{ "best_metric": null, "best_model_checkpoint": null, "epoch": 75.0, "global_step": 187500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 4.98682624510651e-05, "loss": 7.2061, "step": 500 }, { "epoch": 0.2, "eval_accuracy": 0.11605996287252233, "eval_loss": 6.695810794830322, "eval_runtime": 233.7076, "eval_samples_per_second": 21.394, "eval_steps_per_second": 2.674, "step": 500 }, { "epoch": 0.4, "learning_rate": 4.973492485092856e-05, "loss": 6.6051, "step": 1000 }, { "epoch": 0.4, "eval_accuracy": 0.13034278742499958, "eval_loss": 6.552731037139893, "eval_runtime": 230.8673, "eval_samples_per_second": 21.657, "eval_steps_per_second": 2.707, "step": 1000 }, { "epoch": 0.6, "learning_rate": 4.960158725079202e-05, "loss": 6.5016, "step": 1500 }, { "epoch": 0.6, "eval_accuracy": 0.13822830162292474, "eval_loss": 6.471951007843018, "eval_runtime": 231.166, "eval_samples_per_second": 21.629, "eval_steps_per_second": 2.704, "step": 1500 }, { "epoch": 0.8, "learning_rate": 4.946824965065549e-05, "loss": 6.4189, "step": 2000 }, { "epoch": 0.8, "eval_accuracy": 0.14243438880927284, "eval_loss": 6.3796162605285645, "eval_runtime": 231.3451, "eval_samples_per_second": 21.613, "eval_steps_per_second": 2.702, "step": 2000 }, { "epoch": 1.0, "learning_rate": 4.933491205051895e-05, "loss": 6.3648, "step": 2500 }, { "epoch": 1.0, "eval_accuracy": 0.14476689484585437, "eval_loss": 6.3224334716796875, "eval_runtime": 231.3236, "eval_samples_per_second": 21.615, "eval_steps_per_second": 2.702, "step": 2500 }, { "epoch": 1.2, "learning_rate": 4.9201574450382416e-05, "loss": 6.2787, "step": 3000 }, { "epoch": 1.2, "eval_accuracy": 0.14111015350213132, "eval_loss": 6.2787089347839355, "eval_runtime": 230.6423, "eval_samples_per_second": 21.679, "eval_steps_per_second": 2.71, "step": 3000 }, { "epoch": 1.4, "learning_rate": 4.9068236850245876e-05, "loss": 6.2583, "step": 3500 }, { "epoch": 1.4, "eval_accuracy": 0.1445680523766434, "eval_loss": 6.2466654777526855, "eval_runtime": 230.7323, "eval_samples_per_second": 21.67, "eval_steps_per_second": 2.709, "step": 3500 }, { "epoch": 1.6, "learning_rate": 4.893489925010934e-05, "loss": 6.2211, "step": 4000 }, { "epoch": 1.6, "eval_accuracy": 0.14751256197797077, "eval_loss": 6.216161251068115, "eval_runtime": 230.8211, "eval_samples_per_second": 21.662, "eval_steps_per_second": 2.708, "step": 4000 }, { "epoch": 1.8, "learning_rate": 4.8801561649972796e-05, "loss": 6.1897, "step": 4500 }, { "epoch": 1.8, "eval_accuracy": 0.14658194493087173, "eval_loss": 6.193264961242676, "eval_runtime": 231.0835, "eval_samples_per_second": 21.637, "eval_steps_per_second": 2.705, "step": 4500 }, { "epoch": 2.0, "learning_rate": 4.866822404983626e-05, "loss": 6.1625, "step": 5000 }, { "epoch": 2.0, "eval_accuracy": 0.14833689917741957, "eval_loss": 6.170398235321045, "eval_runtime": 231.1062, "eval_samples_per_second": 21.635, "eval_steps_per_second": 2.704, "step": 5000 }, { "epoch": 2.2, "learning_rate": 4.853488644969972e-05, "loss": 6.1412, "step": 5500 }, { "epoch": 2.2, "eval_accuracy": 0.1484326081453751, "eval_loss": 6.152679920196533, "eval_runtime": 230.9877, "eval_samples_per_second": 21.646, "eval_steps_per_second": 2.706, "step": 5500 }, { "epoch": 2.4, "learning_rate": 4.840154884956319e-05, "loss": 6.1062, "step": 6000 }, { "epoch": 2.4, "eval_accuracy": 0.14922723927442746, "eval_loss": 6.129592418670654, "eval_runtime": 230.7218, "eval_samples_per_second": 21.671, "eval_steps_per_second": 2.709, "step": 6000 }, { "epoch": 2.6, "learning_rate": 4.826821124942665e-05, "loss": 6.1003, "step": 6500 }, { "epoch": 2.6, "eval_accuracy": 0.14833042176191666, "eval_loss": 6.127459526062012, "eval_runtime": 231.3952, "eval_samples_per_second": 21.608, "eval_steps_per_second": 2.701, "step": 6500 }, { "epoch": 2.8, "learning_rate": 4.8134873649290116e-05, "loss": 6.0944, "step": 7000 }, { "epoch": 2.8, "eval_accuracy": 0.14956795181846647, "eval_loss": 6.098344326019287, "eval_runtime": 230.8927, "eval_samples_per_second": 21.655, "eval_steps_per_second": 2.707, "step": 7000 }, { "epoch": 3.0, "learning_rate": 4.8001536049153576e-05, "loss": 6.077, "step": 7500 }, { "epoch": 3.0, "eval_accuracy": 0.1509302093387027, "eval_loss": 6.0839033126831055, "eval_runtime": 230.9459, "eval_samples_per_second": 21.65, "eval_steps_per_second": 2.706, "step": 7500 }, { "epoch": 3.2, "learning_rate": 4.7868198449017036e-05, "loss": 6.0419, "step": 8000 }, { "epoch": 3.2, "eval_accuracy": 0.15035993408116505, "eval_loss": 6.074743270874023, "eval_runtime": 230.7806, "eval_samples_per_second": 21.666, "eval_steps_per_second": 2.708, "step": 8000 }, { "epoch": 3.4, "learning_rate": 4.7734860848880495e-05, "loss": 6.0264, "step": 8500 }, { "epoch": 3.4, "eval_accuracy": 0.15058128131222967, "eval_loss": 6.07292366027832, "eval_runtime": 231.2041, "eval_samples_per_second": 21.626, "eval_steps_per_second": 2.703, "step": 8500 }, { "epoch": 3.6, "learning_rate": 4.760152324874396e-05, "loss": 6.0222, "step": 9000 }, { "epoch": 3.6, "eval_accuracy": 0.1504468752084306, "eval_loss": 6.058495998382568, "eval_runtime": 231.1986, "eval_samples_per_second": 21.626, "eval_steps_per_second": 2.703, "step": 9000 }, { "epoch": 3.8, "learning_rate": 4.746818564860742e-05, "loss": 6.0067, "step": 9500 }, { "epoch": 3.8, "eval_accuracy": 0.15003014928058153, "eval_loss": 6.051761627197266, "eval_runtime": 231.3307, "eval_samples_per_second": 21.614, "eval_steps_per_second": 2.702, "step": 9500 }, { "epoch": 4.0, "learning_rate": 4.733484804847089e-05, "loss": 6.0045, "step": 10000 }, { "epoch": 4.0, "eval_accuracy": 0.15037748890129463, "eval_loss": 6.030020236968994, "eval_runtime": 231.5038, "eval_samples_per_second": 21.598, "eval_steps_per_second": 2.7, "step": 10000 }, { "epoch": 4.2, "learning_rate": 4.720151044833435e-05, "loss": 5.9659, "step": 10500 }, { "epoch": 4.2, "eval_accuracy": 0.15042165912398572, "eval_loss": 6.024837970733643, "eval_runtime": 230.5925, "eval_samples_per_second": 21.683, "eval_steps_per_second": 2.71, "step": 10500 }, { "epoch": 4.4, "learning_rate": 4.706817284819781e-05, "loss": 5.9542, "step": 11000 }, { "epoch": 4.4, "eval_accuracy": 0.15116976604679064, "eval_loss": 6.014277935028076, "eval_runtime": 230.7651, "eval_samples_per_second": 21.667, "eval_steps_per_second": 2.708, "step": 11000 }, { "epoch": 4.6, "learning_rate": 4.6934835248061275e-05, "loss": 5.9479, "step": 11500 }, { "epoch": 4.6, "eval_accuracy": 0.15137721677585322, "eval_loss": 5.98906135559082, "eval_runtime": 231.6346, "eval_samples_per_second": 21.586, "eval_steps_per_second": 2.698, "step": 11500 }, { "epoch": 4.8, "learning_rate": 4.6801497647924735e-05, "loss": 5.9506, "step": 12000 }, { "epoch": 4.8, "eval_accuracy": 0.1517101264223344, "eval_loss": 5.9826836585998535, "eval_runtime": 231.7066, "eval_samples_per_second": 21.579, "eval_steps_per_second": 2.697, "step": 12000 }, { "epoch": 5.0, "learning_rate": 4.66681600477882e-05, "loss": 5.9358, "step": 12500 }, { "epoch": 5.0, "eval_accuracy": 0.15087804601873653, "eval_loss": 5.9973015785217285, "eval_runtime": 230.661, "eval_samples_per_second": 21.677, "eval_steps_per_second": 2.71, "step": 12500 }, { "epoch": 5.2, "learning_rate": 4.653482244765166e-05, "loss": 5.9114, "step": 13000 }, { "epoch": 5.2, "eval_accuracy": 0.15052932437024058, "eval_loss": 5.976053714752197, "eval_runtime": 231.2672, "eval_samples_per_second": 21.62, "eval_steps_per_second": 2.703, "step": 13000 }, { "epoch": 5.4, "learning_rate": 4.640148484751513e-05, "loss": 5.9089, "step": 13500 }, { "epoch": 5.4, "eval_accuracy": 0.15164112985030448, "eval_loss": 5.963654518127441, "eval_runtime": 230.7206, "eval_samples_per_second": 21.671, "eval_steps_per_second": 2.709, "step": 13500 }, { "epoch": 5.6, "learning_rate": 4.626814724737859e-05, "loss": 5.9008, "step": 14000 }, { "epoch": 5.6, "eval_accuracy": 0.15150205172883927, "eval_loss": 5.95345401763916, "eval_runtime": 230.8946, "eval_samples_per_second": 21.655, "eval_steps_per_second": 2.707, "step": 14000 }, { "epoch": 5.8, "learning_rate": 4.613480964724205e-05, "loss": 5.9007, "step": 14500 }, { "epoch": 5.8, "eval_accuracy": 0.15299477003435696, "eval_loss": 5.934290409088135, "eval_runtime": 230.6423, "eval_samples_per_second": 21.679, "eval_steps_per_second": 2.71, "step": 14500 }, { "epoch": 6.0, "learning_rate": 4.600147204710551e-05, "loss": 5.8734, "step": 15000 }, { "epoch": 6.0, "eval_accuracy": 0.1532304554860595, "eval_loss": 5.92548131942749, "eval_runtime": 230.5299, "eval_samples_per_second": 21.689, "eval_steps_per_second": 2.711, "step": 15000 }, { "epoch": 6.2, "learning_rate": 4.5868134446968975e-05, "loss": 5.8519, "step": 15500 }, { "epoch": 6.2, "eval_accuracy": 0.15272607511053005, "eval_loss": 5.92126989364624, "eval_runtime": 230.8715, "eval_samples_per_second": 21.657, "eval_steps_per_second": 2.707, "step": 15500 }, { "epoch": 6.4, "learning_rate": 4.5734796846832435e-05, "loss": 5.8383, "step": 16000 }, { "epoch": 6.4, "eval_accuracy": 0.15125044872561924, "eval_loss": 5.912574768066406, "eval_runtime": 230.9209, "eval_samples_per_second": 21.652, "eval_steps_per_second": 2.707, "step": 16000 }, { "epoch": 6.6, "learning_rate": 4.56014592466959e-05, "loss": 5.8461, "step": 16500 }, { "epoch": 6.6, "eval_accuracy": 0.15246080472538365, "eval_loss": 5.904086589813232, "eval_runtime": 230.6847, "eval_samples_per_second": 21.675, "eval_steps_per_second": 2.709, "step": 16500 }, { "epoch": 6.8, "learning_rate": 4.546812164655936e-05, "loss": 5.8387, "step": 17000 }, { "epoch": 6.8, "eval_accuracy": 0.1517369949687959, "eval_loss": 5.8923492431640625, "eval_runtime": 230.6969, "eval_samples_per_second": 21.673, "eval_steps_per_second": 2.709, "step": 17000 }, { "epoch": 7.0, "learning_rate": 4.533478404642282e-05, "loss": 5.831, "step": 17500 }, { "epoch": 7.0, "eval_accuracy": 0.15579792043399637, "eval_loss": 5.878210067749023, "eval_runtime": 230.6131, "eval_samples_per_second": 21.681, "eval_steps_per_second": 2.71, "step": 17500 }, { "epoch": 7.2, "learning_rate": 4.520144644628628e-05, "loss": 5.8003, "step": 18000 }, { "epoch": 7.2, "eval_accuracy": 0.15538990825688073, "eval_loss": 5.865967750549316, "eval_runtime": 230.4846, "eval_samples_per_second": 21.693, "eval_steps_per_second": 2.712, "step": 18000 }, { "epoch": 7.4, "learning_rate": 4.506810884614975e-05, "loss": 5.7832, "step": 18500 }, { "epoch": 7.4, "eval_accuracy": 0.1559906228220237, "eval_loss": 5.85080099105835, "eval_runtime": 230.509, "eval_samples_per_second": 21.691, "eval_steps_per_second": 2.711, "step": 18500 }, { "epoch": 7.6, "learning_rate": 4.493477124601321e-05, "loss": 5.7902, "step": 19000 }, { "epoch": 7.6, "eval_accuracy": 0.15576768816506403, "eval_loss": 5.849486827850342, "eval_runtime": 231.3532, "eval_samples_per_second": 21.612, "eval_steps_per_second": 2.701, "step": 19000 }, { "epoch": 7.8, "learning_rate": 4.4801433645876674e-05, "loss": 5.7707, "step": 19500 }, { "epoch": 7.8, "eval_accuracy": 0.15526944235643156, "eval_loss": 5.8376359939575195, "eval_runtime": 231.4064, "eval_samples_per_second": 21.607, "eval_steps_per_second": 2.701, "step": 19500 }, { "epoch": 8.0, "learning_rate": 4.4668096045740134e-05, "loss": 5.7638, "step": 20000 }, { "epoch": 8.0, "eval_accuracy": 0.156442207844077, "eval_loss": 5.828929901123047, "eval_runtime": 231.698, "eval_samples_per_second": 21.58, "eval_steps_per_second": 2.697, "step": 20000 }, { "epoch": 8.2, "learning_rate": 4.4534758445603594e-05, "loss": 5.741, "step": 20500 }, { "epoch": 8.2, "eval_accuracy": 0.1573580556184791, "eval_loss": 5.823002815246582, "eval_runtime": 230.5869, "eval_samples_per_second": 21.684, "eval_steps_per_second": 2.71, "step": 20500 }, { "epoch": 8.4, "learning_rate": 4.4401420845467054e-05, "loss": 5.7291, "step": 21000 }, { "epoch": 8.4, "eval_accuracy": 0.1574160541095799, "eval_loss": 5.810952186584473, "eval_runtime": 231.4576, "eval_samples_per_second": 21.602, "eval_steps_per_second": 2.7, "step": 21000 }, { "epoch": 8.6, "learning_rate": 4.426808324533052e-05, "loss": 5.7206, "step": 21500 }, { "epoch": 8.6, "eval_accuracy": 0.15753739088780072, "eval_loss": 5.801361560821533, "eval_runtime": 231.427, "eval_samples_per_second": 21.605, "eval_steps_per_second": 2.701, "step": 21500 }, { "epoch": 8.8, "learning_rate": 4.413474564519398e-05, "loss": 5.6974, "step": 22000 }, { "epoch": 8.8, "eval_accuracy": 0.16051140143149828, "eval_loss": 5.7644429206848145, "eval_runtime": 230.2201, "eval_samples_per_second": 21.718, "eval_steps_per_second": 2.715, "step": 22000 }, { "epoch": 9.0, "learning_rate": 4.400140804505745e-05, "loss": 5.6954, "step": 22500 }, { "epoch": 9.0, "eval_accuracy": 0.1638391355182993, "eval_loss": 5.740447521209717, "eval_runtime": 230.3833, "eval_samples_per_second": 21.703, "eval_steps_per_second": 2.713, "step": 22500 }, { "epoch": 9.2, "learning_rate": 4.386807044492091e-05, "loss": 5.6467, "step": 23000 }, { "epoch": 9.2, "eval_accuracy": 0.16680106362418612, "eval_loss": 5.703954696655273, "eval_runtime": 231.5174, "eval_samples_per_second": 21.597, "eval_steps_per_second": 2.7, "step": 23000 }, { "epoch": 9.4, "learning_rate": 4.3734732844784374e-05, "loss": 5.6134, "step": 23500 }, { "epoch": 9.4, "eval_accuracy": 0.17379691610883538, "eval_loss": 5.665645122528076, "eval_runtime": 230.5546, "eval_samples_per_second": 21.687, "eval_steps_per_second": 2.711, "step": 23500 }, { "epoch": 9.6, "learning_rate": 4.360139524464783e-05, "loss": 5.5855, "step": 24000 }, { "epoch": 9.6, "eval_accuracy": 0.17866926158214308, "eval_loss": 5.626242637634277, "eval_runtime": 231.3553, "eval_samples_per_second": 21.612, "eval_steps_per_second": 2.701, "step": 24000 }, { "epoch": 9.8, "learning_rate": 4.3468057644511294e-05, "loss": 5.5374, "step": 24500 }, { "epoch": 9.8, "eval_accuracy": 0.1882605678906917, "eval_loss": 5.558737277984619, "eval_runtime": 231.4004, "eval_samples_per_second": 21.608, "eval_steps_per_second": 2.701, "step": 24500 }, { "epoch": 10.0, "learning_rate": 4.3334720044374753e-05, "loss": 5.4678, "step": 25000 }, { "epoch": 10.0, "eval_accuracy": 0.20085203692159995, "eval_loss": 5.438808917999268, "eval_runtime": 230.4228, "eval_samples_per_second": 21.699, "eval_steps_per_second": 2.712, "step": 25000 }, { "epoch": 10.2, "learning_rate": 4.320138244423822e-05, "loss": 5.3324, "step": 25500 }, { "epoch": 10.2, "eval_accuracy": 0.22031610383030917, "eval_loss": 5.270298480987549, "eval_runtime": 231.4936, "eval_samples_per_second": 21.599, "eval_steps_per_second": 2.7, "step": 25500 }, { "epoch": 10.4, "learning_rate": 4.306804484410168e-05, "loss": 5.1849, "step": 26000 }, { "epoch": 10.4, "eval_accuracy": 0.24342581021411314, "eval_loss": 5.090771675109863, "eval_runtime": 231.6154, "eval_samples_per_second": 21.588, "eval_steps_per_second": 2.698, "step": 26000 }, { "epoch": 10.6, "learning_rate": 4.293470724396515e-05, "loss": 5.0273, "step": 26500 }, { "epoch": 10.6, "eval_accuracy": 0.2657009801806347, "eval_loss": 4.91030740737915, "eval_runtime": 231.4751, "eval_samples_per_second": 21.601, "eval_steps_per_second": 2.7, "step": 26500 }, { "epoch": 10.8, "learning_rate": 4.2801369643828607e-05, "loss": 4.8718, "step": 27000 }, { "epoch": 10.8, "eval_accuracy": 0.2844097476529729, "eval_loss": 4.763670921325684, "eval_runtime": 231.3758, "eval_samples_per_second": 21.61, "eval_steps_per_second": 2.701, "step": 27000 }, { "epoch": 11.0, "learning_rate": 4.2668032043692066e-05, "loss": 4.7523, "step": 27500 }, { "epoch": 11.0, "eval_accuracy": 0.30234914938862306, "eval_loss": 4.606442928314209, "eval_runtime": 231.2813, "eval_samples_per_second": 21.619, "eval_steps_per_second": 2.702, "step": 27500 }, { "epoch": 11.2, "learning_rate": 4.2534694443555526e-05, "loss": 4.5814, "step": 28000 }, { "epoch": 11.2, "eval_accuracy": 0.32203576764067926, "eval_loss": 4.43984317779541, "eval_runtime": 231.3348, "eval_samples_per_second": 21.614, "eval_steps_per_second": 2.702, "step": 28000 }, { "epoch": 11.4, "learning_rate": 4.240135684341899e-05, "loss": 4.4627, "step": 28500 }, { "epoch": 11.4, "eval_accuracy": 0.3375924905212874, "eval_loss": 4.300474643707275, "eval_runtime": 231.0917, "eval_samples_per_second": 21.636, "eval_steps_per_second": 2.705, "step": 28500 }, { "epoch": 11.6, "learning_rate": 4.226801924328245e-05, "loss": 4.3228, "step": 29000 }, { "epoch": 11.6, "eval_accuracy": 0.352034906941449, "eval_loss": 4.177072048187256, "eval_runtime": 231.0017, "eval_samples_per_second": 21.645, "eval_steps_per_second": 2.706, "step": 29000 }, { "epoch": 11.8, "learning_rate": 4.213468164314592e-05, "loss": 4.1885, "step": 29500 }, { "epoch": 11.8, "eval_accuracy": 0.3631866171945581, "eval_loss": 4.0783257484436035, "eval_runtime": 231.2677, "eval_samples_per_second": 21.62, "eval_steps_per_second": 2.702, "step": 29500 }, { "epoch": 12.0, "learning_rate": 4.200134404300938e-05, "loss": 4.0772, "step": 30000 }, { "epoch": 12.0, "eval_accuracy": 0.3765043895715632, "eval_loss": 3.9657578468322754, "eval_runtime": 231.1257, "eval_samples_per_second": 21.633, "eval_steps_per_second": 2.704, "step": 30000 }, { "epoch": 12.2, "learning_rate": 4.186800644287284e-05, "loss": 3.9602, "step": 30500 }, { "epoch": 12.2, "eval_accuracy": 0.38800160873770595, "eval_loss": 3.8685555458068848, "eval_runtime": 231.7873, "eval_samples_per_second": 21.571, "eval_steps_per_second": 2.696, "step": 30500 }, { "epoch": 12.4, "learning_rate": 4.17346688427363e-05, "loss": 3.8622, "step": 31000 }, { "epoch": 12.4, "eval_accuracy": 0.3967645113361764, "eval_loss": 3.7885961532592773, "eval_runtime": 231.6543, "eval_samples_per_second": 21.584, "eval_steps_per_second": 2.698, "step": 31000 }, { "epoch": 12.6, "learning_rate": 4.1601331242599766e-05, "loss": 3.7958, "step": 31500 }, { "epoch": 12.6, "eval_accuracy": 0.4074396657589834, "eval_loss": 3.69677472114563, "eval_runtime": 231.2453, "eval_samples_per_second": 21.622, "eval_steps_per_second": 2.703, "step": 31500 }, { "epoch": 12.8, "learning_rate": 4.1467993642463226e-05, "loss": 3.7245, "step": 32000 }, { "epoch": 12.8, "eval_accuracy": 0.41293986747928324, "eval_loss": 3.6479530334472656, "eval_runtime": 230.7287, "eval_samples_per_second": 21.67, "eval_steps_per_second": 2.709, "step": 32000 }, { "epoch": 13.0, "learning_rate": 4.133465604232669e-05, "loss": 3.6503, "step": 32500 }, { "epoch": 13.0, "eval_accuracy": 0.42037487909815563, "eval_loss": 3.577112913131714, "eval_runtime": 231.289, "eval_samples_per_second": 21.618, "eval_steps_per_second": 2.702, "step": 32500 }, { "epoch": 13.2, "learning_rate": 4.120131844219015e-05, "loss": 3.5569, "step": 33000 }, { "epoch": 13.2, "eval_accuracy": 0.42860995864438833, "eval_loss": 3.5102858543395996, "eval_runtime": 231.3523, "eval_samples_per_second": 21.612, "eval_steps_per_second": 2.702, "step": 33000 }, { "epoch": 13.4, "learning_rate": 4.106798084205362e-05, "loss": 3.5151, "step": 33500 }, { "epoch": 13.4, "eval_accuracy": 0.43582590959536877, "eval_loss": 3.461106300354004, "eval_runtime": 231.2514, "eval_samples_per_second": 21.621, "eval_steps_per_second": 2.703, "step": 33500 }, { "epoch": 13.6, "learning_rate": 4.093464324191707e-05, "loss": 3.4388, "step": 34000 }, { "epoch": 13.6, "eval_accuracy": 0.4410264100641506, "eval_loss": 3.411881923675537, "eval_runtime": 232.8711, "eval_samples_per_second": 21.471, "eval_steps_per_second": 2.684, "step": 34000 }, { "epoch": 13.8, "learning_rate": 4.080130564178054e-05, "loss": 3.41, "step": 34500 }, { "epoch": 13.8, "eval_accuracy": 0.4486495293386135, "eval_loss": 3.3569633960723877, "eval_runtime": 233.0322, "eval_samples_per_second": 21.456, "eval_steps_per_second": 2.682, "step": 34500 }, { "epoch": 14.0, "learning_rate": 4.0667968041644e-05, "loss": 3.3447, "step": 35000 }, { "epoch": 14.0, "eval_accuracy": 0.4518131893672265, "eval_loss": 3.3158445358276367, "eval_runtime": 232.0508, "eval_samples_per_second": 21.547, "eval_steps_per_second": 2.693, "step": 35000 }, { "epoch": 14.2, "learning_rate": 4.0534630441507466e-05, "loss": 3.2678, "step": 35500 }, { "epoch": 14.2, "eval_accuracy": 0.4585323577940368, "eval_loss": 3.2717108726501465, "eval_runtime": 231.5502, "eval_samples_per_second": 21.594, "eval_steps_per_second": 2.699, "step": 35500 }, { "epoch": 14.4, "learning_rate": 4.0401292841370925e-05, "loss": 3.2395, "step": 36000 }, { "epoch": 14.4, "eval_accuracy": 0.46290830741037076, "eval_loss": 3.22344970703125, "eval_runtime": 231.32, "eval_samples_per_second": 21.615, "eval_steps_per_second": 2.702, "step": 36000 }, { "epoch": 14.6, "learning_rate": 4.026795524123439e-05, "loss": 3.2033, "step": 36500 }, { "epoch": 14.6, "eval_accuracy": 0.46970301506869355, "eval_loss": 3.17228627204895, "eval_runtime": 231.2086, "eval_samples_per_second": 21.625, "eval_steps_per_second": 2.703, "step": 36500 }, { "epoch": 14.8, "learning_rate": 4.013461764109785e-05, "loss": 3.1739, "step": 37000 }, { "epoch": 14.8, "eval_accuracy": 0.4746977130977131, "eval_loss": 3.140921115875244, "eval_runtime": 231.0625, "eval_samples_per_second": 21.639, "eval_steps_per_second": 2.705, "step": 37000 }, { "epoch": 15.0, "learning_rate": 4.000128004096131e-05, "loss": 3.1467, "step": 37500 }, { "epoch": 15.0, "eval_accuracy": 0.47823785730692026, "eval_loss": 3.1042473316192627, "eval_runtime": 231.1531, "eval_samples_per_second": 21.631, "eval_steps_per_second": 2.704, "step": 37500 }, { "epoch": 15.2, "learning_rate": 3.986794244082477e-05, "loss": 3.0736, "step": 38000 }, { "epoch": 15.2, "eval_accuracy": 0.48393500600560524, "eval_loss": 3.0560505390167236, "eval_runtime": 231.7054, "eval_samples_per_second": 21.579, "eval_steps_per_second": 2.697, "step": 38000 }, { "epoch": 15.4, "learning_rate": 3.973460484068824e-05, "loss": 3.0468, "step": 38500 }, { "epoch": 15.4, "eval_accuracy": 0.48689248404671853, "eval_loss": 3.0274672508239746, "eval_runtime": 231.6854, "eval_samples_per_second": 21.581, "eval_steps_per_second": 2.698, "step": 38500 }, { "epoch": 15.6, "learning_rate": 3.96012672405517e-05, "loss": 3.0105, "step": 39000 }, { "epoch": 15.6, "eval_accuracy": 0.48984264091336255, "eval_loss": 3.0050787925720215, "eval_runtime": 231.6235, "eval_samples_per_second": 21.587, "eval_steps_per_second": 2.698, "step": 39000 }, { "epoch": 15.8, "learning_rate": 3.9467929640415165e-05, "loss": 2.9828, "step": 39500 }, { "epoch": 15.8, "eval_accuracy": 0.4949883527454243, "eval_loss": 2.968860149383545, "eval_runtime": 231.5803, "eval_samples_per_second": 21.591, "eval_steps_per_second": 2.699, "step": 39500 }, { "epoch": 16.0, "learning_rate": 3.9334592040278625e-05, "loss": 2.9523, "step": 40000 }, { "epoch": 16.0, "eval_accuracy": 0.4958962606673906, "eval_loss": 2.9481143951416016, "eval_runtime": 232.0468, "eval_samples_per_second": 21.547, "eval_steps_per_second": 2.693, "step": 40000 }, { "epoch": 16.2, "learning_rate": 3.9201254440142085e-05, "loss": 2.8951, "step": 40500 }, { "epoch": 16.2, "eval_accuracy": 0.5039497778353822, "eval_loss": 2.8917958736419678, "eval_runtime": 231.5273, "eval_samples_per_second": 21.596, "eval_steps_per_second": 2.699, "step": 40500 }, { "epoch": 16.4, "learning_rate": 3.9067916840005545e-05, "loss": 2.8614, "step": 41000 }, { "epoch": 16.4, "eval_accuracy": 0.5053813663933169, "eval_loss": 2.873403549194336, "eval_runtime": 231.1155, "eval_samples_per_second": 21.634, "eval_steps_per_second": 2.704, "step": 41000 }, { "epoch": 16.6, "learning_rate": 3.893457923986901e-05, "loss": 2.8422, "step": 41500 }, { "epoch": 16.6, "eval_accuracy": 0.5083243546165059, "eval_loss": 2.848686695098877, "eval_runtime": 231.2389, "eval_samples_per_second": 21.623, "eval_steps_per_second": 2.703, "step": 41500 }, { "epoch": 16.8, "learning_rate": 3.880124163973247e-05, "loss": 2.8184, "step": 42000 }, { "epoch": 16.8, "eval_accuracy": 0.5137553298782741, "eval_loss": 2.8223233222961426, "eval_runtime": 231.3203, "eval_samples_per_second": 21.615, "eval_steps_per_second": 2.702, "step": 42000 }, { "epoch": 17.0, "learning_rate": 3.866790403959594e-05, "loss": 2.7806, "step": 42500 }, { "epoch": 17.0, "eval_accuracy": 0.5166754490698434, "eval_loss": 2.7964563369750977, "eval_runtime": 231.5987, "eval_samples_per_second": 21.589, "eval_steps_per_second": 2.699, "step": 42500 }, { "epoch": 17.2, "learning_rate": 3.85345664394594e-05, "loss": 2.7356, "step": 43000 }, { "epoch": 17.2, "eval_accuracy": 0.5208632038038585, "eval_loss": 2.7596287727355957, "eval_runtime": 231.5176, "eval_samples_per_second": 21.597, "eval_steps_per_second": 2.7, "step": 43000 }, { "epoch": 17.4, "learning_rate": 3.8401228839322865e-05, "loss": 2.7357, "step": 43500 }, { "epoch": 17.4, "eval_accuracy": 0.5249955157546486, "eval_loss": 2.740691900253296, "eval_runtime": 231.4323, "eval_samples_per_second": 21.605, "eval_steps_per_second": 2.701, "step": 43500 }, { "epoch": 17.6, "learning_rate": 3.826789123918632e-05, "loss": 2.7015, "step": 44000 }, { "epoch": 17.6, "eval_accuracy": 0.5272358467037992, "eval_loss": 2.713502883911133, "eval_runtime": 231.2385, "eval_samples_per_second": 21.623, "eval_steps_per_second": 2.703, "step": 44000 }, { "epoch": 17.8, "learning_rate": 3.8134553639049784e-05, "loss": 2.688, "step": 44500 }, { "epoch": 17.8, "eval_accuracy": 0.5288542866847373, "eval_loss": 2.693544387817383, "eval_runtime": 231.2235, "eval_samples_per_second": 21.624, "eval_steps_per_second": 2.703, "step": 44500 }, { "epoch": 18.0, "learning_rate": 3.8001216038913244e-05, "loss": 2.6582, "step": 45000 }, { "epoch": 18.0, "eval_accuracy": 0.5341693899145807, "eval_loss": 2.6571600437164307, "eval_runtime": 231.6454, "eval_samples_per_second": 21.585, "eval_steps_per_second": 2.698, "step": 45000 }, { "epoch": 18.2, "learning_rate": 3.786787843877671e-05, "loss": 2.6186, "step": 45500 }, { "epoch": 18.2, "eval_accuracy": 0.5356641818648343, "eval_loss": 2.6396398544311523, "eval_runtime": 231.8663, "eval_samples_per_second": 21.564, "eval_steps_per_second": 2.696, "step": 45500 }, { "epoch": 18.4, "learning_rate": 3.773454083864017e-05, "loss": 2.6071, "step": 46000 }, { "epoch": 18.4, "eval_accuracy": 0.5376610769046638, "eval_loss": 2.626965045928955, "eval_runtime": 231.641, "eval_samples_per_second": 21.585, "eval_steps_per_second": 2.698, "step": 46000 }, { "epoch": 18.6, "learning_rate": 3.760120323850364e-05, "loss": 2.5891, "step": 46500 }, { "epoch": 18.6, "eval_accuracy": 0.5406689339011392, "eval_loss": 2.6109817028045654, "eval_runtime": 231.5799, "eval_samples_per_second": 21.591, "eval_steps_per_second": 2.699, "step": 46500 }, { "epoch": 18.8, "learning_rate": 3.74678656383671e-05, "loss": 2.558, "step": 47000 }, { "epoch": 18.8, "eval_accuracy": 0.5434645737490298, "eval_loss": 2.5874485969543457, "eval_runtime": 231.7825, "eval_samples_per_second": 21.572, "eval_steps_per_second": 2.696, "step": 47000 }, { "epoch": 19.0, "learning_rate": 3.733452803823056e-05, "loss": 2.5521, "step": 47500 }, { "epoch": 19.0, "eval_accuracy": 0.5464595139947923, "eval_loss": 2.553973913192749, "eval_runtime": 231.9001, "eval_samples_per_second": 21.561, "eval_steps_per_second": 2.695, "step": 47500 }, { "epoch": 19.2, "learning_rate": 3.720119043809402e-05, "loss": 2.5086, "step": 48000 }, { "epoch": 19.2, "eval_accuracy": 0.5504046970853146, "eval_loss": 2.529578924179077, "eval_runtime": 231.5991, "eval_samples_per_second": 21.589, "eval_steps_per_second": 2.699, "step": 48000 }, { "epoch": 19.4, "learning_rate": 3.7067852837957484e-05, "loss": 2.4933, "step": 48500 }, { "epoch": 19.4, "eval_accuracy": 0.552335013374818, "eval_loss": 2.5199332237243652, "eval_runtime": 231.9578, "eval_samples_per_second": 21.556, "eval_steps_per_second": 2.694, "step": 48500 }, { "epoch": 19.6, "learning_rate": 3.6934515237820944e-05, "loss": 2.4924, "step": 49000 }, { "epoch": 19.6, "eval_accuracy": 0.5550407179657637, "eval_loss": 2.503727674484253, "eval_runtime": 231.4473, "eval_samples_per_second": 21.603, "eval_steps_per_second": 2.7, "step": 49000 }, { "epoch": 19.8, "learning_rate": 3.680117763768441e-05, "loss": 2.4633, "step": 49500 }, { "epoch": 19.8, "eval_accuracy": 0.5567173130747701, "eval_loss": 2.47917103767395, "eval_runtime": 231.5225, "eval_samples_per_second": 21.596, "eval_steps_per_second": 2.7, "step": 49500 }, { "epoch": 20.0, "learning_rate": 3.666784003754787e-05, "loss": 2.4426, "step": 50000 }, { "epoch": 20.0, "eval_accuracy": 0.5599728349523613, "eval_loss": 2.472404718399048, "eval_runtime": 231.2837, "eval_samples_per_second": 21.618, "eval_steps_per_second": 2.702, "step": 50000 }, { "epoch": 20.2, "learning_rate": 3.653450243741133e-05, "loss": 2.4106, "step": 50500 }, { "epoch": 20.2, "eval_accuracy": 0.5625706985347937, "eval_loss": 2.439568042755127, "eval_runtime": 232.2415, "eval_samples_per_second": 21.529, "eval_steps_per_second": 2.691, "step": 50500 }, { "epoch": 20.4, "learning_rate": 3.640116483727479e-05, "loss": 2.4103, "step": 51000 }, { "epoch": 20.4, "eval_accuracy": 0.563057566908029, "eval_loss": 2.42592453956604, "eval_runtime": 232.7771, "eval_samples_per_second": 21.48, "eval_steps_per_second": 2.685, "step": 51000 }, { "epoch": 20.6, "learning_rate": 3.626782723713826e-05, "loss": 2.3783, "step": 51500 }, { "epoch": 20.6, "eval_accuracy": 0.5672294768360642, "eval_loss": 2.4072492122650146, "eval_runtime": 232.8388, "eval_samples_per_second": 21.474, "eval_steps_per_second": 2.684, "step": 51500 }, { "epoch": 20.8, "learning_rate": 3.613448963700172e-05, "loss": 2.3712, "step": 52000 }, { "epoch": 20.8, "eval_accuracy": 0.567927338493045, "eval_loss": 2.4054574966430664, "eval_runtime": 231.9123, "eval_samples_per_second": 21.56, "eval_steps_per_second": 2.695, "step": 52000 }, { "epoch": 21.0, "learning_rate": 3.6001152036865183e-05, "loss": 2.3616, "step": 52500 }, { "epoch": 21.0, "eval_accuracy": 0.5724379744645745, "eval_loss": 2.3781449794769287, "eval_runtime": 231.3479, "eval_samples_per_second": 21.612, "eval_steps_per_second": 2.702, "step": 52500 }, { "epoch": 21.2, "learning_rate": 3.586781443672864e-05, "loss": 2.3274, "step": 53000 }, { "epoch": 21.2, "eval_accuracy": 0.5746179093591486, "eval_loss": 2.3627405166625977, "eval_runtime": 231.8262, "eval_samples_per_second": 21.568, "eval_steps_per_second": 2.696, "step": 53000 }, { "epoch": 21.4, "learning_rate": 3.573447683659211e-05, "loss": 2.3133, "step": 53500 }, { "epoch": 21.4, "eval_accuracy": 0.5750550403629329, "eval_loss": 2.358633279800415, "eval_runtime": 231.5004, "eval_samples_per_second": 21.598, "eval_steps_per_second": 2.7, "step": 53500 }, { "epoch": 21.6, "learning_rate": 3.560113923645556e-05, "loss": 2.3076, "step": 54000 }, { "epoch": 21.6, "eval_accuracy": 0.5785171432769237, "eval_loss": 2.3207192420959473, "eval_runtime": 231.9813, "eval_samples_per_second": 21.553, "eval_steps_per_second": 2.694, "step": 54000 }, { "epoch": 21.8, "learning_rate": 3.546780163631903e-05, "loss": 2.2991, "step": 54500 }, { "epoch": 21.8, "eval_accuracy": 0.579623361683169, "eval_loss": 2.31518292427063, "eval_runtime": 231.8156, "eval_samples_per_second": 21.569, "eval_steps_per_second": 2.696, "step": 54500 }, { "epoch": 22.0, "learning_rate": 3.533446403618249e-05, "loss": 2.2831, "step": 55000 }, { "epoch": 22.0, "eval_accuracy": 0.5815027321493463, "eval_loss": 2.3000547885894775, "eval_runtime": 231.8488, "eval_samples_per_second": 21.566, "eval_steps_per_second": 2.696, "step": 55000 }, { "epoch": 22.2, "learning_rate": 3.5201126436045956e-05, "loss": 2.2461, "step": 55500 }, { "epoch": 22.2, "eval_accuracy": 0.5822362290673837, "eval_loss": 2.294421911239624, "eval_runtime": 231.0966, "eval_samples_per_second": 21.636, "eval_steps_per_second": 2.704, "step": 55500 }, { "epoch": 22.4, "learning_rate": 3.5067788835909416e-05, "loss": 2.2467, "step": 56000 }, { "epoch": 22.4, "eval_accuracy": 0.5855738229102827, "eval_loss": 2.2848546504974365, "eval_runtime": 232.0538, "eval_samples_per_second": 21.547, "eval_steps_per_second": 2.693, "step": 56000 }, { "epoch": 22.6, "learning_rate": 3.493445123577288e-05, "loss": 2.2199, "step": 56500 }, { "epoch": 22.6, "eval_accuracy": 0.5863217741131097, "eval_loss": 2.277578592300415, "eval_runtime": 231.9613, "eval_samples_per_second": 21.555, "eval_steps_per_second": 2.694, "step": 56500 }, { "epoch": 22.8, "learning_rate": 3.480111363563634e-05, "loss": 2.2279, "step": 57000 }, { "epoch": 22.8, "eval_accuracy": 0.5884500762486337, "eval_loss": 2.2576725482940674, "eval_runtime": 231.4399, "eval_samples_per_second": 21.604, "eval_steps_per_second": 2.7, "step": 57000 }, { "epoch": 23.0, "learning_rate": 3.46677760354998e-05, "loss": 2.2048, "step": 57500 }, { "epoch": 23.0, "eval_accuracy": 0.5885816360134521, "eval_loss": 2.256614923477173, "eval_runtime": 231.8872, "eval_samples_per_second": 21.562, "eval_steps_per_second": 2.695, "step": 57500 }, { "epoch": 23.2, "learning_rate": 3.453443843536326e-05, "loss": 2.1704, "step": 58000 }, { "epoch": 23.2, "eval_accuracy": 0.5914266042687368, "eval_loss": 2.2453179359436035, "eval_runtime": 232.1021, "eval_samples_per_second": 21.542, "eval_steps_per_second": 2.693, "step": 58000 }, { "epoch": 23.4, "learning_rate": 3.440110083522673e-05, "loss": 2.1682, "step": 58500 }, { "epoch": 23.4, "eval_accuracy": 0.5927180608978089, "eval_loss": 2.2313756942749023, "eval_runtime": 231.9349, "eval_samples_per_second": 21.558, "eval_steps_per_second": 2.695, "step": 58500 }, { "epoch": 23.6, "learning_rate": 3.426776323509019e-05, "loss": 2.1592, "step": 59000 }, { "epoch": 23.6, "eval_accuracy": 0.5960976878150253, "eval_loss": 2.2097034454345703, "eval_runtime": 232.3164, "eval_samples_per_second": 21.522, "eval_steps_per_second": 2.69, "step": 59000 }, { "epoch": 23.8, "learning_rate": 3.4134425634953656e-05, "loss": 2.1547, "step": 59500 }, { "epoch": 23.8, "eval_accuracy": 0.5972384920529272, "eval_loss": 2.198352336883545, "eval_runtime": 231.637, "eval_samples_per_second": 21.585, "eval_steps_per_second": 2.698, "step": 59500 }, { "epoch": 24.0, "learning_rate": 3.4001088034817116e-05, "loss": 2.1558, "step": 60000 }, { "epoch": 24.0, "eval_accuracy": 0.5992973741183971, "eval_loss": 2.186568021774292, "eval_runtime": 231.7166, "eval_samples_per_second": 21.578, "eval_steps_per_second": 2.697, "step": 60000 }, { "epoch": 24.2, "learning_rate": 3.3867750434680576e-05, "loss": 2.1189, "step": 60500 }, { "epoch": 24.2, "eval_accuracy": 0.6008984588334022, "eval_loss": 2.167475700378418, "eval_runtime": 231.504, "eval_samples_per_second": 21.598, "eval_steps_per_second": 2.7, "step": 60500 }, { "epoch": 24.4, "learning_rate": 3.3734412834544036e-05, "loss": 2.1088, "step": 61000 }, { "epoch": 24.4, "eval_accuracy": 0.602769414408557, "eval_loss": 2.1612794399261475, "eval_runtime": 232.1378, "eval_samples_per_second": 21.539, "eval_steps_per_second": 2.692, "step": 61000 }, { "epoch": 24.6, "learning_rate": 3.36010752344075e-05, "loss": 2.1164, "step": 61500 }, { "epoch": 24.6, "eval_accuracy": 0.6046435839795687, "eval_loss": 2.1531035900115967, "eval_runtime": 231.9129, "eval_samples_per_second": 21.56, "eval_steps_per_second": 2.695, "step": 61500 }, { "epoch": 24.8, "learning_rate": 3.346773763427096e-05, "loss": 2.094, "step": 62000 }, { "epoch": 24.8, "eval_accuracy": 0.6041164404544668, "eval_loss": 2.1506526470184326, "eval_runtime": 231.7484, "eval_samples_per_second": 21.575, "eval_steps_per_second": 2.697, "step": 62000 }, { "epoch": 25.0, "learning_rate": 3.333440003413443e-05, "loss": 2.0977, "step": 62500 }, { "epoch": 25.0, "eval_accuracy": 0.6063366902269225, "eval_loss": 2.1298508644104004, "eval_runtime": 231.8473, "eval_samples_per_second": 21.566, "eval_steps_per_second": 2.696, "step": 62500 }, { "epoch": 25.2, "learning_rate": 3.320106243399789e-05, "loss": 2.0657, "step": 63000 }, { "epoch": 25.2, "eval_accuracy": 0.6070928402963278, "eval_loss": 2.121816873550415, "eval_runtime": 231.4696, "eval_samples_per_second": 21.601, "eval_steps_per_second": 2.7, "step": 63000 }, { "epoch": 25.4, "learning_rate": 3.3067724833861355e-05, "loss": 2.051, "step": 63500 }, { "epoch": 25.4, "eval_accuracy": 0.6082587307585552, "eval_loss": 2.123283624649048, "eval_runtime": 231.5258, "eval_samples_per_second": 21.596, "eval_steps_per_second": 2.699, "step": 63500 }, { "epoch": 25.6, "learning_rate": 3.293438723372481e-05, "loss": 2.0482, "step": 64000 }, { "epoch": 25.6, "eval_accuracy": 0.6100416241732469, "eval_loss": 2.106915235519409, "eval_runtime": 231.6727, "eval_samples_per_second": 21.582, "eval_steps_per_second": 2.698, "step": 64000 }, { "epoch": 25.8, "learning_rate": 3.2801049633588275e-05, "loss": 2.04, "step": 64500 }, { "epoch": 25.8, "eval_accuracy": 0.6119977784355264, "eval_loss": 2.0985183715820312, "eval_runtime": 231.5457, "eval_samples_per_second": 21.594, "eval_steps_per_second": 2.699, "step": 64500 }, { "epoch": 26.0, "learning_rate": 3.2667712033451735e-05, "loss": 2.0341, "step": 65000 }, { "epoch": 26.0, "eval_accuracy": 0.6128028880096977, "eval_loss": 2.092878818511963, "eval_runtime": 233.0294, "eval_samples_per_second": 21.457, "eval_steps_per_second": 2.682, "step": 65000 }, { "epoch": 26.2, "learning_rate": 3.25343744333152e-05, "loss": 2.0207, "step": 65500 }, { "epoch": 26.2, "eval_accuracy": 0.6150510766517234, "eval_loss": 2.0767383575439453, "eval_runtime": 233.0148, "eval_samples_per_second": 21.458, "eval_steps_per_second": 2.682, "step": 65500 }, { "epoch": 26.4, "learning_rate": 3.240103683317866e-05, "loss": 2.0044, "step": 66000 }, { "epoch": 26.4, "eval_accuracy": 0.6161581773821563, "eval_loss": 2.0671825408935547, "eval_runtime": 232.3839, "eval_samples_per_second": 21.516, "eval_steps_per_second": 2.69, "step": 66000 }, { "epoch": 26.6, "learning_rate": 3.226769923304213e-05, "loss": 2.0037, "step": 66500 }, { "epoch": 26.6, "eval_accuracy": 0.6159318730896034, "eval_loss": 2.0623385906219482, "eval_runtime": 231.4425, "eval_samples_per_second": 21.604, "eval_steps_per_second": 2.7, "step": 66500 }, { "epoch": 26.8, "learning_rate": 3.213436163290559e-05, "loss": 2.0081, "step": 67000 }, { "epoch": 26.8, "eval_accuracy": 0.6164338068021428, "eval_loss": 2.061392068862915, "eval_runtime": 231.5016, "eval_samples_per_second": 21.598, "eval_steps_per_second": 2.7, "step": 67000 }, { "epoch": 27.0, "learning_rate": 3.200102403276905e-05, "loss": 1.9847, "step": 67500 }, { "epoch": 27.0, "eval_accuracy": 0.6186114673017036, "eval_loss": 2.0499088764190674, "eval_runtime": 231.4759, "eval_samples_per_second": 21.601, "eval_steps_per_second": 2.7, "step": 67500 }, { "epoch": 27.2, "learning_rate": 3.186768643263251e-05, "loss": 1.9465, "step": 68000 }, { "epoch": 27.2, "eval_accuracy": 0.619952747351619, "eval_loss": 2.039889097213745, "eval_runtime": 231.1562, "eval_samples_per_second": 21.63, "eval_steps_per_second": 2.704, "step": 68000 }, { "epoch": 27.4, "learning_rate": 3.1734348832495975e-05, "loss": 1.9573, "step": 68500 }, { "epoch": 27.4, "eval_accuracy": 0.6209867552531732, "eval_loss": 2.0353338718414307, "eval_runtime": 231.215, "eval_samples_per_second": 21.625, "eval_steps_per_second": 2.703, "step": 68500 }, { "epoch": 27.6, "learning_rate": 3.1601011232359435e-05, "loss": 1.9682, "step": 69000 }, { "epoch": 27.6, "eval_accuracy": 0.6227358192316012, "eval_loss": 2.018660306930542, "eval_runtime": 232.2045, "eval_samples_per_second": 21.533, "eval_steps_per_second": 2.692, "step": 69000 }, { "epoch": 27.8, "learning_rate": 3.14676736322229e-05, "loss": 1.9573, "step": 69500 }, { "epoch": 27.8, "eval_accuracy": 0.6228562509132936, "eval_loss": 2.0251047611236572, "eval_runtime": 231.49, "eval_samples_per_second": 21.599, "eval_steps_per_second": 2.7, "step": 69500 }, { "epoch": 28.0, "learning_rate": 3.133433603208636e-05, "loss": 1.9491, "step": 70000 }, { "epoch": 28.0, "eval_accuracy": 0.6245234243399657, "eval_loss": 2.0086443424224854, "eval_runtime": 231.5412, "eval_samples_per_second": 21.594, "eval_steps_per_second": 2.699, "step": 70000 }, { "epoch": 28.2, "learning_rate": 3.120099843194982e-05, "loss": 1.903, "step": 70500 }, { "epoch": 28.2, "eval_accuracy": 0.6246465089539988, "eval_loss": 2.0067226886749268, "eval_runtime": 231.828, "eval_samples_per_second": 21.568, "eval_steps_per_second": 2.696, "step": 70500 }, { "epoch": 28.4, "learning_rate": 3.106766083181329e-05, "loss": 1.9152, "step": 71000 }, { "epoch": 28.4, "eval_accuracy": 0.6263943801774015, "eval_loss": 1.992945909500122, "eval_runtime": 232.4718, "eval_samples_per_second": 21.508, "eval_steps_per_second": 2.688, "step": 71000 }, { "epoch": 28.6, "learning_rate": 3.093432323167675e-05, "loss": 1.9188, "step": 71500 }, { "epoch": 28.6, "eval_accuracy": 0.6274427330569298, "eval_loss": 1.9857181310653687, "eval_runtime": 231.0919, "eval_samples_per_second": 21.636, "eval_steps_per_second": 2.705, "step": 71500 }, { "epoch": 28.8, "learning_rate": 3.0800985631540214e-05, "loss": 1.9232, "step": 72000 }, { "epoch": 28.8, "eval_accuracy": 0.6286685951318984, "eval_loss": 1.9795554876327515, "eval_runtime": 232.0512, "eval_samples_per_second": 21.547, "eval_steps_per_second": 2.693, "step": 72000 }, { "epoch": 29.0, "learning_rate": 3.0667648031403674e-05, "loss": 1.9011, "step": 72500 }, { "epoch": 29.0, "eval_accuracy": 0.62893259547976, "eval_loss": 1.9790831804275513, "eval_runtime": 231.7972, "eval_samples_per_second": 21.571, "eval_steps_per_second": 2.696, "step": 72500 }, { "epoch": 29.2, "learning_rate": 3.053431043126714e-05, "loss": 1.8733, "step": 73000 }, { "epoch": 29.2, "eval_accuracy": 0.6289149729933008, "eval_loss": 1.9699796438217163, "eval_runtime": 231.6438, "eval_samples_per_second": 21.585, "eval_steps_per_second": 2.698, "step": 73000 }, { "epoch": 29.4, "learning_rate": 3.0400972831130597e-05, "loss": 1.8731, "step": 73500 }, { "epoch": 29.4, "eval_accuracy": 0.6302774206622085, "eval_loss": 1.9584376811981201, "eval_runtime": 231.8307, "eval_samples_per_second": 21.567, "eval_steps_per_second": 2.696, "step": 73500 }, { "epoch": 29.6, "learning_rate": 3.0267635230994064e-05, "loss": 1.8812, "step": 74000 }, { "epoch": 29.6, "eval_accuracy": 0.6322679604528252, "eval_loss": 1.9572980403900146, "eval_runtime": 231.0721, "eval_samples_per_second": 21.638, "eval_steps_per_second": 2.705, "step": 74000 }, { "epoch": 29.8, "learning_rate": 3.013429763085752e-05, "loss": 1.8674, "step": 74500 }, { "epoch": 29.8, "eval_accuracy": 0.6318260719202453, "eval_loss": 1.9501464366912842, "eval_runtime": 232.0123, "eval_samples_per_second": 21.551, "eval_steps_per_second": 2.694, "step": 74500 }, { "epoch": 30.0, "learning_rate": 3.0000960030720987e-05, "loss": 1.8572, "step": 75000 }, { "epoch": 30.0, "eval_accuracy": 0.633327222731439, "eval_loss": 1.9454320669174194, "eval_runtime": 231.3259, "eval_samples_per_second": 21.615, "eval_steps_per_second": 2.702, "step": 75000 }, { "epoch": 30.2, "learning_rate": 2.9867622430584447e-05, "loss": 1.849, "step": 75500 }, { "epoch": 30.2, "eval_accuracy": 0.6352029775815805, "eval_loss": 1.9374988079071045, "eval_runtime": 232.1415, "eval_samples_per_second": 21.539, "eval_steps_per_second": 2.692, "step": 75500 }, { "epoch": 30.4, "learning_rate": 2.973428483044791e-05, "loss": 1.8332, "step": 76000 }, { "epoch": 30.4, "eval_accuracy": 0.6343171251873188, "eval_loss": 1.9343605041503906, "eval_runtime": 231.7675, "eval_samples_per_second": 21.573, "eval_steps_per_second": 2.697, "step": 76000 }, { "epoch": 30.6, "learning_rate": 2.960094723031137e-05, "loss": 1.8413, "step": 76500 }, { "epoch": 30.6, "eval_accuracy": 0.6339970956919431, "eval_loss": 1.9292726516723633, "eval_runtime": 232.1306, "eval_samples_per_second": 21.54, "eval_steps_per_second": 2.692, "step": 76500 }, { "epoch": 30.8, "learning_rate": 2.9467609630174837e-05, "loss": 1.8298, "step": 77000 }, { "epoch": 30.8, "eval_accuracy": 0.6370566744044308, "eval_loss": 1.922782063484192, "eval_runtime": 231.4445, "eval_samples_per_second": 21.603, "eval_steps_per_second": 2.7, "step": 77000 }, { "epoch": 31.0, "learning_rate": 2.9334272030038297e-05, "loss": 1.8336, "step": 77500 }, { "epoch": 31.0, "eval_accuracy": 0.6371772588144194, "eval_loss": 1.921502709388733, "eval_runtime": 232.2546, "eval_samples_per_second": 21.528, "eval_steps_per_second": 2.691, "step": 77500 }, { "epoch": 31.2, "learning_rate": 2.920093442990176e-05, "loss": 1.8122, "step": 78000 }, { "epoch": 31.2, "eval_accuracy": 0.6386797799633273, "eval_loss": 1.9133163690567017, "eval_runtime": 231.9897, "eval_samples_per_second": 21.553, "eval_steps_per_second": 2.694, "step": 78000 }, { "epoch": 31.4, "learning_rate": 2.906759682976522e-05, "loss": 1.8001, "step": 78500 }, { "epoch": 31.4, "eval_accuracy": 0.6382722577512613, "eval_loss": 1.9119086265563965, "eval_runtime": 231.6865, "eval_samples_per_second": 21.581, "eval_steps_per_second": 2.698, "step": 78500 }, { "epoch": 31.6, "learning_rate": 2.8934259229628687e-05, "loss": 1.7934, "step": 79000 }, { "epoch": 31.6, "eval_accuracy": 0.6387368757023453, "eval_loss": 1.908843755722046, "eval_runtime": 231.6821, "eval_samples_per_second": 21.581, "eval_steps_per_second": 2.698, "step": 79000 }, { "epoch": 31.8, "learning_rate": 2.8800921629492143e-05, "loss": 1.8079, "step": 79500 }, { "epoch": 31.8, "eval_accuracy": 0.6417437173027998, "eval_loss": 1.8940019607543945, "eval_runtime": 231.6685, "eval_samples_per_second": 21.583, "eval_steps_per_second": 2.698, "step": 79500 }, { "epoch": 32.0, "learning_rate": 2.866758402935561e-05, "loss": 1.8017, "step": 80000 }, { "epoch": 32.0, "eval_accuracy": 0.6409578370303184, "eval_loss": 1.8889141082763672, "eval_runtime": 232.1525, "eval_samples_per_second": 21.538, "eval_steps_per_second": 2.692, "step": 80000 }, { "epoch": 32.2, "learning_rate": 2.853424642921907e-05, "loss": 1.7789, "step": 80500 }, { "epoch": 32.2, "eval_accuracy": 0.6423419234856536, "eval_loss": 1.8882757425308228, "eval_runtime": 232.9517, "eval_samples_per_second": 21.464, "eval_steps_per_second": 2.683, "step": 80500 }, { "epoch": 32.4, "learning_rate": 2.8400908829082533e-05, "loss": 1.7739, "step": 81000 }, { "epoch": 32.4, "eval_accuracy": 0.6419453574431457, "eval_loss": 1.883570671081543, "eval_runtime": 232.6796, "eval_samples_per_second": 21.489, "eval_steps_per_second": 2.686, "step": 81000 }, { "epoch": 32.6, "learning_rate": 2.8267571228945993e-05, "loss": 1.7602, "step": 81500 }, { "epoch": 32.6, "eval_accuracy": 0.6432658377580925, "eval_loss": 1.8794612884521484, "eval_runtime": 232.1193, "eval_samples_per_second": 21.541, "eval_steps_per_second": 2.693, "step": 81500 }, { "epoch": 32.8, "learning_rate": 2.813423362880946e-05, "loss": 1.7731, "step": 82000 }, { "epoch": 32.8, "eval_accuracy": 0.6439001005386474, "eval_loss": 1.876919150352478, "eval_runtime": 232.0655, "eval_samples_per_second": 21.546, "eval_steps_per_second": 2.693, "step": 82000 }, { "epoch": 33.0, "learning_rate": 2.800089602867292e-05, "loss": 1.7784, "step": 82500 }, { "epoch": 33.0, "eval_accuracy": 0.6466631763933027, "eval_loss": 1.858984351158142, "eval_runtime": 232.3936, "eval_samples_per_second": 21.515, "eval_steps_per_second": 2.689, "step": 82500 }, { "epoch": 33.2, "learning_rate": 2.7867558428536383e-05, "loss": 1.7506, "step": 83000 }, { "epoch": 33.2, "eval_accuracy": 0.6447427010573747, "eval_loss": 1.8664191961288452, "eval_runtime": 231.8269, "eval_samples_per_second": 21.568, "eval_steps_per_second": 2.696, "step": 83000 }, { "epoch": 33.4, "learning_rate": 2.7734220828399843e-05, "loss": 1.7307, "step": 83500 }, { "epoch": 33.4, "eval_accuracy": 0.647232934792391, "eval_loss": 1.8552578687667847, "eval_runtime": 231.9354, "eval_samples_per_second": 21.558, "eval_steps_per_second": 2.695, "step": 83500 }, { "epoch": 33.6, "learning_rate": 2.760088322826331e-05, "loss": 1.748, "step": 84000 }, { "epoch": 33.6, "eval_accuracy": 0.6470427709961263, "eval_loss": 1.8523156642913818, "eval_runtime": 231.5672, "eval_samples_per_second": 21.592, "eval_steps_per_second": 2.699, "step": 84000 }, { "epoch": 33.8, "learning_rate": 2.7467545628126766e-05, "loss": 1.7285, "step": 84500 }, { "epoch": 33.8, "eval_accuracy": 0.6490748366917534, "eval_loss": 1.8396527767181396, "eval_runtime": 231.3951, "eval_samples_per_second": 21.608, "eval_steps_per_second": 2.701, "step": 84500 }, { "epoch": 34.0, "learning_rate": 2.7334208027990233e-05, "loss": 1.7426, "step": 85000 }, { "epoch": 34.0, "eval_accuracy": 0.6492367478939033, "eval_loss": 1.8321189880371094, "eval_runtime": 231.7169, "eval_samples_per_second": 21.578, "eval_steps_per_second": 2.697, "step": 85000 }, { "epoch": 34.2, "learning_rate": 2.7200870427853693e-05, "loss": 1.7128, "step": 85500 }, { "epoch": 34.2, "eval_accuracy": 0.6506795203856116, "eval_loss": 1.8220070600509644, "eval_runtime": 231.6693, "eval_samples_per_second": 21.582, "eval_steps_per_second": 2.698, "step": 85500 }, { "epoch": 34.4, "learning_rate": 2.7067532827717156e-05, "loss": 1.7155, "step": 86000 }, { "epoch": 34.4, "eval_accuracy": 0.6479166597570238, "eval_loss": 1.8486615419387817, "eval_runtime": 232.1633, "eval_samples_per_second": 21.537, "eval_steps_per_second": 2.692, "step": 86000 }, { "epoch": 34.6, "learning_rate": 2.6934195227580616e-05, "loss": 1.7143, "step": 86500 }, { "epoch": 34.6, "eval_accuracy": 0.650382482023844, "eval_loss": 1.8266839981079102, "eval_runtime": 231.6644, "eval_samples_per_second": 21.583, "eval_steps_per_second": 2.698, "step": 86500 }, { "epoch": 34.8, "learning_rate": 2.6800857627444082e-05, "loss": 1.7197, "step": 87000 }, { "epoch": 34.8, "eval_accuracy": 0.6498565162886434, "eval_loss": 1.8367702960968018, "eval_runtime": 231.461, "eval_samples_per_second": 21.602, "eval_steps_per_second": 2.7, "step": 87000 }, { "epoch": 35.0, "learning_rate": 2.6667520027307542e-05, "loss": 1.7043, "step": 87500 }, { "epoch": 35.0, "eval_accuracy": 0.6523924018595729, "eval_loss": 1.8128160238265991, "eval_runtime": 231.9645, "eval_samples_per_second": 21.555, "eval_steps_per_second": 2.694, "step": 87500 }, { "epoch": 35.2, "learning_rate": 2.6534182427171006e-05, "loss": 1.6931, "step": 88000 }, { "epoch": 35.2, "eval_accuracy": 0.6517473657677497, "eval_loss": 1.8211960792541504, "eval_runtime": 232.1504, "eval_samples_per_second": 21.538, "eval_steps_per_second": 2.692, "step": 88000 }, { "epoch": 35.4, "learning_rate": 2.6400844827034466e-05, "loss": 1.6873, "step": 88500 }, { "epoch": 35.4, "eval_accuracy": 0.6531137724550898, "eval_loss": 1.811033010482788, "eval_runtime": 231.7269, "eval_samples_per_second": 21.577, "eval_steps_per_second": 2.697, "step": 88500 }, { "epoch": 35.6, "learning_rate": 2.626750722689793e-05, "loss": 1.684, "step": 89000 }, { "epoch": 35.6, "eval_accuracy": 0.6529288870496202, "eval_loss": 1.8144562244415283, "eval_runtime": 231.5171, "eval_samples_per_second": 21.597, "eval_steps_per_second": 2.7, "step": 89000 }, { "epoch": 35.8, "learning_rate": 2.613416962676139e-05, "loss": 1.6802, "step": 89500 }, { "epoch": 35.8, "eval_accuracy": 0.6537266770878372, "eval_loss": 1.8046207427978516, "eval_runtime": 232.1963, "eval_samples_per_second": 21.534, "eval_steps_per_second": 2.692, "step": 89500 }, { "epoch": 36.0, "learning_rate": 2.6000832026624855e-05, "loss": 1.6807, "step": 90000 }, { "epoch": 36.0, "eval_accuracy": 0.654968497660255, "eval_loss": 1.8015984296798706, "eval_runtime": 232.2236, "eval_samples_per_second": 21.531, "eval_steps_per_second": 2.691, "step": 90000 }, { "epoch": 36.2, "learning_rate": 2.5867494426488315e-05, "loss": 1.6612, "step": 90500 }, { "epoch": 36.2, "eval_accuracy": 0.653872168300964, "eval_loss": 1.7996736764907837, "eval_runtime": 231.7061, "eval_samples_per_second": 21.579, "eval_steps_per_second": 2.697, "step": 90500 }, { "epoch": 36.4, "learning_rate": 2.573415682635178e-05, "loss": 1.6586, "step": 91000 }, { "epoch": 36.4, "eval_accuracy": 0.6536951243476204, "eval_loss": 1.8013685941696167, "eval_runtime": 232.2979, "eval_samples_per_second": 21.524, "eval_steps_per_second": 2.691, "step": 91000 }, { "epoch": 36.6, "learning_rate": 2.560081922621524e-05, "loss": 1.658, "step": 91500 }, { "epoch": 36.6, "eval_accuracy": 0.6565445408983954, "eval_loss": 1.7937599420547485, "eval_runtime": 231.4385, "eval_samples_per_second": 21.604, "eval_steps_per_second": 2.701, "step": 91500 }, { "epoch": 36.8, "learning_rate": 2.5467481626078705e-05, "loss": 1.6623, "step": 92000 }, { "epoch": 36.8, "eval_accuracy": 0.65856217010757, "eval_loss": 1.7775884866714478, "eval_runtime": 232.2205, "eval_samples_per_second": 21.531, "eval_steps_per_second": 2.691, "step": 92000 }, { "epoch": 37.0, "learning_rate": 2.5334144025942165e-05, "loss": 1.6618, "step": 92500 }, { "epoch": 37.0, "eval_accuracy": 0.6573262777234046, "eval_loss": 1.7884029150009155, "eval_runtime": 231.6843, "eval_samples_per_second": 21.581, "eval_steps_per_second": 2.698, "step": 92500 }, { "epoch": 37.2, "learning_rate": 2.520080642580563e-05, "loss": 1.6453, "step": 93000 }, { "epoch": 37.2, "eval_accuracy": 0.6571095648699937, "eval_loss": 1.7870711088180542, "eval_runtime": 231.5772, "eval_samples_per_second": 21.591, "eval_steps_per_second": 2.699, "step": 93000 }, { "epoch": 37.4, "learning_rate": 2.5067468825669088e-05, "loss": 1.6462, "step": 93500 }, { "epoch": 37.4, "eval_accuracy": 0.6584839807589097, "eval_loss": 1.7780765295028687, "eval_runtime": 231.8264, "eval_samples_per_second": 21.568, "eval_steps_per_second": 2.696, "step": 93500 }, { "epoch": 37.6, "learning_rate": 2.493413122553255e-05, "loss": 1.6353, "step": 94000 }, { "epoch": 37.6, "eval_accuracy": 0.6583383343335334, "eval_loss": 1.780821442604065, "eval_runtime": 231.9584, "eval_samples_per_second": 21.556, "eval_steps_per_second": 2.694, "step": 94000 }, { "epoch": 37.8, "learning_rate": 2.480079362539601e-05, "loss": 1.6507, "step": 94500 }, { "epoch": 37.8, "eval_accuracy": 0.660343714861709, "eval_loss": 1.766579508781433, "eval_runtime": 232.2004, "eval_samples_per_second": 21.533, "eval_steps_per_second": 2.692, "step": 94500 }, { "epoch": 38.0, "learning_rate": 2.4667456025259475e-05, "loss": 1.6383, "step": 95000 }, { "epoch": 38.0, "eval_accuracy": 0.6605969144871192, "eval_loss": 1.762445330619812, "eval_runtime": 231.2813, "eval_samples_per_second": 21.619, "eval_steps_per_second": 2.702, "step": 95000 }, { "epoch": 38.2, "learning_rate": 2.4534118425122938e-05, "loss": 1.6299, "step": 95500 }, { "epoch": 38.2, "eval_accuracy": 0.6605303776246108, "eval_loss": 1.7652736902236938, "eval_runtime": 233.0257, "eval_samples_per_second": 21.457, "eval_steps_per_second": 2.682, "step": 95500 }, { "epoch": 38.4, "learning_rate": 2.4400780824986398e-05, "loss": 1.6085, "step": 96000 }, { "epoch": 38.4, "eval_accuracy": 0.6610363508099403, "eval_loss": 1.752347469329834, "eval_runtime": 233.1846, "eval_samples_per_second": 21.442, "eval_steps_per_second": 2.68, "step": 96000 }, { "epoch": 38.6, "learning_rate": 2.426744322484986e-05, "loss": 1.6155, "step": 96500 }, { "epoch": 38.6, "eval_accuracy": 0.6612273667271772, "eval_loss": 1.7521241903305054, "eval_runtime": 232.3209, "eval_samples_per_second": 21.522, "eval_steps_per_second": 2.69, "step": 96500 }, { "epoch": 38.8, "learning_rate": 2.4134105624713325e-05, "loss": 1.6106, "step": 97000 }, { "epoch": 38.8, "eval_accuracy": 0.6604985873358816, "eval_loss": 1.763405442237854, "eval_runtime": 232.258, "eval_samples_per_second": 21.528, "eval_steps_per_second": 2.691, "step": 97000 }, { "epoch": 39.0, "learning_rate": 2.4000768024576788e-05, "loss": 1.6201, "step": 97500 }, { "epoch": 39.0, "eval_accuracy": 0.6624809349278767, "eval_loss": 1.7461175918579102, "eval_runtime": 231.8365, "eval_samples_per_second": 21.567, "eval_steps_per_second": 2.696, "step": 97500 }, { "epoch": 39.2, "learning_rate": 2.3867430424440248e-05, "loss": 1.5835, "step": 98000 }, { "epoch": 39.2, "eval_accuracy": 0.6617336081815064, "eval_loss": 1.750459909439087, "eval_runtime": 232.4138, "eval_samples_per_second": 21.513, "eval_steps_per_second": 2.689, "step": 98000 }, { "epoch": 39.4, "learning_rate": 2.373409282430371e-05, "loss": 1.5885, "step": 98500 }, { "epoch": 39.4, "eval_accuracy": 0.6622954419917079, "eval_loss": 1.7476534843444824, "eval_runtime": 232.0419, "eval_samples_per_second": 21.548, "eval_steps_per_second": 2.693, "step": 98500 }, { "epoch": 39.6, "learning_rate": 2.3600755224167174e-05, "loss": 1.5988, "step": 99000 }, { "epoch": 39.6, "eval_accuracy": 0.6634760353188246, "eval_loss": 1.7444517612457275, "eval_runtime": 232.4705, "eval_samples_per_second": 21.508, "eval_steps_per_second": 2.689, "step": 99000 }, { "epoch": 39.8, "learning_rate": 2.3467417624030638e-05, "loss": 1.6013, "step": 99500 }, { "epoch": 39.8, "eval_accuracy": 0.6637441413471821, "eval_loss": 1.7407046556472778, "eval_runtime": 231.1683, "eval_samples_per_second": 21.629, "eval_steps_per_second": 2.704, "step": 99500 }, { "epoch": 40.0, "learning_rate": 2.33340800238941e-05, "loss": 1.594, "step": 100000 }, { "epoch": 40.0, "eval_accuracy": 0.6655929575351373, "eval_loss": 1.7335906028747559, "eval_runtime": 231.9333, "eval_samples_per_second": 21.558, "eval_steps_per_second": 2.695, "step": 100000 }, { "epoch": 40.2, "learning_rate": 2.3200742423757564e-05, "loss": 1.5741, "step": 100500 }, { "epoch": 40.2, "eval_accuracy": 0.6636754906359572, "eval_loss": 1.734755516052246, "eval_runtime": 232.1572, "eval_samples_per_second": 21.537, "eval_steps_per_second": 2.692, "step": 100500 }, { "epoch": 40.4, "learning_rate": 2.3067404823621024e-05, "loss": 1.5744, "step": 101000 }, { "epoch": 40.4, "eval_accuracy": 0.6653410869753469, "eval_loss": 1.7242318391799927, "eval_runtime": 232.2909, "eval_samples_per_second": 21.525, "eval_steps_per_second": 2.691, "step": 101000 }, { "epoch": 40.6, "learning_rate": 2.2934067223484487e-05, "loss": 1.5809, "step": 101500 }, { "epoch": 40.6, "eval_accuracy": 0.6660700357978522, "eval_loss": 1.726152777671814, "eval_runtime": 231.4564, "eval_samples_per_second": 21.602, "eval_steps_per_second": 2.7, "step": 101500 }, { "epoch": 40.8, "learning_rate": 2.280072962334795e-05, "loss": 1.5723, "step": 102000 }, { "epoch": 40.8, "eval_accuracy": 0.6664572676418679, "eval_loss": 1.7256526947021484, "eval_runtime": 231.2938, "eval_samples_per_second": 21.618, "eval_steps_per_second": 2.702, "step": 102000 }, { "epoch": 41.0, "learning_rate": 2.266739202321141e-05, "loss": 1.5695, "step": 102500 }, { "epoch": 41.0, "eval_accuracy": 0.6664388539576068, "eval_loss": 1.71815824508667, "eval_runtime": 232.2361, "eval_samples_per_second": 21.53, "eval_steps_per_second": 2.691, "step": 102500 }, { "epoch": 41.2, "learning_rate": 2.2534054423074874e-05, "loss": 1.5462, "step": 103000 }, { "epoch": 41.2, "eval_accuracy": 0.6659556456182217, "eval_loss": 1.7257441282272339, "eval_runtime": 232.4495, "eval_samples_per_second": 21.51, "eval_steps_per_second": 2.689, "step": 103000 }, { "epoch": 41.4, "learning_rate": 2.2400716822938337e-05, "loss": 1.5545, "step": 103500 }, { "epoch": 41.4, "eval_accuracy": 0.6685517195414556, "eval_loss": 1.710143804550171, "eval_runtime": 231.3349, "eval_samples_per_second": 21.614, "eval_steps_per_second": 2.702, "step": 103500 }, { "epoch": 41.6, "learning_rate": 2.2267379222801797e-05, "loss": 1.5574, "step": 104000 }, { "epoch": 41.6, "eval_accuracy": 0.6684205612462655, "eval_loss": 1.7108157873153687, "eval_runtime": 231.6198, "eval_samples_per_second": 21.587, "eval_steps_per_second": 2.698, "step": 104000 }, { "epoch": 41.8, "learning_rate": 2.213404162266526e-05, "loss": 1.5485, "step": 104500 }, { "epoch": 41.8, "eval_accuracy": 0.6665381047535768, "eval_loss": 1.7164138555526733, "eval_runtime": 231.1733, "eval_samples_per_second": 21.629, "eval_steps_per_second": 2.704, "step": 104500 }, { "epoch": 42.0, "learning_rate": 2.2000704022528724e-05, "loss": 1.5487, "step": 105000 }, { "epoch": 42.0, "eval_accuracy": 0.6693558557063685, "eval_loss": 1.7079566717147827, "eval_runtime": 231.1865, "eval_samples_per_second": 21.628, "eval_steps_per_second": 2.703, "step": 105000 }, { "epoch": 42.2, "learning_rate": 2.1867366422392187e-05, "loss": 1.5278, "step": 105500 }, { "epoch": 42.2, "eval_accuracy": 0.6685844037369237, "eval_loss": 1.709234595298767, "eval_runtime": 232.3941, "eval_samples_per_second": 21.515, "eval_steps_per_second": 2.689, "step": 105500 }, { "epoch": 42.4, "learning_rate": 2.1734028822255647e-05, "loss": 1.5282, "step": 106000 }, { "epoch": 42.4, "eval_accuracy": 0.6689898717577476, "eval_loss": 1.7052183151245117, "eval_runtime": 231.6514, "eval_samples_per_second": 21.584, "eval_steps_per_second": 2.698, "step": 106000 }, { "epoch": 42.6, "learning_rate": 2.160069122211911e-05, "loss": 1.5468, "step": 106500 }, { "epoch": 42.6, "eval_accuracy": 0.6704241186507764, "eval_loss": 1.7057934999465942, "eval_runtime": 232.2002, "eval_samples_per_second": 21.533, "eval_steps_per_second": 2.692, "step": 106500 }, { "epoch": 42.8, "learning_rate": 2.1467353621982573e-05, "loss": 1.5375, "step": 107000 }, { "epoch": 42.8, "eval_accuracy": 0.6689407289213416, "eval_loss": 1.7019603252410889, "eval_runtime": 231.6252, "eval_samples_per_second": 21.587, "eval_steps_per_second": 2.698, "step": 107000 }, { "epoch": 43.0, "learning_rate": 2.1334016021846033e-05, "loss": 1.5301, "step": 107500 }, { "epoch": 43.0, "eval_accuracy": 0.67096851406744, "eval_loss": 1.694966435432434, "eval_runtime": 231.3192, "eval_samples_per_second": 21.615, "eval_steps_per_second": 2.702, "step": 107500 }, { "epoch": 43.2, "learning_rate": 2.1200678421709497e-05, "loss": 1.5224, "step": 108000 }, { "epoch": 43.2, "eval_accuracy": 0.6701957265809324, "eval_loss": 1.6989938020706177, "eval_runtime": 231.5003, "eval_samples_per_second": 21.598, "eval_steps_per_second": 2.7, "step": 108000 }, { "epoch": 43.4, "learning_rate": 2.106734082157296e-05, "loss": 1.5105, "step": 108500 }, { "epoch": 43.4, "eval_accuracy": 0.6714945598779761, "eval_loss": 1.691909909248352, "eval_runtime": 232.1406, "eval_samples_per_second": 21.539, "eval_steps_per_second": 2.692, "step": 108500 }, { "epoch": 43.6, "learning_rate": 2.093400322143642e-05, "loss": 1.5179, "step": 109000 }, { "epoch": 43.6, "eval_accuracy": 0.6724466215607879, "eval_loss": 1.6844831705093384, "eval_runtime": 231.9646, "eval_samples_per_second": 21.555, "eval_steps_per_second": 2.694, "step": 109000 }, { "epoch": 43.8, "learning_rate": 2.0800665621299883e-05, "loss": 1.518, "step": 109500 }, { "epoch": 43.8, "eval_accuracy": 0.6721125557875637, "eval_loss": 1.6838468313217163, "eval_runtime": 232.4955, "eval_samples_per_second": 21.506, "eval_steps_per_second": 2.688, "step": 109500 }, { "epoch": 44.0, "learning_rate": 2.0667328021163346e-05, "loss": 1.5191, "step": 110000 }, { "epoch": 44.0, "eval_accuracy": 0.6715394411366622, "eval_loss": 1.6877400875091553, "eval_runtime": 231.8955, "eval_samples_per_second": 21.561, "eval_steps_per_second": 2.695, "step": 110000 }, { "epoch": 44.2, "learning_rate": 2.053399042102681e-05, "loss": 1.4984, "step": 110500 }, { "epoch": 44.2, "eval_accuracy": 0.6712061803124136, "eval_loss": 1.692290186882019, "eval_runtime": 231.7743, "eval_samples_per_second": 21.573, "eval_steps_per_second": 2.697, "step": 110500 }, { "epoch": 44.4, "learning_rate": 2.040065282089027e-05, "loss": 1.5051, "step": 111000 }, { "epoch": 44.4, "eval_accuracy": 0.6722261856343719, "eval_loss": 1.684216022491455, "eval_runtime": 232.2158, "eval_samples_per_second": 21.532, "eval_steps_per_second": 2.691, "step": 111000 }, { "epoch": 44.6, "learning_rate": 2.0267315220753733e-05, "loss": 1.4993, "step": 111500 }, { "epoch": 44.6, "eval_accuracy": 0.6741077814035333, "eval_loss": 1.6768300533294678, "eval_runtime": 232.5116, "eval_samples_per_second": 21.504, "eval_steps_per_second": 2.688, "step": 111500 }, { "epoch": 44.8, "learning_rate": 2.0133977620617196e-05, "loss": 1.5035, "step": 112000 }, { "epoch": 44.8, "eval_accuracy": 0.6727453029224042, "eval_loss": 1.6816706657409668, "eval_runtime": 232.9219, "eval_samples_per_second": 21.466, "eval_steps_per_second": 2.683, "step": 112000 }, { "epoch": 45.0, "learning_rate": 2.0000640020480656e-05, "loss": 1.5047, "step": 112500 }, { "epoch": 45.0, "eval_accuracy": 0.6732629405102925, "eval_loss": 1.6727675199508667, "eval_runtime": 233.5892, "eval_samples_per_second": 21.405, "eval_steps_per_second": 2.676, "step": 112500 }, { "epoch": 45.2, "learning_rate": 1.986730242034412e-05, "loss": 1.4788, "step": 113000 }, { "epoch": 45.2, "eval_accuracy": 0.6720030354064183, "eval_loss": 1.6824595928192139, "eval_runtime": 232.4735, "eval_samples_per_second": 21.508, "eval_steps_per_second": 2.688, "step": 113000 }, { "epoch": 45.4, "learning_rate": 1.9733964820207583e-05, "loss": 1.4841, "step": 113500 }, { "epoch": 45.4, "eval_accuracy": 0.6734870787809277, "eval_loss": 1.6770191192626953, "eval_runtime": 232.5524, "eval_samples_per_second": 21.501, "eval_steps_per_second": 2.688, "step": 113500 }, { "epoch": 45.6, "learning_rate": 1.9600627220071042e-05, "loss": 1.4863, "step": 114000 }, { "epoch": 45.6, "eval_accuracy": 0.6752743985944378, "eval_loss": 1.6587978601455688, "eval_runtime": 231.3799, "eval_samples_per_second": 21.609, "eval_steps_per_second": 2.701, "step": 114000 }, { "epoch": 45.8, "learning_rate": 1.9467289619934506e-05, "loss": 1.4859, "step": 114500 }, { "epoch": 45.8, "eval_accuracy": 0.6741216184724784, "eval_loss": 1.6681159734725952, "eval_runtime": 232.5066, "eval_samples_per_second": 21.505, "eval_steps_per_second": 2.688, "step": 114500 }, { "epoch": 46.0, "learning_rate": 1.933395201979797e-05, "loss": 1.4839, "step": 115000 }, { "epoch": 46.0, "eval_accuracy": 0.67397265751252, "eval_loss": 1.6657896041870117, "eval_runtime": 231.5604, "eval_samples_per_second": 21.593, "eval_steps_per_second": 2.699, "step": 115000 }, { "epoch": 46.2, "learning_rate": 1.9200614419661432e-05, "loss": 1.4633, "step": 115500 }, { "epoch": 46.2, "eval_accuracy": 0.6765235484774519, "eval_loss": 1.6601226329803467, "eval_runtime": 231.3619, "eval_samples_per_second": 21.611, "eval_steps_per_second": 2.701, "step": 115500 }, { "epoch": 46.4, "learning_rate": 1.9067276819524892e-05, "loss": 1.4725, "step": 116000 }, { "epoch": 46.4, "eval_accuracy": 0.6752809587927104, "eval_loss": 1.6586965322494507, "eval_runtime": 231.6681, "eval_samples_per_second": 21.583, "eval_steps_per_second": 2.698, "step": 116000 }, { "epoch": 46.6, "learning_rate": 1.8933939219388355e-05, "loss": 1.4703, "step": 116500 }, { "epoch": 46.6, "eval_accuracy": 0.6755936231903356, "eval_loss": 1.664272665977478, "eval_runtime": 231.4478, "eval_samples_per_second": 21.603, "eval_steps_per_second": 2.7, "step": 116500 }, { "epoch": 46.8, "learning_rate": 1.880060161925182e-05, "loss": 1.4763, "step": 117000 }, { "epoch": 46.8, "eval_accuracy": 0.6759434654429518, "eval_loss": 1.6583046913146973, "eval_runtime": 231.3856, "eval_samples_per_second": 21.609, "eval_steps_per_second": 2.701, "step": 117000 }, { "epoch": 47.0, "learning_rate": 1.866726401911528e-05, "loss": 1.4825, "step": 117500 }, { "epoch": 47.0, "eval_accuracy": 0.6766304076891632, "eval_loss": 1.6487648487091064, "eval_runtime": 231.4489, "eval_samples_per_second": 21.603, "eval_steps_per_second": 2.7, "step": 117500 }, { "epoch": 47.2, "learning_rate": 1.8533926418978742e-05, "loss": 1.4496, "step": 118000 }, { "epoch": 47.2, "eval_accuracy": 0.6772409050087971, "eval_loss": 1.648973822593689, "eval_runtime": 232.2521, "eval_samples_per_second": 21.528, "eval_steps_per_second": 2.691, "step": 118000 }, { "epoch": 47.4, "learning_rate": 1.8400588818842205e-05, "loss": 1.457, "step": 118500 }, { "epoch": 47.4, "eval_accuracy": 0.6777788151191017, "eval_loss": 1.6462332010269165, "eval_runtime": 231.6165, "eval_samples_per_second": 21.587, "eval_steps_per_second": 2.698, "step": 118500 }, { "epoch": 47.6, "learning_rate": 1.8267251218705665e-05, "loss": 1.4541, "step": 119000 }, { "epoch": 47.6, "eval_accuracy": 0.6799176285554341, "eval_loss": 1.63682222366333, "eval_runtime": 232.1826, "eval_samples_per_second": 21.535, "eval_steps_per_second": 2.692, "step": 119000 }, { "epoch": 47.8, "learning_rate": 1.813391361856913e-05, "loss": 1.4561, "step": 119500 }, { "epoch": 47.8, "eval_accuracy": 0.6777792584202286, "eval_loss": 1.6403781175613403, "eval_runtime": 231.5617, "eval_samples_per_second": 21.593, "eval_steps_per_second": 2.699, "step": 119500 }, { "epoch": 48.0, "learning_rate": 1.8000576018432592e-05, "loss": 1.4547, "step": 120000 }, { "epoch": 48.0, "eval_accuracy": 0.6790187161680793, "eval_loss": 1.638481855392456, "eval_runtime": 231.6285, "eval_samples_per_second": 21.586, "eval_steps_per_second": 2.698, "step": 120000 }, { "epoch": 48.2, "learning_rate": 1.7867238418296055e-05, "loss": 1.4406, "step": 120500 }, { "epoch": 48.2, "eval_accuracy": 0.6799383927561591, "eval_loss": 1.6374049186706543, "eval_runtime": 231.2859, "eval_samples_per_second": 21.618, "eval_steps_per_second": 2.702, "step": 120500 }, { "epoch": 48.4, "learning_rate": 1.7733900818159515e-05, "loss": 1.4374, "step": 121000 }, { "epoch": 48.4, "eval_accuracy": 0.6799176059126742, "eval_loss": 1.6319433450698853, "eval_runtime": 231.6988, "eval_samples_per_second": 21.58, "eval_steps_per_second": 2.697, "step": 121000 }, { "epoch": 48.6, "learning_rate": 1.7600563218022978e-05, "loss": 1.4395, "step": 121500 }, { "epoch": 48.6, "eval_accuracy": 0.6786781490626186, "eval_loss": 1.642459511756897, "eval_runtime": 231.6229, "eval_samples_per_second": 21.587, "eval_steps_per_second": 2.698, "step": 121500 }, { "epoch": 48.8, "learning_rate": 1.746722561788644e-05, "loss": 1.4347, "step": 122000 }, { "epoch": 48.8, "eval_accuracy": 0.6814051123983632, "eval_loss": 1.6252304315567017, "eval_runtime": 232.473, "eval_samples_per_second": 21.508, "eval_steps_per_second": 2.688, "step": 122000 }, { "epoch": 49.0, "learning_rate": 1.73338880177499e-05, "loss": 1.4392, "step": 122500 }, { "epoch": 49.0, "eval_accuracy": 0.6801429877795327, "eval_loss": 1.635969638824463, "eval_runtime": 231.885, "eval_samples_per_second": 21.562, "eval_steps_per_second": 2.695, "step": 122500 }, { "epoch": 49.2, "learning_rate": 1.7200550417613365e-05, "loss": 1.439, "step": 123000 }, { "epoch": 49.2, "eval_accuracy": 0.68263718810269, "eval_loss": 1.6232901811599731, "eval_runtime": 232.2203, "eval_samples_per_second": 21.531, "eval_steps_per_second": 2.691, "step": 123000 }, { "epoch": 49.4, "learning_rate": 1.7067212817476828e-05, "loss": 1.4223, "step": 123500 }, { "epoch": 49.4, "eval_accuracy": 0.6809202734397155, "eval_loss": 1.6261595487594604, "eval_runtime": 231.4902, "eval_samples_per_second": 21.599, "eval_steps_per_second": 2.7, "step": 123500 }, { "epoch": 49.6, "learning_rate": 1.6933875217340288e-05, "loss": 1.4292, "step": 124000 }, { "epoch": 49.6, "eval_accuracy": 0.6810616944045385, "eval_loss": 1.6292299032211304, "eval_runtime": 231.4763, "eval_samples_per_second": 21.6, "eval_steps_per_second": 2.7, "step": 124000 }, { "epoch": 49.8, "learning_rate": 1.680053761720375e-05, "loss": 1.4237, "step": 124500 }, { "epoch": 49.8, "eval_accuracy": 0.6812210767774028, "eval_loss": 1.6226879358291626, "eval_runtime": 231.3771, "eval_samples_per_second": 21.61, "eval_steps_per_second": 2.701, "step": 124500 }, { "epoch": 50.0, "learning_rate": 1.6667200017067214e-05, "loss": 1.4241, "step": 125000 }, { "epoch": 50.0, "eval_accuracy": 0.6810288384521558, "eval_loss": 1.6230130195617676, "eval_runtime": 231.4942, "eval_samples_per_second": 21.599, "eval_steps_per_second": 2.7, "step": 125000 }, { "epoch": 50.2, "learning_rate": 1.6533862416930678e-05, "loss": 1.4118, "step": 125500 }, { "epoch": 50.2, "eval_accuracy": 0.6822409697872935, "eval_loss": 1.6255977153778076, "eval_runtime": 231.568, "eval_samples_per_second": 21.592, "eval_steps_per_second": 2.699, "step": 125500 }, { "epoch": 50.4, "learning_rate": 1.6400524816794138e-05, "loss": 1.4225, "step": 126000 }, { "epoch": 50.4, "eval_accuracy": 0.6816822007419128, "eval_loss": 1.6251336336135864, "eval_runtime": 231.5464, "eval_samples_per_second": 21.594, "eval_steps_per_second": 2.699, "step": 126000 }, { "epoch": 50.6, "learning_rate": 1.62671872166576e-05, "loss": 1.4122, "step": 126500 }, { "epoch": 50.6, "eval_accuracy": 0.6827178759221865, "eval_loss": 1.6178197860717773, "eval_runtime": 232.3558, "eval_samples_per_second": 21.519, "eval_steps_per_second": 2.69, "step": 126500 }, { "epoch": 50.8, "learning_rate": 1.6133849616521064e-05, "loss": 1.4081, "step": 127000 }, { "epoch": 50.8, "eval_accuracy": 0.6812576306483431, "eval_loss": 1.6189851760864258, "eval_runtime": 231.411, "eval_samples_per_second": 21.607, "eval_steps_per_second": 2.701, "step": 127000 }, { "epoch": 51.0, "learning_rate": 1.6000512016384524e-05, "loss": 1.4058, "step": 127500 }, { "epoch": 51.0, "eval_accuracy": 0.6835776111558344, "eval_loss": 1.6182929277420044, "eval_runtime": 231.4214, "eval_samples_per_second": 21.606, "eval_steps_per_second": 2.701, "step": 127500 }, { "epoch": 51.2, "learning_rate": 1.5867174416247987e-05, "loss": 1.3985, "step": 128000 }, { "epoch": 51.2, "eval_accuracy": 0.6816691554670649, "eval_loss": 1.619876503944397, "eval_runtime": 232.0714, "eval_samples_per_second": 21.545, "eval_steps_per_second": 2.693, "step": 128000 }, { "epoch": 51.4, "learning_rate": 1.573383681611145e-05, "loss": 1.3967, "step": 128500 }, { "epoch": 51.4, "eval_accuracy": 0.6829394364835076, "eval_loss": 1.6167734861373901, "eval_runtime": 233.6677, "eval_samples_per_second": 21.398, "eval_steps_per_second": 2.675, "step": 128500 }, { "epoch": 51.6, "learning_rate": 1.560049921597491e-05, "loss": 1.4113, "step": 129000 }, { "epoch": 51.6, "eval_accuracy": 0.6831813209285004, "eval_loss": 1.612316608428955, "eval_runtime": 232.8487, "eval_samples_per_second": 21.473, "eval_steps_per_second": 2.684, "step": 129000 }, { "epoch": 51.8, "learning_rate": 1.5467161615838374e-05, "loss": 1.3876, "step": 129500 }, { "epoch": 51.8, "eval_accuracy": 0.6841063178736425, "eval_loss": 1.6078130006790161, "eval_runtime": 232.4944, "eval_samples_per_second": 21.506, "eval_steps_per_second": 2.688, "step": 129500 }, { "epoch": 52.0, "learning_rate": 1.5333824015701837e-05, "loss": 1.4027, "step": 130000 }, { "epoch": 52.0, "eval_accuracy": 0.6846912751008356, "eval_loss": 1.6028109788894653, "eval_runtime": 232.0149, "eval_samples_per_second": 21.55, "eval_steps_per_second": 2.694, "step": 130000 }, { "epoch": 52.2, "learning_rate": 1.5200486415565299e-05, "loss": 1.3939, "step": 130500 }, { "epoch": 52.2, "eval_accuracy": 0.6844753106931615, "eval_loss": 1.6081105470657349, "eval_runtime": 231.5632, "eval_samples_per_second": 21.592, "eval_steps_per_second": 2.699, "step": 130500 }, { "epoch": 52.4, "learning_rate": 1.506714881542876e-05, "loss": 1.391, "step": 131000 }, { "epoch": 52.4, "eval_accuracy": 0.6848551411978101, "eval_loss": 1.6033966541290283, "eval_runtime": 232.425, "eval_samples_per_second": 21.512, "eval_steps_per_second": 2.689, "step": 131000 }, { "epoch": 52.6, "learning_rate": 1.4933811215292224e-05, "loss": 1.3895, "step": 131500 }, { "epoch": 52.6, "eval_accuracy": 0.6849530771970824, "eval_loss": 1.6016286611557007, "eval_runtime": 231.6757, "eval_samples_per_second": 21.582, "eval_steps_per_second": 2.698, "step": 131500 }, { "epoch": 52.8, "learning_rate": 1.4800473615155685e-05, "loss": 1.3858, "step": 132000 }, { "epoch": 52.8, "eval_accuracy": 0.6847438399813156, "eval_loss": 1.6009891033172607, "eval_runtime": 231.6508, "eval_samples_per_second": 21.584, "eval_steps_per_second": 2.698, "step": 132000 }, { "epoch": 53.0, "learning_rate": 1.4667136015019148e-05, "loss": 1.3852, "step": 132500 }, { "epoch": 53.0, "eval_accuracy": 0.6862125510988115, "eval_loss": 1.5885919332504272, "eval_runtime": 231.798, "eval_samples_per_second": 21.571, "eval_steps_per_second": 2.696, "step": 132500 }, { "epoch": 53.2, "learning_rate": 1.453379841488261e-05, "loss": 1.3716, "step": 133000 }, { "epoch": 53.2, "eval_accuracy": 0.686239673683405, "eval_loss": 1.596373438835144, "eval_runtime": 232.5907, "eval_samples_per_second": 21.497, "eval_steps_per_second": 2.687, "step": 133000 }, { "epoch": 53.4, "learning_rate": 1.4400460814746072e-05, "loss": 1.3727, "step": 133500 }, { "epoch": 53.4, "eval_accuracy": 0.6875449053992921, "eval_loss": 1.5952101945877075, "eval_runtime": 231.5617, "eval_samples_per_second": 21.593, "eval_steps_per_second": 2.699, "step": 133500 }, { "epoch": 53.6, "learning_rate": 1.4267123214609535e-05, "loss": 1.3656, "step": 134000 }, { "epoch": 53.6, "eval_accuracy": 0.6850110757234091, "eval_loss": 1.6030853986740112, "eval_runtime": 232.6646, "eval_samples_per_second": 21.49, "eval_steps_per_second": 2.686, "step": 134000 }, { "epoch": 53.8, "learning_rate": 1.4133785614472997e-05, "loss": 1.3873, "step": 134500 }, { "epoch": 53.8, "eval_accuracy": 0.6867045238563649, "eval_loss": 1.5926910638809204, "eval_runtime": 231.8443, "eval_samples_per_second": 21.566, "eval_steps_per_second": 2.696, "step": 134500 }, { "epoch": 54.0, "learning_rate": 1.400044801433646e-05, "loss": 1.3742, "step": 135000 }, { "epoch": 54.0, "eval_accuracy": 0.6858065721683885, "eval_loss": 1.596977949142456, "eval_runtime": 231.6888, "eval_samples_per_second": 21.581, "eval_steps_per_second": 2.698, "step": 135000 }, { "epoch": 54.2, "learning_rate": 1.3867110414199921e-05, "loss": 1.3687, "step": 135500 }, { "epoch": 54.2, "eval_accuracy": 0.6863366864299514, "eval_loss": 1.5954158306121826, "eval_runtime": 232.5257, "eval_samples_per_second": 21.503, "eval_steps_per_second": 2.688, "step": 135500 }, { "epoch": 54.4, "learning_rate": 1.3733772814063383e-05, "loss": 1.359, "step": 136000 }, { "epoch": 54.4, "eval_accuracy": 0.6872515010006671, "eval_loss": 1.5854052305221558, "eval_runtime": 231.6594, "eval_samples_per_second": 21.583, "eval_steps_per_second": 2.698, "step": 136000 }, { "epoch": 54.6, "learning_rate": 1.3600435213926846e-05, "loss": 1.3696, "step": 136500 }, { "epoch": 54.6, "eval_accuracy": 0.6877612475473026, "eval_loss": 1.5902482271194458, "eval_runtime": 231.5097, "eval_samples_per_second": 21.597, "eval_steps_per_second": 2.7, "step": 136500 }, { "epoch": 54.8, "learning_rate": 1.3467097613790308e-05, "loss": 1.38, "step": 137000 }, { "epoch": 54.8, "eval_accuracy": 0.6871327341859661, "eval_loss": 1.5869797468185425, "eval_runtime": 232.8368, "eval_samples_per_second": 21.474, "eval_steps_per_second": 2.684, "step": 137000 }, { "epoch": 55.0, "learning_rate": 1.3333760013653771e-05, "loss": 1.3529, "step": 137500 }, { "epoch": 55.0, "eval_accuracy": 0.6879042788016817, "eval_loss": 1.5887551307678223, "eval_runtime": 232.8567, "eval_samples_per_second": 21.472, "eval_steps_per_second": 2.684, "step": 137500 }, { "epoch": 55.2, "learning_rate": 1.3200422413517233e-05, "loss": 1.3479, "step": 138000 }, { "epoch": 55.2, "eval_accuracy": 0.6888780042450111, "eval_loss": 1.5719605684280396, "eval_runtime": 231.8675, "eval_samples_per_second": 21.564, "eval_steps_per_second": 2.696, "step": 138000 }, { "epoch": 55.4, "learning_rate": 1.3067084813380694e-05, "loss": 1.3558, "step": 138500 }, { "epoch": 55.4, "eval_accuracy": 0.6876944245034697, "eval_loss": 1.58102285861969, "eval_runtime": 232.7287, "eval_samples_per_second": 21.484, "eval_steps_per_second": 2.686, "step": 138500 }, { "epoch": 55.6, "learning_rate": 1.2933747213244158e-05, "loss": 1.3565, "step": 139000 }, { "epoch": 55.6, "eval_accuracy": 0.6909087883376879, "eval_loss": 1.5686722993850708, "eval_runtime": 232.323, "eval_samples_per_second": 21.522, "eval_steps_per_second": 2.69, "step": 139000 }, { "epoch": 55.8, "learning_rate": 1.280040961310762e-05, "loss": 1.351, "step": 139500 }, { "epoch": 55.8, "eval_accuracy": 0.6897409911780831, "eval_loss": 1.5762348175048828, "eval_runtime": 232.2112, "eval_samples_per_second": 21.532, "eval_steps_per_second": 2.692, "step": 139500 }, { "epoch": 56.0, "learning_rate": 1.2667072012971083e-05, "loss": 1.3698, "step": 140000 }, { "epoch": 56.0, "eval_accuracy": 0.6881191410145602, "eval_loss": 1.5784891843795776, "eval_runtime": 232.3446, "eval_samples_per_second": 21.52, "eval_steps_per_second": 2.69, "step": 140000 }, { "epoch": 56.2, "learning_rate": 1.2533734412834544e-05, "loss": 1.3388, "step": 140500 }, { "epoch": 56.2, "eval_accuracy": 0.6882200986804969, "eval_loss": 1.5766631364822388, "eval_runtime": 233.4176, "eval_samples_per_second": 21.421, "eval_steps_per_second": 2.678, "step": 140500 }, { "epoch": 56.4, "learning_rate": 1.2400396812698006e-05, "loss": 1.3433, "step": 141000 }, { "epoch": 56.4, "eval_accuracy": 0.6896203829799986, "eval_loss": 1.5752308368682861, "eval_runtime": 233.3927, "eval_samples_per_second": 21.423, "eval_steps_per_second": 2.678, "step": 141000 }, { "epoch": 56.6, "learning_rate": 1.2267059212561469e-05, "loss": 1.3505, "step": 141500 }, { "epoch": 56.6, "eval_accuracy": 0.6890066251596424, "eval_loss": 1.5754320621490479, "eval_runtime": 231.7194, "eval_samples_per_second": 21.578, "eval_steps_per_second": 2.697, "step": 141500 }, { "epoch": 56.8, "learning_rate": 1.213372161242493e-05, "loss": 1.3429, "step": 142000 }, { "epoch": 56.8, "eval_accuracy": 0.6895795829707522, "eval_loss": 1.5772255659103394, "eval_runtime": 232.2785, "eval_samples_per_second": 21.526, "eval_steps_per_second": 2.691, "step": 142000 }, { "epoch": 57.0, "learning_rate": 1.2000384012288394e-05, "loss": 1.337, "step": 142500 }, { "epoch": 57.0, "eval_accuracy": 0.69004771461609, "eval_loss": 1.5732284784317017, "eval_runtime": 231.5917, "eval_samples_per_second": 21.59, "eval_steps_per_second": 2.699, "step": 142500 }, { "epoch": 57.2, "learning_rate": 1.1867046412151855e-05, "loss": 1.3398, "step": 143000 }, { "epoch": 57.2, "eval_accuracy": 0.690381858161266, "eval_loss": 1.5680711269378662, "eval_runtime": 232.6848, "eval_samples_per_second": 21.488, "eval_steps_per_second": 2.686, "step": 143000 }, { "epoch": 57.4, "learning_rate": 1.1733708812015319e-05, "loss": 1.3334, "step": 143500 }, { "epoch": 57.4, "eval_accuracy": 0.690041861053248, "eval_loss": 1.5696121454238892, "eval_runtime": 231.7038, "eval_samples_per_second": 21.579, "eval_steps_per_second": 2.697, "step": 143500 }, { "epoch": 57.6, "learning_rate": 1.1600371211878782e-05, "loss": 1.3384, "step": 144000 }, { "epoch": 57.6, "eval_accuracy": 0.6907987077717064, "eval_loss": 1.5674421787261963, "eval_runtime": 231.9577, "eval_samples_per_second": 21.556, "eval_steps_per_second": 2.694, "step": 144000 }, { "epoch": 57.8, "learning_rate": 1.1467033611742244e-05, "loss": 1.33, "step": 144500 }, { "epoch": 57.8, "eval_accuracy": 0.6915707537755306, "eval_loss": 1.5592304468154907, "eval_runtime": 232.0926, "eval_samples_per_second": 21.543, "eval_steps_per_second": 2.693, "step": 144500 }, { "epoch": 58.0, "learning_rate": 1.1333696011605705e-05, "loss": 1.327, "step": 145000 }, { "epoch": 58.0, "eval_accuracy": 0.6924366115221547, "eval_loss": 1.5497514009475708, "eval_runtime": 232.0721, "eval_samples_per_second": 21.545, "eval_steps_per_second": 2.693, "step": 145000 }, { "epoch": 58.2, "learning_rate": 1.1200358411469169e-05, "loss": 1.3234, "step": 145500 }, { "epoch": 58.2, "eval_accuracy": 0.6909872386858108, "eval_loss": 1.5625743865966797, "eval_runtime": 231.8743, "eval_samples_per_second": 21.563, "eval_steps_per_second": 2.695, "step": 145500 }, { "epoch": 58.4, "learning_rate": 1.106702081133263e-05, "loss": 1.3266, "step": 146000 }, { "epoch": 58.4, "eval_accuracy": 0.6892814371257485, "eval_loss": 1.5743225812911987, "eval_runtime": 232.1904, "eval_samples_per_second": 21.534, "eval_steps_per_second": 2.692, "step": 146000 }, { "epoch": 58.6, "learning_rate": 1.0933683211196093e-05, "loss": 1.3152, "step": 146500 }, { "epoch": 58.6, "eval_accuracy": 0.6911878441987919, "eval_loss": 1.567984700202942, "eval_runtime": 232.1539, "eval_samples_per_second": 21.537, "eval_steps_per_second": 2.692, "step": 146500 }, { "epoch": 58.8, "learning_rate": 1.0800345611059555e-05, "loss": 1.3279, "step": 147000 }, { "epoch": 58.8, "eval_accuracy": 0.6919366138448707, "eval_loss": 1.558101773262024, "eval_runtime": 231.3447, "eval_samples_per_second": 21.613, "eval_steps_per_second": 2.702, "step": 147000 }, { "epoch": 59.0, "learning_rate": 1.0667008010923017e-05, "loss": 1.3172, "step": 147500 }, { "epoch": 59.0, "eval_accuracy": 0.691684386719777, "eval_loss": 1.5645062923431396, "eval_runtime": 232.1387, "eval_samples_per_second": 21.539, "eval_steps_per_second": 2.692, "step": 147500 }, { "epoch": 59.2, "learning_rate": 1.053367041078648e-05, "loss": 1.3073, "step": 148000 }, { "epoch": 59.2, "eval_accuracy": 0.6923759867524124, "eval_loss": 1.5578668117523193, "eval_runtime": 232.0325, "eval_samples_per_second": 21.549, "eval_steps_per_second": 2.694, "step": 148000 }, { "epoch": 59.4, "learning_rate": 1.0400332810649941e-05, "loss": 1.307, "step": 148500 }, { "epoch": 59.4, "eval_accuracy": 0.6938674761086743, "eval_loss": 1.546762466430664, "eval_runtime": 232.2513, "eval_samples_per_second": 21.528, "eval_steps_per_second": 2.691, "step": 148500 }, { "epoch": 59.6, "learning_rate": 1.0266995210513405e-05, "loss": 1.3164, "step": 149000 }, { "epoch": 59.6, "eval_accuracy": 0.6929686873814422, "eval_loss": 1.5518994331359863, "eval_runtime": 231.1014, "eval_samples_per_second": 21.636, "eval_steps_per_second": 2.704, "step": 149000 }, { "epoch": 59.8, "learning_rate": 1.0133657610376866e-05, "loss": 1.3037, "step": 149500 }, { "epoch": 59.8, "eval_accuracy": 0.6916677464475209, "eval_loss": 1.5627799034118652, "eval_runtime": 232.3485, "eval_samples_per_second": 21.519, "eval_steps_per_second": 2.69, "step": 149500 }, { "epoch": 60.0, "learning_rate": 1.0000320010240328e-05, "loss": 1.3171, "step": 150000 }, { "epoch": 60.0, "eval_accuracy": 0.6933963519284755, "eval_loss": 1.5489420890808105, "eval_runtime": 231.4333, "eval_samples_per_second": 21.604, "eval_steps_per_second": 2.701, "step": 150000 }, { "epoch": 60.2, "learning_rate": 9.866982410103791e-06, "loss": 1.3035, "step": 150500 }, { "epoch": 60.2, "eval_accuracy": 0.6930868044367229, "eval_loss": 1.5499061346054077, "eval_runtime": 232.1383, "eval_samples_per_second": 21.539, "eval_steps_per_second": 2.692, "step": 150500 }, { "epoch": 60.4, "learning_rate": 9.733644809967253e-06, "loss": 1.3109, "step": 151000 }, { "epoch": 60.4, "eval_accuracy": 0.6922226734934385, "eval_loss": 1.5608468055725098, "eval_runtime": 232.2684, "eval_samples_per_second": 21.527, "eval_steps_per_second": 2.691, "step": 151000 }, { "epoch": 60.6, "learning_rate": 9.600307209830716e-06, "loss": 1.304, "step": 151500 }, { "epoch": 60.6, "eval_accuracy": 0.6914924071448069, "eval_loss": 1.5611767768859863, "eval_runtime": 232.4752, "eval_samples_per_second": 21.508, "eval_steps_per_second": 2.688, "step": 151500 }, { "epoch": 60.8, "learning_rate": 9.466969609694178e-06, "loss": 1.3104, "step": 152000 }, { "epoch": 60.8, "eval_accuracy": 0.6932900396672637, "eval_loss": 1.551135540008545, "eval_runtime": 231.5047, "eval_samples_per_second": 21.598, "eval_steps_per_second": 2.7, "step": 152000 }, { "epoch": 61.0, "learning_rate": 9.33363200955764e-06, "loss": 1.3071, "step": 152500 }, { "epoch": 61.0, "eval_accuracy": 0.693459881852489, "eval_loss": 1.5469086170196533, "eval_runtime": 233.8113, "eval_samples_per_second": 21.385, "eval_steps_per_second": 2.673, "step": 152500 }, { "epoch": 61.2, "learning_rate": 9.200294409421103e-06, "loss": 1.2935, "step": 153000 }, { "epoch": 61.2, "eval_accuracy": 0.6941589679567207, "eval_loss": 1.5484933853149414, "eval_runtime": 232.5867, "eval_samples_per_second": 21.497, "eval_steps_per_second": 2.687, "step": 153000 }, { "epoch": 61.4, "learning_rate": 9.066956809284564e-06, "loss": 1.2866, "step": 153500 }, { "epoch": 61.4, "eval_accuracy": 0.6940421521647191, "eval_loss": 1.5462923049926758, "eval_runtime": 232.5951, "eval_samples_per_second": 21.497, "eval_steps_per_second": 2.687, "step": 153500 }, { "epoch": 61.6, "learning_rate": 8.933619209148027e-06, "loss": 1.2926, "step": 154000 }, { "epoch": 61.6, "eval_accuracy": 0.6956126566432729, "eval_loss": 1.540623426437378, "eval_runtime": 231.888, "eval_samples_per_second": 21.562, "eval_steps_per_second": 2.695, "step": 154000 }, { "epoch": 61.8, "learning_rate": 8.800281609011489e-06, "loss": 1.3029, "step": 154500 }, { "epoch": 61.8, "eval_accuracy": 0.6944681219450475, "eval_loss": 1.5423588752746582, "eval_runtime": 232.5594, "eval_samples_per_second": 21.5, "eval_steps_per_second": 2.687, "step": 154500 }, { "epoch": 62.0, "learning_rate": 8.66694400887495e-06, "loss": 1.2921, "step": 155000 }, { "epoch": 62.0, "eval_accuracy": 0.6944209552091726, "eval_loss": 1.5446220636367798, "eval_runtime": 232.6244, "eval_samples_per_second": 21.494, "eval_steps_per_second": 2.687, "step": 155000 }, { "epoch": 62.2, "learning_rate": 8.533606408738414e-06, "loss": 1.2765, "step": 155500 }, { "epoch": 62.2, "eval_accuracy": 0.6952571916908579, "eval_loss": 1.5396584272384644, "eval_runtime": 231.3433, "eval_samples_per_second": 21.613, "eval_steps_per_second": 2.702, "step": 155500 }, { "epoch": 62.4, "learning_rate": 8.400268808601876e-06, "loss": 1.275, "step": 156000 }, { "epoch": 62.4, "eval_accuracy": 0.6944803801909601, "eval_loss": 1.5469422340393066, "eval_runtime": 231.6508, "eval_samples_per_second": 21.584, "eval_steps_per_second": 2.698, "step": 156000 }, { "epoch": 62.6, "learning_rate": 8.266931208465339e-06, "loss": 1.2909, "step": 156500 }, { "epoch": 62.6, "eval_accuracy": 0.69449131348161, "eval_loss": 1.5426616668701172, "eval_runtime": 232.6399, "eval_samples_per_second": 21.492, "eval_steps_per_second": 2.687, "step": 156500 }, { "epoch": 62.8, "learning_rate": 8.1335936083288e-06, "loss": 1.2869, "step": 157000 }, { "epoch": 62.8, "eval_accuracy": 0.6949301108724598, "eval_loss": 1.5387953519821167, "eval_runtime": 231.8255, "eval_samples_per_second": 21.568, "eval_steps_per_second": 2.696, "step": 157000 }, { "epoch": 63.0, "learning_rate": 8.000256008192262e-06, "loss": 1.2883, "step": 157500 }, { "epoch": 63.0, "eval_accuracy": 0.694801592969147, "eval_loss": 1.5375311374664307, "eval_runtime": 231.7315, "eval_samples_per_second": 21.577, "eval_steps_per_second": 2.697, "step": 157500 }, { "epoch": 63.2, "learning_rate": 7.866918408055725e-06, "loss": 1.2673, "step": 158000 }, { "epoch": 63.2, "eval_accuracy": 0.6947923905923195, "eval_loss": 1.5423495769500732, "eval_runtime": 231.5823, "eval_samples_per_second": 21.591, "eval_steps_per_second": 2.699, "step": 158000 }, { "epoch": 63.4, "learning_rate": 7.733580807919187e-06, "loss": 1.2754, "step": 158500 }, { "epoch": 63.4, "eval_accuracy": 0.6957325609622272, "eval_loss": 1.5359729528427124, "eval_runtime": 231.7033, "eval_samples_per_second": 21.579, "eval_steps_per_second": 2.697, "step": 158500 }, { "epoch": 63.6, "learning_rate": 7.600243207782649e-06, "loss": 1.2772, "step": 159000 }, { "epoch": 63.6, "eval_accuracy": 0.6952351743013445, "eval_loss": 1.5331131219863892, "eval_runtime": 232.042, "eval_samples_per_second": 21.548, "eval_steps_per_second": 2.693, "step": 159000 }, { "epoch": 63.8, "learning_rate": 7.466905607646112e-06, "loss": 1.283, "step": 159500 }, { "epoch": 63.8, "eval_accuracy": 0.6955319177297024, "eval_loss": 1.5353832244873047, "eval_runtime": 231.7244, "eval_samples_per_second": 21.577, "eval_steps_per_second": 2.697, "step": 159500 }, { "epoch": 64.0, "learning_rate": 7.333568007509574e-06, "loss": 1.2737, "step": 160000 }, { "epoch": 64.0, "eval_accuracy": 0.6960759535765355, "eval_loss": 1.5387910604476929, "eval_runtime": 232.0979, "eval_samples_per_second": 21.543, "eval_steps_per_second": 2.693, "step": 160000 }, { "epoch": 64.2, "learning_rate": 7.200230407373036e-06, "loss": 1.2681, "step": 160500 }, { "epoch": 64.2, "eval_accuracy": 0.695161846872553, "eval_loss": 1.5381580591201782, "eval_runtime": 231.5418, "eval_samples_per_second": 21.594, "eval_steps_per_second": 2.699, "step": 160500 }, { "epoch": 64.4, "learning_rate": 7.066892807236498e-06, "loss": 1.2769, "step": 161000 }, { "epoch": 64.4, "eval_accuracy": 0.6958243929135146, "eval_loss": 1.535032868385315, "eval_runtime": 231.507, "eval_samples_per_second": 21.598, "eval_steps_per_second": 2.7, "step": 161000 }, { "epoch": 64.6, "learning_rate": 6.933555207099961e-06, "loss": 1.2668, "step": 161500 }, { "epoch": 64.6, "eval_accuracy": 0.6956000439629246, "eval_loss": 1.534529447555542, "eval_runtime": 231.4208, "eval_samples_per_second": 21.606, "eval_steps_per_second": 2.701, "step": 161500 }, { "epoch": 64.8, "learning_rate": 6.800217606963423e-06, "loss": 1.2795, "step": 162000 }, { "epoch": 64.8, "eval_accuracy": 0.6983860770024292, "eval_loss": 1.519789695739746, "eval_runtime": 232.0063, "eval_samples_per_second": 21.551, "eval_steps_per_second": 2.694, "step": 162000 }, { "epoch": 65.0, "learning_rate": 6.666880006826886e-06, "loss": 1.2632, "step": 162500 }, { "epoch": 65.0, "eval_accuracy": 0.696454274062425, "eval_loss": 1.5323538780212402, "eval_runtime": 231.6081, "eval_samples_per_second": 21.588, "eval_steps_per_second": 2.699, "step": 162500 }, { "epoch": 65.2, "learning_rate": 6.533542406690347e-06, "loss": 1.2646, "step": 163000 }, { "epoch": 65.2, "eval_accuracy": 0.6962969375320464, "eval_loss": 1.5420233011245728, "eval_runtime": 231.6221, "eval_samples_per_second": 21.587, "eval_steps_per_second": 2.698, "step": 163000 }, { "epoch": 65.4, "learning_rate": 6.40020480655381e-06, "loss": 1.2739, "step": 163500 }, { "epoch": 65.4, "eval_accuracy": 0.6972270479754109, "eval_loss": 1.5307587385177612, "eval_runtime": 231.6515, "eval_samples_per_second": 21.584, "eval_steps_per_second": 2.698, "step": 163500 }, { "epoch": 65.6, "learning_rate": 6.266867206417272e-06, "loss": 1.2667, "step": 164000 }, { "epoch": 65.6, "eval_accuracy": 0.6978264999683358, "eval_loss": 1.5220121145248413, "eval_runtime": 231.7833, "eval_samples_per_second": 21.572, "eval_steps_per_second": 2.696, "step": 164000 }, { "epoch": 65.8, "learning_rate": 6.1335296062807345e-06, "loss": 1.26, "step": 164500 }, { "epoch": 65.8, "eval_accuracy": 0.6961458718130477, "eval_loss": 1.5283021926879883, "eval_runtime": 231.7518, "eval_samples_per_second": 21.575, "eval_steps_per_second": 2.697, "step": 164500 }, { "epoch": 66.0, "learning_rate": 6.000192006144197e-06, "loss": 1.2714, "step": 165000 }, { "epoch": 66.0, "eval_accuracy": 0.6977360596277253, "eval_loss": 1.5235345363616943, "eval_runtime": 231.6521, "eval_samples_per_second": 21.584, "eval_steps_per_second": 2.698, "step": 165000 }, { "epoch": 66.2, "learning_rate": 5.866854406007659e-06, "loss": 1.2652, "step": 165500 }, { "epoch": 66.2, "eval_accuracy": 0.6973007712082262, "eval_loss": 1.5269626379013062, "eval_runtime": 231.9589, "eval_samples_per_second": 21.556, "eval_steps_per_second": 2.694, "step": 165500 }, { "epoch": 66.4, "learning_rate": 5.733516805871122e-06, "loss": 1.2554, "step": 166000 }, { "epoch": 66.4, "eval_accuracy": 0.6958699662616962, "eval_loss": 1.5355974435806274, "eval_runtime": 231.9704, "eval_samples_per_second": 21.554, "eval_steps_per_second": 2.694, "step": 166000 }, { "epoch": 66.6, "learning_rate": 5.600179205734584e-06, "loss": 1.2666, "step": 166500 }, { "epoch": 66.6, "eval_accuracy": 0.6968431335413323, "eval_loss": 1.523085355758667, "eval_runtime": 231.5172, "eval_samples_per_second": 21.597, "eval_steps_per_second": 2.7, "step": 166500 }, { "epoch": 66.8, "learning_rate": 5.466841605598047e-06, "loss": 1.2634, "step": 167000 }, { "epoch": 66.8, "eval_accuracy": 0.6994266502566565, "eval_loss": 1.5169812440872192, "eval_runtime": 231.7934, "eval_samples_per_second": 21.571, "eval_steps_per_second": 2.696, "step": 167000 }, { "epoch": 67.0, "learning_rate": 5.333504005461508e-06, "loss": 1.2485, "step": 167500 }, { "epoch": 67.0, "eval_accuracy": 0.6988332070858084, "eval_loss": 1.5204789638519287, "eval_runtime": 232.7606, "eval_samples_per_second": 21.481, "eval_steps_per_second": 2.685, "step": 167500 }, { "epoch": 67.2, "learning_rate": 5.200166405324971e-06, "loss": 1.2397, "step": 168000 }, { "epoch": 67.2, "eval_accuracy": 0.6982430767582624, "eval_loss": 1.5262999534606934, "eval_runtime": 232.7735, "eval_samples_per_second": 21.48, "eval_steps_per_second": 2.685, "step": 168000 }, { "epoch": 67.4, "learning_rate": 5.066828805188433e-06, "loss": 1.2416, "step": 168500 }, { "epoch": 67.4, "eval_accuracy": 0.7000725051705876, "eval_loss": 1.5132619142532349, "eval_runtime": 232.8101, "eval_samples_per_second": 21.477, "eval_steps_per_second": 2.685, "step": 168500 }, { "epoch": 67.6, "learning_rate": 4.933491205051896e-06, "loss": 1.2615, "step": 169000 }, { "epoch": 67.6, "eval_accuracy": 0.6990387592769552, "eval_loss": 1.514991283416748, "eval_runtime": 233.0379, "eval_samples_per_second": 21.456, "eval_steps_per_second": 2.682, "step": 169000 }, { "epoch": 67.8, "learning_rate": 4.800153604915358e-06, "loss": 1.254, "step": 169500 }, { "epoch": 67.8, "eval_accuracy": 0.6989797619444537, "eval_loss": 1.5213098526000977, "eval_runtime": 231.7854, "eval_samples_per_second": 21.572, "eval_steps_per_second": 2.696, "step": 169500 }, { "epoch": 68.0, "learning_rate": 4.66681600477882e-06, "loss": 1.2463, "step": 170000 }, { "epoch": 68.0, "eval_accuracy": 0.6994830576307364, "eval_loss": 1.5156911611557007, "eval_runtime": 231.8883, "eval_samples_per_second": 21.562, "eval_steps_per_second": 2.695, "step": 170000 }, { "epoch": 68.2, "learning_rate": 4.533478404642282e-06, "loss": 1.2412, "step": 170500 }, { "epoch": 68.2, "eval_accuracy": 0.7000436351039415, "eval_loss": 1.508183479309082, "eval_runtime": 231.4591, "eval_samples_per_second": 21.602, "eval_steps_per_second": 2.7, "step": 170500 }, { "epoch": 68.4, "learning_rate": 4.4001408045057445e-06, "loss": 1.248, "step": 171000 }, { "epoch": 68.4, "eval_accuracy": 0.6992357860200856, "eval_loss": 1.516764760017395, "eval_runtime": 231.362, "eval_samples_per_second": 21.611, "eval_steps_per_second": 2.701, "step": 171000 }, { "epoch": 68.6, "learning_rate": 4.266803204369207e-06, "loss": 1.2468, "step": 171500 }, { "epoch": 68.6, "eval_accuracy": 0.6990639521278373, "eval_loss": 1.5186537504196167, "eval_runtime": 231.4765, "eval_samples_per_second": 21.6, "eval_steps_per_second": 2.7, "step": 171500 }, { "epoch": 68.8, "learning_rate": 4.1334656042326694e-06, "loss": 1.246, "step": 172000 }, { "epoch": 68.8, "eval_accuracy": 0.7000655096618438, "eval_loss": 1.50938880443573, "eval_runtime": 231.6881, "eval_samples_per_second": 21.581, "eval_steps_per_second": 2.698, "step": 172000 }, { "epoch": 69.0, "learning_rate": 4.000128004096131e-06, "loss": 1.2443, "step": 172500 }, { "epoch": 69.0, "eval_accuracy": 0.698370463354622, "eval_loss": 1.5186454057693481, "eval_runtime": 231.6998, "eval_samples_per_second": 21.58, "eval_steps_per_second": 2.697, "step": 172500 }, { "epoch": 69.2, "learning_rate": 3.8667904039595935e-06, "loss": 1.2451, "step": 173000 }, { "epoch": 69.2, "eval_accuracy": 0.6988172430355427, "eval_loss": 1.5156738758087158, "eval_runtime": 231.6043, "eval_samples_per_second": 21.589, "eval_steps_per_second": 2.699, "step": 173000 }, { "epoch": 69.4, "learning_rate": 3.733452803823056e-06, "loss": 1.2375, "step": 173500 }, { "epoch": 69.4, "eval_accuracy": 0.7002490046471991, "eval_loss": 1.5102007389068604, "eval_runtime": 231.5872, "eval_samples_per_second": 21.59, "eval_steps_per_second": 2.699, "step": 173500 }, { "epoch": 69.6, "learning_rate": 3.600115203686518e-06, "loss": 1.2441, "step": 174000 }, { "epoch": 69.6, "eval_accuracy": 0.7000056553747683, "eval_loss": 1.5142953395843506, "eval_runtime": 231.917, "eval_samples_per_second": 21.559, "eval_steps_per_second": 2.695, "step": 174000 }, { "epoch": 69.8, "learning_rate": 3.4667776035499804e-06, "loss": 1.2335, "step": 174500 }, { "epoch": 69.8, "eval_accuracy": 0.6984952065719414, "eval_loss": 1.5172849893569946, "eval_runtime": 231.8749, "eval_samples_per_second": 21.563, "eval_steps_per_second": 2.695, "step": 174500 }, { "epoch": 70.0, "learning_rate": 3.333440003413443e-06, "loss": 1.2361, "step": 175000 }, { "epoch": 70.0, "eval_accuracy": 0.7000938923367872, "eval_loss": 1.5102351903915405, "eval_runtime": 231.661, "eval_samples_per_second": 21.583, "eval_steps_per_second": 2.698, "step": 175000 }, { "epoch": 70.2, "learning_rate": 3.200102403276905e-06, "loss": 1.23, "step": 175500 }, { "epoch": 70.2, "eval_accuracy": 0.6997497747973176, "eval_loss": 1.515488624572754, "eval_runtime": 231.3654, "eval_samples_per_second": 21.611, "eval_steps_per_second": 2.701, "step": 175500 }, { "epoch": 70.4, "learning_rate": 3.0667648031403673e-06, "loss": 1.2401, "step": 176000 }, { "epoch": 70.4, "eval_accuracy": 0.7005253823912052, "eval_loss": 1.5026744604110718, "eval_runtime": 231.8606, "eval_samples_per_second": 21.565, "eval_steps_per_second": 2.696, "step": 176000 }, { "epoch": 70.6, "learning_rate": 2.9334272030038297e-06, "loss": 1.2346, "step": 176500 }, { "epoch": 70.6, "eval_accuracy": 0.699492636060484, "eval_loss": 1.5123237371444702, "eval_runtime": 231.4499, "eval_samples_per_second": 21.603, "eval_steps_per_second": 2.7, "step": 176500 }, { "epoch": 70.8, "learning_rate": 2.800089602867292e-06, "loss": 1.2306, "step": 177000 }, { "epoch": 70.8, "eval_accuracy": 0.6984030423306806, "eval_loss": 1.515058994293213, "eval_runtime": 231.9805, "eval_samples_per_second": 21.554, "eval_steps_per_second": 2.694, "step": 177000 }, { "epoch": 71.0, "learning_rate": 2.666752002730754e-06, "loss": 1.2333, "step": 177500 }, { "epoch": 71.0, "eval_accuracy": 0.6999873478411421, "eval_loss": 1.512516736984253, "eval_runtime": 232.103, "eval_samples_per_second": 21.542, "eval_steps_per_second": 2.693, "step": 177500 }, { "epoch": 71.2, "learning_rate": 2.5334144025942166e-06, "loss": 1.2248, "step": 178000 }, { "epoch": 71.2, "eval_accuracy": 0.6991863430659604, "eval_loss": 1.5198755264282227, "eval_runtime": 231.8985, "eval_samples_per_second": 21.561, "eval_steps_per_second": 2.695, "step": 178000 }, { "epoch": 71.4, "learning_rate": 2.400076802457679e-06, "loss": 1.2385, "step": 178500 }, { "epoch": 71.4, "eval_accuracy": 0.7000136463336296, "eval_loss": 1.510788917541504, "eval_runtime": 231.9773, "eval_samples_per_second": 21.554, "eval_steps_per_second": 2.694, "step": 178500 }, { "epoch": 71.6, "learning_rate": 2.266739202321141e-06, "loss": 1.2278, "step": 179000 }, { "epoch": 71.6, "eval_accuracy": 0.7000376979559702, "eval_loss": 1.5091618299484253, "eval_runtime": 231.9158, "eval_samples_per_second": 21.56, "eval_steps_per_second": 2.695, "step": 179000 }, { "epoch": 71.8, "learning_rate": 2.1334016021846035e-06, "loss": 1.2278, "step": 179500 }, { "epoch": 71.8, "eval_accuracy": 0.6989206577075939, "eval_loss": 1.5162503719329834, "eval_runtime": 232.1036, "eval_samples_per_second": 21.542, "eval_steps_per_second": 2.693, "step": 179500 }, { "epoch": 72.0, "learning_rate": 2.0000640020480655e-06, "loss": 1.2242, "step": 180000 }, { "epoch": 72.0, "eval_accuracy": 0.7009711915464922, "eval_loss": 1.5056333541870117, "eval_runtime": 231.7421, "eval_samples_per_second": 21.576, "eval_steps_per_second": 2.697, "step": 180000 }, { "epoch": 72.2, "learning_rate": 1.866726401911528e-06, "loss": 1.2208, "step": 180500 }, { "epoch": 72.2, "eval_accuracy": 0.7022552583268642, "eval_loss": 1.4967743158340454, "eval_runtime": 232.2745, "eval_samples_per_second": 21.526, "eval_steps_per_second": 2.691, "step": 180500 }, { "epoch": 72.4, "learning_rate": 1.7333888017749902e-06, "loss": 1.2216, "step": 181000 }, { "epoch": 72.4, "eval_accuracy": 0.700648538703923, "eval_loss": 1.5097259283065796, "eval_runtime": 232.0858, "eval_samples_per_second": 21.544, "eval_steps_per_second": 2.693, "step": 181000 }, { "epoch": 72.6, "learning_rate": 1.6000512016384524e-06, "loss": 1.2271, "step": 181500 }, { "epoch": 72.6, "eval_accuracy": 0.7012992208675475, "eval_loss": 1.4988183975219727, "eval_runtime": 232.0081, "eval_samples_per_second": 21.551, "eval_steps_per_second": 2.694, "step": 181500 }, { "epoch": 72.8, "learning_rate": 1.4667136015019148e-06, "loss": 1.2302, "step": 182000 }, { "epoch": 72.8, "eval_accuracy": 0.6996726840571242, "eval_loss": 1.514102578163147, "eval_runtime": 231.967, "eval_samples_per_second": 21.555, "eval_steps_per_second": 2.694, "step": 182000 }, { "epoch": 73.0, "learning_rate": 1.333376001365377e-06, "loss": 1.2268, "step": 182500 }, { "epoch": 73.0, "eval_accuracy": 0.7015639472509312, "eval_loss": 1.4995648860931396, "eval_runtime": 232.3566, "eval_samples_per_second": 21.519, "eval_steps_per_second": 2.69, "step": 182500 }, { "epoch": 73.2, "learning_rate": 1.2000384012288395e-06, "loss": 1.2258, "step": 183000 }, { "epoch": 73.2, "eval_accuracy": 0.7007709925697089, "eval_loss": 1.501629114151001, "eval_runtime": 231.7525, "eval_samples_per_second": 21.575, "eval_steps_per_second": 2.697, "step": 183000 }, { "epoch": 73.4, "learning_rate": 1.0667008010923017e-06, "loss": 1.2244, "step": 183500 }, { "epoch": 73.4, "eval_accuracy": 0.7012282262174829, "eval_loss": 1.5031931400299072, "eval_runtime": 231.5977, "eval_samples_per_second": 21.589, "eval_steps_per_second": 2.699, "step": 183500 }, { "epoch": 73.6, "learning_rate": 9.33363200955764e-07, "loss": 1.2117, "step": 184000 }, { "epoch": 73.6, "eval_accuracy": 0.7002899246996493, "eval_loss": 1.5096535682678223, "eval_runtime": 231.7141, "eval_samples_per_second": 21.578, "eval_steps_per_second": 2.697, "step": 184000 }, { "epoch": 73.8, "learning_rate": 8.000256008192262e-07, "loss": 1.2279, "step": 184500 }, { "epoch": 73.8, "eval_accuracy": 0.7011673514817666, "eval_loss": 1.5058051347732544, "eval_runtime": 231.9567, "eval_samples_per_second": 21.556, "eval_steps_per_second": 2.694, "step": 184500 }, { "epoch": 74.0, "learning_rate": 6.666880006826885e-07, "loss": 1.2274, "step": 185000 }, { "epoch": 74.0, "eval_accuracy": 0.7014659184827526, "eval_loss": 1.5029548406600952, "eval_runtime": 233.2086, "eval_samples_per_second": 21.44, "eval_steps_per_second": 2.68, "step": 185000 }, { "epoch": 74.2, "learning_rate": 5.333504005461509e-07, "loss": 1.2117, "step": 185500 }, { "epoch": 74.2, "eval_accuracy": 0.700834731208563, "eval_loss": 1.5085574388504028, "eval_runtime": 232.873, "eval_samples_per_second": 21.471, "eval_steps_per_second": 2.684, "step": 185500 }, { "epoch": 74.4, "learning_rate": 4.000128004096131e-07, "loss": 1.2223, "step": 186000 }, { "epoch": 74.4, "eval_accuracy": 0.701793034494251, "eval_loss": 1.4998304843902588, "eval_runtime": 233.0397, "eval_samples_per_second": 21.456, "eval_steps_per_second": 2.682, "step": 186000 }, { "epoch": 74.6, "learning_rate": 2.6667520027307544e-07, "loss": 1.227, "step": 186500 }, { "epoch": 74.6, "eval_accuracy": 0.7014006218321054, "eval_loss": 1.5059279203414917, "eval_runtime": 231.9768, "eval_samples_per_second": 21.554, "eval_steps_per_second": 2.694, "step": 186500 }, { "epoch": 74.8, "learning_rate": 1.3333760013653772e-07, "loss": 1.2168, "step": 187000 }, { "epoch": 74.8, "eval_accuracy": 0.7011164815850444, "eval_loss": 1.4984267950057983, "eval_runtime": 231.8422, "eval_samples_per_second": 21.566, "eval_steps_per_second": 2.696, "step": 187000 }, { "epoch": 75.0, "learning_rate": 0.0, "loss": 1.2093, "step": 187500 }, { "epoch": 75.0, "eval_accuracy": 0.7017719448166707, "eval_loss": 1.5077435970306396, "eval_runtime": 231.8651, "eval_samples_per_second": 21.564, "eval_steps_per_second": 2.696, "step": 187500 }, { "epoch": 75.0, "step": 187500, "total_flos": 3.879632807909937e+17, "train_loss": 2.3812346901041668, "train_runtime": 293209.3669, "train_samples_per_second": 5.116, "train_steps_per_second": 0.639 } ], "max_steps": 187500, "num_train_epochs": 75, "total_flos": 3.879632807909937e+17, "trial_name": null, "trial_params": null }