|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 75.0, |
|
"global_step": 187500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.98682624510651e-05, |
|
"loss": 7.2061, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.11605996287252233, |
|
"eval_loss": 6.695810794830322, |
|
"eval_runtime": 233.7076, |
|
"eval_samples_per_second": 21.394, |
|
"eval_steps_per_second": 2.674, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.973492485092856e-05, |
|
"loss": 6.6051, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.13034278742499958, |
|
"eval_loss": 6.552731037139893, |
|
"eval_runtime": 230.8673, |
|
"eval_samples_per_second": 21.657, |
|
"eval_steps_per_second": 2.707, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.960158725079202e-05, |
|
"loss": 6.5016, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.13822830162292474, |
|
"eval_loss": 6.471951007843018, |
|
"eval_runtime": 231.166, |
|
"eval_samples_per_second": 21.629, |
|
"eval_steps_per_second": 2.704, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.946824965065549e-05, |
|
"loss": 6.4189, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.14243438880927284, |
|
"eval_loss": 6.3796162605285645, |
|
"eval_runtime": 231.3451, |
|
"eval_samples_per_second": 21.613, |
|
"eval_steps_per_second": 2.702, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.933491205051895e-05, |
|
"loss": 6.3648, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.14476689484585437, |
|
"eval_loss": 6.3224334716796875, |
|
"eval_runtime": 231.3236, |
|
"eval_samples_per_second": 21.615, |
|
"eval_steps_per_second": 2.702, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.9201574450382416e-05, |
|
"loss": 6.2787, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.14111015350213132, |
|
"eval_loss": 6.2787089347839355, |
|
"eval_runtime": 230.6423, |
|
"eval_samples_per_second": 21.679, |
|
"eval_steps_per_second": 2.71, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.9068236850245876e-05, |
|
"loss": 6.2583, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": 0.1445680523766434, |
|
"eval_loss": 6.2466654777526855, |
|
"eval_runtime": 230.7323, |
|
"eval_samples_per_second": 21.67, |
|
"eval_steps_per_second": 2.709, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.893489925010934e-05, |
|
"loss": 6.2211, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.14751256197797077, |
|
"eval_loss": 6.216161251068115, |
|
"eval_runtime": 230.8211, |
|
"eval_samples_per_second": 21.662, |
|
"eval_steps_per_second": 2.708, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.8801561649972796e-05, |
|
"loss": 6.1897, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.14658194493087173, |
|
"eval_loss": 6.193264961242676, |
|
"eval_runtime": 231.0835, |
|
"eval_samples_per_second": 21.637, |
|
"eval_steps_per_second": 2.705, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.866822404983626e-05, |
|
"loss": 6.1625, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.14833689917741957, |
|
"eval_loss": 6.170398235321045, |
|
"eval_runtime": 231.1062, |
|
"eval_samples_per_second": 21.635, |
|
"eval_steps_per_second": 2.704, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.853488644969972e-05, |
|
"loss": 6.1412, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.1484326081453751, |
|
"eval_loss": 6.152679920196533, |
|
"eval_runtime": 230.9877, |
|
"eval_samples_per_second": 21.646, |
|
"eval_steps_per_second": 2.706, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.840154884956319e-05, |
|
"loss": 6.1062, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.14922723927442746, |
|
"eval_loss": 6.129592418670654, |
|
"eval_runtime": 230.7218, |
|
"eval_samples_per_second": 21.671, |
|
"eval_steps_per_second": 2.709, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.826821124942665e-05, |
|
"loss": 6.1003, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_accuracy": 0.14833042176191666, |
|
"eval_loss": 6.127459526062012, |
|
"eval_runtime": 231.3952, |
|
"eval_samples_per_second": 21.608, |
|
"eval_steps_per_second": 2.701, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 4.8134873649290116e-05, |
|
"loss": 6.0944, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_accuracy": 0.14956795181846647, |
|
"eval_loss": 6.098344326019287, |
|
"eval_runtime": 230.8927, |
|
"eval_samples_per_second": 21.655, |
|
"eval_steps_per_second": 2.707, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.8001536049153576e-05, |
|
"loss": 6.077, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.1509302093387027, |
|
"eval_loss": 6.0839033126831055, |
|
"eval_runtime": 230.9459, |
|
"eval_samples_per_second": 21.65, |
|
"eval_steps_per_second": 2.706, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 4.7868198449017036e-05, |
|
"loss": 6.0419, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_accuracy": 0.15035993408116505, |
|
"eval_loss": 6.074743270874023, |
|
"eval_runtime": 230.7806, |
|
"eval_samples_per_second": 21.666, |
|
"eval_steps_per_second": 2.708, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 4.7734860848880495e-05, |
|
"loss": 6.0264, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_accuracy": 0.15058128131222967, |
|
"eval_loss": 6.07292366027832, |
|
"eval_runtime": 231.2041, |
|
"eval_samples_per_second": 21.626, |
|
"eval_steps_per_second": 2.703, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 4.760152324874396e-05, |
|
"loss": 6.0222, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_accuracy": 0.1504468752084306, |
|
"eval_loss": 6.058495998382568, |
|
"eval_runtime": 231.1986, |
|
"eval_samples_per_second": 21.626, |
|
"eval_steps_per_second": 2.703, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 4.746818564860742e-05, |
|
"loss": 6.0067, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_accuracy": 0.15003014928058153, |
|
"eval_loss": 6.051761627197266, |
|
"eval_runtime": 231.3307, |
|
"eval_samples_per_second": 21.614, |
|
"eval_steps_per_second": 2.702, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.733484804847089e-05, |
|
"loss": 6.0045, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.15037748890129463, |
|
"eval_loss": 6.030020236968994, |
|
"eval_runtime": 231.5038, |
|
"eval_samples_per_second": 21.598, |
|
"eval_steps_per_second": 2.7, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 4.720151044833435e-05, |
|
"loss": 5.9659, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_accuracy": 0.15042165912398572, |
|
"eval_loss": 6.024837970733643, |
|
"eval_runtime": 230.5925, |
|
"eval_samples_per_second": 21.683, |
|
"eval_steps_per_second": 2.71, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 4.706817284819781e-05, |
|
"loss": 5.9542, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_accuracy": 0.15116976604679064, |
|
"eval_loss": 6.014277935028076, |
|
"eval_runtime": 230.7651, |
|
"eval_samples_per_second": 21.667, |
|
"eval_steps_per_second": 2.708, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 4.6934835248061275e-05, |
|
"loss": 5.9479, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_accuracy": 0.15137721677585322, |
|
"eval_loss": 5.98906135559082, |
|
"eval_runtime": 231.6346, |
|
"eval_samples_per_second": 21.586, |
|
"eval_steps_per_second": 2.698, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 4.6801497647924735e-05, |
|
"loss": 5.9506, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_accuracy": 0.1517101264223344, |
|
"eval_loss": 5.9826836585998535, |
|
"eval_runtime": 231.7066, |
|
"eval_samples_per_second": 21.579, |
|
"eval_steps_per_second": 2.697, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.66681600477882e-05, |
|
"loss": 5.9358, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.15087804601873653, |
|
"eval_loss": 5.9973015785217285, |
|
"eval_runtime": 230.661, |
|
"eval_samples_per_second": 21.677, |
|
"eval_steps_per_second": 2.71, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 4.653482244765166e-05, |
|
"loss": 5.9114, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"eval_accuracy": 0.15052932437024058, |
|
"eval_loss": 5.976053714752197, |
|
"eval_runtime": 231.2672, |
|
"eval_samples_per_second": 21.62, |
|
"eval_steps_per_second": 2.703, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 4.640148484751513e-05, |
|
"loss": 5.9089, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"eval_accuracy": 0.15164112985030448, |
|
"eval_loss": 5.963654518127441, |
|
"eval_runtime": 230.7206, |
|
"eval_samples_per_second": 21.671, |
|
"eval_steps_per_second": 2.709, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 4.626814724737859e-05, |
|
"loss": 5.9008, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_accuracy": 0.15150205172883927, |
|
"eval_loss": 5.95345401763916, |
|
"eval_runtime": 230.8946, |
|
"eval_samples_per_second": 21.655, |
|
"eval_steps_per_second": 2.707, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 4.613480964724205e-05, |
|
"loss": 5.9007, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"eval_accuracy": 0.15299477003435696, |
|
"eval_loss": 5.934290409088135, |
|
"eval_runtime": 230.6423, |
|
"eval_samples_per_second": 21.679, |
|
"eval_steps_per_second": 2.71, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.600147204710551e-05, |
|
"loss": 5.8734, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.1532304554860595, |
|
"eval_loss": 5.92548131942749, |
|
"eval_runtime": 230.5299, |
|
"eval_samples_per_second": 21.689, |
|
"eval_steps_per_second": 2.711, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 4.5868134446968975e-05, |
|
"loss": 5.8519, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_accuracy": 0.15272607511053005, |
|
"eval_loss": 5.92126989364624, |
|
"eval_runtime": 230.8715, |
|
"eval_samples_per_second": 21.657, |
|
"eval_steps_per_second": 2.707, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 4.5734796846832435e-05, |
|
"loss": 5.8383, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_accuracy": 0.15125044872561924, |
|
"eval_loss": 5.912574768066406, |
|
"eval_runtime": 230.9209, |
|
"eval_samples_per_second": 21.652, |
|
"eval_steps_per_second": 2.707, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 4.56014592466959e-05, |
|
"loss": 5.8461, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_accuracy": 0.15246080472538365, |
|
"eval_loss": 5.904086589813232, |
|
"eval_runtime": 230.6847, |
|
"eval_samples_per_second": 21.675, |
|
"eval_steps_per_second": 2.709, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 4.546812164655936e-05, |
|
"loss": 5.8387, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_accuracy": 0.1517369949687959, |
|
"eval_loss": 5.8923492431640625, |
|
"eval_runtime": 230.6969, |
|
"eval_samples_per_second": 21.673, |
|
"eval_steps_per_second": 2.709, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.533478404642282e-05, |
|
"loss": 5.831, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.15579792043399637, |
|
"eval_loss": 5.878210067749023, |
|
"eval_runtime": 230.6131, |
|
"eval_samples_per_second": 21.681, |
|
"eval_steps_per_second": 2.71, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 4.520144644628628e-05, |
|
"loss": 5.8003, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_accuracy": 0.15538990825688073, |
|
"eval_loss": 5.865967750549316, |
|
"eval_runtime": 230.4846, |
|
"eval_samples_per_second": 21.693, |
|
"eval_steps_per_second": 2.712, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 4.506810884614975e-05, |
|
"loss": 5.7832, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"eval_accuracy": 0.1559906228220237, |
|
"eval_loss": 5.85080099105835, |
|
"eval_runtime": 230.509, |
|
"eval_samples_per_second": 21.691, |
|
"eval_steps_per_second": 2.711, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 4.493477124601321e-05, |
|
"loss": 5.7902, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"eval_accuracy": 0.15576768816506403, |
|
"eval_loss": 5.849486827850342, |
|
"eval_runtime": 231.3532, |
|
"eval_samples_per_second": 21.612, |
|
"eval_steps_per_second": 2.701, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 4.4801433645876674e-05, |
|
"loss": 5.7707, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"eval_accuracy": 0.15526944235643156, |
|
"eval_loss": 5.8376359939575195, |
|
"eval_runtime": 231.4064, |
|
"eval_samples_per_second": 21.607, |
|
"eval_steps_per_second": 2.701, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.4668096045740134e-05, |
|
"loss": 5.7638, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.156442207844077, |
|
"eval_loss": 5.828929901123047, |
|
"eval_runtime": 231.698, |
|
"eval_samples_per_second": 21.58, |
|
"eval_steps_per_second": 2.697, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 4.4534758445603594e-05, |
|
"loss": 5.741, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_accuracy": 0.1573580556184791, |
|
"eval_loss": 5.823002815246582, |
|
"eval_runtime": 230.5869, |
|
"eval_samples_per_second": 21.684, |
|
"eval_steps_per_second": 2.71, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 4.4401420845467054e-05, |
|
"loss": 5.7291, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"eval_accuracy": 0.1574160541095799, |
|
"eval_loss": 5.810952186584473, |
|
"eval_runtime": 231.4576, |
|
"eval_samples_per_second": 21.602, |
|
"eval_steps_per_second": 2.7, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 4.426808324533052e-05, |
|
"loss": 5.7206, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"eval_accuracy": 0.15753739088780072, |
|
"eval_loss": 5.801361560821533, |
|
"eval_runtime": 231.427, |
|
"eval_samples_per_second": 21.605, |
|
"eval_steps_per_second": 2.701, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 4.413474564519398e-05, |
|
"loss": 5.6974, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_accuracy": 0.16051140143149828, |
|
"eval_loss": 5.7644429206848145, |
|
"eval_runtime": 230.2201, |
|
"eval_samples_per_second": 21.718, |
|
"eval_steps_per_second": 2.715, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.400140804505745e-05, |
|
"loss": 5.6954, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.1638391355182993, |
|
"eval_loss": 5.740447521209717, |
|
"eval_runtime": 230.3833, |
|
"eval_samples_per_second": 21.703, |
|
"eval_steps_per_second": 2.713, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 4.386807044492091e-05, |
|
"loss": 5.6467, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_accuracy": 0.16680106362418612, |
|
"eval_loss": 5.703954696655273, |
|
"eval_runtime": 231.5174, |
|
"eval_samples_per_second": 21.597, |
|
"eval_steps_per_second": 2.7, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 4.3734732844784374e-05, |
|
"loss": 5.6134, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"eval_accuracy": 0.17379691610883538, |
|
"eval_loss": 5.665645122528076, |
|
"eval_runtime": 230.5546, |
|
"eval_samples_per_second": 21.687, |
|
"eval_steps_per_second": 2.711, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 4.360139524464783e-05, |
|
"loss": 5.5855, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_accuracy": 0.17866926158214308, |
|
"eval_loss": 5.626242637634277, |
|
"eval_runtime": 231.3553, |
|
"eval_samples_per_second": 21.612, |
|
"eval_steps_per_second": 2.701, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 4.3468057644511294e-05, |
|
"loss": 5.5374, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_accuracy": 0.1882605678906917, |
|
"eval_loss": 5.558737277984619, |
|
"eval_runtime": 231.4004, |
|
"eval_samples_per_second": 21.608, |
|
"eval_steps_per_second": 2.701, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.3334720044374753e-05, |
|
"loss": 5.4678, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.20085203692159995, |
|
"eval_loss": 5.438808917999268, |
|
"eval_runtime": 230.4228, |
|
"eval_samples_per_second": 21.699, |
|
"eval_steps_per_second": 2.712, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 4.320138244423822e-05, |
|
"loss": 5.3324, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"eval_accuracy": 0.22031610383030917, |
|
"eval_loss": 5.270298480987549, |
|
"eval_runtime": 231.4936, |
|
"eval_samples_per_second": 21.599, |
|
"eval_steps_per_second": 2.7, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 4.306804484410168e-05, |
|
"loss": 5.1849, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"eval_accuracy": 0.24342581021411314, |
|
"eval_loss": 5.090771675109863, |
|
"eval_runtime": 231.6154, |
|
"eval_samples_per_second": 21.588, |
|
"eval_steps_per_second": 2.698, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 4.293470724396515e-05, |
|
"loss": 5.0273, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"eval_accuracy": 0.2657009801806347, |
|
"eval_loss": 4.91030740737915, |
|
"eval_runtime": 231.4751, |
|
"eval_samples_per_second": 21.601, |
|
"eval_steps_per_second": 2.7, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 4.2801369643828607e-05, |
|
"loss": 4.8718, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"eval_accuracy": 0.2844097476529729, |
|
"eval_loss": 4.763670921325684, |
|
"eval_runtime": 231.3758, |
|
"eval_samples_per_second": 21.61, |
|
"eval_steps_per_second": 2.701, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.2668032043692066e-05, |
|
"loss": 4.7523, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.30234914938862306, |
|
"eval_loss": 4.606442928314209, |
|
"eval_runtime": 231.2813, |
|
"eval_samples_per_second": 21.619, |
|
"eval_steps_per_second": 2.702, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.2534694443555526e-05, |
|
"loss": 4.5814, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"eval_accuracy": 0.32203576764067926, |
|
"eval_loss": 4.43984317779541, |
|
"eval_runtime": 231.3348, |
|
"eval_samples_per_second": 21.614, |
|
"eval_steps_per_second": 2.702, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 4.240135684341899e-05, |
|
"loss": 4.4627, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"eval_accuracy": 0.3375924905212874, |
|
"eval_loss": 4.300474643707275, |
|
"eval_runtime": 231.0917, |
|
"eval_samples_per_second": 21.636, |
|
"eval_steps_per_second": 2.705, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 4.226801924328245e-05, |
|
"loss": 4.3228, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"eval_accuracy": 0.352034906941449, |
|
"eval_loss": 4.177072048187256, |
|
"eval_runtime": 231.0017, |
|
"eval_samples_per_second": 21.645, |
|
"eval_steps_per_second": 2.706, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 4.213468164314592e-05, |
|
"loss": 4.1885, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"eval_accuracy": 0.3631866171945581, |
|
"eval_loss": 4.0783257484436035, |
|
"eval_runtime": 231.2677, |
|
"eval_samples_per_second": 21.62, |
|
"eval_steps_per_second": 2.702, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.200134404300938e-05, |
|
"loss": 4.0772, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.3765043895715632, |
|
"eval_loss": 3.9657578468322754, |
|
"eval_runtime": 231.1257, |
|
"eval_samples_per_second": 21.633, |
|
"eval_steps_per_second": 2.704, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 4.186800644287284e-05, |
|
"loss": 3.9602, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"eval_accuracy": 0.38800160873770595, |
|
"eval_loss": 3.8685555458068848, |
|
"eval_runtime": 231.7873, |
|
"eval_samples_per_second": 21.571, |
|
"eval_steps_per_second": 2.696, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 4.17346688427363e-05, |
|
"loss": 3.8622, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"eval_accuracy": 0.3967645113361764, |
|
"eval_loss": 3.7885961532592773, |
|
"eval_runtime": 231.6543, |
|
"eval_samples_per_second": 21.584, |
|
"eval_steps_per_second": 2.698, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 4.1601331242599766e-05, |
|
"loss": 3.7958, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"eval_accuracy": 0.4074396657589834, |
|
"eval_loss": 3.69677472114563, |
|
"eval_runtime": 231.2453, |
|
"eval_samples_per_second": 21.622, |
|
"eval_steps_per_second": 2.703, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 4.1467993642463226e-05, |
|
"loss": 3.7245, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_accuracy": 0.41293986747928324, |
|
"eval_loss": 3.6479530334472656, |
|
"eval_runtime": 230.7287, |
|
"eval_samples_per_second": 21.67, |
|
"eval_steps_per_second": 2.709, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 4.133465604232669e-05, |
|
"loss": 3.6503, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.42037487909815563, |
|
"eval_loss": 3.577112913131714, |
|
"eval_runtime": 231.289, |
|
"eval_samples_per_second": 21.618, |
|
"eval_steps_per_second": 2.702, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 4.120131844219015e-05, |
|
"loss": 3.5569, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"eval_accuracy": 0.42860995864438833, |
|
"eval_loss": 3.5102858543395996, |
|
"eval_runtime": 231.3523, |
|
"eval_samples_per_second": 21.612, |
|
"eval_steps_per_second": 2.702, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 4.106798084205362e-05, |
|
"loss": 3.5151, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"eval_accuracy": 0.43582590959536877, |
|
"eval_loss": 3.461106300354004, |
|
"eval_runtime": 231.2514, |
|
"eval_samples_per_second": 21.621, |
|
"eval_steps_per_second": 2.703, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 4.093464324191707e-05, |
|
"loss": 3.4388, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"eval_accuracy": 0.4410264100641506, |
|
"eval_loss": 3.411881923675537, |
|
"eval_runtime": 232.8711, |
|
"eval_samples_per_second": 21.471, |
|
"eval_steps_per_second": 2.684, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 4.080130564178054e-05, |
|
"loss": 3.41, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"eval_accuracy": 0.4486495293386135, |
|
"eval_loss": 3.3569633960723877, |
|
"eval_runtime": 233.0322, |
|
"eval_samples_per_second": 21.456, |
|
"eval_steps_per_second": 2.682, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 4.0667968041644e-05, |
|
"loss": 3.3447, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.4518131893672265, |
|
"eval_loss": 3.3158445358276367, |
|
"eval_runtime": 232.0508, |
|
"eval_samples_per_second": 21.547, |
|
"eval_steps_per_second": 2.693, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 4.0534630441507466e-05, |
|
"loss": 3.2678, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"eval_accuracy": 0.4585323577940368, |
|
"eval_loss": 3.2717108726501465, |
|
"eval_runtime": 231.5502, |
|
"eval_samples_per_second": 21.594, |
|
"eval_steps_per_second": 2.699, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 4.0401292841370925e-05, |
|
"loss": 3.2395, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"eval_accuracy": 0.46290830741037076, |
|
"eval_loss": 3.22344970703125, |
|
"eval_runtime": 231.32, |
|
"eval_samples_per_second": 21.615, |
|
"eval_steps_per_second": 2.702, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"learning_rate": 4.026795524123439e-05, |
|
"loss": 3.2033, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"eval_accuracy": 0.46970301506869355, |
|
"eval_loss": 3.17228627204895, |
|
"eval_runtime": 231.2086, |
|
"eval_samples_per_second": 21.625, |
|
"eval_steps_per_second": 2.703, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 4.013461764109785e-05, |
|
"loss": 3.1739, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"eval_accuracy": 0.4746977130977131, |
|
"eval_loss": 3.140921115875244, |
|
"eval_runtime": 231.0625, |
|
"eval_samples_per_second": 21.639, |
|
"eval_steps_per_second": 2.705, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 4.000128004096131e-05, |
|
"loss": 3.1467, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.47823785730692026, |
|
"eval_loss": 3.1042473316192627, |
|
"eval_runtime": 231.1531, |
|
"eval_samples_per_second": 21.631, |
|
"eval_steps_per_second": 2.704, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 3.986794244082477e-05, |
|
"loss": 3.0736, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"eval_accuracy": 0.48393500600560524, |
|
"eval_loss": 3.0560505390167236, |
|
"eval_runtime": 231.7054, |
|
"eval_samples_per_second": 21.579, |
|
"eval_steps_per_second": 2.697, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 3.973460484068824e-05, |
|
"loss": 3.0468, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"eval_accuracy": 0.48689248404671853, |
|
"eval_loss": 3.0274672508239746, |
|
"eval_runtime": 231.6854, |
|
"eval_samples_per_second": 21.581, |
|
"eval_steps_per_second": 2.698, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 3.96012672405517e-05, |
|
"loss": 3.0105, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"eval_accuracy": 0.48984264091336255, |
|
"eval_loss": 3.0050787925720215, |
|
"eval_runtime": 231.6235, |
|
"eval_samples_per_second": 21.587, |
|
"eval_steps_per_second": 2.698, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 3.9467929640415165e-05, |
|
"loss": 2.9828, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"eval_accuracy": 0.4949883527454243, |
|
"eval_loss": 2.968860149383545, |
|
"eval_runtime": 231.5803, |
|
"eval_samples_per_second": 21.591, |
|
"eval_steps_per_second": 2.699, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3.9334592040278625e-05, |
|
"loss": 2.9523, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.4958962606673906, |
|
"eval_loss": 2.9481143951416016, |
|
"eval_runtime": 232.0468, |
|
"eval_samples_per_second": 21.547, |
|
"eval_steps_per_second": 2.693, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 3.9201254440142085e-05, |
|
"loss": 2.8951, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"eval_accuracy": 0.5039497778353822, |
|
"eval_loss": 2.8917958736419678, |
|
"eval_runtime": 231.5273, |
|
"eval_samples_per_second": 21.596, |
|
"eval_steps_per_second": 2.699, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 3.9067916840005545e-05, |
|
"loss": 2.8614, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"eval_accuracy": 0.5053813663933169, |
|
"eval_loss": 2.873403549194336, |
|
"eval_runtime": 231.1155, |
|
"eval_samples_per_second": 21.634, |
|
"eval_steps_per_second": 2.704, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 16.6, |
|
"learning_rate": 3.893457923986901e-05, |
|
"loss": 2.8422, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 16.6, |
|
"eval_accuracy": 0.5083243546165059, |
|
"eval_loss": 2.848686695098877, |
|
"eval_runtime": 231.2389, |
|
"eval_samples_per_second": 21.623, |
|
"eval_steps_per_second": 2.703, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 3.880124163973247e-05, |
|
"loss": 2.8184, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"eval_accuracy": 0.5137553298782741, |
|
"eval_loss": 2.8223233222961426, |
|
"eval_runtime": 231.3203, |
|
"eval_samples_per_second": 21.615, |
|
"eval_steps_per_second": 2.702, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 3.866790403959594e-05, |
|
"loss": 2.7806, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5166754490698434, |
|
"eval_loss": 2.7964563369750977, |
|
"eval_runtime": 231.5987, |
|
"eval_samples_per_second": 21.589, |
|
"eval_steps_per_second": 2.699, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 3.85345664394594e-05, |
|
"loss": 2.7356, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"eval_accuracy": 0.5208632038038585, |
|
"eval_loss": 2.7596287727355957, |
|
"eval_runtime": 231.5176, |
|
"eval_samples_per_second": 21.597, |
|
"eval_steps_per_second": 2.7, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 17.4, |
|
"learning_rate": 3.8401228839322865e-05, |
|
"loss": 2.7357, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 17.4, |
|
"eval_accuracy": 0.5249955157546486, |
|
"eval_loss": 2.740691900253296, |
|
"eval_runtime": 231.4323, |
|
"eval_samples_per_second": 21.605, |
|
"eval_steps_per_second": 2.701, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 3.826789123918632e-05, |
|
"loss": 2.7015, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"eval_accuracy": 0.5272358467037992, |
|
"eval_loss": 2.713502883911133, |
|
"eval_runtime": 231.2385, |
|
"eval_samples_per_second": 21.623, |
|
"eval_steps_per_second": 2.703, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 3.8134553639049784e-05, |
|
"loss": 2.688, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"eval_accuracy": 0.5288542866847373, |
|
"eval_loss": 2.693544387817383, |
|
"eval_runtime": 231.2235, |
|
"eval_samples_per_second": 21.624, |
|
"eval_steps_per_second": 2.703, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.8001216038913244e-05, |
|
"loss": 2.6582, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5341693899145807, |
|
"eval_loss": 2.6571600437164307, |
|
"eval_runtime": 231.6454, |
|
"eval_samples_per_second": 21.585, |
|
"eval_steps_per_second": 2.698, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"learning_rate": 3.786787843877671e-05, |
|
"loss": 2.6186, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"eval_accuracy": 0.5356641818648343, |
|
"eval_loss": 2.6396398544311523, |
|
"eval_runtime": 231.8663, |
|
"eval_samples_per_second": 21.564, |
|
"eval_steps_per_second": 2.696, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 3.773454083864017e-05, |
|
"loss": 2.6071, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"eval_accuracy": 0.5376610769046638, |
|
"eval_loss": 2.626965045928955, |
|
"eval_runtime": 231.641, |
|
"eval_samples_per_second": 21.585, |
|
"eval_steps_per_second": 2.698, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 3.760120323850364e-05, |
|
"loss": 2.5891, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"eval_accuracy": 0.5406689339011392, |
|
"eval_loss": 2.6109817028045654, |
|
"eval_runtime": 231.5799, |
|
"eval_samples_per_second": 21.591, |
|
"eval_steps_per_second": 2.699, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 3.74678656383671e-05, |
|
"loss": 2.558, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"eval_accuracy": 0.5434645737490298, |
|
"eval_loss": 2.5874485969543457, |
|
"eval_runtime": 231.7825, |
|
"eval_samples_per_second": 21.572, |
|
"eval_steps_per_second": 2.696, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 3.733452803823056e-05, |
|
"loss": 2.5521, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5464595139947923, |
|
"eval_loss": 2.553973913192749, |
|
"eval_runtime": 231.9001, |
|
"eval_samples_per_second": 21.561, |
|
"eval_steps_per_second": 2.695, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 3.720119043809402e-05, |
|
"loss": 2.5086, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"eval_accuracy": 0.5504046970853146, |
|
"eval_loss": 2.529578924179077, |
|
"eval_runtime": 231.5991, |
|
"eval_samples_per_second": 21.589, |
|
"eval_steps_per_second": 2.699, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"learning_rate": 3.7067852837957484e-05, |
|
"loss": 2.4933, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"eval_accuracy": 0.552335013374818, |
|
"eval_loss": 2.5199332237243652, |
|
"eval_runtime": 231.9578, |
|
"eval_samples_per_second": 21.556, |
|
"eval_steps_per_second": 2.694, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 3.6934515237820944e-05, |
|
"loss": 2.4924, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"eval_accuracy": 0.5550407179657637, |
|
"eval_loss": 2.503727674484253, |
|
"eval_runtime": 231.4473, |
|
"eval_samples_per_second": 21.603, |
|
"eval_steps_per_second": 2.7, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 19.8, |
|
"learning_rate": 3.680117763768441e-05, |
|
"loss": 2.4633, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 19.8, |
|
"eval_accuracy": 0.5567173130747701, |
|
"eval_loss": 2.47917103767395, |
|
"eval_runtime": 231.5225, |
|
"eval_samples_per_second": 21.596, |
|
"eval_steps_per_second": 2.7, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 3.666784003754787e-05, |
|
"loss": 2.4426, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5599728349523613, |
|
"eval_loss": 2.472404718399048, |
|
"eval_runtime": 231.2837, |
|
"eval_samples_per_second": 21.618, |
|
"eval_steps_per_second": 2.702, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"learning_rate": 3.653450243741133e-05, |
|
"loss": 2.4106, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"eval_accuracy": 0.5625706985347937, |
|
"eval_loss": 2.439568042755127, |
|
"eval_runtime": 232.2415, |
|
"eval_samples_per_second": 21.529, |
|
"eval_steps_per_second": 2.691, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"learning_rate": 3.640116483727479e-05, |
|
"loss": 2.4103, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"eval_accuracy": 0.563057566908029, |
|
"eval_loss": 2.42592453956604, |
|
"eval_runtime": 232.7771, |
|
"eval_samples_per_second": 21.48, |
|
"eval_steps_per_second": 2.685, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 20.6, |
|
"learning_rate": 3.626782723713826e-05, |
|
"loss": 2.3783, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 20.6, |
|
"eval_accuracy": 0.5672294768360642, |
|
"eval_loss": 2.4072492122650146, |
|
"eval_runtime": 232.8388, |
|
"eval_samples_per_second": 21.474, |
|
"eval_steps_per_second": 2.684, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 3.613448963700172e-05, |
|
"loss": 2.3712, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"eval_accuracy": 0.567927338493045, |
|
"eval_loss": 2.4054574966430664, |
|
"eval_runtime": 231.9123, |
|
"eval_samples_per_second": 21.56, |
|
"eval_steps_per_second": 2.695, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 3.6001152036865183e-05, |
|
"loss": 2.3616, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.5724379744645745, |
|
"eval_loss": 2.3781449794769287, |
|
"eval_runtime": 231.3479, |
|
"eval_samples_per_second": 21.612, |
|
"eval_steps_per_second": 2.702, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"learning_rate": 3.586781443672864e-05, |
|
"loss": 2.3274, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"eval_accuracy": 0.5746179093591486, |
|
"eval_loss": 2.3627405166625977, |
|
"eval_runtime": 231.8262, |
|
"eval_samples_per_second": 21.568, |
|
"eval_steps_per_second": 2.696, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 21.4, |
|
"learning_rate": 3.573447683659211e-05, |
|
"loss": 2.3133, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 21.4, |
|
"eval_accuracy": 0.5750550403629329, |
|
"eval_loss": 2.358633279800415, |
|
"eval_runtime": 231.5004, |
|
"eval_samples_per_second": 21.598, |
|
"eval_steps_per_second": 2.7, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 3.560113923645556e-05, |
|
"loss": 2.3076, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"eval_accuracy": 0.5785171432769237, |
|
"eval_loss": 2.3207192420959473, |
|
"eval_runtime": 231.9813, |
|
"eval_samples_per_second": 21.553, |
|
"eval_steps_per_second": 2.694, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 21.8, |
|
"learning_rate": 3.546780163631903e-05, |
|
"loss": 2.2991, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 21.8, |
|
"eval_accuracy": 0.579623361683169, |
|
"eval_loss": 2.31518292427063, |
|
"eval_runtime": 231.8156, |
|
"eval_samples_per_second": 21.569, |
|
"eval_steps_per_second": 2.696, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 3.533446403618249e-05, |
|
"loss": 2.2831, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5815027321493463, |
|
"eval_loss": 2.3000547885894775, |
|
"eval_runtime": 231.8488, |
|
"eval_samples_per_second": 21.566, |
|
"eval_steps_per_second": 2.696, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 22.2, |
|
"learning_rate": 3.5201126436045956e-05, |
|
"loss": 2.2461, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 22.2, |
|
"eval_accuracy": 0.5822362290673837, |
|
"eval_loss": 2.294421911239624, |
|
"eval_runtime": 231.0966, |
|
"eval_samples_per_second": 21.636, |
|
"eval_steps_per_second": 2.704, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 3.5067788835909416e-05, |
|
"loss": 2.2467, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"eval_accuracy": 0.5855738229102827, |
|
"eval_loss": 2.2848546504974365, |
|
"eval_runtime": 232.0538, |
|
"eval_samples_per_second": 21.547, |
|
"eval_steps_per_second": 2.693, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 22.6, |
|
"learning_rate": 3.493445123577288e-05, |
|
"loss": 2.2199, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 22.6, |
|
"eval_accuracy": 0.5863217741131097, |
|
"eval_loss": 2.277578592300415, |
|
"eval_runtime": 231.9613, |
|
"eval_samples_per_second": 21.555, |
|
"eval_steps_per_second": 2.694, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"learning_rate": 3.480111363563634e-05, |
|
"loss": 2.2279, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"eval_accuracy": 0.5884500762486337, |
|
"eval_loss": 2.2576725482940674, |
|
"eval_runtime": 231.4399, |
|
"eval_samples_per_second": 21.604, |
|
"eval_steps_per_second": 2.7, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 3.46677760354998e-05, |
|
"loss": 2.2048, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.5885816360134521, |
|
"eval_loss": 2.256614923477173, |
|
"eval_runtime": 231.8872, |
|
"eval_samples_per_second": 21.562, |
|
"eval_steps_per_second": 2.695, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"learning_rate": 3.453443843536326e-05, |
|
"loss": 2.1704, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"eval_accuracy": 0.5914266042687368, |
|
"eval_loss": 2.2453179359436035, |
|
"eval_runtime": 232.1021, |
|
"eval_samples_per_second": 21.542, |
|
"eval_steps_per_second": 2.693, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 23.4, |
|
"learning_rate": 3.440110083522673e-05, |
|
"loss": 2.1682, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 23.4, |
|
"eval_accuracy": 0.5927180608978089, |
|
"eval_loss": 2.2313756942749023, |
|
"eval_runtime": 231.9349, |
|
"eval_samples_per_second": 21.558, |
|
"eval_steps_per_second": 2.695, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"learning_rate": 3.426776323509019e-05, |
|
"loss": 2.1592, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"eval_accuracy": 0.5960976878150253, |
|
"eval_loss": 2.2097034454345703, |
|
"eval_runtime": 232.3164, |
|
"eval_samples_per_second": 21.522, |
|
"eval_steps_per_second": 2.69, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 23.8, |
|
"learning_rate": 3.4134425634953656e-05, |
|
"loss": 2.1547, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 23.8, |
|
"eval_accuracy": 0.5972384920529272, |
|
"eval_loss": 2.198352336883545, |
|
"eval_runtime": 231.637, |
|
"eval_samples_per_second": 21.585, |
|
"eval_steps_per_second": 2.698, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 3.4001088034817116e-05, |
|
"loss": 2.1558, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5992973741183971, |
|
"eval_loss": 2.186568021774292, |
|
"eval_runtime": 231.7166, |
|
"eval_samples_per_second": 21.578, |
|
"eval_steps_per_second": 2.697, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 24.2, |
|
"learning_rate": 3.3867750434680576e-05, |
|
"loss": 2.1189, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 24.2, |
|
"eval_accuracy": 0.6008984588334022, |
|
"eval_loss": 2.167475700378418, |
|
"eval_runtime": 231.504, |
|
"eval_samples_per_second": 21.598, |
|
"eval_steps_per_second": 2.7, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 24.4, |
|
"learning_rate": 3.3734412834544036e-05, |
|
"loss": 2.1088, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 24.4, |
|
"eval_accuracy": 0.602769414408557, |
|
"eval_loss": 2.1612794399261475, |
|
"eval_runtime": 232.1378, |
|
"eval_samples_per_second": 21.539, |
|
"eval_steps_per_second": 2.692, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 24.6, |
|
"learning_rate": 3.36010752344075e-05, |
|
"loss": 2.1164, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 24.6, |
|
"eval_accuracy": 0.6046435839795687, |
|
"eval_loss": 2.1531035900115967, |
|
"eval_runtime": 231.9129, |
|
"eval_samples_per_second": 21.56, |
|
"eval_steps_per_second": 2.695, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"learning_rate": 3.346773763427096e-05, |
|
"loss": 2.094, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"eval_accuracy": 0.6041164404544668, |
|
"eval_loss": 2.1506526470184326, |
|
"eval_runtime": 231.7484, |
|
"eval_samples_per_second": 21.575, |
|
"eval_steps_per_second": 2.697, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 3.333440003413443e-05, |
|
"loss": 2.0977, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6063366902269225, |
|
"eval_loss": 2.1298508644104004, |
|
"eval_runtime": 231.8473, |
|
"eval_samples_per_second": 21.566, |
|
"eval_steps_per_second": 2.696, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"learning_rate": 3.320106243399789e-05, |
|
"loss": 2.0657, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"eval_accuracy": 0.6070928402963278, |
|
"eval_loss": 2.121816873550415, |
|
"eval_runtime": 231.4696, |
|
"eval_samples_per_second": 21.601, |
|
"eval_steps_per_second": 2.7, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 25.4, |
|
"learning_rate": 3.3067724833861355e-05, |
|
"loss": 2.051, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 25.4, |
|
"eval_accuracy": 0.6082587307585552, |
|
"eval_loss": 2.123283624649048, |
|
"eval_runtime": 231.5258, |
|
"eval_samples_per_second": 21.596, |
|
"eval_steps_per_second": 2.699, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 3.293438723372481e-05, |
|
"loss": 2.0482, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"eval_accuracy": 0.6100416241732469, |
|
"eval_loss": 2.106915235519409, |
|
"eval_runtime": 231.6727, |
|
"eval_samples_per_second": 21.582, |
|
"eval_steps_per_second": 2.698, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 25.8, |
|
"learning_rate": 3.2801049633588275e-05, |
|
"loss": 2.04, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 25.8, |
|
"eval_accuracy": 0.6119977784355264, |
|
"eval_loss": 2.0985183715820312, |
|
"eval_runtime": 231.5457, |
|
"eval_samples_per_second": 21.594, |
|
"eval_steps_per_second": 2.699, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 3.2667712033451735e-05, |
|
"loss": 2.0341, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6128028880096977, |
|
"eval_loss": 2.092878818511963, |
|
"eval_runtime": 233.0294, |
|
"eval_samples_per_second": 21.457, |
|
"eval_steps_per_second": 2.682, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 26.2, |
|
"learning_rate": 3.25343744333152e-05, |
|
"loss": 2.0207, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 26.2, |
|
"eval_accuracy": 0.6150510766517234, |
|
"eval_loss": 2.0767383575439453, |
|
"eval_runtime": 233.0148, |
|
"eval_samples_per_second": 21.458, |
|
"eval_steps_per_second": 2.682, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"learning_rate": 3.240103683317866e-05, |
|
"loss": 2.0044, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"eval_accuracy": 0.6161581773821563, |
|
"eval_loss": 2.0671825408935547, |
|
"eval_runtime": 232.3839, |
|
"eval_samples_per_second": 21.516, |
|
"eval_steps_per_second": 2.69, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 26.6, |
|
"learning_rate": 3.226769923304213e-05, |
|
"loss": 2.0037, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 26.6, |
|
"eval_accuracy": 0.6159318730896034, |
|
"eval_loss": 2.0623385906219482, |
|
"eval_runtime": 231.4425, |
|
"eval_samples_per_second": 21.604, |
|
"eval_steps_per_second": 2.7, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"learning_rate": 3.213436163290559e-05, |
|
"loss": 2.0081, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"eval_accuracy": 0.6164338068021428, |
|
"eval_loss": 2.061392068862915, |
|
"eval_runtime": 231.5016, |
|
"eval_samples_per_second": 21.598, |
|
"eval_steps_per_second": 2.7, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 3.200102403276905e-05, |
|
"loss": 1.9847, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6186114673017036, |
|
"eval_loss": 2.0499088764190674, |
|
"eval_runtime": 231.4759, |
|
"eval_samples_per_second": 21.601, |
|
"eval_steps_per_second": 2.7, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 3.186768643263251e-05, |
|
"loss": 1.9465, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"eval_accuracy": 0.619952747351619, |
|
"eval_loss": 2.039889097213745, |
|
"eval_runtime": 231.1562, |
|
"eval_samples_per_second": 21.63, |
|
"eval_steps_per_second": 2.704, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 27.4, |
|
"learning_rate": 3.1734348832495975e-05, |
|
"loss": 1.9573, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 27.4, |
|
"eval_accuracy": 0.6209867552531732, |
|
"eval_loss": 2.0353338718414307, |
|
"eval_runtime": 231.215, |
|
"eval_samples_per_second": 21.625, |
|
"eval_steps_per_second": 2.703, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 27.6, |
|
"learning_rate": 3.1601011232359435e-05, |
|
"loss": 1.9682, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 27.6, |
|
"eval_accuracy": 0.6227358192316012, |
|
"eval_loss": 2.018660306930542, |
|
"eval_runtime": 232.2045, |
|
"eval_samples_per_second": 21.533, |
|
"eval_steps_per_second": 2.692, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 27.8, |
|
"learning_rate": 3.14676736322229e-05, |
|
"loss": 1.9573, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 27.8, |
|
"eval_accuracy": 0.6228562509132936, |
|
"eval_loss": 2.0251047611236572, |
|
"eval_runtime": 231.49, |
|
"eval_samples_per_second": 21.599, |
|
"eval_steps_per_second": 2.7, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 3.133433603208636e-05, |
|
"loss": 1.9491, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6245234243399657, |
|
"eval_loss": 2.0086443424224854, |
|
"eval_runtime": 231.5412, |
|
"eval_samples_per_second": 21.594, |
|
"eval_steps_per_second": 2.699, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 28.2, |
|
"learning_rate": 3.120099843194982e-05, |
|
"loss": 1.903, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 28.2, |
|
"eval_accuracy": 0.6246465089539988, |
|
"eval_loss": 2.0067226886749268, |
|
"eval_runtime": 231.828, |
|
"eval_samples_per_second": 21.568, |
|
"eval_steps_per_second": 2.696, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"learning_rate": 3.106766083181329e-05, |
|
"loss": 1.9152, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"eval_accuracy": 0.6263943801774015, |
|
"eval_loss": 1.992945909500122, |
|
"eval_runtime": 232.4718, |
|
"eval_samples_per_second": 21.508, |
|
"eval_steps_per_second": 2.688, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 28.6, |
|
"learning_rate": 3.093432323167675e-05, |
|
"loss": 1.9188, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 28.6, |
|
"eval_accuracy": 0.6274427330569298, |
|
"eval_loss": 1.9857181310653687, |
|
"eval_runtime": 231.0919, |
|
"eval_samples_per_second": 21.636, |
|
"eval_steps_per_second": 2.705, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 3.0800985631540214e-05, |
|
"loss": 1.9232, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"eval_accuracy": 0.6286685951318984, |
|
"eval_loss": 1.9795554876327515, |
|
"eval_runtime": 232.0512, |
|
"eval_samples_per_second": 21.547, |
|
"eval_steps_per_second": 2.693, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 3.0667648031403674e-05, |
|
"loss": 1.9011, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.62893259547976, |
|
"eval_loss": 1.9790831804275513, |
|
"eval_runtime": 231.7972, |
|
"eval_samples_per_second": 21.571, |
|
"eval_steps_per_second": 2.696, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"learning_rate": 3.053431043126714e-05, |
|
"loss": 1.8733, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"eval_accuracy": 0.6289149729933008, |
|
"eval_loss": 1.9699796438217163, |
|
"eval_runtime": 231.6438, |
|
"eval_samples_per_second": 21.585, |
|
"eval_steps_per_second": 2.698, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 29.4, |
|
"learning_rate": 3.0400972831130597e-05, |
|
"loss": 1.8731, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 29.4, |
|
"eval_accuracy": 0.6302774206622085, |
|
"eval_loss": 1.9584376811981201, |
|
"eval_runtime": 231.8307, |
|
"eval_samples_per_second": 21.567, |
|
"eval_steps_per_second": 2.696, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 29.6, |
|
"learning_rate": 3.0267635230994064e-05, |
|
"loss": 1.8812, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 29.6, |
|
"eval_accuracy": 0.6322679604528252, |
|
"eval_loss": 1.9572980403900146, |
|
"eval_runtime": 231.0721, |
|
"eval_samples_per_second": 21.638, |
|
"eval_steps_per_second": 2.705, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 29.8, |
|
"learning_rate": 3.013429763085752e-05, |
|
"loss": 1.8674, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 29.8, |
|
"eval_accuracy": 0.6318260719202453, |
|
"eval_loss": 1.9501464366912842, |
|
"eval_runtime": 232.0123, |
|
"eval_samples_per_second": 21.551, |
|
"eval_steps_per_second": 2.694, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 3.0000960030720987e-05, |
|
"loss": 1.8572, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.633327222731439, |
|
"eval_loss": 1.9454320669174194, |
|
"eval_runtime": 231.3259, |
|
"eval_samples_per_second": 21.615, |
|
"eval_steps_per_second": 2.702, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 30.2, |
|
"learning_rate": 2.9867622430584447e-05, |
|
"loss": 1.849, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 30.2, |
|
"eval_accuracy": 0.6352029775815805, |
|
"eval_loss": 1.9374988079071045, |
|
"eval_runtime": 232.1415, |
|
"eval_samples_per_second": 21.539, |
|
"eval_steps_per_second": 2.692, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 2.973428483044791e-05, |
|
"loss": 1.8332, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"eval_accuracy": 0.6343171251873188, |
|
"eval_loss": 1.9343605041503906, |
|
"eval_runtime": 231.7675, |
|
"eval_samples_per_second": 21.573, |
|
"eval_steps_per_second": 2.697, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 30.6, |
|
"learning_rate": 2.960094723031137e-05, |
|
"loss": 1.8413, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 30.6, |
|
"eval_accuracy": 0.6339970956919431, |
|
"eval_loss": 1.9292726516723633, |
|
"eval_runtime": 232.1306, |
|
"eval_samples_per_second": 21.54, |
|
"eval_steps_per_second": 2.692, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 30.8, |
|
"learning_rate": 2.9467609630174837e-05, |
|
"loss": 1.8298, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 30.8, |
|
"eval_accuracy": 0.6370566744044308, |
|
"eval_loss": 1.922782063484192, |
|
"eval_runtime": 231.4445, |
|
"eval_samples_per_second": 21.603, |
|
"eval_steps_per_second": 2.7, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2.9334272030038297e-05, |
|
"loss": 1.8336, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.6371772588144194, |
|
"eval_loss": 1.921502709388733, |
|
"eval_runtime": 232.2546, |
|
"eval_samples_per_second": 21.528, |
|
"eval_steps_per_second": 2.691, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 31.2, |
|
"learning_rate": 2.920093442990176e-05, |
|
"loss": 1.8122, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 31.2, |
|
"eval_accuracy": 0.6386797799633273, |
|
"eval_loss": 1.9133163690567017, |
|
"eval_runtime": 231.9897, |
|
"eval_samples_per_second": 21.553, |
|
"eval_steps_per_second": 2.694, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 31.4, |
|
"learning_rate": 2.906759682976522e-05, |
|
"loss": 1.8001, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 31.4, |
|
"eval_accuracy": 0.6382722577512613, |
|
"eval_loss": 1.9119086265563965, |
|
"eval_runtime": 231.6865, |
|
"eval_samples_per_second": 21.581, |
|
"eval_steps_per_second": 2.698, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 31.6, |
|
"learning_rate": 2.8934259229628687e-05, |
|
"loss": 1.7934, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 31.6, |
|
"eval_accuracy": 0.6387368757023453, |
|
"eval_loss": 1.908843755722046, |
|
"eval_runtime": 231.6821, |
|
"eval_samples_per_second": 21.581, |
|
"eval_steps_per_second": 2.698, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 31.8, |
|
"learning_rate": 2.8800921629492143e-05, |
|
"loss": 1.8079, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 31.8, |
|
"eval_accuracy": 0.6417437173027998, |
|
"eval_loss": 1.8940019607543945, |
|
"eval_runtime": 231.6685, |
|
"eval_samples_per_second": 21.583, |
|
"eval_steps_per_second": 2.698, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 2.866758402935561e-05, |
|
"loss": 1.8017, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.6409578370303184, |
|
"eval_loss": 1.8889141082763672, |
|
"eval_runtime": 232.1525, |
|
"eval_samples_per_second": 21.538, |
|
"eval_steps_per_second": 2.692, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 32.2, |
|
"learning_rate": 2.853424642921907e-05, |
|
"loss": 1.7789, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 32.2, |
|
"eval_accuracy": 0.6423419234856536, |
|
"eval_loss": 1.8882757425308228, |
|
"eval_runtime": 232.9517, |
|
"eval_samples_per_second": 21.464, |
|
"eval_steps_per_second": 2.683, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 32.4, |
|
"learning_rate": 2.8400908829082533e-05, |
|
"loss": 1.7739, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 32.4, |
|
"eval_accuracy": 0.6419453574431457, |
|
"eval_loss": 1.883570671081543, |
|
"eval_runtime": 232.6796, |
|
"eval_samples_per_second": 21.489, |
|
"eval_steps_per_second": 2.686, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 32.6, |
|
"learning_rate": 2.8267571228945993e-05, |
|
"loss": 1.7602, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 32.6, |
|
"eval_accuracy": 0.6432658377580925, |
|
"eval_loss": 1.8794612884521484, |
|
"eval_runtime": 232.1193, |
|
"eval_samples_per_second": 21.541, |
|
"eval_steps_per_second": 2.693, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"learning_rate": 2.813423362880946e-05, |
|
"loss": 1.7731, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"eval_accuracy": 0.6439001005386474, |
|
"eval_loss": 1.876919150352478, |
|
"eval_runtime": 232.0655, |
|
"eval_samples_per_second": 21.546, |
|
"eval_steps_per_second": 2.693, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 2.800089602867292e-05, |
|
"loss": 1.7784, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.6466631763933027, |
|
"eval_loss": 1.858984351158142, |
|
"eval_runtime": 232.3936, |
|
"eval_samples_per_second": 21.515, |
|
"eval_steps_per_second": 2.689, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 33.2, |
|
"learning_rate": 2.7867558428536383e-05, |
|
"loss": 1.7506, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 33.2, |
|
"eval_accuracy": 0.6447427010573747, |
|
"eval_loss": 1.8664191961288452, |
|
"eval_runtime": 231.8269, |
|
"eval_samples_per_second": 21.568, |
|
"eval_steps_per_second": 2.696, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 33.4, |
|
"learning_rate": 2.7734220828399843e-05, |
|
"loss": 1.7307, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 33.4, |
|
"eval_accuracy": 0.647232934792391, |
|
"eval_loss": 1.8552578687667847, |
|
"eval_runtime": 231.9354, |
|
"eval_samples_per_second": 21.558, |
|
"eval_steps_per_second": 2.695, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"learning_rate": 2.760088322826331e-05, |
|
"loss": 1.748, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"eval_accuracy": 0.6470427709961263, |
|
"eval_loss": 1.8523156642913818, |
|
"eval_runtime": 231.5672, |
|
"eval_samples_per_second": 21.592, |
|
"eval_steps_per_second": 2.699, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 33.8, |
|
"learning_rate": 2.7467545628126766e-05, |
|
"loss": 1.7285, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 33.8, |
|
"eval_accuracy": 0.6490748366917534, |
|
"eval_loss": 1.8396527767181396, |
|
"eval_runtime": 231.3951, |
|
"eval_samples_per_second": 21.608, |
|
"eval_steps_per_second": 2.701, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 2.7334208027990233e-05, |
|
"loss": 1.7426, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.6492367478939033, |
|
"eval_loss": 1.8321189880371094, |
|
"eval_runtime": 231.7169, |
|
"eval_samples_per_second": 21.578, |
|
"eval_steps_per_second": 2.697, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 34.2, |
|
"learning_rate": 2.7200870427853693e-05, |
|
"loss": 1.7128, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 34.2, |
|
"eval_accuracy": 0.6506795203856116, |
|
"eval_loss": 1.8220070600509644, |
|
"eval_runtime": 231.6693, |
|
"eval_samples_per_second": 21.582, |
|
"eval_steps_per_second": 2.698, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 34.4, |
|
"learning_rate": 2.7067532827717156e-05, |
|
"loss": 1.7155, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 34.4, |
|
"eval_accuracy": 0.6479166597570238, |
|
"eval_loss": 1.8486615419387817, |
|
"eval_runtime": 232.1633, |
|
"eval_samples_per_second": 21.537, |
|
"eval_steps_per_second": 2.692, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 34.6, |
|
"learning_rate": 2.6934195227580616e-05, |
|
"loss": 1.7143, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 34.6, |
|
"eval_accuracy": 0.650382482023844, |
|
"eval_loss": 1.8266839981079102, |
|
"eval_runtime": 231.6644, |
|
"eval_samples_per_second": 21.583, |
|
"eval_steps_per_second": 2.698, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 34.8, |
|
"learning_rate": 2.6800857627444082e-05, |
|
"loss": 1.7197, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 34.8, |
|
"eval_accuracy": 0.6498565162886434, |
|
"eval_loss": 1.8367702960968018, |
|
"eval_runtime": 231.461, |
|
"eval_samples_per_second": 21.602, |
|
"eval_steps_per_second": 2.7, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 2.6667520027307542e-05, |
|
"loss": 1.7043, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.6523924018595729, |
|
"eval_loss": 1.8128160238265991, |
|
"eval_runtime": 231.9645, |
|
"eval_samples_per_second": 21.555, |
|
"eval_steps_per_second": 2.694, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"learning_rate": 2.6534182427171006e-05, |
|
"loss": 1.6931, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"eval_accuracy": 0.6517473657677497, |
|
"eval_loss": 1.8211960792541504, |
|
"eval_runtime": 232.1504, |
|
"eval_samples_per_second": 21.538, |
|
"eval_steps_per_second": 2.692, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"learning_rate": 2.6400844827034466e-05, |
|
"loss": 1.6873, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"eval_accuracy": 0.6531137724550898, |
|
"eval_loss": 1.811033010482788, |
|
"eval_runtime": 231.7269, |
|
"eval_samples_per_second": 21.577, |
|
"eval_steps_per_second": 2.697, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 35.6, |
|
"learning_rate": 2.626750722689793e-05, |
|
"loss": 1.684, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 35.6, |
|
"eval_accuracy": 0.6529288870496202, |
|
"eval_loss": 1.8144562244415283, |
|
"eval_runtime": 231.5171, |
|
"eval_samples_per_second": 21.597, |
|
"eval_steps_per_second": 2.7, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 35.8, |
|
"learning_rate": 2.613416962676139e-05, |
|
"loss": 1.6802, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 35.8, |
|
"eval_accuracy": 0.6537266770878372, |
|
"eval_loss": 1.8046207427978516, |
|
"eval_runtime": 232.1963, |
|
"eval_samples_per_second": 21.534, |
|
"eval_steps_per_second": 2.692, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2.6000832026624855e-05, |
|
"loss": 1.6807, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.654968497660255, |
|
"eval_loss": 1.8015984296798706, |
|
"eval_runtime": 232.2236, |
|
"eval_samples_per_second": 21.531, |
|
"eval_steps_per_second": 2.691, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 36.2, |
|
"learning_rate": 2.5867494426488315e-05, |
|
"loss": 1.6612, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 36.2, |
|
"eval_accuracy": 0.653872168300964, |
|
"eval_loss": 1.7996736764907837, |
|
"eval_runtime": 231.7061, |
|
"eval_samples_per_second": 21.579, |
|
"eval_steps_per_second": 2.697, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 36.4, |
|
"learning_rate": 2.573415682635178e-05, |
|
"loss": 1.6586, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 36.4, |
|
"eval_accuracy": 0.6536951243476204, |
|
"eval_loss": 1.8013685941696167, |
|
"eval_runtime": 232.2979, |
|
"eval_samples_per_second": 21.524, |
|
"eval_steps_per_second": 2.691, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 36.6, |
|
"learning_rate": 2.560081922621524e-05, |
|
"loss": 1.658, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 36.6, |
|
"eval_accuracy": 0.6565445408983954, |
|
"eval_loss": 1.7937599420547485, |
|
"eval_runtime": 231.4385, |
|
"eval_samples_per_second": 21.604, |
|
"eval_steps_per_second": 2.701, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"learning_rate": 2.5467481626078705e-05, |
|
"loss": 1.6623, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"eval_accuracy": 0.65856217010757, |
|
"eval_loss": 1.7775884866714478, |
|
"eval_runtime": 232.2205, |
|
"eval_samples_per_second": 21.531, |
|
"eval_steps_per_second": 2.691, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2.5334144025942165e-05, |
|
"loss": 1.6618, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.6573262777234046, |
|
"eval_loss": 1.7884029150009155, |
|
"eval_runtime": 231.6843, |
|
"eval_samples_per_second": 21.581, |
|
"eval_steps_per_second": 2.698, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 37.2, |
|
"learning_rate": 2.520080642580563e-05, |
|
"loss": 1.6453, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 37.2, |
|
"eval_accuracy": 0.6571095648699937, |
|
"eval_loss": 1.7870711088180542, |
|
"eval_runtime": 231.5772, |
|
"eval_samples_per_second": 21.591, |
|
"eval_steps_per_second": 2.699, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 37.4, |
|
"learning_rate": 2.5067468825669088e-05, |
|
"loss": 1.6462, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 37.4, |
|
"eval_accuracy": 0.6584839807589097, |
|
"eval_loss": 1.7780765295028687, |
|
"eval_runtime": 231.8264, |
|
"eval_samples_per_second": 21.568, |
|
"eval_steps_per_second": 2.696, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 37.6, |
|
"learning_rate": 2.493413122553255e-05, |
|
"loss": 1.6353, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 37.6, |
|
"eval_accuracy": 0.6583383343335334, |
|
"eval_loss": 1.780821442604065, |
|
"eval_runtime": 231.9584, |
|
"eval_samples_per_second": 21.556, |
|
"eval_steps_per_second": 2.694, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 37.8, |
|
"learning_rate": 2.480079362539601e-05, |
|
"loss": 1.6507, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 37.8, |
|
"eval_accuracy": 0.660343714861709, |
|
"eval_loss": 1.766579508781433, |
|
"eval_runtime": 232.2004, |
|
"eval_samples_per_second": 21.533, |
|
"eval_steps_per_second": 2.692, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 2.4667456025259475e-05, |
|
"loss": 1.6383, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.6605969144871192, |
|
"eval_loss": 1.762445330619812, |
|
"eval_runtime": 231.2813, |
|
"eval_samples_per_second": 21.619, |
|
"eval_steps_per_second": 2.702, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 38.2, |
|
"learning_rate": 2.4534118425122938e-05, |
|
"loss": 1.6299, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 38.2, |
|
"eval_accuracy": 0.6605303776246108, |
|
"eval_loss": 1.7652736902236938, |
|
"eval_runtime": 233.0257, |
|
"eval_samples_per_second": 21.457, |
|
"eval_steps_per_second": 2.682, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"learning_rate": 2.4400780824986398e-05, |
|
"loss": 1.6085, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"eval_accuracy": 0.6610363508099403, |
|
"eval_loss": 1.752347469329834, |
|
"eval_runtime": 233.1846, |
|
"eval_samples_per_second": 21.442, |
|
"eval_steps_per_second": 2.68, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 38.6, |
|
"learning_rate": 2.426744322484986e-05, |
|
"loss": 1.6155, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 38.6, |
|
"eval_accuracy": 0.6612273667271772, |
|
"eval_loss": 1.7521241903305054, |
|
"eval_runtime": 232.3209, |
|
"eval_samples_per_second": 21.522, |
|
"eval_steps_per_second": 2.69, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 38.8, |
|
"learning_rate": 2.4134105624713325e-05, |
|
"loss": 1.6106, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 38.8, |
|
"eval_accuracy": 0.6604985873358816, |
|
"eval_loss": 1.763405442237854, |
|
"eval_runtime": 232.258, |
|
"eval_samples_per_second": 21.528, |
|
"eval_steps_per_second": 2.691, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 2.4000768024576788e-05, |
|
"loss": 1.6201, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.6624809349278767, |
|
"eval_loss": 1.7461175918579102, |
|
"eval_runtime": 231.8365, |
|
"eval_samples_per_second": 21.567, |
|
"eval_steps_per_second": 2.696, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 39.2, |
|
"learning_rate": 2.3867430424440248e-05, |
|
"loss": 1.5835, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 39.2, |
|
"eval_accuracy": 0.6617336081815064, |
|
"eval_loss": 1.750459909439087, |
|
"eval_runtime": 232.4138, |
|
"eval_samples_per_second": 21.513, |
|
"eval_steps_per_second": 2.689, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 39.4, |
|
"learning_rate": 2.373409282430371e-05, |
|
"loss": 1.5885, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 39.4, |
|
"eval_accuracy": 0.6622954419917079, |
|
"eval_loss": 1.7476534843444824, |
|
"eval_runtime": 232.0419, |
|
"eval_samples_per_second": 21.548, |
|
"eval_steps_per_second": 2.693, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 39.6, |
|
"learning_rate": 2.3600755224167174e-05, |
|
"loss": 1.5988, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 39.6, |
|
"eval_accuracy": 0.6634760353188246, |
|
"eval_loss": 1.7444517612457275, |
|
"eval_runtime": 232.4705, |
|
"eval_samples_per_second": 21.508, |
|
"eval_steps_per_second": 2.689, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 39.8, |
|
"learning_rate": 2.3467417624030638e-05, |
|
"loss": 1.6013, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 39.8, |
|
"eval_accuracy": 0.6637441413471821, |
|
"eval_loss": 1.7407046556472778, |
|
"eval_runtime": 231.1683, |
|
"eval_samples_per_second": 21.629, |
|
"eval_steps_per_second": 2.704, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 2.33340800238941e-05, |
|
"loss": 1.594, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.6655929575351373, |
|
"eval_loss": 1.7335906028747559, |
|
"eval_runtime": 231.9333, |
|
"eval_samples_per_second": 21.558, |
|
"eval_steps_per_second": 2.695, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 40.2, |
|
"learning_rate": 2.3200742423757564e-05, |
|
"loss": 1.5741, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 40.2, |
|
"eval_accuracy": 0.6636754906359572, |
|
"eval_loss": 1.734755516052246, |
|
"eval_runtime": 232.1572, |
|
"eval_samples_per_second": 21.537, |
|
"eval_steps_per_second": 2.692, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 40.4, |
|
"learning_rate": 2.3067404823621024e-05, |
|
"loss": 1.5744, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 40.4, |
|
"eval_accuracy": 0.6653410869753469, |
|
"eval_loss": 1.7242318391799927, |
|
"eval_runtime": 232.2909, |
|
"eval_samples_per_second": 21.525, |
|
"eval_steps_per_second": 2.691, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 40.6, |
|
"learning_rate": 2.2934067223484487e-05, |
|
"loss": 1.5809, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 40.6, |
|
"eval_accuracy": 0.6660700357978522, |
|
"eval_loss": 1.726152777671814, |
|
"eval_runtime": 231.4564, |
|
"eval_samples_per_second": 21.602, |
|
"eval_steps_per_second": 2.7, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 40.8, |
|
"learning_rate": 2.280072962334795e-05, |
|
"loss": 1.5723, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 40.8, |
|
"eval_accuracy": 0.6664572676418679, |
|
"eval_loss": 1.7256526947021484, |
|
"eval_runtime": 231.2938, |
|
"eval_samples_per_second": 21.618, |
|
"eval_steps_per_second": 2.702, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 2.266739202321141e-05, |
|
"loss": 1.5695, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.6664388539576068, |
|
"eval_loss": 1.71815824508667, |
|
"eval_runtime": 232.2361, |
|
"eval_samples_per_second": 21.53, |
|
"eval_steps_per_second": 2.691, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 41.2, |
|
"learning_rate": 2.2534054423074874e-05, |
|
"loss": 1.5462, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 41.2, |
|
"eval_accuracy": 0.6659556456182217, |
|
"eval_loss": 1.7257441282272339, |
|
"eval_runtime": 232.4495, |
|
"eval_samples_per_second": 21.51, |
|
"eval_steps_per_second": 2.689, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 41.4, |
|
"learning_rate": 2.2400716822938337e-05, |
|
"loss": 1.5545, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 41.4, |
|
"eval_accuracy": 0.6685517195414556, |
|
"eval_loss": 1.710143804550171, |
|
"eval_runtime": 231.3349, |
|
"eval_samples_per_second": 21.614, |
|
"eval_steps_per_second": 2.702, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"learning_rate": 2.2267379222801797e-05, |
|
"loss": 1.5574, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"eval_accuracy": 0.6684205612462655, |
|
"eval_loss": 1.7108157873153687, |
|
"eval_runtime": 231.6198, |
|
"eval_samples_per_second": 21.587, |
|
"eval_steps_per_second": 2.698, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 41.8, |
|
"learning_rate": 2.213404162266526e-05, |
|
"loss": 1.5485, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 41.8, |
|
"eval_accuracy": 0.6665381047535768, |
|
"eval_loss": 1.7164138555526733, |
|
"eval_runtime": 231.1733, |
|
"eval_samples_per_second": 21.629, |
|
"eval_steps_per_second": 2.704, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 2.2000704022528724e-05, |
|
"loss": 1.5487, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.6693558557063685, |
|
"eval_loss": 1.7079566717147827, |
|
"eval_runtime": 231.1865, |
|
"eval_samples_per_second": 21.628, |
|
"eval_steps_per_second": 2.703, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 42.2, |
|
"learning_rate": 2.1867366422392187e-05, |
|
"loss": 1.5278, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 42.2, |
|
"eval_accuracy": 0.6685844037369237, |
|
"eval_loss": 1.709234595298767, |
|
"eval_runtime": 232.3941, |
|
"eval_samples_per_second": 21.515, |
|
"eval_steps_per_second": 2.689, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 42.4, |
|
"learning_rate": 2.1734028822255647e-05, |
|
"loss": 1.5282, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 42.4, |
|
"eval_accuracy": 0.6689898717577476, |
|
"eval_loss": 1.7052183151245117, |
|
"eval_runtime": 231.6514, |
|
"eval_samples_per_second": 21.584, |
|
"eval_steps_per_second": 2.698, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 42.6, |
|
"learning_rate": 2.160069122211911e-05, |
|
"loss": 1.5468, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 42.6, |
|
"eval_accuracy": 0.6704241186507764, |
|
"eval_loss": 1.7057934999465942, |
|
"eval_runtime": 232.2002, |
|
"eval_samples_per_second": 21.533, |
|
"eval_steps_per_second": 2.692, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 42.8, |
|
"learning_rate": 2.1467353621982573e-05, |
|
"loss": 1.5375, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 42.8, |
|
"eval_accuracy": 0.6689407289213416, |
|
"eval_loss": 1.7019603252410889, |
|
"eval_runtime": 231.6252, |
|
"eval_samples_per_second": 21.587, |
|
"eval_steps_per_second": 2.698, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 2.1334016021846033e-05, |
|
"loss": 1.5301, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.67096851406744, |
|
"eval_loss": 1.694966435432434, |
|
"eval_runtime": 231.3192, |
|
"eval_samples_per_second": 21.615, |
|
"eval_steps_per_second": 2.702, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 43.2, |
|
"learning_rate": 2.1200678421709497e-05, |
|
"loss": 1.5224, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 43.2, |
|
"eval_accuracy": 0.6701957265809324, |
|
"eval_loss": 1.6989938020706177, |
|
"eval_runtime": 231.5003, |
|
"eval_samples_per_second": 21.598, |
|
"eval_steps_per_second": 2.7, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"learning_rate": 2.106734082157296e-05, |
|
"loss": 1.5105, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"eval_accuracy": 0.6714945598779761, |
|
"eval_loss": 1.691909909248352, |
|
"eval_runtime": 232.1406, |
|
"eval_samples_per_second": 21.539, |
|
"eval_steps_per_second": 2.692, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 43.6, |
|
"learning_rate": 2.093400322143642e-05, |
|
"loss": 1.5179, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 43.6, |
|
"eval_accuracy": 0.6724466215607879, |
|
"eval_loss": 1.6844831705093384, |
|
"eval_runtime": 231.9646, |
|
"eval_samples_per_second": 21.555, |
|
"eval_steps_per_second": 2.694, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 43.8, |
|
"learning_rate": 2.0800665621299883e-05, |
|
"loss": 1.518, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 43.8, |
|
"eval_accuracy": 0.6721125557875637, |
|
"eval_loss": 1.6838468313217163, |
|
"eval_runtime": 232.4955, |
|
"eval_samples_per_second": 21.506, |
|
"eval_steps_per_second": 2.688, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 2.0667328021163346e-05, |
|
"loss": 1.5191, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.6715394411366622, |
|
"eval_loss": 1.6877400875091553, |
|
"eval_runtime": 231.8955, |
|
"eval_samples_per_second": 21.561, |
|
"eval_steps_per_second": 2.695, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 44.2, |
|
"learning_rate": 2.053399042102681e-05, |
|
"loss": 1.4984, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 44.2, |
|
"eval_accuracy": 0.6712061803124136, |
|
"eval_loss": 1.692290186882019, |
|
"eval_runtime": 231.7743, |
|
"eval_samples_per_second": 21.573, |
|
"eval_steps_per_second": 2.697, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 44.4, |
|
"learning_rate": 2.040065282089027e-05, |
|
"loss": 1.5051, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 44.4, |
|
"eval_accuracy": 0.6722261856343719, |
|
"eval_loss": 1.684216022491455, |
|
"eval_runtime": 232.2158, |
|
"eval_samples_per_second": 21.532, |
|
"eval_steps_per_second": 2.691, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 44.6, |
|
"learning_rate": 2.0267315220753733e-05, |
|
"loss": 1.4993, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 44.6, |
|
"eval_accuracy": 0.6741077814035333, |
|
"eval_loss": 1.6768300533294678, |
|
"eval_runtime": 232.5116, |
|
"eval_samples_per_second": 21.504, |
|
"eval_steps_per_second": 2.688, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"learning_rate": 2.0133977620617196e-05, |
|
"loss": 1.5035, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"eval_accuracy": 0.6727453029224042, |
|
"eval_loss": 1.6816706657409668, |
|
"eval_runtime": 232.9219, |
|
"eval_samples_per_second": 21.466, |
|
"eval_steps_per_second": 2.683, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 2.0000640020480656e-05, |
|
"loss": 1.5047, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.6732629405102925, |
|
"eval_loss": 1.6727675199508667, |
|
"eval_runtime": 233.5892, |
|
"eval_samples_per_second": 21.405, |
|
"eval_steps_per_second": 2.676, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 45.2, |
|
"learning_rate": 1.986730242034412e-05, |
|
"loss": 1.4788, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 45.2, |
|
"eval_accuracy": 0.6720030354064183, |
|
"eval_loss": 1.6824595928192139, |
|
"eval_runtime": 232.4735, |
|
"eval_samples_per_second": 21.508, |
|
"eval_steps_per_second": 2.688, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 45.4, |
|
"learning_rate": 1.9733964820207583e-05, |
|
"loss": 1.4841, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 45.4, |
|
"eval_accuracy": 0.6734870787809277, |
|
"eval_loss": 1.6770191192626953, |
|
"eval_runtime": 232.5524, |
|
"eval_samples_per_second": 21.501, |
|
"eval_steps_per_second": 2.688, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 45.6, |
|
"learning_rate": 1.9600627220071042e-05, |
|
"loss": 1.4863, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 45.6, |
|
"eval_accuracy": 0.6752743985944378, |
|
"eval_loss": 1.6587978601455688, |
|
"eval_runtime": 231.3799, |
|
"eval_samples_per_second": 21.609, |
|
"eval_steps_per_second": 2.701, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 45.8, |
|
"learning_rate": 1.9467289619934506e-05, |
|
"loss": 1.4859, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 45.8, |
|
"eval_accuracy": 0.6741216184724784, |
|
"eval_loss": 1.6681159734725952, |
|
"eval_runtime": 232.5066, |
|
"eval_samples_per_second": 21.505, |
|
"eval_steps_per_second": 2.688, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 1.933395201979797e-05, |
|
"loss": 1.4839, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.67397265751252, |
|
"eval_loss": 1.6657896041870117, |
|
"eval_runtime": 231.5604, |
|
"eval_samples_per_second": 21.593, |
|
"eval_steps_per_second": 2.699, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 46.2, |
|
"learning_rate": 1.9200614419661432e-05, |
|
"loss": 1.4633, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 46.2, |
|
"eval_accuracy": 0.6765235484774519, |
|
"eval_loss": 1.6601226329803467, |
|
"eval_runtime": 231.3619, |
|
"eval_samples_per_second": 21.611, |
|
"eval_steps_per_second": 2.701, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"learning_rate": 1.9067276819524892e-05, |
|
"loss": 1.4725, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"eval_accuracy": 0.6752809587927104, |
|
"eval_loss": 1.6586965322494507, |
|
"eval_runtime": 231.6681, |
|
"eval_samples_per_second": 21.583, |
|
"eval_steps_per_second": 2.698, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 46.6, |
|
"learning_rate": 1.8933939219388355e-05, |
|
"loss": 1.4703, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 46.6, |
|
"eval_accuracy": 0.6755936231903356, |
|
"eval_loss": 1.664272665977478, |
|
"eval_runtime": 231.4478, |
|
"eval_samples_per_second": 21.603, |
|
"eval_steps_per_second": 2.7, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 46.8, |
|
"learning_rate": 1.880060161925182e-05, |
|
"loss": 1.4763, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 46.8, |
|
"eval_accuracy": 0.6759434654429518, |
|
"eval_loss": 1.6583046913146973, |
|
"eval_runtime": 231.3856, |
|
"eval_samples_per_second": 21.609, |
|
"eval_steps_per_second": 2.701, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 1.866726401911528e-05, |
|
"loss": 1.4825, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.6766304076891632, |
|
"eval_loss": 1.6487648487091064, |
|
"eval_runtime": 231.4489, |
|
"eval_samples_per_second": 21.603, |
|
"eval_steps_per_second": 2.7, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"learning_rate": 1.8533926418978742e-05, |
|
"loss": 1.4496, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"eval_accuracy": 0.6772409050087971, |
|
"eval_loss": 1.648973822593689, |
|
"eval_runtime": 232.2521, |
|
"eval_samples_per_second": 21.528, |
|
"eval_steps_per_second": 2.691, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 47.4, |
|
"learning_rate": 1.8400588818842205e-05, |
|
"loss": 1.457, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 47.4, |
|
"eval_accuracy": 0.6777788151191017, |
|
"eval_loss": 1.6462332010269165, |
|
"eval_runtime": 231.6165, |
|
"eval_samples_per_second": 21.587, |
|
"eval_steps_per_second": 2.698, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 47.6, |
|
"learning_rate": 1.8267251218705665e-05, |
|
"loss": 1.4541, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 47.6, |
|
"eval_accuracy": 0.6799176285554341, |
|
"eval_loss": 1.63682222366333, |
|
"eval_runtime": 232.1826, |
|
"eval_samples_per_second": 21.535, |
|
"eval_steps_per_second": 2.692, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 47.8, |
|
"learning_rate": 1.813391361856913e-05, |
|
"loss": 1.4561, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 47.8, |
|
"eval_accuracy": 0.6777792584202286, |
|
"eval_loss": 1.6403781175613403, |
|
"eval_runtime": 231.5617, |
|
"eval_samples_per_second": 21.593, |
|
"eval_steps_per_second": 2.699, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 1.8000576018432592e-05, |
|
"loss": 1.4547, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.6790187161680793, |
|
"eval_loss": 1.638481855392456, |
|
"eval_runtime": 231.6285, |
|
"eval_samples_per_second": 21.586, |
|
"eval_steps_per_second": 2.698, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 48.2, |
|
"learning_rate": 1.7867238418296055e-05, |
|
"loss": 1.4406, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 48.2, |
|
"eval_accuracy": 0.6799383927561591, |
|
"eval_loss": 1.6374049186706543, |
|
"eval_runtime": 231.2859, |
|
"eval_samples_per_second": 21.618, |
|
"eval_steps_per_second": 2.702, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 48.4, |
|
"learning_rate": 1.7733900818159515e-05, |
|
"loss": 1.4374, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 48.4, |
|
"eval_accuracy": 0.6799176059126742, |
|
"eval_loss": 1.6319433450698853, |
|
"eval_runtime": 231.6988, |
|
"eval_samples_per_second": 21.58, |
|
"eval_steps_per_second": 2.697, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 48.6, |
|
"learning_rate": 1.7600563218022978e-05, |
|
"loss": 1.4395, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 48.6, |
|
"eval_accuracy": 0.6786781490626186, |
|
"eval_loss": 1.642459511756897, |
|
"eval_runtime": 231.6229, |
|
"eval_samples_per_second": 21.587, |
|
"eval_steps_per_second": 2.698, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 48.8, |
|
"learning_rate": 1.746722561788644e-05, |
|
"loss": 1.4347, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 48.8, |
|
"eval_accuracy": 0.6814051123983632, |
|
"eval_loss": 1.6252304315567017, |
|
"eval_runtime": 232.473, |
|
"eval_samples_per_second": 21.508, |
|
"eval_steps_per_second": 2.688, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 1.73338880177499e-05, |
|
"loss": 1.4392, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.6801429877795327, |
|
"eval_loss": 1.635969638824463, |
|
"eval_runtime": 231.885, |
|
"eval_samples_per_second": 21.562, |
|
"eval_steps_per_second": 2.695, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 49.2, |
|
"learning_rate": 1.7200550417613365e-05, |
|
"loss": 1.439, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 49.2, |
|
"eval_accuracy": 0.68263718810269, |
|
"eval_loss": 1.6232901811599731, |
|
"eval_runtime": 232.2203, |
|
"eval_samples_per_second": 21.531, |
|
"eval_steps_per_second": 2.691, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 49.4, |
|
"learning_rate": 1.7067212817476828e-05, |
|
"loss": 1.4223, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 49.4, |
|
"eval_accuracy": 0.6809202734397155, |
|
"eval_loss": 1.6261595487594604, |
|
"eval_runtime": 231.4902, |
|
"eval_samples_per_second": 21.599, |
|
"eval_steps_per_second": 2.7, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"learning_rate": 1.6933875217340288e-05, |
|
"loss": 1.4292, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"eval_accuracy": 0.6810616944045385, |
|
"eval_loss": 1.6292299032211304, |
|
"eval_runtime": 231.4763, |
|
"eval_samples_per_second": 21.6, |
|
"eval_steps_per_second": 2.7, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 49.8, |
|
"learning_rate": 1.680053761720375e-05, |
|
"loss": 1.4237, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 49.8, |
|
"eval_accuracy": 0.6812210767774028, |
|
"eval_loss": 1.6226879358291626, |
|
"eval_runtime": 231.3771, |
|
"eval_samples_per_second": 21.61, |
|
"eval_steps_per_second": 2.701, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 1.6667200017067214e-05, |
|
"loss": 1.4241, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.6810288384521558, |
|
"eval_loss": 1.6230130195617676, |
|
"eval_runtime": 231.4942, |
|
"eval_samples_per_second": 21.599, |
|
"eval_steps_per_second": 2.7, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 50.2, |
|
"learning_rate": 1.6533862416930678e-05, |
|
"loss": 1.4118, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 50.2, |
|
"eval_accuracy": 0.6822409697872935, |
|
"eval_loss": 1.6255977153778076, |
|
"eval_runtime": 231.568, |
|
"eval_samples_per_second": 21.592, |
|
"eval_steps_per_second": 2.699, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 50.4, |
|
"learning_rate": 1.6400524816794138e-05, |
|
"loss": 1.4225, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 50.4, |
|
"eval_accuracy": 0.6816822007419128, |
|
"eval_loss": 1.6251336336135864, |
|
"eval_runtime": 231.5464, |
|
"eval_samples_per_second": 21.594, |
|
"eval_steps_per_second": 2.699, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 50.6, |
|
"learning_rate": 1.62671872166576e-05, |
|
"loss": 1.4122, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 50.6, |
|
"eval_accuracy": 0.6827178759221865, |
|
"eval_loss": 1.6178197860717773, |
|
"eval_runtime": 232.3558, |
|
"eval_samples_per_second": 21.519, |
|
"eval_steps_per_second": 2.69, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 50.8, |
|
"learning_rate": 1.6133849616521064e-05, |
|
"loss": 1.4081, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 50.8, |
|
"eval_accuracy": 0.6812576306483431, |
|
"eval_loss": 1.6189851760864258, |
|
"eval_runtime": 231.411, |
|
"eval_samples_per_second": 21.607, |
|
"eval_steps_per_second": 2.701, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 1.6000512016384524e-05, |
|
"loss": 1.4058, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.6835776111558344, |
|
"eval_loss": 1.6182929277420044, |
|
"eval_runtime": 231.4214, |
|
"eval_samples_per_second": 21.606, |
|
"eval_steps_per_second": 2.701, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 51.2, |
|
"learning_rate": 1.5867174416247987e-05, |
|
"loss": 1.3985, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 51.2, |
|
"eval_accuracy": 0.6816691554670649, |
|
"eval_loss": 1.619876503944397, |
|
"eval_runtime": 232.0714, |
|
"eval_samples_per_second": 21.545, |
|
"eval_steps_per_second": 2.693, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 51.4, |
|
"learning_rate": 1.573383681611145e-05, |
|
"loss": 1.3967, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 51.4, |
|
"eval_accuracy": 0.6829394364835076, |
|
"eval_loss": 1.6167734861373901, |
|
"eval_runtime": 233.6677, |
|
"eval_samples_per_second": 21.398, |
|
"eval_steps_per_second": 2.675, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 51.6, |
|
"learning_rate": 1.560049921597491e-05, |
|
"loss": 1.4113, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 51.6, |
|
"eval_accuracy": 0.6831813209285004, |
|
"eval_loss": 1.612316608428955, |
|
"eval_runtime": 232.8487, |
|
"eval_samples_per_second": 21.473, |
|
"eval_steps_per_second": 2.684, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 51.8, |
|
"learning_rate": 1.5467161615838374e-05, |
|
"loss": 1.3876, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 51.8, |
|
"eval_accuracy": 0.6841063178736425, |
|
"eval_loss": 1.6078130006790161, |
|
"eval_runtime": 232.4944, |
|
"eval_samples_per_second": 21.506, |
|
"eval_steps_per_second": 2.688, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 1.5333824015701837e-05, |
|
"loss": 1.4027, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.6846912751008356, |
|
"eval_loss": 1.6028109788894653, |
|
"eval_runtime": 232.0149, |
|
"eval_samples_per_second": 21.55, |
|
"eval_steps_per_second": 2.694, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 52.2, |
|
"learning_rate": 1.5200486415565299e-05, |
|
"loss": 1.3939, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 52.2, |
|
"eval_accuracy": 0.6844753106931615, |
|
"eval_loss": 1.6081105470657349, |
|
"eval_runtime": 231.5632, |
|
"eval_samples_per_second": 21.592, |
|
"eval_steps_per_second": 2.699, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 52.4, |
|
"learning_rate": 1.506714881542876e-05, |
|
"loss": 1.391, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 52.4, |
|
"eval_accuracy": 0.6848551411978101, |
|
"eval_loss": 1.6033966541290283, |
|
"eval_runtime": 232.425, |
|
"eval_samples_per_second": 21.512, |
|
"eval_steps_per_second": 2.689, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 52.6, |
|
"learning_rate": 1.4933811215292224e-05, |
|
"loss": 1.3895, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 52.6, |
|
"eval_accuracy": 0.6849530771970824, |
|
"eval_loss": 1.6016286611557007, |
|
"eval_runtime": 231.6757, |
|
"eval_samples_per_second": 21.582, |
|
"eval_steps_per_second": 2.698, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"learning_rate": 1.4800473615155685e-05, |
|
"loss": 1.3858, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"eval_accuracy": 0.6847438399813156, |
|
"eval_loss": 1.6009891033172607, |
|
"eval_runtime": 231.6508, |
|
"eval_samples_per_second": 21.584, |
|
"eval_steps_per_second": 2.698, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 1.4667136015019148e-05, |
|
"loss": 1.3852, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.6862125510988115, |
|
"eval_loss": 1.5885919332504272, |
|
"eval_runtime": 231.798, |
|
"eval_samples_per_second": 21.571, |
|
"eval_steps_per_second": 2.696, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 53.2, |
|
"learning_rate": 1.453379841488261e-05, |
|
"loss": 1.3716, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 53.2, |
|
"eval_accuracy": 0.686239673683405, |
|
"eval_loss": 1.596373438835144, |
|
"eval_runtime": 232.5907, |
|
"eval_samples_per_second": 21.497, |
|
"eval_steps_per_second": 2.687, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 53.4, |
|
"learning_rate": 1.4400460814746072e-05, |
|
"loss": 1.3727, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 53.4, |
|
"eval_accuracy": 0.6875449053992921, |
|
"eval_loss": 1.5952101945877075, |
|
"eval_runtime": 231.5617, |
|
"eval_samples_per_second": 21.593, |
|
"eval_steps_per_second": 2.699, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 53.6, |
|
"learning_rate": 1.4267123214609535e-05, |
|
"loss": 1.3656, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 53.6, |
|
"eval_accuracy": 0.6850110757234091, |
|
"eval_loss": 1.6030853986740112, |
|
"eval_runtime": 232.6646, |
|
"eval_samples_per_second": 21.49, |
|
"eval_steps_per_second": 2.686, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 53.8, |
|
"learning_rate": 1.4133785614472997e-05, |
|
"loss": 1.3873, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 53.8, |
|
"eval_accuracy": 0.6867045238563649, |
|
"eval_loss": 1.5926910638809204, |
|
"eval_runtime": 231.8443, |
|
"eval_samples_per_second": 21.566, |
|
"eval_steps_per_second": 2.696, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 1.400044801433646e-05, |
|
"loss": 1.3742, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.6858065721683885, |
|
"eval_loss": 1.596977949142456, |
|
"eval_runtime": 231.6888, |
|
"eval_samples_per_second": 21.581, |
|
"eval_steps_per_second": 2.698, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 54.2, |
|
"learning_rate": 1.3867110414199921e-05, |
|
"loss": 1.3687, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 54.2, |
|
"eval_accuracy": 0.6863366864299514, |
|
"eval_loss": 1.5954158306121826, |
|
"eval_runtime": 232.5257, |
|
"eval_samples_per_second": 21.503, |
|
"eval_steps_per_second": 2.688, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 54.4, |
|
"learning_rate": 1.3733772814063383e-05, |
|
"loss": 1.359, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 54.4, |
|
"eval_accuracy": 0.6872515010006671, |
|
"eval_loss": 1.5854052305221558, |
|
"eval_runtime": 231.6594, |
|
"eval_samples_per_second": 21.583, |
|
"eval_steps_per_second": 2.698, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 54.6, |
|
"learning_rate": 1.3600435213926846e-05, |
|
"loss": 1.3696, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 54.6, |
|
"eval_accuracy": 0.6877612475473026, |
|
"eval_loss": 1.5902482271194458, |
|
"eval_runtime": 231.5097, |
|
"eval_samples_per_second": 21.597, |
|
"eval_steps_per_second": 2.7, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 54.8, |
|
"learning_rate": 1.3467097613790308e-05, |
|
"loss": 1.38, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 54.8, |
|
"eval_accuracy": 0.6871327341859661, |
|
"eval_loss": 1.5869797468185425, |
|
"eval_runtime": 232.8368, |
|
"eval_samples_per_second": 21.474, |
|
"eval_steps_per_second": 2.684, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 1.3333760013653771e-05, |
|
"loss": 1.3529, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.6879042788016817, |
|
"eval_loss": 1.5887551307678223, |
|
"eval_runtime": 232.8567, |
|
"eval_samples_per_second": 21.472, |
|
"eval_steps_per_second": 2.684, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 55.2, |
|
"learning_rate": 1.3200422413517233e-05, |
|
"loss": 1.3479, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 55.2, |
|
"eval_accuracy": 0.6888780042450111, |
|
"eval_loss": 1.5719605684280396, |
|
"eval_runtime": 231.8675, |
|
"eval_samples_per_second": 21.564, |
|
"eval_steps_per_second": 2.696, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 55.4, |
|
"learning_rate": 1.3067084813380694e-05, |
|
"loss": 1.3558, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 55.4, |
|
"eval_accuracy": 0.6876944245034697, |
|
"eval_loss": 1.58102285861969, |
|
"eval_runtime": 232.7287, |
|
"eval_samples_per_second": 21.484, |
|
"eval_steps_per_second": 2.686, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 55.6, |
|
"learning_rate": 1.2933747213244158e-05, |
|
"loss": 1.3565, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 55.6, |
|
"eval_accuracy": 0.6909087883376879, |
|
"eval_loss": 1.5686722993850708, |
|
"eval_runtime": 232.323, |
|
"eval_samples_per_second": 21.522, |
|
"eval_steps_per_second": 2.69, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 55.8, |
|
"learning_rate": 1.280040961310762e-05, |
|
"loss": 1.351, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 55.8, |
|
"eval_accuracy": 0.6897409911780831, |
|
"eval_loss": 1.5762348175048828, |
|
"eval_runtime": 232.2112, |
|
"eval_samples_per_second": 21.532, |
|
"eval_steps_per_second": 2.692, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 1.2667072012971083e-05, |
|
"loss": 1.3698, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.6881191410145602, |
|
"eval_loss": 1.5784891843795776, |
|
"eval_runtime": 232.3446, |
|
"eval_samples_per_second": 21.52, |
|
"eval_steps_per_second": 2.69, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 56.2, |
|
"learning_rate": 1.2533734412834544e-05, |
|
"loss": 1.3388, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 56.2, |
|
"eval_accuracy": 0.6882200986804969, |
|
"eval_loss": 1.5766631364822388, |
|
"eval_runtime": 233.4176, |
|
"eval_samples_per_second": 21.421, |
|
"eval_steps_per_second": 2.678, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 56.4, |
|
"learning_rate": 1.2400396812698006e-05, |
|
"loss": 1.3433, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 56.4, |
|
"eval_accuracy": 0.6896203829799986, |
|
"eval_loss": 1.5752308368682861, |
|
"eval_runtime": 233.3927, |
|
"eval_samples_per_second": 21.423, |
|
"eval_steps_per_second": 2.678, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"learning_rate": 1.2267059212561469e-05, |
|
"loss": 1.3505, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"eval_accuracy": 0.6890066251596424, |
|
"eval_loss": 1.5754320621490479, |
|
"eval_runtime": 231.7194, |
|
"eval_samples_per_second": 21.578, |
|
"eval_steps_per_second": 2.697, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 56.8, |
|
"learning_rate": 1.213372161242493e-05, |
|
"loss": 1.3429, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 56.8, |
|
"eval_accuracy": 0.6895795829707522, |
|
"eval_loss": 1.5772255659103394, |
|
"eval_runtime": 232.2785, |
|
"eval_samples_per_second": 21.526, |
|
"eval_steps_per_second": 2.691, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 1.2000384012288394e-05, |
|
"loss": 1.337, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.69004771461609, |
|
"eval_loss": 1.5732284784317017, |
|
"eval_runtime": 231.5917, |
|
"eval_samples_per_second": 21.59, |
|
"eval_steps_per_second": 2.699, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 57.2, |
|
"learning_rate": 1.1867046412151855e-05, |
|
"loss": 1.3398, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 57.2, |
|
"eval_accuracy": 0.690381858161266, |
|
"eval_loss": 1.5680711269378662, |
|
"eval_runtime": 232.6848, |
|
"eval_samples_per_second": 21.488, |
|
"eval_steps_per_second": 2.686, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 57.4, |
|
"learning_rate": 1.1733708812015319e-05, |
|
"loss": 1.3334, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 57.4, |
|
"eval_accuracy": 0.690041861053248, |
|
"eval_loss": 1.5696121454238892, |
|
"eval_runtime": 231.7038, |
|
"eval_samples_per_second": 21.579, |
|
"eval_steps_per_second": 2.697, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 57.6, |
|
"learning_rate": 1.1600371211878782e-05, |
|
"loss": 1.3384, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 57.6, |
|
"eval_accuracy": 0.6907987077717064, |
|
"eval_loss": 1.5674421787261963, |
|
"eval_runtime": 231.9577, |
|
"eval_samples_per_second": 21.556, |
|
"eval_steps_per_second": 2.694, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 57.8, |
|
"learning_rate": 1.1467033611742244e-05, |
|
"loss": 1.33, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 57.8, |
|
"eval_accuracy": 0.6915707537755306, |
|
"eval_loss": 1.5592304468154907, |
|
"eval_runtime": 232.0926, |
|
"eval_samples_per_second": 21.543, |
|
"eval_steps_per_second": 2.693, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 1.1333696011605705e-05, |
|
"loss": 1.327, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.6924366115221547, |
|
"eval_loss": 1.5497514009475708, |
|
"eval_runtime": 232.0721, |
|
"eval_samples_per_second": 21.545, |
|
"eval_steps_per_second": 2.693, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 58.2, |
|
"learning_rate": 1.1200358411469169e-05, |
|
"loss": 1.3234, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 58.2, |
|
"eval_accuracy": 0.6909872386858108, |
|
"eval_loss": 1.5625743865966797, |
|
"eval_runtime": 231.8743, |
|
"eval_samples_per_second": 21.563, |
|
"eval_steps_per_second": 2.695, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 58.4, |
|
"learning_rate": 1.106702081133263e-05, |
|
"loss": 1.3266, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 58.4, |
|
"eval_accuracy": 0.6892814371257485, |
|
"eval_loss": 1.5743225812911987, |
|
"eval_runtime": 232.1904, |
|
"eval_samples_per_second": 21.534, |
|
"eval_steps_per_second": 2.692, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 58.6, |
|
"learning_rate": 1.0933683211196093e-05, |
|
"loss": 1.3152, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 58.6, |
|
"eval_accuracy": 0.6911878441987919, |
|
"eval_loss": 1.567984700202942, |
|
"eval_runtime": 232.1539, |
|
"eval_samples_per_second": 21.537, |
|
"eval_steps_per_second": 2.692, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 58.8, |
|
"learning_rate": 1.0800345611059555e-05, |
|
"loss": 1.3279, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 58.8, |
|
"eval_accuracy": 0.6919366138448707, |
|
"eval_loss": 1.558101773262024, |
|
"eval_runtime": 231.3447, |
|
"eval_samples_per_second": 21.613, |
|
"eval_steps_per_second": 2.702, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 1.0667008010923017e-05, |
|
"loss": 1.3172, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.691684386719777, |
|
"eval_loss": 1.5645062923431396, |
|
"eval_runtime": 232.1387, |
|
"eval_samples_per_second": 21.539, |
|
"eval_steps_per_second": 2.692, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 59.2, |
|
"learning_rate": 1.053367041078648e-05, |
|
"loss": 1.3073, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 59.2, |
|
"eval_accuracy": 0.6923759867524124, |
|
"eval_loss": 1.5578668117523193, |
|
"eval_runtime": 232.0325, |
|
"eval_samples_per_second": 21.549, |
|
"eval_steps_per_second": 2.694, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 59.4, |
|
"learning_rate": 1.0400332810649941e-05, |
|
"loss": 1.307, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 59.4, |
|
"eval_accuracy": 0.6938674761086743, |
|
"eval_loss": 1.546762466430664, |
|
"eval_runtime": 232.2513, |
|
"eval_samples_per_second": 21.528, |
|
"eval_steps_per_second": 2.691, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 59.6, |
|
"learning_rate": 1.0266995210513405e-05, |
|
"loss": 1.3164, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 59.6, |
|
"eval_accuracy": 0.6929686873814422, |
|
"eval_loss": 1.5518994331359863, |
|
"eval_runtime": 231.1014, |
|
"eval_samples_per_second": 21.636, |
|
"eval_steps_per_second": 2.704, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 59.8, |
|
"learning_rate": 1.0133657610376866e-05, |
|
"loss": 1.3037, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 59.8, |
|
"eval_accuracy": 0.6916677464475209, |
|
"eval_loss": 1.5627799034118652, |
|
"eval_runtime": 232.3485, |
|
"eval_samples_per_second": 21.519, |
|
"eval_steps_per_second": 2.69, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 1.0000320010240328e-05, |
|
"loss": 1.3171, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.6933963519284755, |
|
"eval_loss": 1.5489420890808105, |
|
"eval_runtime": 231.4333, |
|
"eval_samples_per_second": 21.604, |
|
"eval_steps_per_second": 2.701, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 60.2, |
|
"learning_rate": 9.866982410103791e-06, |
|
"loss": 1.3035, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 60.2, |
|
"eval_accuracy": 0.6930868044367229, |
|
"eval_loss": 1.5499061346054077, |
|
"eval_runtime": 232.1383, |
|
"eval_samples_per_second": 21.539, |
|
"eval_steps_per_second": 2.692, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 60.4, |
|
"learning_rate": 9.733644809967253e-06, |
|
"loss": 1.3109, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 60.4, |
|
"eval_accuracy": 0.6922226734934385, |
|
"eval_loss": 1.5608468055725098, |
|
"eval_runtime": 232.2684, |
|
"eval_samples_per_second": 21.527, |
|
"eval_steps_per_second": 2.691, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 60.6, |
|
"learning_rate": 9.600307209830716e-06, |
|
"loss": 1.304, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 60.6, |
|
"eval_accuracy": 0.6914924071448069, |
|
"eval_loss": 1.5611767768859863, |
|
"eval_runtime": 232.4752, |
|
"eval_samples_per_second": 21.508, |
|
"eval_steps_per_second": 2.688, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 60.8, |
|
"learning_rate": 9.466969609694178e-06, |
|
"loss": 1.3104, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 60.8, |
|
"eval_accuracy": 0.6932900396672637, |
|
"eval_loss": 1.551135540008545, |
|
"eval_runtime": 231.5047, |
|
"eval_samples_per_second": 21.598, |
|
"eval_steps_per_second": 2.7, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"learning_rate": 9.33363200955764e-06, |
|
"loss": 1.3071, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.693459881852489, |
|
"eval_loss": 1.5469086170196533, |
|
"eval_runtime": 233.8113, |
|
"eval_samples_per_second": 21.385, |
|
"eval_steps_per_second": 2.673, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 61.2, |
|
"learning_rate": 9.200294409421103e-06, |
|
"loss": 1.2935, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 61.2, |
|
"eval_accuracy": 0.6941589679567207, |
|
"eval_loss": 1.5484933853149414, |
|
"eval_runtime": 232.5867, |
|
"eval_samples_per_second": 21.497, |
|
"eval_steps_per_second": 2.687, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 61.4, |
|
"learning_rate": 9.066956809284564e-06, |
|
"loss": 1.2866, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 61.4, |
|
"eval_accuracy": 0.6940421521647191, |
|
"eval_loss": 1.5462923049926758, |
|
"eval_runtime": 232.5951, |
|
"eval_samples_per_second": 21.497, |
|
"eval_steps_per_second": 2.687, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 61.6, |
|
"learning_rate": 8.933619209148027e-06, |
|
"loss": 1.2926, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 61.6, |
|
"eval_accuracy": 0.6956126566432729, |
|
"eval_loss": 1.540623426437378, |
|
"eval_runtime": 231.888, |
|
"eval_samples_per_second": 21.562, |
|
"eval_steps_per_second": 2.695, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 61.8, |
|
"learning_rate": 8.800281609011489e-06, |
|
"loss": 1.3029, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 61.8, |
|
"eval_accuracy": 0.6944681219450475, |
|
"eval_loss": 1.5423588752746582, |
|
"eval_runtime": 232.5594, |
|
"eval_samples_per_second": 21.5, |
|
"eval_steps_per_second": 2.687, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"learning_rate": 8.66694400887495e-06, |
|
"loss": 1.2921, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.6944209552091726, |
|
"eval_loss": 1.5446220636367798, |
|
"eval_runtime": 232.6244, |
|
"eval_samples_per_second": 21.494, |
|
"eval_steps_per_second": 2.687, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 62.2, |
|
"learning_rate": 8.533606408738414e-06, |
|
"loss": 1.2765, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 62.2, |
|
"eval_accuracy": 0.6952571916908579, |
|
"eval_loss": 1.5396584272384644, |
|
"eval_runtime": 231.3433, |
|
"eval_samples_per_second": 21.613, |
|
"eval_steps_per_second": 2.702, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 62.4, |
|
"learning_rate": 8.400268808601876e-06, |
|
"loss": 1.275, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 62.4, |
|
"eval_accuracy": 0.6944803801909601, |
|
"eval_loss": 1.5469422340393066, |
|
"eval_runtime": 231.6508, |
|
"eval_samples_per_second": 21.584, |
|
"eval_steps_per_second": 2.698, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 62.6, |
|
"learning_rate": 8.266931208465339e-06, |
|
"loss": 1.2909, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 62.6, |
|
"eval_accuracy": 0.69449131348161, |
|
"eval_loss": 1.5426616668701172, |
|
"eval_runtime": 232.6399, |
|
"eval_samples_per_second": 21.492, |
|
"eval_steps_per_second": 2.687, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 62.8, |
|
"learning_rate": 8.1335936083288e-06, |
|
"loss": 1.2869, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 62.8, |
|
"eval_accuracy": 0.6949301108724598, |
|
"eval_loss": 1.5387953519821167, |
|
"eval_runtime": 231.8255, |
|
"eval_samples_per_second": 21.568, |
|
"eval_steps_per_second": 2.696, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"learning_rate": 8.000256008192262e-06, |
|
"loss": 1.2883, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.694801592969147, |
|
"eval_loss": 1.5375311374664307, |
|
"eval_runtime": 231.7315, |
|
"eval_samples_per_second": 21.577, |
|
"eval_steps_per_second": 2.697, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 63.2, |
|
"learning_rate": 7.866918408055725e-06, |
|
"loss": 1.2673, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 63.2, |
|
"eval_accuracy": 0.6947923905923195, |
|
"eval_loss": 1.5423495769500732, |
|
"eval_runtime": 231.5823, |
|
"eval_samples_per_second": 21.591, |
|
"eval_steps_per_second": 2.699, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 63.4, |
|
"learning_rate": 7.733580807919187e-06, |
|
"loss": 1.2754, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 63.4, |
|
"eval_accuracy": 0.6957325609622272, |
|
"eval_loss": 1.5359729528427124, |
|
"eval_runtime": 231.7033, |
|
"eval_samples_per_second": 21.579, |
|
"eval_steps_per_second": 2.697, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 63.6, |
|
"learning_rate": 7.600243207782649e-06, |
|
"loss": 1.2772, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 63.6, |
|
"eval_accuracy": 0.6952351743013445, |
|
"eval_loss": 1.5331131219863892, |
|
"eval_runtime": 232.042, |
|
"eval_samples_per_second": 21.548, |
|
"eval_steps_per_second": 2.693, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 63.8, |
|
"learning_rate": 7.466905607646112e-06, |
|
"loss": 1.283, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 63.8, |
|
"eval_accuracy": 0.6955319177297024, |
|
"eval_loss": 1.5353832244873047, |
|
"eval_runtime": 231.7244, |
|
"eval_samples_per_second": 21.577, |
|
"eval_steps_per_second": 2.697, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 7.333568007509574e-06, |
|
"loss": 1.2737, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.6960759535765355, |
|
"eval_loss": 1.5387910604476929, |
|
"eval_runtime": 232.0979, |
|
"eval_samples_per_second": 21.543, |
|
"eval_steps_per_second": 2.693, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 64.2, |
|
"learning_rate": 7.200230407373036e-06, |
|
"loss": 1.2681, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 64.2, |
|
"eval_accuracy": 0.695161846872553, |
|
"eval_loss": 1.5381580591201782, |
|
"eval_runtime": 231.5418, |
|
"eval_samples_per_second": 21.594, |
|
"eval_steps_per_second": 2.699, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 64.4, |
|
"learning_rate": 7.066892807236498e-06, |
|
"loss": 1.2769, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 64.4, |
|
"eval_accuracy": 0.6958243929135146, |
|
"eval_loss": 1.535032868385315, |
|
"eval_runtime": 231.507, |
|
"eval_samples_per_second": 21.598, |
|
"eval_steps_per_second": 2.7, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 64.6, |
|
"learning_rate": 6.933555207099961e-06, |
|
"loss": 1.2668, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 64.6, |
|
"eval_accuracy": 0.6956000439629246, |
|
"eval_loss": 1.534529447555542, |
|
"eval_runtime": 231.4208, |
|
"eval_samples_per_second": 21.606, |
|
"eval_steps_per_second": 2.701, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 64.8, |
|
"learning_rate": 6.800217606963423e-06, |
|
"loss": 1.2795, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 64.8, |
|
"eval_accuracy": 0.6983860770024292, |
|
"eval_loss": 1.519789695739746, |
|
"eval_runtime": 232.0063, |
|
"eval_samples_per_second": 21.551, |
|
"eval_steps_per_second": 2.694, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 6.666880006826886e-06, |
|
"loss": 1.2632, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.696454274062425, |
|
"eval_loss": 1.5323538780212402, |
|
"eval_runtime": 231.6081, |
|
"eval_samples_per_second": 21.588, |
|
"eval_steps_per_second": 2.699, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 65.2, |
|
"learning_rate": 6.533542406690347e-06, |
|
"loss": 1.2646, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 65.2, |
|
"eval_accuracy": 0.6962969375320464, |
|
"eval_loss": 1.5420233011245728, |
|
"eval_runtime": 231.6221, |
|
"eval_samples_per_second": 21.587, |
|
"eval_steps_per_second": 2.698, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 65.4, |
|
"learning_rate": 6.40020480655381e-06, |
|
"loss": 1.2739, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 65.4, |
|
"eval_accuracy": 0.6972270479754109, |
|
"eval_loss": 1.5307587385177612, |
|
"eval_runtime": 231.6515, |
|
"eval_samples_per_second": 21.584, |
|
"eval_steps_per_second": 2.698, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 65.6, |
|
"learning_rate": 6.266867206417272e-06, |
|
"loss": 1.2667, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 65.6, |
|
"eval_accuracy": 0.6978264999683358, |
|
"eval_loss": 1.5220121145248413, |
|
"eval_runtime": 231.7833, |
|
"eval_samples_per_second": 21.572, |
|
"eval_steps_per_second": 2.696, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 65.8, |
|
"learning_rate": 6.1335296062807345e-06, |
|
"loss": 1.26, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 65.8, |
|
"eval_accuracy": 0.6961458718130477, |
|
"eval_loss": 1.5283021926879883, |
|
"eval_runtime": 231.7518, |
|
"eval_samples_per_second": 21.575, |
|
"eval_steps_per_second": 2.697, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"learning_rate": 6.000192006144197e-06, |
|
"loss": 1.2714, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.6977360596277253, |
|
"eval_loss": 1.5235345363616943, |
|
"eval_runtime": 231.6521, |
|
"eval_samples_per_second": 21.584, |
|
"eval_steps_per_second": 2.698, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 66.2, |
|
"learning_rate": 5.866854406007659e-06, |
|
"loss": 1.2652, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 66.2, |
|
"eval_accuracy": 0.6973007712082262, |
|
"eval_loss": 1.5269626379013062, |
|
"eval_runtime": 231.9589, |
|
"eval_samples_per_second": 21.556, |
|
"eval_steps_per_second": 2.694, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 66.4, |
|
"learning_rate": 5.733516805871122e-06, |
|
"loss": 1.2554, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 66.4, |
|
"eval_accuracy": 0.6958699662616962, |
|
"eval_loss": 1.5355974435806274, |
|
"eval_runtime": 231.9704, |
|
"eval_samples_per_second": 21.554, |
|
"eval_steps_per_second": 2.694, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 66.6, |
|
"learning_rate": 5.600179205734584e-06, |
|
"loss": 1.2666, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 66.6, |
|
"eval_accuracy": 0.6968431335413323, |
|
"eval_loss": 1.523085355758667, |
|
"eval_runtime": 231.5172, |
|
"eval_samples_per_second": 21.597, |
|
"eval_steps_per_second": 2.7, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 66.8, |
|
"learning_rate": 5.466841605598047e-06, |
|
"loss": 1.2634, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 66.8, |
|
"eval_accuracy": 0.6994266502566565, |
|
"eval_loss": 1.5169812440872192, |
|
"eval_runtime": 231.7934, |
|
"eval_samples_per_second": 21.571, |
|
"eval_steps_per_second": 2.696, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"learning_rate": 5.333504005461508e-06, |
|
"loss": 1.2485, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.6988332070858084, |
|
"eval_loss": 1.5204789638519287, |
|
"eval_runtime": 232.7606, |
|
"eval_samples_per_second": 21.481, |
|
"eval_steps_per_second": 2.685, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 67.2, |
|
"learning_rate": 5.200166405324971e-06, |
|
"loss": 1.2397, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 67.2, |
|
"eval_accuracy": 0.6982430767582624, |
|
"eval_loss": 1.5262999534606934, |
|
"eval_runtime": 232.7735, |
|
"eval_samples_per_second": 21.48, |
|
"eval_steps_per_second": 2.685, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 67.4, |
|
"learning_rate": 5.066828805188433e-06, |
|
"loss": 1.2416, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 67.4, |
|
"eval_accuracy": 0.7000725051705876, |
|
"eval_loss": 1.5132619142532349, |
|
"eval_runtime": 232.8101, |
|
"eval_samples_per_second": 21.477, |
|
"eval_steps_per_second": 2.685, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 67.6, |
|
"learning_rate": 4.933491205051896e-06, |
|
"loss": 1.2615, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 67.6, |
|
"eval_accuracy": 0.6990387592769552, |
|
"eval_loss": 1.514991283416748, |
|
"eval_runtime": 233.0379, |
|
"eval_samples_per_second": 21.456, |
|
"eval_steps_per_second": 2.682, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 67.8, |
|
"learning_rate": 4.800153604915358e-06, |
|
"loss": 1.254, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 67.8, |
|
"eval_accuracy": 0.6989797619444537, |
|
"eval_loss": 1.5213098526000977, |
|
"eval_runtime": 231.7854, |
|
"eval_samples_per_second": 21.572, |
|
"eval_steps_per_second": 2.696, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 4.66681600477882e-06, |
|
"loss": 1.2463, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.6994830576307364, |
|
"eval_loss": 1.5156911611557007, |
|
"eval_runtime": 231.8883, |
|
"eval_samples_per_second": 21.562, |
|
"eval_steps_per_second": 2.695, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 68.2, |
|
"learning_rate": 4.533478404642282e-06, |
|
"loss": 1.2412, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 68.2, |
|
"eval_accuracy": 0.7000436351039415, |
|
"eval_loss": 1.508183479309082, |
|
"eval_runtime": 231.4591, |
|
"eval_samples_per_second": 21.602, |
|
"eval_steps_per_second": 2.7, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 68.4, |
|
"learning_rate": 4.4001408045057445e-06, |
|
"loss": 1.248, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 68.4, |
|
"eval_accuracy": 0.6992357860200856, |
|
"eval_loss": 1.516764760017395, |
|
"eval_runtime": 231.362, |
|
"eval_samples_per_second": 21.611, |
|
"eval_steps_per_second": 2.701, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 68.6, |
|
"learning_rate": 4.266803204369207e-06, |
|
"loss": 1.2468, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 68.6, |
|
"eval_accuracy": 0.6990639521278373, |
|
"eval_loss": 1.5186537504196167, |
|
"eval_runtime": 231.4765, |
|
"eval_samples_per_second": 21.6, |
|
"eval_steps_per_second": 2.7, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 68.8, |
|
"learning_rate": 4.1334656042326694e-06, |
|
"loss": 1.246, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 68.8, |
|
"eval_accuracy": 0.7000655096618438, |
|
"eval_loss": 1.50938880443573, |
|
"eval_runtime": 231.6881, |
|
"eval_samples_per_second": 21.581, |
|
"eval_steps_per_second": 2.698, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"learning_rate": 4.000128004096131e-06, |
|
"loss": 1.2443, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.698370463354622, |
|
"eval_loss": 1.5186454057693481, |
|
"eval_runtime": 231.6998, |
|
"eval_samples_per_second": 21.58, |
|
"eval_steps_per_second": 2.697, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 69.2, |
|
"learning_rate": 3.8667904039595935e-06, |
|
"loss": 1.2451, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 69.2, |
|
"eval_accuracy": 0.6988172430355427, |
|
"eval_loss": 1.5156738758087158, |
|
"eval_runtime": 231.6043, |
|
"eval_samples_per_second": 21.589, |
|
"eval_steps_per_second": 2.699, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 69.4, |
|
"learning_rate": 3.733452803823056e-06, |
|
"loss": 1.2375, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 69.4, |
|
"eval_accuracy": 0.7002490046471991, |
|
"eval_loss": 1.5102007389068604, |
|
"eval_runtime": 231.5872, |
|
"eval_samples_per_second": 21.59, |
|
"eval_steps_per_second": 2.699, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 69.6, |
|
"learning_rate": 3.600115203686518e-06, |
|
"loss": 1.2441, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 69.6, |
|
"eval_accuracy": 0.7000056553747683, |
|
"eval_loss": 1.5142953395843506, |
|
"eval_runtime": 231.917, |
|
"eval_samples_per_second": 21.559, |
|
"eval_steps_per_second": 2.695, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 69.8, |
|
"learning_rate": 3.4667776035499804e-06, |
|
"loss": 1.2335, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 69.8, |
|
"eval_accuracy": 0.6984952065719414, |
|
"eval_loss": 1.5172849893569946, |
|
"eval_runtime": 231.8749, |
|
"eval_samples_per_second": 21.563, |
|
"eval_steps_per_second": 2.695, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 3.333440003413443e-06, |
|
"loss": 1.2361, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.7000938923367872, |
|
"eval_loss": 1.5102351903915405, |
|
"eval_runtime": 231.661, |
|
"eval_samples_per_second": 21.583, |
|
"eval_steps_per_second": 2.698, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 70.2, |
|
"learning_rate": 3.200102403276905e-06, |
|
"loss": 1.23, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 70.2, |
|
"eval_accuracy": 0.6997497747973176, |
|
"eval_loss": 1.515488624572754, |
|
"eval_runtime": 231.3654, |
|
"eval_samples_per_second": 21.611, |
|
"eval_steps_per_second": 2.701, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 70.4, |
|
"learning_rate": 3.0667648031403673e-06, |
|
"loss": 1.2401, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 70.4, |
|
"eval_accuracy": 0.7005253823912052, |
|
"eval_loss": 1.5026744604110718, |
|
"eval_runtime": 231.8606, |
|
"eval_samples_per_second": 21.565, |
|
"eval_steps_per_second": 2.696, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 70.6, |
|
"learning_rate": 2.9334272030038297e-06, |
|
"loss": 1.2346, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 70.6, |
|
"eval_accuracy": 0.699492636060484, |
|
"eval_loss": 1.5123237371444702, |
|
"eval_runtime": 231.4499, |
|
"eval_samples_per_second": 21.603, |
|
"eval_steps_per_second": 2.7, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 70.8, |
|
"learning_rate": 2.800089602867292e-06, |
|
"loss": 1.2306, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 70.8, |
|
"eval_accuracy": 0.6984030423306806, |
|
"eval_loss": 1.515058994293213, |
|
"eval_runtime": 231.9805, |
|
"eval_samples_per_second": 21.554, |
|
"eval_steps_per_second": 2.694, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"learning_rate": 2.666752002730754e-06, |
|
"loss": 1.2333, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.6999873478411421, |
|
"eval_loss": 1.512516736984253, |
|
"eval_runtime": 232.103, |
|
"eval_samples_per_second": 21.542, |
|
"eval_steps_per_second": 2.693, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 71.2, |
|
"learning_rate": 2.5334144025942166e-06, |
|
"loss": 1.2248, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 71.2, |
|
"eval_accuracy": 0.6991863430659604, |
|
"eval_loss": 1.5198755264282227, |
|
"eval_runtime": 231.8985, |
|
"eval_samples_per_second": 21.561, |
|
"eval_steps_per_second": 2.695, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 71.4, |
|
"learning_rate": 2.400076802457679e-06, |
|
"loss": 1.2385, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 71.4, |
|
"eval_accuracy": 0.7000136463336296, |
|
"eval_loss": 1.510788917541504, |
|
"eval_runtime": 231.9773, |
|
"eval_samples_per_second": 21.554, |
|
"eval_steps_per_second": 2.694, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 71.6, |
|
"learning_rate": 2.266739202321141e-06, |
|
"loss": 1.2278, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 71.6, |
|
"eval_accuracy": 0.7000376979559702, |
|
"eval_loss": 1.5091618299484253, |
|
"eval_runtime": 231.9158, |
|
"eval_samples_per_second": 21.56, |
|
"eval_steps_per_second": 2.695, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 71.8, |
|
"learning_rate": 2.1334016021846035e-06, |
|
"loss": 1.2278, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 71.8, |
|
"eval_accuracy": 0.6989206577075939, |
|
"eval_loss": 1.5162503719329834, |
|
"eval_runtime": 232.1036, |
|
"eval_samples_per_second": 21.542, |
|
"eval_steps_per_second": 2.693, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 2.0000640020480655e-06, |
|
"loss": 1.2242, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.7009711915464922, |
|
"eval_loss": 1.5056333541870117, |
|
"eval_runtime": 231.7421, |
|
"eval_samples_per_second": 21.576, |
|
"eval_steps_per_second": 2.697, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 72.2, |
|
"learning_rate": 1.866726401911528e-06, |
|
"loss": 1.2208, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 72.2, |
|
"eval_accuracy": 0.7022552583268642, |
|
"eval_loss": 1.4967743158340454, |
|
"eval_runtime": 232.2745, |
|
"eval_samples_per_second": 21.526, |
|
"eval_steps_per_second": 2.691, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 72.4, |
|
"learning_rate": 1.7333888017749902e-06, |
|
"loss": 1.2216, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 72.4, |
|
"eval_accuracy": 0.700648538703923, |
|
"eval_loss": 1.5097259283065796, |
|
"eval_runtime": 232.0858, |
|
"eval_samples_per_second": 21.544, |
|
"eval_steps_per_second": 2.693, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 72.6, |
|
"learning_rate": 1.6000512016384524e-06, |
|
"loss": 1.2271, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 72.6, |
|
"eval_accuracy": 0.7012992208675475, |
|
"eval_loss": 1.4988183975219727, |
|
"eval_runtime": 232.0081, |
|
"eval_samples_per_second": 21.551, |
|
"eval_steps_per_second": 2.694, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 72.8, |
|
"learning_rate": 1.4667136015019148e-06, |
|
"loss": 1.2302, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 72.8, |
|
"eval_accuracy": 0.6996726840571242, |
|
"eval_loss": 1.514102578163147, |
|
"eval_runtime": 231.967, |
|
"eval_samples_per_second": 21.555, |
|
"eval_steps_per_second": 2.694, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"learning_rate": 1.333376001365377e-06, |
|
"loss": 1.2268, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.7015639472509312, |
|
"eval_loss": 1.4995648860931396, |
|
"eval_runtime": 232.3566, |
|
"eval_samples_per_second": 21.519, |
|
"eval_steps_per_second": 2.69, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 73.2, |
|
"learning_rate": 1.2000384012288395e-06, |
|
"loss": 1.2258, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 73.2, |
|
"eval_accuracy": 0.7007709925697089, |
|
"eval_loss": 1.501629114151001, |
|
"eval_runtime": 231.7525, |
|
"eval_samples_per_second": 21.575, |
|
"eval_steps_per_second": 2.697, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 73.4, |
|
"learning_rate": 1.0667008010923017e-06, |
|
"loss": 1.2244, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 73.4, |
|
"eval_accuracy": 0.7012282262174829, |
|
"eval_loss": 1.5031931400299072, |
|
"eval_runtime": 231.5977, |
|
"eval_samples_per_second": 21.589, |
|
"eval_steps_per_second": 2.699, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 73.6, |
|
"learning_rate": 9.33363200955764e-07, |
|
"loss": 1.2117, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 73.6, |
|
"eval_accuracy": 0.7002899246996493, |
|
"eval_loss": 1.5096535682678223, |
|
"eval_runtime": 231.7141, |
|
"eval_samples_per_second": 21.578, |
|
"eval_steps_per_second": 2.697, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 73.8, |
|
"learning_rate": 8.000256008192262e-07, |
|
"loss": 1.2279, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 73.8, |
|
"eval_accuracy": 0.7011673514817666, |
|
"eval_loss": 1.5058051347732544, |
|
"eval_runtime": 231.9567, |
|
"eval_samples_per_second": 21.556, |
|
"eval_steps_per_second": 2.694, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"learning_rate": 6.666880006826885e-07, |
|
"loss": 1.2274, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.7014659184827526, |
|
"eval_loss": 1.5029548406600952, |
|
"eval_runtime": 233.2086, |
|
"eval_samples_per_second": 21.44, |
|
"eval_steps_per_second": 2.68, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 74.2, |
|
"learning_rate": 5.333504005461509e-07, |
|
"loss": 1.2117, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 74.2, |
|
"eval_accuracy": 0.700834731208563, |
|
"eval_loss": 1.5085574388504028, |
|
"eval_runtime": 232.873, |
|
"eval_samples_per_second": 21.471, |
|
"eval_steps_per_second": 2.684, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 74.4, |
|
"learning_rate": 4.000128004096131e-07, |
|
"loss": 1.2223, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 74.4, |
|
"eval_accuracy": 0.701793034494251, |
|
"eval_loss": 1.4998304843902588, |
|
"eval_runtime": 233.0397, |
|
"eval_samples_per_second": 21.456, |
|
"eval_steps_per_second": 2.682, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 74.6, |
|
"learning_rate": 2.6667520027307544e-07, |
|
"loss": 1.227, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 74.6, |
|
"eval_accuracy": 0.7014006218321054, |
|
"eval_loss": 1.5059279203414917, |
|
"eval_runtime": 231.9768, |
|
"eval_samples_per_second": 21.554, |
|
"eval_steps_per_second": 2.694, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 74.8, |
|
"learning_rate": 1.3333760013653772e-07, |
|
"loss": 1.2168, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 74.8, |
|
"eval_accuracy": 0.7011164815850444, |
|
"eval_loss": 1.4984267950057983, |
|
"eval_runtime": 231.8422, |
|
"eval_samples_per_second": 21.566, |
|
"eval_steps_per_second": 2.696, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.2093, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.7017719448166707, |
|
"eval_loss": 1.5077435970306396, |
|
"eval_runtime": 231.8651, |
|
"eval_samples_per_second": 21.564, |
|
"eval_steps_per_second": 2.696, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"step": 187500, |
|
"total_flos": 3.879632807909937e+17, |
|
"train_loss": 2.3812346901041668, |
|
"train_runtime": 293209.3669, |
|
"train_samples_per_second": 5.116, |
|
"train_steps_per_second": 0.639 |
|
} |
|
], |
|
"max_steps": 187500, |
|
"num_train_epochs": 75, |
|
"total_flos": 3.879632807909937e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|