|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 69.3769684356573, |
|
"global_step": 500000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9999999999999997e-06, |
|
"loss": 0.4842, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.999999999999999e-06, |
|
"loss": 0.4793, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 0.45401322841644287, |
|
"eval_runtime": 1.5264, |
|
"eval_samples_per_second": 121.199, |
|
"eval_steps_per_second": 7.862, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.999999999999999e-06, |
|
"loss": 0.4743, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.1999999999999999e-05, |
|
"loss": 0.4701, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 0.4466768801212311, |
|
"eval_runtime": 1.2467, |
|
"eval_samples_per_second": 148.397, |
|
"eval_steps_per_second": 9.626, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.4999999999999999e-05, |
|
"loss": 0.4687, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7999999999999997e-05, |
|
"loss": 0.4673, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 0.4303785264492035, |
|
"eval_runtime": 1.4639, |
|
"eval_samples_per_second": 126.371, |
|
"eval_steps_per_second": 8.197, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.4669, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.3999999999999997e-05, |
|
"loss": 0.4669, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 0.44127601385116577, |
|
"eval_runtime": 1.4871, |
|
"eval_samples_per_second": 124.403, |
|
"eval_steps_per_second": 8.069, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.6999999999999996e-05, |
|
"loss": 0.4663, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 0.4668, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 0.4368070960044861, |
|
"eval_runtime": 1.5671, |
|
"eval_samples_per_second": 118.051, |
|
"eval_steps_per_second": 7.657, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.2999999999999996e-05, |
|
"loss": 0.467, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.5999999999999994e-05, |
|
"loss": 0.4676, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 0.4358096420764923, |
|
"eval_runtime": 1.613, |
|
"eval_samples_per_second": 114.696, |
|
"eval_steps_per_second": 7.44, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.9e-05, |
|
"loss": 0.4683, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.4691, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 0.43669232726097107, |
|
"eval_runtime": 1.3117, |
|
"eval_samples_per_second": 141.037, |
|
"eval_steps_per_second": 9.148, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 0.4697, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.7999999999999994e-05, |
|
"loss": 0.4693, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 0.4429354965686798, |
|
"eval_runtime": 1.2244, |
|
"eval_samples_per_second": 151.099, |
|
"eval_steps_per_second": 9.801, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5.1e-05, |
|
"loss": 0.4703, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.399999999999999e-05, |
|
"loss": 0.4709, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.43878915905952454, |
|
"eval_runtime": 1.2254, |
|
"eval_samples_per_second": 150.966, |
|
"eval_steps_per_second": 9.792, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.6999999999999996e-05, |
|
"loss": 0.4711, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.9981999999999996e-05, |
|
"loss": 0.4722, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 0.44534820318222046, |
|
"eval_runtime": 1.1709, |
|
"eval_samples_per_second": 157.999, |
|
"eval_steps_per_second": 10.249, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 6.298199999999999e-05, |
|
"loss": 0.4713, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 6.598199999999999e-05, |
|
"loss": 0.4729, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 0.4415363073348999, |
|
"eval_runtime": 1.2422, |
|
"eval_samples_per_second": 148.934, |
|
"eval_steps_per_second": 9.661, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 6.8982e-05, |
|
"loss": 0.4733, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 7.198199999999999e-05, |
|
"loss": 0.4732, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 0.4510314166545868, |
|
"eval_runtime": 1.3224, |
|
"eval_samples_per_second": 139.898, |
|
"eval_steps_per_second": 9.074, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 7.4982e-05, |
|
"loss": 0.4748, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.7982e-05, |
|
"loss": 0.4751, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 0.44608551263809204, |
|
"eval_runtime": 1.3056, |
|
"eval_samples_per_second": 141.703, |
|
"eval_steps_per_second": 9.192, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 8.098199999999999e-05, |
|
"loss": 0.4755, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 8.3982e-05, |
|
"loss": 0.4765, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 0.4448258578777313, |
|
"eval_runtime": 1.3312, |
|
"eval_samples_per_second": 138.973, |
|
"eval_steps_per_second": 9.014, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 8.698199999999999e-05, |
|
"loss": 0.4771, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 8.9982e-05, |
|
"loss": 0.477, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 0.4497504234313965, |
|
"eval_runtime": 1.4193, |
|
"eval_samples_per_second": 130.344, |
|
"eval_steps_per_second": 8.455, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.298199999999998e-05, |
|
"loss": 0.4776, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 9.598199999999999e-05, |
|
"loss": 0.4779, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 0.4446822702884674, |
|
"eval_runtime": 1.2963, |
|
"eval_samples_per_second": 142.717, |
|
"eval_steps_per_second": 9.257, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 9.8982e-05, |
|
"loss": 0.4796, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00010197599999999999, |
|
"loss": 0.4795, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 0.44301700592041016, |
|
"eval_runtime": 1.2604, |
|
"eval_samples_per_second": 146.78, |
|
"eval_steps_per_second": 9.521, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00010496999999999999, |
|
"loss": 0.4804, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00010796999999999998, |
|
"loss": 0.481, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.4499399662017822, |
|
"eval_runtime": 1.2921, |
|
"eval_samples_per_second": 143.181, |
|
"eval_steps_per_second": 9.287, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.00011096999999999999, |
|
"loss": 0.4814, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.00011397, |
|
"loss": 0.4821, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_loss": 0.4550692141056061, |
|
"eval_runtime": 1.2086, |
|
"eval_samples_per_second": 153.074, |
|
"eval_steps_per_second": 9.929, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.00011697, |
|
"loss": 0.483, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.00011996999999999998, |
|
"loss": 0.4829, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_loss": 0.4518587291240692, |
|
"eval_runtime": 1.2357, |
|
"eval_samples_per_second": 149.708, |
|
"eval_steps_per_second": 9.711, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.00012297, |
|
"loss": 0.4838, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.00012597, |
|
"loss": 0.4838, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 0.45201268792152405, |
|
"eval_runtime": 1.2706, |
|
"eval_samples_per_second": 145.603, |
|
"eval_steps_per_second": 9.444, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.00012896999999999998, |
|
"loss": 0.4846, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.00013197, |
|
"loss": 0.4856, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_loss": 0.46331045031547546, |
|
"eval_runtime": 1.4645, |
|
"eval_samples_per_second": 126.326, |
|
"eval_steps_per_second": 8.194, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.000134958, |
|
"loss": 0.4848, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00013795799999999998, |
|
"loss": 0.4857, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_loss": 0.45755112171173096, |
|
"eval_runtime": 1.2529, |
|
"eval_samples_per_second": 147.652, |
|
"eval_steps_per_second": 9.577, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.00014095799999999997, |
|
"loss": 0.4867, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.000143958, |
|
"loss": 0.4869, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 0.44849660992622375, |
|
"eval_runtime": 1.3684, |
|
"eval_samples_per_second": 135.198, |
|
"eval_steps_per_second": 8.77, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.00014695799999999998, |
|
"loss": 0.4874, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.000149958, |
|
"loss": 0.4882, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_loss": 0.4590979516506195, |
|
"eval_runtime": 1.3063, |
|
"eval_samples_per_second": 141.622, |
|
"eval_steps_per_second": 9.186, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0001499996278877012, |
|
"loss": 0.4888, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 0.00014999849034570668, |
|
"loss": 0.4883, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_loss": 0.4644538164138794, |
|
"eval_runtime": 1.393, |
|
"eval_samples_per_second": 132.808, |
|
"eval_steps_per_second": 8.615, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.00014999658731143647, |
|
"loss": 0.4892, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.00014999392490663588, |
|
"loss": 0.4889, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_loss": 0.456952303647995, |
|
"eval_runtime": 1.2777, |
|
"eval_samples_per_second": 144.79, |
|
"eval_steps_per_second": 9.392, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.00014999052298598836, |
|
"loss": 0.4912, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.0001499863312870767, |
|
"loss": 0.4884, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_loss": 0.4571981132030487, |
|
"eval_runtime": 1.4323, |
|
"eval_samples_per_second": 129.165, |
|
"eval_steps_per_second": 8.378, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.0001499813742288589, |
|
"loss": 0.4887, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.00014997565186554453, |
|
"loss": 0.4897, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_loss": 0.4552990198135376, |
|
"eval_runtime": 1.4111, |
|
"eval_samples_per_second": 131.102, |
|
"eval_steps_per_second": 8.504, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.00014996916425971245, |
|
"loss": 0.4882, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.0001499619114823101, |
|
"loss": 0.4883, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"eval_loss": 0.453418105840683, |
|
"eval_runtime": 1.3895, |
|
"eval_samples_per_second": 133.14, |
|
"eval_steps_per_second": 8.636, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.00014995389361265266, |
|
"loss": 0.4889, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.00014994511073842227, |
|
"loss": 0.4881, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"eval_loss": 0.45868200063705444, |
|
"eval_runtime": 1.3438, |
|
"eval_samples_per_second": 137.672, |
|
"eval_steps_per_second": 8.93, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.00014993556295566702, |
|
"loss": 0.4887, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.0001499252503688, |
|
"loss": 0.4889, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_loss": 0.4631924331188202, |
|
"eval_runtime": 1.339, |
|
"eval_samples_per_second": 138.166, |
|
"eval_steps_per_second": 8.962, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.00014991417309059794, |
|
"loss": 0.4885, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.0001499023312422002, |
|
"loss": 0.4886, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_loss": 0.45870065689086914, |
|
"eval_runtime": 1.3071, |
|
"eval_samples_per_second": 141.531, |
|
"eval_steps_per_second": 9.18, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 0.00014988972495310734, |
|
"loss": 0.4888, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.0001498763543611797, |
|
"loss": 0.4883, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 0.46206343173980713, |
|
"eval_runtime": 1.3419, |
|
"eval_samples_per_second": 137.861, |
|
"eval_steps_per_second": 8.942, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.00014986221961263597, |
|
"loss": 0.4884, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.00014984732086205146, |
|
"loss": 0.4876, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"eval_loss": 0.452243447303772, |
|
"eval_runtime": 1.311, |
|
"eval_samples_per_second": 141.113, |
|
"eval_steps_per_second": 9.153, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.00014983165827235656, |
|
"loss": 0.4879, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00014981523201483478, |
|
"loss": 0.4878, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.45599275827407837, |
|
"eval_runtime": 1.3198, |
|
"eval_samples_per_second": 140.175, |
|
"eval_steps_per_second": 9.092, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 0.0001497980422691211, |
|
"loss": 0.4879, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.00014978008922319978, |
|
"loss": 0.4883, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_loss": 0.45790329575538635, |
|
"eval_runtime": 1.2841, |
|
"eval_samples_per_second": 144.071, |
|
"eval_steps_per_second": 9.345, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.00014976137307340248, |
|
"loss": 0.4875, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.0001497419337437372, |
|
"loss": 0.4882, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"eval_loss": 0.4554036259651184, |
|
"eval_runtime": 1.4061, |
|
"eval_samples_per_second": 131.565, |
|
"eval_steps_per_second": 8.534, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.00014972169353371555, |
|
"loss": 0.4882, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.0001497006908584238, |
|
"loss": 0.4883, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 0.4587613046169281, |
|
"eval_runtime": 1.383, |
|
"eval_samples_per_second": 133.763, |
|
"eval_steps_per_second": 8.677, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.00014967892594754389, |
|
"loss": 0.4877, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.00014965639903909335, |
|
"loss": 0.4872, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"eval_loss": 0.4561161398887634, |
|
"eval_runtime": 1.2745, |
|
"eval_samples_per_second": 145.151, |
|
"eval_steps_per_second": 9.415, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.00014963329971066358, |
|
"loss": 0.4911, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 0.00014960925564539397, |
|
"loss": 0.4868, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_loss": 0.46137019991874695, |
|
"eval_runtime": 1.4076, |
|
"eval_samples_per_second": 131.433, |
|
"eval_steps_per_second": 8.525, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.000149584450344457, |
|
"loss": 0.4873, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.00014955888407911953, |
|
"loss": 0.4875, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"eval_loss": 0.4583793580532074, |
|
"eval_runtime": 1.1996, |
|
"eval_samples_per_second": 154.218, |
|
"eval_steps_per_second": 10.003, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 0.00014953255712897006, |
|
"loss": 0.4872, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.00014950546978191592, |
|
"loss": 0.4868, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_loss": 0.46193328499794006, |
|
"eval_runtime": 1.2487, |
|
"eval_samples_per_second": 148.152, |
|
"eval_steps_per_second": 9.61, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.00014947762233418003, |
|
"loss": 0.4882, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.00014944901509029757, |
|
"loss": 0.4874, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"eval_loss": 0.4519434869289398, |
|
"eval_runtime": 1.2829, |
|
"eval_samples_per_second": 144.21, |
|
"eval_steps_per_second": 9.354, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.0001494196483631128, |
|
"loss": 0.4874, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.00014938952247377555, |
|
"loss": 0.4874, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"eval_loss": 0.4625035524368286, |
|
"eval_runtime": 1.3409, |
|
"eval_samples_per_second": 137.972, |
|
"eval_steps_per_second": 8.95, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.00014935863775173772, |
|
"loss": 0.487, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.00014932699453474967, |
|
"loss": 0.487, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"eval_loss": 0.45792409777641296, |
|
"eval_runtime": 1.3874, |
|
"eval_samples_per_second": 133.339, |
|
"eval_steps_per_second": 8.649, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.00014929459316885652, |
|
"loss": 0.4872, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.0001492614340083945, |
|
"loss": 0.4872, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_loss": 0.4534100592136383, |
|
"eval_runtime": 1.3917, |
|
"eval_samples_per_second": 132.927, |
|
"eval_steps_per_second": 8.622, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.00014922751741598678, |
|
"loss": 0.4868, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.00014919284376253988, |
|
"loss": 0.4872, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"eval_loss": 0.45163026452064514, |
|
"eval_runtime": 1.3427, |
|
"eval_samples_per_second": 137.777, |
|
"eval_steps_per_second": 8.937, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.00014915741342723924, |
|
"loss": 0.4864, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.00014912122679754534, |
|
"loss": 0.4865, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_loss": 0.4634714126586914, |
|
"eval_runtime": 1.339, |
|
"eval_samples_per_second": 138.163, |
|
"eval_steps_per_second": 8.962, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.0001490842842691894, |
|
"loss": 0.4866, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.00014904658624616897, |
|
"loss": 0.4865, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"eval_loss": 0.4610464572906494, |
|
"eval_runtime": 1.3562, |
|
"eval_samples_per_second": 136.406, |
|
"eval_steps_per_second": 8.848, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 0.00014900821080024855, |
|
"loss": 0.4869, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.00014896900454183386, |
|
"loss": 0.4863, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"eval_loss": 0.45153000950813293, |
|
"eval_runtime": 1.3758, |
|
"eval_samples_per_second": 134.464, |
|
"eval_steps_per_second": 8.722, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.00014892904404943528, |
|
"loss": 0.4864, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.00014888832976005437, |
|
"loss": 0.4861, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"eval_loss": 0.45839324593544006, |
|
"eval_runtime": 1.3527, |
|
"eval_samples_per_second": 136.761, |
|
"eval_steps_per_second": 8.871, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 0.00014884694580576338, |
|
"loss": 0.4857, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.00014880472677172968, |
|
"loss": 0.4866, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"eval_loss": 0.4540667235851288, |
|
"eval_runtime": 1.4425, |
|
"eval_samples_per_second": 128.253, |
|
"eval_steps_per_second": 8.319, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.00014876175530022752, |
|
"loss": 0.4862, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.000148718031861186, |
|
"loss": 0.4862, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"eval_loss": 0.45079880952835083, |
|
"eval_runtime": 1.426, |
|
"eval_samples_per_second": 129.729, |
|
"eval_steps_per_second": 8.415, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.00014867400540032418, |
|
"loss": 0.4945, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.00014862878697647535, |
|
"loss": 0.4863, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"eval_loss": 0.45646485686302185, |
|
"eval_runtime": 1.2918, |
|
"eval_samples_per_second": 143.21, |
|
"eval_steps_per_second": 9.289, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.00014858281803920744, |
|
"loss": 0.4853, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 0.0001485360990912294, |
|
"loss": 0.486, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"eval_loss": 0.4665206968784332, |
|
"eval_runtime": 1.3746, |
|
"eval_samples_per_second": 134.585, |
|
"eval_steps_per_second": 8.73, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.00014848863064345212, |
|
"loss": 0.486, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.000148440413214983, |
|
"loss": 0.486, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"eval_loss": 0.45648473501205444, |
|
"eval_runtime": 1.418, |
|
"eval_samples_per_second": 130.464, |
|
"eval_steps_per_second": 8.463, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 0.00014839144733312009, |
|
"loss": 0.4852, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 0.0001483417335333464, |
|
"loss": 0.4861, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"eval_loss": 0.4579944908618927, |
|
"eval_runtime": 1.4339, |
|
"eval_samples_per_second": 129.019, |
|
"eval_steps_per_second": 8.369, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 0.0001482912723593241, |
|
"loss": 0.4852, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 0.00014824006436288853, |
|
"loss": 0.4852, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"eval_loss": 0.45960545539855957, |
|
"eval_runtime": 1.4194, |
|
"eval_samples_per_second": 130.337, |
|
"eval_steps_per_second": 8.454, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 0.00014818811010404212, |
|
"loss": 0.4849, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.00014813541015094828, |
|
"loss": 0.4846, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_loss": 0.45274996757507324, |
|
"eval_runtime": 1.438, |
|
"eval_samples_per_second": 128.654, |
|
"eval_steps_per_second": 8.345, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 0.0001480819650799253, |
|
"loss": 0.4854, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 0.0001480277754754399, |
|
"loss": 0.4848, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_loss": 0.4504894018173218, |
|
"eval_runtime": 1.4132, |
|
"eval_samples_per_second": 130.905, |
|
"eval_steps_per_second": 8.491, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.00014797284193010093, |
|
"loss": 0.4849, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 0.00014791716504465282, |
|
"loss": 0.4849, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"eval_loss": 0.4406717121601105, |
|
"eval_runtime": 1.3298, |
|
"eval_samples_per_second": 139.116, |
|
"eval_steps_per_second": 9.024, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 0.00014786074542796906, |
|
"loss": 0.4847, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 0.00014780358369704557, |
|
"loss": 0.4851, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"eval_loss": 0.4578949809074402, |
|
"eval_runtime": 1.2912, |
|
"eval_samples_per_second": 143.275, |
|
"eval_steps_per_second": 9.294, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 0.00014774568047699386, |
|
"loss": 0.4844, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 0.00014768703640103426, |
|
"loss": 0.4848, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"eval_loss": 0.4559178650379181, |
|
"eval_runtime": 1.4061, |
|
"eval_samples_per_second": 131.569, |
|
"eval_steps_per_second": 8.534, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.000147627652110489, |
|
"loss": 0.4848, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 0.00014756752825477516, |
|
"loss": 0.4851, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"eval_loss": 0.45050284266471863, |
|
"eval_runtime": 1.4888, |
|
"eval_samples_per_second": 124.265, |
|
"eval_steps_per_second": 8.06, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 0.00014750666549139763, |
|
"loss": 0.4852, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 0.0001474454362920812, |
|
"loss": 0.4846, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"eval_loss": 0.4615005850791931, |
|
"eval_runtime": 1.3552, |
|
"eval_samples_per_second": 136.507, |
|
"eval_steps_per_second": 8.854, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 0.00014738310214159147, |
|
"loss": 0.4851, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 0.00014732003110029247, |
|
"loss": 0.4842, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"eval_loss": 0.4617568850517273, |
|
"eval_runtime": 1.4135, |
|
"eval_samples_per_second": 130.884, |
|
"eval_steps_per_second": 8.49, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.00014725622385791898, |
|
"loss": 0.484, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 0.00014719168111225673, |
|
"loss": 0.484, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"eval_loss": 0.4558795690536499, |
|
"eval_runtime": 1.3419, |
|
"eval_samples_per_second": 137.865, |
|
"eval_steps_per_second": 8.943, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 0.00014712640356913495, |
|
"loss": 0.484, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.00014706039194241832, |
|
"loss": 0.4841, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 0.46133604645729065, |
|
"eval_runtime": 1.3387, |
|
"eval_samples_per_second": 138.192, |
|
"eval_steps_per_second": 8.964, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 0.0001469936469539994, |
|
"loss": 0.4837, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.00014692616933379077, |
|
"loss": 0.484, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"eval_loss": 0.4527389109134674, |
|
"eval_runtime": 1.3393, |
|
"eval_samples_per_second": 138.131, |
|
"eval_steps_per_second": 8.96, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 0.0001468579598197168, |
|
"loss": 0.484, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 0.00014678901915770584, |
|
"loss": 0.4842, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"eval_loss": 0.4482715427875519, |
|
"eval_runtime": 1.2999, |
|
"eval_samples_per_second": 142.321, |
|
"eval_steps_per_second": 9.232, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 0.00014671934810168194, |
|
"loss": 0.4843, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 0.00014664894741355652, |
|
"loss": 0.4842, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_loss": 0.45848214626312256, |
|
"eval_runtime": 1.4072, |
|
"eval_samples_per_second": 131.462, |
|
"eval_steps_per_second": 8.527, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 0.00014657781786322035, |
|
"loss": 0.4836, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 0.00014650596022853471, |
|
"loss": 0.4837, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"eval_loss": 0.45845624804496765, |
|
"eval_runtime": 1.3792, |
|
"eval_samples_per_second": 134.135, |
|
"eval_steps_per_second": 8.701, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 0.00014643337529532323, |
|
"loss": 0.4838, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 0.00014636021120475917, |
|
"loss": 0.4833, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"eval_loss": 0.4540862739086151, |
|
"eval_runtime": 1.2824, |
|
"eval_samples_per_second": 144.264, |
|
"eval_steps_per_second": 9.358, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 0.00014628617551437316, |
|
"loss": 0.4836, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 0.0001462114149289917, |
|
"loss": 0.4836, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"eval_loss": 0.45281586050987244, |
|
"eval_runtime": 1.381, |
|
"eval_samples_per_second": 133.963, |
|
"eval_steps_per_second": 8.69, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 0.00014613593026618457, |
|
"loss": 0.4834, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 0.00014605972235143998, |
|
"loss": 0.4832, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"eval_loss": 0.44752052426338196, |
|
"eval_runtime": 1.2887, |
|
"eval_samples_per_second": 143.559, |
|
"eval_steps_per_second": 9.312, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 0.00014598356489470423, |
|
"loss": 0.4841, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 0.00014590592019575967, |
|
"loss": 0.4836, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"eval_loss": 0.4524598717689514, |
|
"eval_runtime": 1.395, |
|
"eval_samples_per_second": 132.618, |
|
"eval_steps_per_second": 8.602, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 0.00014582755476023087, |
|
"loss": 0.4838, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 0.00014574846944510973, |
|
"loss": 0.4826, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"eval_loss": 0.4561995565891266, |
|
"eval_runtime": 1.2744, |
|
"eval_samples_per_second": 145.166, |
|
"eval_steps_per_second": 9.416, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 0.00014566866511526059, |
|
"loss": 0.483, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 0.00014558814264341083, |
|
"loss": 0.4824, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_loss": 0.4501703679561615, |
|
"eval_runtime": 1.361, |
|
"eval_samples_per_second": 135.932, |
|
"eval_steps_per_second": 8.817, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 0.0001455069029101412, |
|
"loss": 0.4832, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 0.00014542494680387652, |
|
"loss": 0.4828, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"eval_loss": 0.4528810381889343, |
|
"eval_runtime": 1.3979, |
|
"eval_samples_per_second": 132.344, |
|
"eval_steps_per_second": 8.585, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 11.17, |
|
"learning_rate": 0.0001453422752208756, |
|
"loss": 0.4829, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 0.00014525888906522161, |
|
"loss": 0.4829, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"eval_loss": 0.452385812997818, |
|
"eval_runtime": 1.3808, |
|
"eval_samples_per_second": 133.981, |
|
"eval_steps_per_second": 8.691, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 0.00014517478924881222, |
|
"loss": 0.4826, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"learning_rate": 0.00014508997669134948, |
|
"loss": 0.4823, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"eval_loss": 0.4506407380104065, |
|
"eval_runtime": 1.3541, |
|
"eval_samples_per_second": 136.62, |
|
"eval_steps_per_second": 8.862, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 0.00014500445232033005, |
|
"loss": 0.4827, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 0.0001449183902503642, |
|
"loss": 0.4827, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"eval_loss": 0.4510784149169922, |
|
"eval_runtime": 1.3772, |
|
"eval_samples_per_second": 134.329, |
|
"eval_steps_per_second": 8.713, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 0.00014483144648477184, |
|
"loss": 0.4826, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 0.00014474379373286762, |
|
"loss": 0.4823, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"eval_loss": 0.45063039660453796, |
|
"eval_runtime": 1.308, |
|
"eval_samples_per_second": 141.432, |
|
"eval_steps_per_second": 9.174, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 0.00014465543295320805, |
|
"loss": 0.482, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 0.00014456636511209239, |
|
"loss": 0.4827, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"eval_loss": 0.45612767338752747, |
|
"eval_runtime": 1.4022, |
|
"eval_samples_per_second": 131.931, |
|
"eval_steps_per_second": 8.558, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.0001444765911835523, |
|
"loss": 0.4819, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 0.0001443861121493411, |
|
"loss": 0.4832, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"eval_loss": 0.4471343159675598, |
|
"eval_runtime": 1.4636, |
|
"eval_samples_per_second": 126.398, |
|
"eval_steps_per_second": 8.199, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.000144294928998923, |
|
"loss": 0.4828, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 0.00014420304272946233, |
|
"loss": 0.482, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"eval_loss": 0.447856605052948, |
|
"eval_runtime": 1.3062, |
|
"eval_samples_per_second": 141.638, |
|
"eval_steps_per_second": 9.187, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 0.0001441104543458126, |
|
"loss": 0.4819, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 0.0001440173521384988, |
|
"loss": 0.4819, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"eval_loss": 0.4560653567314148, |
|
"eval_runtime": 1.2789, |
|
"eval_samples_per_second": 144.65, |
|
"eval_steps_per_second": 9.383, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"learning_rate": 0.00014392336397087298, |
|
"loss": 0.482, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"learning_rate": 0.00014382867674758018, |
|
"loss": 0.4816, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"eval_loss": 0.45898643136024475, |
|
"eval_runtime": 1.4452, |
|
"eval_samples_per_second": 128.01, |
|
"eval_steps_per_second": 8.303, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"learning_rate": 0.00014373367443415487, |
|
"loss": 0.4822, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"learning_rate": 0.0001436375949994348, |
|
"loss": 0.4818, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"eval_loss": 0.4468829035758972, |
|
"eval_runtime": 1.4183, |
|
"eval_samples_per_second": 130.434, |
|
"eval_steps_per_second": 8.461, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"learning_rate": 0.00014354081963417143, |
|
"loss": 0.4817, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 0.0001434433493966847, |
|
"loss": 0.4815, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"eval_loss": 0.46334829926490784, |
|
"eval_runtime": 1.3078, |
|
"eval_samples_per_second": 141.454, |
|
"eval_steps_per_second": 9.175, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 0.00014334518535289345, |
|
"loss": 0.4817, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 0.00014324632857630403, |
|
"loss": 0.4822, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"eval_loss": 0.4566173553466797, |
|
"eval_runtime": 1.4054, |
|
"eval_samples_per_second": 131.633, |
|
"eval_steps_per_second": 8.538, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"learning_rate": 0.0001431467801479982, |
|
"loss": 0.4817, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 0.0001430465411566217, |
|
"loss": 0.4816, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_loss": 0.4547722339630127, |
|
"eval_runtime": 1.4224, |
|
"eval_samples_per_second": 130.065, |
|
"eval_steps_per_second": 8.437, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 0.00014294561269837208, |
|
"loss": 0.4819, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 0.00014284399587698678, |
|
"loss": 0.4824, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"eval_loss": 0.45478177070617676, |
|
"eval_runtime": 1.3566, |
|
"eval_samples_per_second": 136.367, |
|
"eval_steps_per_second": 8.845, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 0.00014274251296038978, |
|
"loss": 0.4857, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 0.0001426395282386502, |
|
"loss": 0.4812, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"eval_loss": 0.4533080756664276, |
|
"eval_runtime": 1.4018, |
|
"eval_samples_per_second": 131.972, |
|
"eval_steps_per_second": 8.56, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 13.25, |
|
"learning_rate": 0.00014253585850106554, |
|
"loss": 0.4808, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 0.00014243150488135145, |
|
"loss": 0.4809, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"eval_loss": 0.45463913679122925, |
|
"eval_runtime": 1.2723, |
|
"eval_samples_per_second": 145.405, |
|
"eval_steps_per_second": 9.432, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 13.39, |
|
"learning_rate": 0.0001423264685207024, |
|
"loss": 0.4816, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 0.00014222075056777916, |
|
"loss": 0.481, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"eval_loss": 0.4589781165122986, |
|
"eval_runtime": 1.4685, |
|
"eval_samples_per_second": 125.98, |
|
"eval_steps_per_second": 8.172, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 0.00014211435217869638, |
|
"loss": 0.4812, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 0.00014200727451700978, |
|
"loss": 0.4807, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"eval_loss": 0.44650688767433167, |
|
"eval_runtime": 1.2669, |
|
"eval_samples_per_second": 146.022, |
|
"eval_steps_per_second": 9.472, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 0.00014189951875370348, |
|
"loss": 0.4811, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 0.00014179108606717728, |
|
"loss": 0.4808, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"eval_loss": 0.45313671231269836, |
|
"eval_runtime": 1.434, |
|
"eval_samples_per_second": 129.013, |
|
"eval_steps_per_second": 8.368, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 0.00014168197764323368, |
|
"loss": 0.4809, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 0.00014157219467506488, |
|
"loss": 0.4806, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"eval_loss": 0.4459179937839508, |
|
"eval_runtime": 1.3942, |
|
"eval_samples_per_second": 132.692, |
|
"eval_steps_per_second": 8.607, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 0.0001414617383632398, |
|
"loss": 0.4806, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 0.0001413506099156911, |
|
"loss": 0.4809, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"eval_loss": 0.4517055153846741, |
|
"eval_runtime": 1.3095, |
|
"eval_samples_per_second": 141.281, |
|
"eval_steps_per_second": 9.164, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 0.00014123881054770157, |
|
"loss": 0.4806, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 0.0001411263414818912, |
|
"loss": 0.4801, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_loss": 0.45189717411994934, |
|
"eval_runtime": 1.2719, |
|
"eval_samples_per_second": 145.454, |
|
"eval_steps_per_second": 9.435, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 0.0001410132039482037, |
|
"loss": 0.4805, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 0.000140899399183893, |
|
"loss": 0.4801, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"eval_loss": 0.4547403156757355, |
|
"eval_runtime": 1.3022, |
|
"eval_samples_per_second": 142.067, |
|
"eval_steps_per_second": 9.215, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 0.00014078492843350979, |
|
"loss": 0.48, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"learning_rate": 0.00014066979294888782, |
|
"loss": 0.4805, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"eval_loss": 0.4516749978065491, |
|
"eval_runtime": 1.2682, |
|
"eval_samples_per_second": 145.873, |
|
"eval_steps_per_second": 9.462, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 0.00014055399398913034, |
|
"loss": 0.4806, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 0.00014043753282059617, |
|
"loss": 0.4799, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"eval_loss": 0.4491257071495056, |
|
"eval_runtime": 1.2323, |
|
"eval_samples_per_second": 150.131, |
|
"eval_steps_per_second": 9.738, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 0.00014032041071688605, |
|
"loss": 0.4804, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 0.0001402026289588285, |
|
"loss": 0.4805, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"eval_loss": 0.45593592524528503, |
|
"eval_runtime": 1.2562, |
|
"eval_samples_per_second": 147.265, |
|
"eval_steps_per_second": 9.552, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 0.00014008466390471485, |
|
"loss": 0.4805, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 0.0001399655693349842, |
|
"loss": 0.48, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"eval_loss": 0.4550573527812958, |
|
"eval_runtime": 1.3288, |
|
"eval_samples_per_second": 139.229, |
|
"eval_steps_per_second": 9.031, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"learning_rate": 0.00013984581899139457, |
|
"loss": 0.4799, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 0.00013972565564541377, |
|
"loss": 0.4796, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_loss": 0.4537319242954254, |
|
"eval_runtime": 1.2437, |
|
"eval_samples_per_second": 148.751, |
|
"eval_steps_per_second": 9.649, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"learning_rate": 0.00013960459899495145, |
|
"loss": 0.4803, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 0.00013948289051814402, |
|
"loss": 0.4801, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"eval_loss": 0.4508979320526123, |
|
"eval_runtime": 1.3574, |
|
"eval_samples_per_second": 136.289, |
|
"eval_steps_per_second": 8.84, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 0.00013936053154597593, |
|
"loss": 0.4797, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"learning_rate": 0.0001392375234165452, |
|
"loss": 0.4797, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"eval_loss": 0.44824209809303284, |
|
"eval_runtime": 1.3552, |
|
"eval_samples_per_second": 136.513, |
|
"eval_steps_per_second": 8.855, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"learning_rate": 0.0001391138674750491, |
|
"loss": 0.4798, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 0.00013898956507376917, |
|
"loss": 0.4798, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"eval_loss": 0.446604460477829, |
|
"eval_runtime": 1.2361, |
|
"eval_samples_per_second": 149.667, |
|
"eval_steps_per_second": 9.708, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 0.00013886461757205655, |
|
"loss": 0.48, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 0.0001387390263363171, |
|
"loss": 0.4789, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"eval_loss": 0.4444972574710846, |
|
"eval_runtime": 1.2832, |
|
"eval_samples_per_second": 144.17, |
|
"eval_steps_per_second": 9.352, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 0.00013861279273999646, |
|
"loss": 0.4798, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 15.68, |
|
"learning_rate": 0.00013848668131967977, |
|
"loss": 0.4808, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 15.68, |
|
"eval_loss": 0.4493043124675751, |
|
"eval_runtime": 1.4157, |
|
"eval_samples_per_second": 130.682, |
|
"eval_steps_per_second": 8.477, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"learning_rate": 0.00013835942664208942, |
|
"loss": 0.4799, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 15.82, |
|
"learning_rate": 0.00013823127937490458, |
|
"loss": 0.4789, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 15.82, |
|
"eval_loss": 0.447512686252594, |
|
"eval_runtime": 1.3478, |
|
"eval_samples_per_second": 137.263, |
|
"eval_steps_per_second": 8.904, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 0.00013810249529977794, |
|
"loss": 0.4791, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"learning_rate": 0.00013797307582507154, |
|
"loss": 0.4792, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"eval_loss": 0.4542599320411682, |
|
"eval_runtime": 1.3264, |
|
"eval_samples_per_second": 139.478, |
|
"eval_steps_per_second": 9.047, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.00013784302236609602, |
|
"loss": 0.4792, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 0.00013771233634509523, |
|
"loss": 0.4787, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"eval_loss": 0.44714024662971497, |
|
"eval_runtime": 1.3014, |
|
"eval_samples_per_second": 142.155, |
|
"eval_steps_per_second": 9.221, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 16.16, |
|
"learning_rate": 0.00013758101919123046, |
|
"loss": 0.4789, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 16.23, |
|
"learning_rate": 0.00013744907234056512, |
|
"loss": 0.4796, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 16.23, |
|
"eval_loss": 0.456470787525177, |
|
"eval_runtime": 1.4121, |
|
"eval_samples_per_second": 131.013, |
|
"eval_steps_per_second": 8.498, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 0.00013731649723604881, |
|
"loss": 0.4792, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 16.37, |
|
"learning_rate": 0.00013718329532750163, |
|
"loss": 0.4787, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 16.37, |
|
"eval_loss": 0.45149141550064087, |
|
"eval_runtime": 1.422, |
|
"eval_samples_per_second": 130.101, |
|
"eval_steps_per_second": 8.439, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 0.00013704946807159836, |
|
"loss": 0.479, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 0.00013691501693185236, |
|
"loss": 0.4788, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"eval_loss": 0.4449474513530731, |
|
"eval_runtime": 1.3546, |
|
"eval_samples_per_second": 136.571, |
|
"eval_steps_per_second": 8.859, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"learning_rate": 0.0001367799433785998, |
|
"loss": 0.4792, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"learning_rate": 0.0001366442488889834, |
|
"loss": 0.4783, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"eval_loss": 0.4454094171524048, |
|
"eval_runtime": 1.3113, |
|
"eval_samples_per_second": 141.083, |
|
"eval_steps_per_second": 9.151, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 0.00013650793494693634, |
|
"loss": 0.4787, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 0.0001363710030431661, |
|
"loss": 0.4787, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"eval_loss": 0.44856345653533936, |
|
"eval_runtime": 1.3509, |
|
"eval_samples_per_second": 136.945, |
|
"eval_steps_per_second": 8.883, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 0.00013623345467513802, |
|
"loss": 0.4784, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 16.93, |
|
"learning_rate": 0.00013609529134705898, |
|
"loss": 0.4789, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 16.93, |
|
"eval_loss": 0.4480277895927429, |
|
"eval_runtime": 1.4149, |
|
"eval_samples_per_second": 130.754, |
|
"eval_steps_per_second": 8.481, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 0.00013595679273463004, |
|
"loss": 0.4786, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 0.0001358174052482977, |
|
"loss": 0.4782, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"eval_loss": 0.4529421329498291, |
|
"eval_runtime": 1.3006, |
|
"eval_samples_per_second": 142.246, |
|
"eval_steps_per_second": 9.227, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 0.0001356774073517643, |
|
"loss": 0.4779, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"learning_rate": 0.00013553680057602438, |
|
"loss": 0.4782, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"eval_loss": 0.448131263256073, |
|
"eval_runtime": 1.4454, |
|
"eval_samples_per_second": 127.994, |
|
"eval_steps_per_second": 8.302, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 17.27, |
|
"learning_rate": 0.00013539558645873117, |
|
"loss": 0.4784, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 17.34, |
|
"learning_rate": 0.00013525405078756463, |
|
"loss": 0.4777, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 17.34, |
|
"eval_loss": 0.452815443277359, |
|
"eval_runtime": 1.3521, |
|
"eval_samples_per_second": 136.827, |
|
"eval_steps_per_second": 8.875, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 0.00013511162783361505, |
|
"loss": 0.4785, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 0.00013496860218773315, |
|
"loss": 0.4779, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"eval_loss": 0.4513993561267853, |
|
"eval_runtime": 1.4627, |
|
"eval_samples_per_second": 126.479, |
|
"eval_steps_per_second": 8.204, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 0.00013482497541402445, |
|
"loss": 0.4781, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 17.62, |
|
"learning_rate": 0.00013468074908316836, |
|
"loss": 0.4781, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 17.62, |
|
"eval_loss": 0.45199939608573914, |
|
"eval_runtime": 1.3306, |
|
"eval_samples_per_second": 139.031, |
|
"eval_steps_per_second": 9.018, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 17.69, |
|
"learning_rate": 0.0001345359247724009, |
|
"loss": 0.4783, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"learning_rate": 0.0001343905040654976, |
|
"loss": 0.4776, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"eval_loss": 0.44953423738479614, |
|
"eval_runtime": 1.2748, |
|
"eval_samples_per_second": 145.125, |
|
"eval_steps_per_second": 9.413, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 17.83, |
|
"learning_rate": 0.00013424448855275597, |
|
"loss": 0.4782, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"learning_rate": 0.00013409817363937984, |
|
"loss": 0.4777, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"eval_loss": 0.4501388669013977, |
|
"eval_runtime": 1.3709, |
|
"eval_samples_per_second": 134.949, |
|
"eval_steps_per_second": 8.753, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"learning_rate": 0.00013395156446640005, |
|
"loss": 0.478, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"learning_rate": 0.00013380377767805118, |
|
"loss": 0.4783, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"eval_loss": 0.4528443217277527, |
|
"eval_runtime": 1.4085, |
|
"eval_samples_per_second": 131.344, |
|
"eval_steps_per_second": 8.52, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 18.11, |
|
"learning_rate": 0.00013365540250020975, |
|
"loss": 0.4781, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 0.00013350644055548294, |
|
"loss": 0.4771, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"eval_loss": 0.44982901215553284, |
|
"eval_runtime": 1.2299, |
|
"eval_samples_per_second": 150.422, |
|
"eval_steps_per_second": 9.757, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"learning_rate": 0.0001333568934728947, |
|
"loss": 0.4772, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"learning_rate": 0.00013320676288786797, |
|
"loss": 0.4775, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"eval_loss": 0.4524603486061096, |
|
"eval_runtime": 1.3159, |
|
"eval_samples_per_second": 140.59, |
|
"eval_steps_per_second": 9.119, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 18.38, |
|
"learning_rate": 0.00013305605044220678, |
|
"loss": 0.4771, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 0.00013290475778407828, |
|
"loss": 0.4772, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"eval_loss": 0.4482344388961792, |
|
"eval_runtime": 1.3869, |
|
"eval_samples_per_second": 133.391, |
|
"eval_steps_per_second": 8.652, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 0.00013275288656799475, |
|
"loss": 0.4779, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"learning_rate": 0.00013260104939421987, |
|
"loss": 0.4775, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"eval_loss": 0.4531766176223755, |
|
"eval_runtime": 1.3099, |
|
"eval_samples_per_second": 141.231, |
|
"eval_steps_per_second": 9.161, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 0.00013244802834864357, |
|
"loss": 0.4767, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 18.73, |
|
"learning_rate": 0.00013229443373983214, |
|
"loss": 0.4769, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 18.73, |
|
"eval_loss": 0.4537143111228943, |
|
"eval_runtime": 1.2689, |
|
"eval_samples_per_second": 145.794, |
|
"eval_steps_per_second": 9.457, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 0.00013214026724747165, |
|
"loss": 0.4775, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"learning_rate": 0.00013198553055750217, |
|
"loss": 0.4776, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"eval_loss": 0.45088058710098267, |
|
"eval_runtime": 1.3285, |
|
"eval_samples_per_second": 139.259, |
|
"eval_steps_per_second": 9.033, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"learning_rate": 0.00013183022536209928, |
|
"loss": 0.4772, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 0.0001316743533596558, |
|
"loss": 0.4775, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"eval_loss": 0.44637706875801086, |
|
"eval_runtime": 1.4739, |
|
"eval_samples_per_second": 125.521, |
|
"eval_steps_per_second": 8.142, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 19.08, |
|
"learning_rate": 0.0001315182296918089, |
|
"loss": 0.4767, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 0.00013136123032030968, |
|
"loss": 0.4769, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"eval_loss": 0.4463551342487335, |
|
"eval_runtime": 1.2873, |
|
"eval_samples_per_second": 143.713, |
|
"eval_steps_per_second": 9.322, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"learning_rate": 0.0001312036692706245, |
|
"loss": 0.4766, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 19.29, |
|
"learning_rate": 0.0001310455482658157, |
|
"loss": 0.4772, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 19.29, |
|
"eval_loss": 0.44989240169525146, |
|
"eval_runtime": 1.3958, |
|
"eval_samples_per_second": 132.539, |
|
"eval_steps_per_second": 8.597, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 19.36, |
|
"learning_rate": 0.00013088686903506928, |
|
"loss": 0.4766, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 19.43, |
|
"learning_rate": 0.0001307276333136759, |
|
"loss": 0.4766, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 19.43, |
|
"eval_loss": 0.4428194761276245, |
|
"eval_runtime": 1.3007, |
|
"eval_samples_per_second": 142.227, |
|
"eval_steps_per_second": 9.226, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"learning_rate": 0.00013056880323663773, |
|
"loss": 0.4783, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"learning_rate": 0.00013040846307693418, |
|
"loss": 0.4764, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"eval_loss": 0.45361649990081787, |
|
"eval_runtime": 1.2902, |
|
"eval_samples_per_second": 143.387, |
|
"eval_steps_per_second": 9.301, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"learning_rate": 0.00013024757165835455, |
|
"loss": 0.4762, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"learning_rate": 0.00013008613074038167, |
|
"loss": 0.477, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"eval_loss": 0.44440191984176636, |
|
"eval_runtime": 1.4302, |
|
"eval_samples_per_second": 129.355, |
|
"eval_steps_per_second": 8.391, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 19.77, |
|
"learning_rate": 0.00012992414208850748, |
|
"loss": 0.4762, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"learning_rate": 0.000129761607474214, |
|
"loss": 0.4764, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"eval_loss": 0.4482267498970032, |
|
"eval_runtime": 1.3355, |
|
"eval_samples_per_second": 138.528, |
|
"eval_steps_per_second": 8.986, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 0.00012959852867495364, |
|
"loss": 0.4765, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 19.98, |
|
"learning_rate": 0.00012943490747413015, |
|
"loss": 0.4764, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 19.98, |
|
"eval_loss": 0.4509750008583069, |
|
"eval_runtime": 1.3011, |
|
"eval_samples_per_second": 142.182, |
|
"eval_steps_per_second": 9.223, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 20.05, |
|
"learning_rate": 0.00012927074566107863, |
|
"loss": 0.4764, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 20.12, |
|
"learning_rate": 0.00012910604503104647, |
|
"loss": 0.4763, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 20.12, |
|
"eval_loss": 0.45185399055480957, |
|
"eval_runtime": 1.4864, |
|
"eval_samples_per_second": 124.464, |
|
"eval_steps_per_second": 8.073, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 20.19, |
|
"learning_rate": 0.00012894080738517333, |
|
"loss": 0.4757, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 20.26, |
|
"learning_rate": 0.00012877503453047163, |
|
"loss": 0.4761, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 20.26, |
|
"eval_loss": 0.44523686170578003, |
|
"eval_runtime": 1.2461, |
|
"eval_samples_per_second": 148.467, |
|
"eval_steps_per_second": 9.63, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 20.33, |
|
"learning_rate": 0.00012860872827980677, |
|
"loss": 0.4757, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"learning_rate": 0.00012844189045187722, |
|
"loss": 0.4761, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"eval_loss": 0.44761815667152405, |
|
"eval_runtime": 1.3189, |
|
"eval_samples_per_second": 140.265, |
|
"eval_steps_per_second": 9.098, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 20.47, |
|
"learning_rate": 0.0001282745228711948, |
|
"loss": 0.4756, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 20.54, |
|
"learning_rate": 0.0001281066273680645, |
|
"loss": 0.4756, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 20.54, |
|
"eval_loss": 0.449351966381073, |
|
"eval_runtime": 1.3771, |
|
"eval_samples_per_second": 134.344, |
|
"eval_steps_per_second": 8.714, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 20.6, |
|
"learning_rate": 0.00012793820577856467, |
|
"loss": 0.4763, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 20.67, |
|
"learning_rate": 0.00012776925994452683, |
|
"loss": 0.4757, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 20.67, |
|
"eval_loss": 0.45438215136528015, |
|
"eval_runtime": 1.2596, |
|
"eval_samples_per_second": 146.871, |
|
"eval_steps_per_second": 9.527, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 20.74, |
|
"learning_rate": 0.00012759979171351554, |
|
"loss": 0.4759, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 20.81, |
|
"learning_rate": 0.0001274301434346256, |
|
"loss": 0.4762, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 20.81, |
|
"eval_loss": 0.44118747115135193, |
|
"eval_runtime": 1.2816, |
|
"eval_samples_per_second": 144.353, |
|
"eval_steps_per_second": 9.363, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 20.88, |
|
"learning_rate": 0.00012725963701070271, |
|
"loss": 0.4756, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 20.95, |
|
"learning_rate": 0.00012708861376296116, |
|
"loss": 0.4757, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 20.95, |
|
"eval_loss": 0.44587138295173645, |
|
"eval_runtime": 1.2831, |
|
"eval_samples_per_second": 144.186, |
|
"eval_steps_per_second": 9.353, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"learning_rate": 0.00012691707556168372, |
|
"loss": 0.4758, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 21.09, |
|
"learning_rate": 0.00012674502428278476, |
|
"loss": 0.4749, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 21.09, |
|
"eval_loss": 0.45321565866470337, |
|
"eval_runtime": 1.3999, |
|
"eval_samples_per_second": 132.157, |
|
"eval_steps_per_second": 8.572, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 21.16, |
|
"learning_rate": 0.00012657246180778942, |
|
"loss": 0.4752, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 21.23, |
|
"learning_rate": 0.00012639939002381332, |
|
"loss": 0.4752, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 21.23, |
|
"eval_loss": 0.4477311074733734, |
|
"eval_runtime": 1.4069, |
|
"eval_samples_per_second": 131.493, |
|
"eval_steps_per_second": 8.529, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 21.3, |
|
"learning_rate": 0.0001262258108235418, |
|
"loss": 0.4747, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 21.37, |
|
"learning_rate": 0.00012605207477788834, |
|
"loss": 0.4749, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 21.37, |
|
"eval_loss": 0.43964967131614685, |
|
"eval_runtime": 1.4856, |
|
"eval_samples_per_second": 124.531, |
|
"eval_steps_per_second": 8.078, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 21.44, |
|
"learning_rate": 0.00012587748745058206, |
|
"loss": 0.4755, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 21.51, |
|
"learning_rate": 0.00012570274909192927, |
|
"loss": 0.4764, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 21.51, |
|
"eval_loss": 0.4465893805027008, |
|
"eval_runtime": 1.3187, |
|
"eval_samples_per_second": 140.292, |
|
"eval_steps_per_second": 9.1, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 21.58, |
|
"learning_rate": 0.00012552716125933724, |
|
"loss": 0.4754, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 21.65, |
|
"learning_rate": 0.0001253510755490038, |
|
"loss": 0.4753, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 21.65, |
|
"eval_loss": 0.4523191750049591, |
|
"eval_runtime": 1.3412, |
|
"eval_samples_per_second": 137.933, |
|
"eval_steps_per_second": 8.947, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 21.71, |
|
"learning_rate": 0.00012517484754357648, |
|
"loss": 0.4756, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 21.78, |
|
"learning_rate": 0.00012499777284623038, |
|
"loss": 0.4755, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 21.78, |
|
"eval_loss": 0.4581702947616577, |
|
"eval_runtime": 1.2823, |
|
"eval_samples_per_second": 144.268, |
|
"eval_steps_per_second": 9.358, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 21.85, |
|
"learning_rate": 0.00012482056168383577, |
|
"loss": 0.4757, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 21.92, |
|
"learning_rate": 0.00012464250572981418, |
|
"loss": 0.4749, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 21.92, |
|
"eval_loss": 0.45388588309288025, |
|
"eval_runtime": 1.433, |
|
"eval_samples_per_second": 129.098, |
|
"eval_steps_per_second": 8.374, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"learning_rate": 0.00012446396157250224, |
|
"loss": 0.4748, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 22.06, |
|
"learning_rate": 0.00012428493116443032, |
|
"loss": 0.475, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 22.06, |
|
"eval_loss": 0.45385169982910156, |
|
"eval_runtime": 1.3333, |
|
"eval_samples_per_second": 138.754, |
|
"eval_steps_per_second": 9.0, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 22.13, |
|
"learning_rate": 0.00012410541646344625, |
|
"loss": 0.4751, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 22.2, |
|
"learning_rate": 0.00012392541943269405, |
|
"loss": 0.4747, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 22.2, |
|
"eval_loss": 0.4518774151802063, |
|
"eval_runtime": 1.3056, |
|
"eval_samples_per_second": 141.696, |
|
"eval_steps_per_second": 9.191, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 22.27, |
|
"learning_rate": 0.0001237449420405924, |
|
"loss": 0.4746, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"learning_rate": 0.00012356398626081318, |
|
"loss": 0.4745, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"eval_loss": 0.43704134225845337, |
|
"eval_runtime": 1.2831, |
|
"eval_samples_per_second": 144.177, |
|
"eval_steps_per_second": 9.352, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 22.41, |
|
"learning_rate": 0.00012338255407225979, |
|
"loss": 0.4744, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 22.48, |
|
"learning_rate": 0.00012320064745904554, |
|
"loss": 0.4748, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 22.48, |
|
"eval_loss": 0.4449314773082733, |
|
"eval_runtime": 1.3741, |
|
"eval_samples_per_second": 134.638, |
|
"eval_steps_per_second": 8.733, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 22.55, |
|
"learning_rate": 0.00012301826841047204, |
|
"loss": 0.474, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 22.62, |
|
"learning_rate": 0.00012283541892100733, |
|
"loss": 0.4743, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 22.62, |
|
"eval_loss": 0.44838273525238037, |
|
"eval_runtime": 1.3185, |
|
"eval_samples_per_second": 140.311, |
|
"eval_steps_per_second": 9.101, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 22.69, |
|
"learning_rate": 0.00012265210099026412, |
|
"loss": 0.4747, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 22.76, |
|
"learning_rate": 0.000122468316622978, |
|
"loss": 0.4745, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 22.76, |
|
"eval_loss": 0.44712451100349426, |
|
"eval_runtime": 1.4597, |
|
"eval_samples_per_second": 126.737, |
|
"eval_steps_per_second": 8.221, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 22.82, |
|
"learning_rate": 0.00012228406782898528, |
|
"loss": 0.4741, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 22.89, |
|
"learning_rate": 0.00012209935662320132, |
|
"loss": 0.4739, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 22.89, |
|
"eval_loss": 0.44801923632621765, |
|
"eval_runtime": 1.3259, |
|
"eval_samples_per_second": 139.524, |
|
"eval_steps_per_second": 9.05, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 22.96, |
|
"learning_rate": 0.00012191418502559822, |
|
"loss": 0.4743, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"learning_rate": 0.00012172892677721339, |
|
"loss": 0.4746, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"eval_loss": 0.4518602192401886, |
|
"eval_runtime": 1.3853, |
|
"eval_samples_per_second": 133.543, |
|
"eval_steps_per_second": 8.662, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 23.1, |
|
"learning_rate": 0.00012154433187517636, |
|
"loss": 0.4749, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 23.17, |
|
"learning_rate": 0.00012135779580511397, |
|
"loss": 0.4739, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 23.17, |
|
"eval_loss": 0.4477546811103821, |
|
"eval_runtime": 1.215, |
|
"eval_samples_per_second": 152.268, |
|
"eval_steps_per_second": 9.877, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"learning_rate": 0.00012117080745282275, |
|
"loss": 0.4741, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 23.31, |
|
"learning_rate": 0.00012098336886317734, |
|
"loss": 0.4739, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 23.31, |
|
"eval_loss": 0.44968467950820923, |
|
"eval_runtime": 1.4145, |
|
"eval_samples_per_second": 130.789, |
|
"eval_steps_per_second": 8.484, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 23.38, |
|
"learning_rate": 0.0001207954820859762, |
|
"loss": 0.4734, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 23.45, |
|
"learning_rate": 0.00012060714917591897, |
|
"loss": 0.4738, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 23.45, |
|
"eval_loss": 0.4461642801761627, |
|
"eval_runtime": 1.3919, |
|
"eval_samples_per_second": 132.914, |
|
"eval_steps_per_second": 8.621, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 23.52, |
|
"learning_rate": 0.0001204183721925842, |
|
"loss": 0.4736, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 23.59, |
|
"learning_rate": 0.00012022915320040672, |
|
"loss": 0.474, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 23.59, |
|
"eval_loss": 0.4429852366447449, |
|
"eval_runtime": 1.4452, |
|
"eval_samples_per_second": 128.013, |
|
"eval_steps_per_second": 8.304, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 23.66, |
|
"learning_rate": 0.00012003949426865512, |
|
"loss": 0.4734, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"learning_rate": 0.00011984939747140908, |
|
"loss": 0.4737, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"eval_loss": 0.4483201205730438, |
|
"eval_runtime": 1.3789, |
|
"eval_samples_per_second": 134.168, |
|
"eval_steps_per_second": 8.703, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 23.8, |
|
"learning_rate": 0.00011965886488753674, |
|
"loss": 0.4735, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 23.87, |
|
"learning_rate": 0.00011946789860067191, |
|
"loss": 0.4737, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 23.87, |
|
"eval_loss": 0.45078667998313904, |
|
"eval_runtime": 1.407, |
|
"eval_samples_per_second": 131.488, |
|
"eval_steps_per_second": 8.529, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 23.93, |
|
"learning_rate": 0.0001192765006991913, |
|
"loss": 0.4733, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 0.00011908467327619169, |
|
"loss": 0.474, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.4439038932323456, |
|
"eval_runtime": 1.5717, |
|
"eval_samples_per_second": 117.704, |
|
"eval_steps_per_second": 7.635, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 24.07, |
|
"learning_rate": 0.00011889241842946705, |
|
"loss": 0.4734, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 24.14, |
|
"learning_rate": 0.00011869973826148558, |
|
"loss": 0.4729, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 24.14, |
|
"eval_loss": 0.44258496165275574, |
|
"eval_runtime": 1.4275, |
|
"eval_samples_per_second": 129.602, |
|
"eval_steps_per_second": 8.407, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"learning_rate": 0.00011850663487936671, |
|
"loss": 0.4736, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 24.28, |
|
"learning_rate": 0.00011831311039485813, |
|
"loss": 0.4735, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 24.28, |
|
"eval_loss": 0.4433186650276184, |
|
"eval_runtime": 1.3374, |
|
"eval_samples_per_second": 138.324, |
|
"eval_steps_per_second": 8.972, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 24.35, |
|
"learning_rate": 0.00011811916692431257, |
|
"loss": 0.473, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 24.42, |
|
"learning_rate": 0.00011792480658866476, |
|
"loss": 0.4722, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 24.42, |
|
"eval_loss": 0.4482550621032715, |
|
"eval_runtime": 1.3965, |
|
"eval_samples_per_second": 132.478, |
|
"eval_steps_per_second": 8.593, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 24.49, |
|
"learning_rate": 0.0001177300315134082, |
|
"loss": 0.4731, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 24.56, |
|
"learning_rate": 0.00011753523461430789, |
|
"loss": 0.4728, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 24.56, |
|
"eval_loss": 0.44964930415153503, |
|
"eval_runtime": 1.2933, |
|
"eval_samples_per_second": 143.042, |
|
"eval_steps_per_second": 9.278, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 24.63, |
|
"learning_rate": 0.00011734002887616938, |
|
"loss": 0.4729, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 24.7, |
|
"learning_rate": 0.00011714402400936337, |
|
"loss": 0.4727, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 24.7, |
|
"eval_loss": 0.4473492503166199, |
|
"eval_runtime": 1.3972, |
|
"eval_samples_per_second": 132.412, |
|
"eval_steps_per_second": 8.589, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 24.77, |
|
"learning_rate": 0.00011694761294146178, |
|
"loss": 0.4729, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 24.84, |
|
"learning_rate": 0.00011675079782038459, |
|
"loss": 0.4729, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 24.84, |
|
"eval_loss": 0.440412700176239, |
|
"eval_runtime": 1.3521, |
|
"eval_samples_per_second": 136.82, |
|
"eval_steps_per_second": 8.875, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 24.91, |
|
"learning_rate": 0.00011655358079847044, |
|
"loss": 0.4734, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 24.98, |
|
"learning_rate": 0.00011635635966349406, |
|
"loss": 0.4722, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 24.98, |
|
"eval_loss": 0.4425950348377228, |
|
"eval_runtime": 1.4026, |
|
"eval_samples_per_second": 131.901, |
|
"eval_steps_per_second": 8.556, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 25.05, |
|
"learning_rate": 0.00011615834610748511, |
|
"loss": 0.4728, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 25.11, |
|
"learning_rate": 0.00011595993712959619, |
|
"loss": 0.4724, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 25.11, |
|
"eval_loss": 0.4478763937950134, |
|
"eval_runtime": 1.2447, |
|
"eval_samples_per_second": 148.632, |
|
"eval_steps_per_second": 9.641, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 25.18, |
|
"learning_rate": 0.00011576193088899439, |
|
"loss": 0.4728, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 25.25, |
|
"learning_rate": 0.00011556313791328178, |
|
"loss": 0.4739, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 25.25, |
|
"eval_loss": 0.44296661019325256, |
|
"eval_runtime": 1.3308, |
|
"eval_samples_per_second": 139.009, |
|
"eval_steps_per_second": 9.017, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"learning_rate": 0.00011536355803243809, |
|
"loss": 0.4723, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 25.39, |
|
"learning_rate": 0.00011516359142138949, |
|
"loss": 0.4723, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 25.39, |
|
"eval_loss": 0.44180306792259216, |
|
"eval_runtime": 1.3053, |
|
"eval_samples_per_second": 141.73, |
|
"eval_steps_per_second": 9.193, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 25.46, |
|
"learning_rate": 0.00011496324026693882, |
|
"loss": 0.472, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 25.53, |
|
"learning_rate": 0.0001147625067600942, |
|
"loss": 0.4724, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 25.53, |
|
"eval_loss": 0.4371403157711029, |
|
"eval_runtime": 1.3813, |
|
"eval_samples_per_second": 133.927, |
|
"eval_steps_per_second": 8.687, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 0.00011456139309604507, |
|
"loss": 0.4719, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 25.67, |
|
"learning_rate": 0.00011435990147413828, |
|
"loss": 0.472, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 25.67, |
|
"eval_loss": 0.4456477761268616, |
|
"eval_runtime": 1.3621, |
|
"eval_samples_per_second": 135.819, |
|
"eval_steps_per_second": 8.81, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 25.74, |
|
"learning_rate": 0.00011415803409785392, |
|
"loss": 0.472, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"learning_rate": 0.00011395579317478123, |
|
"loss": 0.4726, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"eval_loss": 0.4419301748275757, |
|
"eval_runtime": 1.3745, |
|
"eval_samples_per_second": 134.595, |
|
"eval_steps_per_second": 8.73, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 25.88, |
|
"learning_rate": 0.00011375318091659465, |
|
"loss": 0.4725, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 25.95, |
|
"learning_rate": 0.00011355019953902926, |
|
"loss": 0.4721, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 25.95, |
|
"eval_loss": 0.44172403216362, |
|
"eval_runtime": 1.2569, |
|
"eval_samples_per_second": 147.182, |
|
"eval_steps_per_second": 9.547, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"learning_rate": 0.00011334685126185694, |
|
"loss": 0.4723, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 0.00011314313830886184, |
|
"loss": 0.4722, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"eval_loss": 0.44749096035957336, |
|
"eval_runtime": 1.3409, |
|
"eval_samples_per_second": 137.963, |
|
"eval_steps_per_second": 8.949, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 26.16, |
|
"learning_rate": 0.0001129390629078162, |
|
"loss": 0.4714, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 26.22, |
|
"learning_rate": 0.00011273462729045592, |
|
"loss": 0.4715, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 26.22, |
|
"eval_loss": 0.43891334533691406, |
|
"eval_runtime": 1.3386, |
|
"eval_samples_per_second": 138.204, |
|
"eval_steps_per_second": 8.965, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 26.29, |
|
"learning_rate": 0.00011252983369245613, |
|
"loss": 0.4716, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 26.36, |
|
"learning_rate": 0.00011232468435340683, |
|
"loss": 0.4717, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 26.36, |
|
"eval_loss": 0.4450508654117584, |
|
"eval_runtime": 1.2763, |
|
"eval_samples_per_second": 144.955, |
|
"eval_steps_per_second": 9.402, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 26.43, |
|
"learning_rate": 0.00011211959287375901, |
|
"loss": 0.4713, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 26.5, |
|
"learning_rate": 0.00011191373948717242, |
|
"loss": 0.4716, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 26.5, |
|
"eval_loss": 0.4439684748649597, |
|
"eval_runtime": 1.3629, |
|
"eval_samples_per_second": 135.738, |
|
"eval_steps_per_second": 8.805, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 26.57, |
|
"learning_rate": 0.00011170753709704381, |
|
"loss": 0.471, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 26.64, |
|
"learning_rate": 0.00011150098795836949, |
|
"loss": 0.4714, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 26.64, |
|
"eval_loss": 0.43994638323783875, |
|
"eval_runtime": 1.2645, |
|
"eval_samples_per_second": 146.299, |
|
"eval_steps_per_second": 9.49, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 26.71, |
|
"learning_rate": 0.00011129409432993779, |
|
"loss": 0.4713, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 26.78, |
|
"learning_rate": 0.00011108685847430426, |
|
"loss": 0.4712, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 26.78, |
|
"eval_loss": 0.43981364369392395, |
|
"eval_runtime": 1.378, |
|
"eval_samples_per_second": 134.256, |
|
"eval_steps_per_second": 8.709, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 26.85, |
|
"learning_rate": 0.00011087969814717285, |
|
"loss": 0.472, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 26.92, |
|
"learning_rate": 0.0001106717853128616, |
|
"loss": 0.4709, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 26.92, |
|
"eval_loss": 0.4423602223396301, |
|
"eval_runtime": 1.4176, |
|
"eval_samples_per_second": 130.501, |
|
"eval_steps_per_second": 8.465, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"learning_rate": 0.00011046353705682016, |
|
"loss": 0.4714, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 27.06, |
|
"learning_rate": 0.00011025495565641824, |
|
"loss": 0.4714, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 27.06, |
|
"eval_loss": 0.45326730608940125, |
|
"eval_runtime": 1.3206, |
|
"eval_samples_per_second": 140.087, |
|
"eval_steps_per_second": 9.087, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 27.13, |
|
"learning_rate": 0.00011004604339266847, |
|
"loss": 0.4713, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 0.00010983722135828816, |
|
"loss": 0.4706, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"eval_loss": 0.43939587473869324, |
|
"eval_runtime": 1.3732, |
|
"eval_samples_per_second": 134.718, |
|
"eval_steps_per_second": 8.738, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 0.00010962765487562423, |
|
"loss": 0.471, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"learning_rate": 0.00010941776438967333, |
|
"loss": 0.471, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"eval_loss": 0.44356608390808105, |
|
"eval_runtime": 1.2936, |
|
"eval_samples_per_second": 143.016, |
|
"eval_steps_per_second": 9.277, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 27.4, |
|
"learning_rate": 0.00010920755219576423, |
|
"loss": 0.4709, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 27.47, |
|
"learning_rate": 0.0001089970205927438, |
|
"loss": 0.4707, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 27.47, |
|
"eval_loss": 0.442117840051651, |
|
"eval_runtime": 1.2666, |
|
"eval_samples_per_second": 146.059, |
|
"eval_steps_per_second": 9.474, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"learning_rate": 0.00010878659389531198, |
|
"loss": 0.4707, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 27.61, |
|
"learning_rate": 0.0001085766989233304, |
|
"loss": 0.471, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 27.61, |
|
"eval_loss": 0.44586172699928284, |
|
"eval_runtime": 1.2641, |
|
"eval_samples_per_second": 146.347, |
|
"eval_steps_per_second": 9.493, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 27.68, |
|
"learning_rate": 0.00010836522541162288, |
|
"loss": 0.471, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 27.75, |
|
"learning_rate": 0.00010815344170235474, |
|
"loss": 0.4707, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 27.75, |
|
"eval_loss": 0.4438597857952118, |
|
"eval_runtime": 1.3122, |
|
"eval_samples_per_second": 140.987, |
|
"eval_steps_per_second": 9.145, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 27.82, |
|
"learning_rate": 0.00010794135011155871, |
|
"loss": 0.4713, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 27.89, |
|
"learning_rate": 0.00010772895295863448, |
|
"loss": 0.471, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 27.89, |
|
"eval_loss": 0.446729838848114, |
|
"eval_runtime": 1.2963, |
|
"eval_samples_per_second": 142.711, |
|
"eval_steps_per_second": 9.257, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 27.96, |
|
"learning_rate": 0.00010751625256632328, |
|
"loss": 0.4709, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 28.03, |
|
"learning_rate": 0.00010730325126068249, |
|
"loss": 0.471, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 28.03, |
|
"eval_loss": 0.4438862204551697, |
|
"eval_runtime": 1.3617, |
|
"eval_samples_per_second": 135.862, |
|
"eval_steps_per_second": 8.813, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 28.1, |
|
"learning_rate": 0.00010708995137106029, |
|
"loss": 0.4704, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 28.17, |
|
"learning_rate": 0.00010687635523007008, |
|
"loss": 0.4704, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 28.17, |
|
"eval_loss": 0.44452282786369324, |
|
"eval_runtime": 1.2694, |
|
"eval_samples_per_second": 145.738, |
|
"eval_steps_per_second": 9.453, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 28.24, |
|
"learning_rate": 0.00010666246517356506, |
|
"loss": 0.4701, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 28.31, |
|
"learning_rate": 0.00010644828354061256, |
|
"loss": 0.4705, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 28.31, |
|
"eval_loss": 0.44292566180229187, |
|
"eval_runtime": 1.3369, |
|
"eval_samples_per_second": 138.378, |
|
"eval_steps_per_second": 8.976, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 28.38, |
|
"learning_rate": 0.0001062338126734687, |
|
"loss": 0.4702, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 28.44, |
|
"learning_rate": 0.00010601905491755247, |
|
"loss": 0.4706, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 28.44, |
|
"eval_loss": 0.4382130801677704, |
|
"eval_runtime": 1.3584, |
|
"eval_samples_per_second": 136.194, |
|
"eval_steps_per_second": 8.834, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 28.51, |
|
"learning_rate": 0.00010580401262142026, |
|
"loss": 0.4703, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 28.58, |
|
"learning_rate": 0.00010558868813674022, |
|
"loss": 0.4703, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 28.58, |
|
"eval_loss": 0.44251275062561035, |
|
"eval_runtime": 1.3909, |
|
"eval_samples_per_second": 133.01, |
|
"eval_steps_per_second": 8.628, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 28.65, |
|
"learning_rate": 0.00010537308381826636, |
|
"loss": 0.4695, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 28.72, |
|
"learning_rate": 0.00010515763406275424, |
|
"loss": 0.4695, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 28.72, |
|
"eval_loss": 0.441448837518692, |
|
"eval_runtime": 1.299, |
|
"eval_samples_per_second": 142.42, |
|
"eval_steps_per_second": 9.238, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 28.79, |
|
"learning_rate": 0.00010494147770104224, |
|
"loss": 0.4697, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 28.86, |
|
"learning_rate": 0.00010472504858332604, |
|
"loss": 0.4696, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 28.86, |
|
"eval_loss": 0.4405233561992645, |
|
"eval_runtime": 1.3279, |
|
"eval_samples_per_second": 139.321, |
|
"eval_steps_per_second": 9.037, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 28.93, |
|
"learning_rate": 0.00010450834907643986, |
|
"loss": 0.4697, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 0.00010429138155017476, |
|
"loss": 0.4696, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.4460256099700928, |
|
"eval_runtime": 1.1542, |
|
"eval_samples_per_second": 160.282, |
|
"eval_steps_per_second": 10.397, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 29.07, |
|
"learning_rate": 0.00010407414837725286, |
|
"loss": 0.4701, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 29.14, |
|
"learning_rate": 0.00010385665193330129, |
|
"loss": 0.4701, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 29.14, |
|
"eval_loss": 0.4460484981536865, |
|
"eval_runtime": 1.3974, |
|
"eval_samples_per_second": 132.39, |
|
"eval_steps_per_second": 8.587, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 29.21, |
|
"learning_rate": 0.00010363889459682637, |
|
"loss": 0.4694, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 29.28, |
|
"learning_rate": 0.00010342087874918744, |
|
"loss": 0.4696, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 29.28, |
|
"eval_loss": 0.4396804869174957, |
|
"eval_runtime": 1.3526, |
|
"eval_samples_per_second": 136.773, |
|
"eval_steps_per_second": 8.872, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 29.35, |
|
"learning_rate": 0.00010320260677457086, |
|
"loss": 0.4689, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 29.42, |
|
"learning_rate": 0.00010298451836303776, |
|
"loss": 0.4693, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 29.42, |
|
"eval_loss": 0.4438728988170624, |
|
"eval_runtime": 1.3437, |
|
"eval_samples_per_second": 137.675, |
|
"eval_steps_per_second": 8.93, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 29.49, |
|
"learning_rate": 0.00010276574179851642, |
|
"loss": 0.4695, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 29.55, |
|
"learning_rate": 0.00010254671627149016, |
|
"loss": 0.4694, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 29.55, |
|
"eval_loss": 0.4494737684726715, |
|
"eval_runtime": 1.3148, |
|
"eval_samples_per_second": 140.708, |
|
"eval_steps_per_second": 9.127, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 29.62, |
|
"learning_rate": 0.00010232744417718707, |
|
"loss": 0.4692, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 29.69, |
|
"learning_rate": 0.00010210792791353163, |
|
"loss": 0.469, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 29.69, |
|
"eval_loss": 0.44658780097961426, |
|
"eval_runtime": 1.3457, |
|
"eval_samples_per_second": 137.477, |
|
"eval_steps_per_second": 8.917, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 29.76, |
|
"learning_rate": 0.00010188816988111854, |
|
"loss": 0.4693, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 29.83, |
|
"learning_rate": 0.0001016681724831864, |
|
"loss": 0.4691, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 29.83, |
|
"eval_loss": 0.43355101346969604, |
|
"eval_runtime": 1.4163, |
|
"eval_samples_per_second": 130.619, |
|
"eval_steps_per_second": 8.473, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 29.9, |
|
"learning_rate": 0.00010144793812559155, |
|
"loss": 0.4691, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 29.97, |
|
"learning_rate": 0.00010122791038707925, |
|
"loss": 0.4694, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 29.97, |
|
"eval_loss": 0.4376647174358368, |
|
"eval_runtime": 1.269, |
|
"eval_samples_per_second": 145.78, |
|
"eval_steps_per_second": 9.456, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 30.04, |
|
"learning_rate": 0.00010100765143119065, |
|
"loss": 0.4696, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 30.11, |
|
"learning_rate": 0.00010078804782907829, |
|
"loss": 0.4698, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 30.11, |
|
"eval_loss": 0.43560221791267395, |
|
"eval_runtime": 1.4174, |
|
"eval_samples_per_second": 130.525, |
|
"eval_steps_per_second": 8.467, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 30.18, |
|
"learning_rate": 0.00010056689200396548, |
|
"loss": 0.4685, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 30.25, |
|
"learning_rate": 0.00010034551126261683, |
|
"loss": 0.4689, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 30.25, |
|
"eval_loss": 0.43806299567222595, |
|
"eval_runtime": 1.3648, |
|
"eval_samples_per_second": 135.548, |
|
"eval_steps_per_second": 8.792, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 30.32, |
|
"learning_rate": 0.00010012390802601666, |
|
"loss": 0.4685, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 30.39, |
|
"learning_rate": 9.990208471758243e-05, |
|
"loss": 0.4685, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 30.39, |
|
"eval_loss": 0.44310352206230164, |
|
"eval_runtime": 1.2698, |
|
"eval_samples_per_second": 145.698, |
|
"eval_steps_per_second": 9.451, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 30.46, |
|
"learning_rate": 9.968004376313833e-05, |
|
"loss": 0.469, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 30.53, |
|
"learning_rate": 9.945778759088867e-05, |
|
"loss": 0.4688, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 30.53, |
|
"eval_loss": 0.4411117434501648, |
|
"eval_runtime": 1.4619, |
|
"eval_samples_per_second": 126.55, |
|
"eval_steps_per_second": 8.209, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 30.6, |
|
"learning_rate": 9.923531863139129e-05, |
|
"loss": 0.4687, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 30.66, |
|
"learning_rate": 9.901263931753117e-05, |
|
"loss": 0.4687, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 30.66, |
|
"eval_loss": 0.444545179605484, |
|
"eval_runtime": 1.4274, |
|
"eval_samples_per_second": 129.604, |
|
"eval_steps_per_second": 8.407, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 30.73, |
|
"learning_rate": 9.878975208449357e-05, |
|
"loss": 0.4686, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 30.8, |
|
"learning_rate": 9.856665936973753e-05, |
|
"loss": 0.4685, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 30.8, |
|
"eval_loss": 0.44319838285446167, |
|
"eval_runtime": 1.625, |
|
"eval_samples_per_second": 113.845, |
|
"eval_steps_per_second": 7.385, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 30.87, |
|
"learning_rate": 9.834336361296929e-05, |
|
"loss": 0.4682, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 30.94, |
|
"learning_rate": 9.811986725611542e-05, |
|
"loss": 0.4687, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 30.94, |
|
"eval_loss": 0.43832239508628845, |
|
"eval_runtime": 1.4789, |
|
"eval_samples_per_second": 125.09, |
|
"eval_steps_per_second": 8.114, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 9.789617274329632e-05, |
|
"loss": 0.4685, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 31.08, |
|
"learning_rate": 9.76722825207993e-05, |
|
"loss": 0.4681, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 31.08, |
|
"eval_loss": 0.4371243119239807, |
|
"eval_runtime": 1.2937, |
|
"eval_samples_per_second": 142.997, |
|
"eval_steps_per_second": 9.275, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 31.15, |
|
"learning_rate": 9.7448199037052e-05, |
|
"loss": 0.468, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 31.22, |
|
"learning_rate": 9.722392474259546e-05, |
|
"loss": 0.4683, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 31.22, |
|
"eval_loss": 0.4383920133113861, |
|
"eval_runtime": 1.3579, |
|
"eval_samples_per_second": 136.242, |
|
"eval_steps_per_second": 8.837, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 31.29, |
|
"learning_rate": 9.699946209005747e-05, |
|
"loss": 0.4681, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 31.36, |
|
"learning_rate": 9.677481353412562e-05, |
|
"loss": 0.4678, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 31.36, |
|
"eval_loss": 0.4395655393600464, |
|
"eval_runtime": 1.4128, |
|
"eval_samples_per_second": 130.944, |
|
"eval_steps_per_second": 8.494, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 31.43, |
|
"learning_rate": 9.65504313769716e-05, |
|
"loss": 0.4681, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 31.5, |
|
"learning_rate": 9.632541874594076e-05, |
|
"loss": 0.4682, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 31.5, |
|
"eval_loss": 0.4387070834636688, |
|
"eval_runtime": 1.3852, |
|
"eval_samples_per_second": 133.557, |
|
"eval_steps_per_second": 8.663, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 31.57, |
|
"learning_rate": 9.610022758274608e-05, |
|
"loss": 0.4678, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 31.64, |
|
"learning_rate": 9.587486035004206e-05, |
|
"loss": 0.4671, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 31.64, |
|
"eval_loss": 0.43819934129714966, |
|
"eval_runtime": 1.4407, |
|
"eval_samples_per_second": 128.406, |
|
"eval_steps_per_second": 8.329, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 31.71, |
|
"learning_rate": 9.564931951240863e-05, |
|
"loss": 0.4677, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 31.77, |
|
"learning_rate": 9.542405912943224e-05, |
|
"loss": 0.4676, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 31.77, |
|
"eval_loss": 0.44104936718940735, |
|
"eval_runtime": 1.2764, |
|
"eval_samples_per_second": 144.942, |
|
"eval_steps_per_second": 9.402, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 31.84, |
|
"learning_rate": 9.519817881812249e-05, |
|
"loss": 0.4679, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 31.91, |
|
"learning_rate": 9.497213230196413e-05, |
|
"loss": 0.4681, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 31.91, |
|
"eval_loss": 0.4391230046749115, |
|
"eval_runtime": 1.302, |
|
"eval_samples_per_second": 142.084, |
|
"eval_steps_per_second": 9.216, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 31.98, |
|
"learning_rate": 9.474592205296567e-05, |
|
"loss": 0.4674, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 9.451955054492613e-05, |
|
"loss": 0.4676, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"eval_loss": 0.4428912103176117, |
|
"eval_runtime": 1.3006, |
|
"eval_samples_per_second": 142.245, |
|
"eval_steps_per_second": 9.227, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 32.12, |
|
"learning_rate": 9.429347347081072e-05, |
|
"loss": 0.468, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 32.19, |
|
"learning_rate": 9.406678718325199e-05, |
|
"loss": 0.4673, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 32.19, |
|
"eval_loss": 0.43948763608932495, |
|
"eval_runtime": 1.3886, |
|
"eval_samples_per_second": 133.23, |
|
"eval_steps_per_second": 8.642, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 9.383994706356235e-05, |
|
"loss": 0.4671, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 32.33, |
|
"learning_rate": 9.361295559242895e-05, |
|
"loss": 0.4669, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 32.33, |
|
"eval_loss": 0.4389004409313202, |
|
"eval_runtime": 1.4112, |
|
"eval_samples_per_second": 131.092, |
|
"eval_steps_per_second": 8.503, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 32.4, |
|
"learning_rate": 9.338581525219419e-05, |
|
"loss": 0.4674, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 32.47, |
|
"learning_rate": 9.315852852682843e-05, |
|
"loss": 0.4675, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 32.47, |
|
"eval_loss": 0.445227712392807, |
|
"eval_runtime": 1.2657, |
|
"eval_samples_per_second": 146.167, |
|
"eval_steps_per_second": 9.481, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 32.54, |
|
"learning_rate": 9.293109790190286e-05, |
|
"loss": 0.467, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"learning_rate": 9.270352586456241e-05, |
|
"loss": 0.4667, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"eval_loss": 0.4395338296890259, |
|
"eval_runtime": 1.3986, |
|
"eval_samples_per_second": 132.275, |
|
"eval_steps_per_second": 8.58, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 32.68, |
|
"learning_rate": 9.247627046241149e-05, |
|
"loss": 0.4667, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 32.75, |
|
"learning_rate": 9.224842333821526e-05, |
|
"loss": 0.4667, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 32.75, |
|
"eval_loss": 0.44597160816192627, |
|
"eval_runtime": 1.3803, |
|
"eval_samples_per_second": 134.024, |
|
"eval_steps_per_second": 8.693, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 32.82, |
|
"learning_rate": 9.202044226722378e-05, |
|
"loss": 0.467, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 32.88, |
|
"learning_rate": 9.179232974260151e-05, |
|
"loss": 0.4672, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 32.88, |
|
"eval_loss": 0.4403511583805084, |
|
"eval_runtime": 1.285, |
|
"eval_samples_per_second": 143.964, |
|
"eval_steps_per_second": 9.338, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 32.95, |
|
"learning_rate": 9.156454486896012e-05, |
|
"loss": 0.4672, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 9.133617717272672e-05, |
|
"loss": 0.4667, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"eval_loss": 0.43722978234291077, |
|
"eval_runtime": 1.3529, |
|
"eval_samples_per_second": 136.739, |
|
"eval_steps_per_second": 8.87, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 33.09, |
|
"learning_rate": 9.110768550587597e-05, |
|
"loss": 0.4663, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 33.16, |
|
"learning_rate": 9.087907236715611e-05, |
|
"loss": 0.4663, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 33.16, |
|
"eval_loss": 0.43622609972953796, |
|
"eval_runtime": 1.5302, |
|
"eval_samples_per_second": 120.902, |
|
"eval_steps_per_second": 7.842, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 33.23, |
|
"learning_rate": 9.065034025664387e-05, |
|
"loss": 0.4665, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 33.3, |
|
"learning_rate": 9.042194948745301e-05, |
|
"loss": 0.4669, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 33.3, |
|
"eval_loss": 0.4427553713321686, |
|
"eval_runtime": 1.9547, |
|
"eval_samples_per_second": 94.644, |
|
"eval_steps_per_second": 6.139, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 33.37, |
|
"learning_rate": 9.019298716419978e-05, |
|
"loss": 0.4669, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 33.44, |
|
"learning_rate": 8.996391337207199e-05, |
|
"loss": 0.4662, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 33.44, |
|
"eval_loss": 0.43700045347213745, |
|
"eval_runtime": 1.4983, |
|
"eval_samples_per_second": 123.472, |
|
"eval_steps_per_second": 8.009, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 33.51, |
|
"learning_rate": 8.973473061618393e-05, |
|
"loss": 0.467, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 33.58, |
|
"learning_rate": 8.950635876843523e-05, |
|
"loss": 0.4662, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 33.58, |
|
"eval_loss": 0.43820101022720337, |
|
"eval_runtime": 1.8161, |
|
"eval_samples_per_second": 101.864, |
|
"eval_steps_per_second": 6.607, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 8.927696601591206e-05, |
|
"loss": 0.4661, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 33.72, |
|
"learning_rate": 8.904747181197483e-05, |
|
"loss": 0.466, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 33.72, |
|
"eval_loss": 0.439488023519516, |
|
"eval_runtime": 1.9649, |
|
"eval_samples_per_second": 94.152, |
|
"eval_steps_per_second": 6.107, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 33.79, |
|
"learning_rate": 8.881787866633541e-05, |
|
"loss": 0.4663, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 33.86, |
|
"learning_rate": 8.858818908978764e-05, |
|
"loss": 0.4661, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 33.86, |
|
"eval_loss": 0.4417674243450165, |
|
"eval_runtime": 1.9528, |
|
"eval_samples_per_second": 94.735, |
|
"eval_steps_per_second": 6.145, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 33.93, |
|
"learning_rate": 8.835840559417993e-05, |
|
"loss": 0.4658, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 33.99, |
|
"learning_rate": 8.812853069238779e-05, |
|
"loss": 0.4663, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 33.99, |
|
"eval_loss": 0.4406869411468506, |
|
"eval_runtime": 1.9749, |
|
"eval_samples_per_second": 93.675, |
|
"eval_steps_per_second": 6.076, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 34.06, |
|
"learning_rate": 8.78985668982863e-05, |
|
"loss": 0.4664, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 34.13, |
|
"learning_rate": 8.766851672672264e-05, |
|
"loss": 0.4661, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 34.13, |
|
"eval_loss": 0.4346267282962799, |
|
"eval_runtime": 1.3563, |
|
"eval_samples_per_second": 136.397, |
|
"eval_steps_per_second": 8.847, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 34.2, |
|
"learning_rate": 8.743838269348869e-05, |
|
"loss": 0.4657, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 34.27, |
|
"learning_rate": 8.720816731529334e-05, |
|
"loss": 0.4652, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 34.27, |
|
"eval_loss": 0.4391646981239319, |
|
"eval_runtime": 1.4449, |
|
"eval_samples_per_second": 128.041, |
|
"eval_steps_per_second": 8.305, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 34.34, |
|
"learning_rate": 8.697787310973507e-05, |
|
"loss": 0.4651, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 34.41, |
|
"learning_rate": 8.674750259527444e-05, |
|
"loss": 0.4662, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 34.41, |
|
"eval_loss": 0.4395732879638672, |
|
"eval_runtime": 1.2788, |
|
"eval_samples_per_second": 144.666, |
|
"eval_steps_per_second": 9.384, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 34.48, |
|
"learning_rate": 8.651798021207188e-05, |
|
"loss": 0.4663, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 34.55, |
|
"learning_rate": 8.628746491855551e-05, |
|
"loss": 0.4655, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 34.55, |
|
"eval_loss": 0.4426644742488861, |
|
"eval_runtime": 1.3113, |
|
"eval_samples_per_second": 141.076, |
|
"eval_steps_per_second": 9.151, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 34.62, |
|
"learning_rate": 8.60568808663301e-05, |
|
"loss": 0.4651, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 34.69, |
|
"learning_rate": 8.582623057702597e-05, |
|
"loss": 0.4657, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 34.69, |
|
"eval_loss": 0.44838571548461914, |
|
"eval_runtime": 1.312, |
|
"eval_samples_per_second": 141.003, |
|
"eval_steps_per_second": 9.146, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 34.76, |
|
"learning_rate": 8.559551657299767e-05, |
|
"loss": 0.4662, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 34.83, |
|
"learning_rate": 8.536520298707998e-05, |
|
"loss": 0.4654, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 34.83, |
|
"eval_loss": 0.42675724625587463, |
|
"eval_runtime": 1.3077, |
|
"eval_samples_per_second": 141.473, |
|
"eval_steps_per_second": 9.177, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 34.9, |
|
"learning_rate": 8.513436923824351e-05, |
|
"loss": 0.4649, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"learning_rate": 8.490347934076768e-05, |
|
"loss": 0.4655, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_loss": 0.4383569657802582, |
|
"eval_runtime": 1.5065, |
|
"eval_samples_per_second": 122.799, |
|
"eval_steps_per_second": 7.965, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 35.04, |
|
"learning_rate": 8.467253581962737e-05, |
|
"loss": 0.4657, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 35.1, |
|
"learning_rate": 8.444154120038393e-05, |
|
"loss": 0.4649, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 35.1, |
|
"eval_loss": 0.43828797340393066, |
|
"eval_runtime": 1.4173, |
|
"eval_samples_per_second": 130.53, |
|
"eval_steps_per_second": 8.467, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 35.17, |
|
"learning_rate": 8.421049800915756e-05, |
|
"loss": 0.4653, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 35.24, |
|
"learning_rate": 8.397940877259951e-05, |
|
"loss": 0.465, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 35.24, |
|
"eval_loss": 0.4367583394050598, |
|
"eval_runtime": 1.4298, |
|
"eval_samples_per_second": 129.389, |
|
"eval_steps_per_second": 8.393, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 35.31, |
|
"learning_rate": 8.374827601786469e-05, |
|
"loss": 0.4646, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 35.38, |
|
"learning_rate": 8.351802704586772e-05, |
|
"loss": 0.4648, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 35.38, |
|
"eval_loss": 0.4326806664466858, |
|
"eval_runtime": 1.2828, |
|
"eval_samples_per_second": 144.22, |
|
"eval_steps_per_second": 9.355, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 35.45, |
|
"learning_rate": 8.328681498693332e-05, |
|
"loss": 0.4648, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 35.52, |
|
"learning_rate": 8.305556698391678e-05, |
|
"loss": 0.4647, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 35.52, |
|
"eval_loss": 0.44159042835235596, |
|
"eval_runtime": 1.4429, |
|
"eval_samples_per_second": 128.215, |
|
"eval_steps_per_second": 8.317, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 35.59, |
|
"learning_rate": 8.282428556570928e-05, |
|
"loss": 0.4643, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 35.66, |
|
"learning_rate": 8.25929732615673e-05, |
|
"loss": 0.4652, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 35.66, |
|
"eval_loss": 0.438982754945755, |
|
"eval_runtime": 1.333, |
|
"eval_samples_per_second": 138.783, |
|
"eval_steps_per_second": 9.002, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 35.73, |
|
"learning_rate": 8.236163260108516e-05, |
|
"loss": 0.4645, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 35.8, |
|
"learning_rate": 8.21302661141673e-05, |
|
"loss": 0.4646, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 35.8, |
|
"eval_loss": 0.4449821710586548, |
|
"eval_runtime": 1.5341, |
|
"eval_samples_per_second": 120.588, |
|
"eval_steps_per_second": 7.822, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 35.87, |
|
"learning_rate": 8.189887633100046e-05, |
|
"loss": 0.465, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 35.94, |
|
"learning_rate": 8.166746578202634e-05, |
|
"loss": 0.4651, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 35.94, |
|
"eval_loss": 0.4354371428489685, |
|
"eval_runtime": 1.3878, |
|
"eval_samples_per_second": 133.304, |
|
"eval_steps_per_second": 8.647, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 8.14364998719983e-05, |
|
"loss": 0.4652, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 36.08, |
|
"learning_rate": 8.120505541249758e-05, |
|
"loss": 0.4643, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 36.08, |
|
"eval_loss": 0.4472554922103882, |
|
"eval_runtime": 1.3524, |
|
"eval_samples_per_second": 136.792, |
|
"eval_steps_per_second": 8.873, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 36.15, |
|
"learning_rate": 8.097359777470398e-05, |
|
"loss": 0.4641, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 36.21, |
|
"learning_rate": 8.074259243531422e-05, |
|
"loss": 0.464, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 36.21, |
|
"eval_loss": 0.4422769844532013, |
|
"eval_runtime": 1.3389, |
|
"eval_samples_per_second": 138.17, |
|
"eval_steps_per_second": 8.962, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 36.28, |
|
"learning_rate": 8.051111604830743e-05, |
|
"loss": 0.4645, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 36.35, |
|
"learning_rate": 8.027963407181743e-05, |
|
"loss": 0.4638, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 36.35, |
|
"eval_loss": 0.4338819682598114, |
|
"eval_runtime": 1.6313, |
|
"eval_samples_per_second": 113.407, |
|
"eval_steps_per_second": 7.356, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 36.42, |
|
"learning_rate": 8.00486120087324e-05, |
|
"loss": 0.4648, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 36.49, |
|
"learning_rate": 7.98171264461855e-05, |
|
"loss": 0.464, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 36.49, |
|
"eval_loss": 0.44382044672966003, |
|
"eval_runtime": 1.7404, |
|
"eval_samples_per_second": 106.298, |
|
"eval_steps_per_second": 6.895, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 36.56, |
|
"learning_rate": 7.958564288351447e-05, |
|
"loss": 0.4637, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 36.63, |
|
"learning_rate": 7.935416385218652e-05, |
|
"loss": 0.464, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 36.63, |
|
"eval_loss": 0.43979740142822266, |
|
"eval_runtime": 1.747, |
|
"eval_samples_per_second": 105.895, |
|
"eval_steps_per_second": 6.869, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 36.7, |
|
"learning_rate": 7.912269188361918e-05, |
|
"loss": 0.4642, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 36.77, |
|
"learning_rate": 7.889122950915287e-05, |
|
"loss": 0.4637, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 36.77, |
|
"eval_loss": 0.43520936369895935, |
|
"eval_runtime": 1.77, |
|
"eval_samples_per_second": 104.517, |
|
"eval_steps_per_second": 6.779, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 36.84, |
|
"learning_rate": 7.865977926002297e-05, |
|
"loss": 0.4641, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 36.91, |
|
"learning_rate": 7.842834366733236e-05, |
|
"loss": 0.4641, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 36.91, |
|
"eval_loss": 0.4351678490638733, |
|
"eval_runtime": 1.7687, |
|
"eval_samples_per_second": 104.598, |
|
"eval_steps_per_second": 6.785, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 36.98, |
|
"learning_rate": 7.819738807999911e-05, |
|
"loss": 0.4633, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 37.05, |
|
"learning_rate": 7.796598935086499e-05, |
|
"loss": 0.4651, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 37.05, |
|
"eval_loss": 0.43237701058387756, |
|
"eval_runtime": 1.2775, |
|
"eval_samples_per_second": 144.818, |
|
"eval_steps_per_second": 9.394, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 37.12, |
|
"learning_rate": 7.773461286534548e-05, |
|
"loss": 0.4634, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 37.19, |
|
"learning_rate": 7.750326115373669e-05, |
|
"loss": 0.4637, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 37.19, |
|
"eval_loss": 0.43414822220802307, |
|
"eval_runtime": 1.2962, |
|
"eval_samples_per_second": 142.727, |
|
"eval_steps_per_second": 9.258, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 37.26, |
|
"learning_rate": 7.727193674606392e-05, |
|
"loss": 0.4631, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 37.32, |
|
"learning_rate": 7.704064217205385e-05, |
|
"loss": 0.4633, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 37.32, |
|
"eval_loss": 0.4331218898296356, |
|
"eval_runtime": 1.268, |
|
"eval_samples_per_second": 145.901, |
|
"eval_steps_per_second": 9.464, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 37.39, |
|
"learning_rate": 7.680937996110681e-05, |
|
"loss": 0.4628, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 37.46, |
|
"learning_rate": 7.657815264226939e-05, |
|
"loss": 0.4639, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 37.46, |
|
"eval_loss": 0.4391315281391144, |
|
"eval_runtime": 1.4376, |
|
"eval_samples_per_second": 128.688, |
|
"eval_steps_per_second": 8.347, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 37.53, |
|
"learning_rate": 7.634742508497605e-05, |
|
"loss": 0.4626, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 37.6, |
|
"learning_rate": 7.611627505352226e-05, |
|
"loss": 0.463, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 37.6, |
|
"eval_loss": 0.43801698088645935, |
|
"eval_runtime": 1.5521, |
|
"eval_samples_per_second": 119.193, |
|
"eval_steps_per_second": 7.731, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 37.67, |
|
"learning_rate": 7.588516749386228e-05, |
|
"loss": 0.4633, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"learning_rate": 7.56541049333513e-05, |
|
"loss": 0.4635, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"eval_loss": 0.43551453948020935, |
|
"eval_runtime": 1.5399, |
|
"eval_samples_per_second": 120.139, |
|
"eval_steps_per_second": 7.793, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 37.81, |
|
"learning_rate": 7.542308989885254e-05, |
|
"loss": 0.4628, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 37.88, |
|
"learning_rate": 7.51921249167093e-05, |
|
"loss": 0.4631, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 37.88, |
|
"eval_loss": 0.4397287368774414, |
|
"eval_runtime": 1.6946, |
|
"eval_samples_per_second": 109.168, |
|
"eval_steps_per_second": 7.081, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 37.95, |
|
"learning_rate": 7.496121251271771e-05, |
|
"loss": 0.4629, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 38.02, |
|
"learning_rate": 7.473035521209879e-05, |
|
"loss": 0.464, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 38.02, |
|
"eval_loss": 0.4335973262786865, |
|
"eval_runtime": 1.395, |
|
"eval_samples_per_second": 132.617, |
|
"eval_steps_per_second": 8.602, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 38.09, |
|
"learning_rate": 7.45000170796258e-05, |
|
"loss": 0.463, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 38.16, |
|
"learning_rate": 7.426927743615492e-05, |
|
"loss": 0.4629, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 38.16, |
|
"eval_loss": 0.4339427947998047, |
|
"eval_runtime": 1.2162, |
|
"eval_samples_per_second": 152.116, |
|
"eval_steps_per_second": 9.867, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 38.23, |
|
"learning_rate": 7.403860046294785e-05, |
|
"loss": 0.4628, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 38.3, |
|
"learning_rate": 7.380798868265099e-05, |
|
"loss": 0.4634, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 38.3, |
|
"eval_loss": 0.43547430634498596, |
|
"eval_runtime": 1.4273, |
|
"eval_samples_per_second": 129.617, |
|
"eval_steps_per_second": 8.408, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 38.37, |
|
"learning_rate": 7.357790563607352e-05, |
|
"loss": 0.4628, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 38.43, |
|
"learning_rate": 7.334743166366936e-05, |
|
"loss": 0.4632, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 38.43, |
|
"eval_loss": 0.43882909417152405, |
|
"eval_runtime": 1.4222, |
|
"eval_samples_per_second": 130.082, |
|
"eval_steps_per_second": 8.438, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 38.5, |
|
"learning_rate": 7.311703044268671e-05, |
|
"loss": 0.4622, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 38.57, |
|
"learning_rate": 7.28871650678614e-05, |
|
"loss": 0.4628, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 38.57, |
|
"eval_loss": 0.43408775329589844, |
|
"eval_runtime": 1.2936, |
|
"eval_samples_per_second": 143.016, |
|
"eval_steps_per_second": 9.277, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 38.64, |
|
"learning_rate": 7.265691674969816e-05, |
|
"loss": 0.4627, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 38.71, |
|
"learning_rate": 7.242674873431688e-05, |
|
"loss": 0.4621, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 38.71, |
|
"eval_loss": 0.433734267950058, |
|
"eval_runtime": 1.3622, |
|
"eval_samples_per_second": 135.812, |
|
"eval_steps_per_second": 8.809, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 38.78, |
|
"learning_rate": 7.21966635387981e-05, |
|
"loss": 0.4624, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 38.85, |
|
"learning_rate": 7.196712359219827e-05, |
|
"loss": 0.4626, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 38.85, |
|
"eval_loss": 0.43395352363586426, |
|
"eval_runtime": 1.294, |
|
"eval_samples_per_second": 142.964, |
|
"eval_steps_per_second": 9.273, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 38.92, |
|
"learning_rate": 7.173721140578366e-05, |
|
"loss": 0.4619, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 38.99, |
|
"learning_rate": 7.150738957990125e-05, |
|
"loss": 0.462, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 38.99, |
|
"eval_loss": 0.4305775463581085, |
|
"eval_runtime": 1.3687, |
|
"eval_samples_per_second": 135.162, |
|
"eval_steps_per_second": 8.767, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 39.06, |
|
"learning_rate": 7.127766062784582e-05, |
|
"loss": 0.4622, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"learning_rate": 7.104940457387272e-05, |
|
"loss": 0.8286, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"eval_loss": 0.45042213797569275, |
|
"eval_runtime": 1.3879, |
|
"eval_samples_per_second": 133.294, |
|
"eval_steps_per_second": 8.646, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 39.2, |
|
"learning_rate": 7.081986831039455e-05, |
|
"loss": 0.4675, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 39.27, |
|
"learning_rate": 7.059043243936574e-05, |
|
"loss": 0.4624, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 39.27, |
|
"eval_loss": 0.43986353278160095, |
|
"eval_runtime": 1.3047, |
|
"eval_samples_per_second": 141.79, |
|
"eval_steps_per_second": 9.197, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 39.34, |
|
"learning_rate": 7.036109946986026e-05, |
|
"loss": 0.4622, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 39.41, |
|
"learning_rate": 7.013187190982666e-05, |
|
"loss": 0.4621, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 39.41, |
|
"eval_loss": 0.4350989758968353, |
|
"eval_runtime": 1.3405, |
|
"eval_samples_per_second": 138.009, |
|
"eval_steps_per_second": 8.952, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 39.48, |
|
"learning_rate": 6.990275226606087e-05, |
|
"loss": 0.4625, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 39.54, |
|
"learning_rate": 6.967374304417855e-05, |
|
"loss": 0.4622, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 39.54, |
|
"eval_loss": 0.43041324615478516, |
|
"eval_runtime": 1.3052, |
|
"eval_samples_per_second": 141.744, |
|
"eval_steps_per_second": 9.194, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 39.61, |
|
"learning_rate": 6.944484674858791e-05, |
|
"loss": 0.4618, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 39.68, |
|
"learning_rate": 6.92160658824621e-05, |
|
"loss": 0.4619, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 39.68, |
|
"eval_loss": 0.4329047203063965, |
|
"eval_runtime": 1.3268, |
|
"eval_samples_per_second": 139.431, |
|
"eval_steps_per_second": 9.044, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 39.75, |
|
"learning_rate": 6.898740294771208e-05, |
|
"loss": 0.4609, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 39.82, |
|
"learning_rate": 6.875886044495907e-05, |
|
"loss": 0.4618, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 39.82, |
|
"eval_loss": 0.42083120346069336, |
|
"eval_runtime": 1.2649, |
|
"eval_samples_per_second": 146.251, |
|
"eval_steps_per_second": 9.487, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 39.89, |
|
"learning_rate": 6.853044087350723e-05, |
|
"loss": 0.4621, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 39.96, |
|
"learning_rate": 6.830214673131643e-05, |
|
"loss": 0.462, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 39.96, |
|
"eval_loss": 0.44138768315315247, |
|
"eval_runtime": 1.284, |
|
"eval_samples_per_second": 144.078, |
|
"eval_steps_per_second": 9.346, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 40.03, |
|
"learning_rate": 6.807398051497478e-05, |
|
"loss": 0.4619, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 40.1, |
|
"learning_rate": 6.784594471967154e-05, |
|
"loss": 0.4615, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 40.1, |
|
"eval_loss": 0.4353402256965637, |
|
"eval_runtime": 1.4649, |
|
"eval_samples_per_second": 126.285, |
|
"eval_steps_per_second": 8.191, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 40.17, |
|
"learning_rate": 6.761804183916954e-05, |
|
"loss": 0.4616, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 40.24, |
|
"learning_rate": 6.739027436577824e-05, |
|
"loss": 0.4614, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 40.24, |
|
"eval_loss": 0.4397503137588501, |
|
"eval_runtime": 1.241, |
|
"eval_samples_per_second": 149.068, |
|
"eval_steps_per_second": 9.669, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 40.31, |
|
"learning_rate": 6.716264479032627e-05, |
|
"loss": 0.4607, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 40.38, |
|
"learning_rate": 6.693515560213409e-05, |
|
"loss": 0.4611, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 40.38, |
|
"eval_loss": 0.4370949864387512, |
|
"eval_runtime": 1.355, |
|
"eval_samples_per_second": 136.534, |
|
"eval_steps_per_second": 8.856, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 40.45, |
|
"learning_rate": 6.670780928898711e-05, |
|
"loss": 0.4615, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 40.52, |
|
"learning_rate": 6.648106259228962e-05, |
|
"loss": 0.4608, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 40.52, |
|
"eval_loss": 0.4326191544532776, |
|
"eval_runtime": 1.3203, |
|
"eval_samples_per_second": 140.118, |
|
"eval_steps_per_second": 9.089, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 40.59, |
|
"learning_rate": 6.625400918814145e-05, |
|
"loss": 0.4604, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 40.65, |
|
"learning_rate": 6.602710610794654e-05, |
|
"loss": 0.4611, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 40.65, |
|
"eval_loss": 0.4331806004047394, |
|
"eval_runtime": 1.3174, |
|
"eval_samples_per_second": 140.426, |
|
"eval_steps_per_second": 9.109, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 40.72, |
|
"learning_rate": 6.580035583308054e-05, |
|
"loss": 0.4613, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 40.79, |
|
"learning_rate": 6.557376084324819e-05, |
|
"loss": 0.4614, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 40.79, |
|
"eval_loss": 0.4342820346355438, |
|
"eval_runtime": 1.384, |
|
"eval_samples_per_second": 133.668, |
|
"eval_steps_per_second": 8.67, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 40.86, |
|
"learning_rate": 6.534732361645596e-05, |
|
"loss": 0.4603, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 40.93, |
|
"learning_rate": 6.512149902139615e-05, |
|
"loss": 0.4609, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 40.93, |
|
"eval_loss": 0.43060681223869324, |
|
"eval_runtime": 1.4594, |
|
"eval_samples_per_second": 126.766, |
|
"eval_steps_per_second": 8.223, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 6.489538441987944e-05, |
|
"loss": 0.4618, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 41.07, |
|
"learning_rate": 6.466943500001871e-05, |
|
"loss": 0.4608, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 41.07, |
|
"eval_loss": 0.43232059478759766, |
|
"eval_runtime": 1.4238, |
|
"eval_samples_per_second": 129.937, |
|
"eval_steps_per_second": 8.428, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 41.14, |
|
"learning_rate": 6.444365323276068e-05, |
|
"loss": 0.4605, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 41.21, |
|
"learning_rate": 6.421804158721853e-05, |
|
"loss": 0.4608, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 41.21, |
|
"eval_loss": 0.43206915259361267, |
|
"eval_runtime": 1.493, |
|
"eval_samples_per_second": 123.915, |
|
"eval_steps_per_second": 8.038, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 41.28, |
|
"learning_rate": 6.399260253064511e-05, |
|
"loss": 0.4608, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 41.35, |
|
"learning_rate": 6.376733852840578e-05, |
|
"loss": 0.4601, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 41.35, |
|
"eval_loss": 0.43304702639579773, |
|
"eval_runtime": 1.3148, |
|
"eval_samples_per_second": 140.708, |
|
"eval_steps_per_second": 9.127, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 41.42, |
|
"learning_rate": 6.35422520439516e-05, |
|
"loss": 0.4599, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 41.49, |
|
"learning_rate": 6.331779517054821e-05, |
|
"loss": 0.4606, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 41.49, |
|
"eval_loss": 0.43612292408943176, |
|
"eval_runtime": 1.4452, |
|
"eval_samples_per_second": 128.012, |
|
"eval_steps_per_second": 8.304, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 41.56, |
|
"learning_rate": 6.309396926796281e-05, |
|
"loss": 0.4599, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 41.63, |
|
"learning_rate": 6.286942898131997e-05, |
|
"loss": 0.4606, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 41.63, |
|
"eval_loss": 0.4367000162601471, |
|
"eval_runtime": 1.3451, |
|
"eval_samples_per_second": 137.539, |
|
"eval_steps_per_second": 8.921, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 41.7, |
|
"learning_rate": 6.264507603185725e-05, |
|
"loss": 0.4597, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 41.76, |
|
"learning_rate": 6.242091287306253e-05, |
|
"loss": 0.46, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 41.76, |
|
"eval_loss": 0.43270692229270935, |
|
"eval_runtime": 1.2785, |
|
"eval_samples_per_second": 144.699, |
|
"eval_steps_per_second": 9.386, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 41.83, |
|
"learning_rate": 6.219694195634821e-05, |
|
"loss": 0.4603, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 41.9, |
|
"learning_rate": 6.197316573102436e-05, |
|
"loss": 0.4596, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 41.9, |
|
"eval_loss": 0.43059635162353516, |
|
"eval_runtime": 1.4989, |
|
"eval_samples_per_second": 123.424, |
|
"eval_steps_per_second": 8.006, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 41.97, |
|
"learning_rate": 6.174958664427192e-05, |
|
"loss": 0.4603, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 42.04, |
|
"learning_rate": 6.152620714111597e-05, |
|
"loss": 0.46, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 42.04, |
|
"eval_loss": 0.43519648909568787, |
|
"eval_runtime": 1.3175, |
|
"eval_samples_per_second": 140.413, |
|
"eval_steps_per_second": 9.108, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 42.11, |
|
"learning_rate": 6.130302966439894e-05, |
|
"loss": 0.4598, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 42.18, |
|
"learning_rate": 6.1080056654754e-05, |
|
"loss": 0.46, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 42.18, |
|
"eval_loss": 0.4337790012359619, |
|
"eval_runtime": 1.2831, |
|
"eval_samples_per_second": 144.178, |
|
"eval_steps_per_second": 9.352, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 42.25, |
|
"learning_rate": 6.085729055057823e-05, |
|
"loss": 0.4601, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 42.32, |
|
"learning_rate": 6.063473378800607e-05, |
|
"loss": 0.4597, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 42.32, |
|
"eval_loss": 0.43329939246177673, |
|
"eval_runtime": 1.2575, |
|
"eval_samples_per_second": 147.119, |
|
"eval_steps_per_second": 9.543, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 42.39, |
|
"learning_rate": 6.041283327788843e-05, |
|
"loss": 0.4593, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 42.46, |
|
"learning_rate": 6.019203421060304e-05, |
|
"loss": 0.4596, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 42.46, |
|
"eval_loss": 0.4334431290626526, |
|
"eval_runtime": 1.3268, |
|
"eval_samples_per_second": 139.43, |
|
"eval_steps_per_second": 9.044, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 42.53, |
|
"learning_rate": 5.997011832389982e-05, |
|
"loss": 0.46, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 42.6, |
|
"learning_rate": 5.974842148077354e-05, |
|
"loss": 0.4591, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 42.6, |
|
"eval_loss": 0.4334283769130707, |
|
"eval_runtime": 1.4267, |
|
"eval_samples_per_second": 129.67, |
|
"eval_steps_per_second": 8.411, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 42.67, |
|
"learning_rate": 5.952694610566536e-05, |
|
"loss": 0.4596, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 42.74, |
|
"learning_rate": 5.930569462059449e-05, |
|
"loss": 0.4597, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 42.74, |
|
"eval_loss": 0.4318722188472748, |
|
"eval_runtime": 1.2649, |
|
"eval_samples_per_second": 146.253, |
|
"eval_steps_per_second": 9.487, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 42.81, |
|
"learning_rate": 5.9084669445131754e-05, |
|
"loss": 0.4597, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 42.87, |
|
"learning_rate": 5.886387299637301e-05, |
|
"loss": 0.4586, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 42.87, |
|
"eval_loss": 0.4267691969871521, |
|
"eval_runtime": 1.3287, |
|
"eval_samples_per_second": 139.236, |
|
"eval_steps_per_second": 9.031, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 42.94, |
|
"learning_rate": 5.8643307688912876e-05, |
|
"loss": 0.4596, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 5.8422975934818206e-05, |
|
"loss": 0.4593, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"eval_loss": 0.4366314113140106, |
|
"eval_runtime": 1.3736, |
|
"eval_samples_per_second": 134.683, |
|
"eval_steps_per_second": 8.736, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 43.08, |
|
"learning_rate": 5.820288014360176e-05, |
|
"loss": 0.4592, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 43.15, |
|
"learning_rate": 5.798302272219588e-05, |
|
"loss": 0.4591, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 43.15, |
|
"eval_loss": 0.428290456533432, |
|
"eval_runtime": 1.2599, |
|
"eval_samples_per_second": 146.841, |
|
"eval_steps_per_second": 9.525, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 43.22, |
|
"learning_rate": 5.776340607492607e-05, |
|
"loss": 0.4584, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 43.29, |
|
"learning_rate": 5.754403260348481e-05, |
|
"loss": 0.4587, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 43.29, |
|
"eval_loss": 0.4289395809173584, |
|
"eval_runtime": 1.3295, |
|
"eval_samples_per_second": 139.154, |
|
"eval_steps_per_second": 9.026, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 43.36, |
|
"learning_rate": 5.732490470690522e-05, |
|
"loss": 0.4588, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 43.43, |
|
"learning_rate": 5.7106024781534895e-05, |
|
"loss": 0.4594, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 43.43, |
|
"eval_loss": 0.43324732780456543, |
|
"eval_runtime": 1.3614, |
|
"eval_samples_per_second": 135.891, |
|
"eval_steps_per_second": 8.815, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 43.5, |
|
"learning_rate": 5.6887395221009625e-05, |
|
"loss": 0.4585, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 43.57, |
|
"learning_rate": 5.666901841622724e-05, |
|
"loss": 0.459, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 43.57, |
|
"eval_loss": 0.4326333999633789, |
|
"eval_runtime": 1.4268, |
|
"eval_samples_per_second": 129.664, |
|
"eval_steps_per_second": 8.411, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 43.64, |
|
"learning_rate": 5.6451332742423816e-05, |
|
"loss": 0.459, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 43.71, |
|
"learning_rate": 5.62334680933011e-05, |
|
"loss": 0.4586, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 43.71, |
|
"eval_loss": 0.4356025755405426, |
|
"eval_runtime": 1.3678, |
|
"eval_samples_per_second": 135.255, |
|
"eval_steps_per_second": 8.773, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 43.78, |
|
"learning_rate": 5.601586335116362e-05, |
|
"loss": 0.4586, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 43.85, |
|
"learning_rate": 5.5798520895701915e-05, |
|
"loss": 0.4581, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 43.85, |
|
"eval_loss": 0.427127480506897, |
|
"eval_runtime": 1.294, |
|
"eval_samples_per_second": 142.967, |
|
"eval_steps_per_second": 9.274, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 43.92, |
|
"learning_rate": 5.5581876993609666e-05, |
|
"loss": 0.4587, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 43.98, |
|
"learning_rate": 5.5365499057136866e-05, |
|
"loss": 0.4584, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 43.98, |
|
"eval_loss": 0.43246105313301086, |
|
"eval_runtime": 1.3243, |
|
"eval_samples_per_second": 139.696, |
|
"eval_steps_per_second": 9.061, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 44.05, |
|
"learning_rate": 5.514895662868006e-05, |
|
"loss": 0.4589, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 44.12, |
|
"learning_rate": 5.493268596725206e-05, |
|
"loss": 0.4586, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 44.12, |
|
"eval_loss": 0.43503591418266296, |
|
"eval_runtime": 1.4646, |
|
"eval_samples_per_second": 126.317, |
|
"eval_steps_per_second": 8.194, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 44.19, |
|
"learning_rate": 5.471668943795419e-05, |
|
"loss": 0.4584, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 44.26, |
|
"learning_rate": 5.4500969402889874e-05, |
|
"loss": 0.4584, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 44.26, |
|
"eval_loss": 0.427314430475235, |
|
"eval_runtime": 1.4555, |
|
"eval_samples_per_second": 127.101, |
|
"eval_steps_per_second": 8.244, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 44.33, |
|
"learning_rate": 5.428552822113888e-05, |
|
"loss": 0.4582, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 44.4, |
|
"learning_rate": 5.4070368248731424e-05, |
|
"loss": 0.4576, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 44.4, |
|
"eval_loss": 0.4284280240535736, |
|
"eval_runtime": 1.3511, |
|
"eval_samples_per_second": 136.923, |
|
"eval_steps_per_second": 8.882, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 44.47, |
|
"learning_rate": 5.385549183862251e-05, |
|
"loss": 0.4582, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 44.54, |
|
"learning_rate": 5.36413302347614e-05, |
|
"loss": 0.458, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 44.54, |
|
"eval_loss": 0.43313825130462646, |
|
"eval_runtime": 1.3515, |
|
"eval_samples_per_second": 136.88, |
|
"eval_steps_per_second": 8.879, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 44.61, |
|
"learning_rate": 5.342702741682713e-05, |
|
"loss": 0.4573, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 44.68, |
|
"learning_rate": 5.3213015196663704e-05, |
|
"loss": 0.4581, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 44.68, |
|
"eval_loss": 0.4263160824775696, |
|
"eval_runtime": 1.282, |
|
"eval_samples_per_second": 144.307, |
|
"eval_steps_per_second": 9.36, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 44.75, |
|
"learning_rate": 5.2999295914674395e-05, |
|
"loss": 0.4579, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 44.82, |
|
"learning_rate": 5.278629845983565e-05, |
|
"loss": 0.4579, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 44.82, |
|
"eval_loss": 0.4283221960067749, |
|
"eval_runtime": 1.2519, |
|
"eval_samples_per_second": 147.773, |
|
"eval_steps_per_second": 9.585, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 44.89, |
|
"learning_rate": 5.257317146501924e-05, |
|
"loss": 0.4578, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 44.96, |
|
"learning_rate": 5.236034440560551e-05, |
|
"loss": 0.4583, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 44.96, |
|
"eval_loss": 0.43619590997695923, |
|
"eval_runtime": 1.2753, |
|
"eval_samples_per_second": 145.067, |
|
"eval_steps_per_second": 9.41, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 45.03, |
|
"learning_rate": 5.21478196090371e-05, |
|
"loss": 0.4582, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 45.1, |
|
"learning_rate": 5.1936023534349374e-05, |
|
"loss": 0.4571, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 45.1, |
|
"eval_loss": 0.4329734742641449, |
|
"eval_runtime": 1.2904, |
|
"eval_samples_per_second": 143.362, |
|
"eval_steps_per_second": 9.299, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 45.16, |
|
"learning_rate": 5.1724109616422535e-05, |
|
"loss": 0.4576, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 45.23, |
|
"learning_rate": 5.151250491910275e-05, |
|
"loss": 0.4566, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 45.23, |
|
"eval_loss": 0.42997583746910095, |
|
"eval_runtime": 1.2531, |
|
"eval_samples_per_second": 147.64, |
|
"eval_steps_per_second": 9.577, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 45.3, |
|
"learning_rate": 5.130121175646505e-05, |
|
"loss": 0.4573, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 45.37, |
|
"learning_rate": 5.109065408306044e-05, |
|
"loss": 0.4572, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 45.37, |
|
"eval_loss": 0.4257906675338745, |
|
"eval_runtime": 1.3322, |
|
"eval_samples_per_second": 138.863, |
|
"eval_steps_per_second": 9.007, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 45.44, |
|
"learning_rate": 5.0879990283753815e-05, |
|
"loss": 0.4576, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 45.51, |
|
"learning_rate": 5.066964493620802e-05, |
|
"loss": 0.4574, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 45.51, |
|
"eval_loss": 0.42002588510513306, |
|
"eval_runtime": 1.3087, |
|
"eval_samples_per_second": 141.356, |
|
"eval_steps_per_second": 9.169, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 45.58, |
|
"learning_rate": 5.046004006827927e-05, |
|
"loss": 0.4573, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 45.65, |
|
"learning_rate": 5.025033787326889e-05, |
|
"loss": 0.4573, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 45.65, |
|
"eval_loss": 0.4298509359359741, |
|
"eval_runtime": 1.2432, |
|
"eval_samples_per_second": 148.814, |
|
"eval_steps_per_second": 9.653, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 45.72, |
|
"learning_rate": 5.004096101579722e-05, |
|
"loss": 0.4571, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 45.79, |
|
"learning_rate": 4.983191178557604e-05, |
|
"loss": 0.4578, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 45.79, |
|
"eval_loss": 0.43188604712486267, |
|
"eval_runtime": 1.2251, |
|
"eval_samples_per_second": 151.012, |
|
"eval_steps_per_second": 9.795, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 45.86, |
|
"learning_rate": 4.962319246873419e-05, |
|
"loss": 0.4576, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 45.93, |
|
"learning_rate": 4.9414805347792704e-05, |
|
"loss": 0.4576, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 45.93, |
|
"eval_loss": 0.4352248013019562, |
|
"eval_runtime": 1.3596, |
|
"eval_samples_per_second": 136.073, |
|
"eval_steps_per_second": 8.826, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 4.920675270163976e-05, |
|
"loss": 0.4573, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 46.07, |
|
"learning_rate": 4.899903680550578e-05, |
|
"loss": 0.4574, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 46.07, |
|
"eval_loss": 0.4277680218219757, |
|
"eval_runtime": 1.304, |
|
"eval_samples_per_second": 141.867, |
|
"eval_steps_per_second": 9.202, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 46.14, |
|
"learning_rate": 4.8791659930938476e-05, |
|
"loss": 0.4567, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 46.21, |
|
"learning_rate": 4.858503807483613e-05, |
|
"loss": 0.4572, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 46.21, |
|
"eval_loss": 0.4325651526451111, |
|
"eval_runtime": 1.4573, |
|
"eval_samples_per_second": 126.944, |
|
"eval_steps_per_second": 8.234, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 46.27, |
|
"learning_rate": 4.837875839490377e-05, |
|
"loss": 0.4566, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 46.34, |
|
"learning_rate": 4.817241078937126e-05, |
|
"loss": 0.4568, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 46.34, |
|
"eval_loss": 0.42952632904052734, |
|
"eval_runtime": 1.348, |
|
"eval_samples_per_second": 137.239, |
|
"eval_steps_per_second": 8.902, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 46.41, |
|
"learning_rate": 4.7966411245255116e-05, |
|
"loss": 0.4567, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 46.48, |
|
"learning_rate": 4.776199486161653e-05, |
|
"loss": 0.4569, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 46.48, |
|
"eval_loss": 0.43004167079925537, |
|
"eval_runtime": 1.4336, |
|
"eval_samples_per_second": 129.047, |
|
"eval_steps_per_second": 8.371, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 46.55, |
|
"learning_rate": 4.755669607275862e-05, |
|
"loss": 0.4568, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 46.62, |
|
"learning_rate": 4.735175207867456e-05, |
|
"loss": 0.4566, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 46.62, |
|
"eval_loss": 0.4332566261291504, |
|
"eval_runtime": 1.3133, |
|
"eval_samples_per_second": 140.869, |
|
"eval_steps_per_second": 9.137, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 46.69, |
|
"learning_rate": 4.7147165120599086e-05, |
|
"loss": 0.4559, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 46.76, |
|
"learning_rate": 4.694293743586235e-05, |
|
"loss": 0.4567, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 46.76, |
|
"eval_loss": 0.4261642396450043, |
|
"eval_runtime": 1.2437, |
|
"eval_samples_per_second": 148.753, |
|
"eval_steps_per_second": 9.649, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 46.83, |
|
"learning_rate": 4.673907125786556e-05, |
|
"loss": 0.4567, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 46.9, |
|
"learning_rate": 4.653597545645173e-05, |
|
"loss": 0.4564, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 46.9, |
|
"eval_loss": 0.43535733222961426, |
|
"eval_runtime": 1.3354, |
|
"eval_samples_per_second": 138.536, |
|
"eval_steps_per_second": 8.986, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 46.97, |
|
"learning_rate": 4.633324414988516e-05, |
|
"loss": 0.4601, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 47.04, |
|
"learning_rate": 4.6130474375707824e-05, |
|
"loss": 0.4574, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 47.04, |
|
"eval_loss": 0.43565723299980164, |
|
"eval_runtime": 1.4317, |
|
"eval_samples_per_second": 129.219, |
|
"eval_steps_per_second": 8.382, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 47.11, |
|
"learning_rate": 4.592807499323636e-05, |
|
"loss": 0.4565, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 47.18, |
|
"learning_rate": 4.572604821587795e-05, |
|
"loss": 0.4564, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 47.18, |
|
"eval_loss": 0.4307515025138855, |
|
"eval_runtime": 1.4736, |
|
"eval_samples_per_second": 125.541, |
|
"eval_steps_per_second": 8.143, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 47.25, |
|
"learning_rate": 4.552439625296505e-05, |
|
"loss": 0.4561, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 47.32, |
|
"learning_rate": 4.5323121309731276e-05, |
|
"loss": 0.4554, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 47.32, |
|
"eval_loss": 0.4350062906742096, |
|
"eval_runtime": 1.2761, |
|
"eval_samples_per_second": 144.972, |
|
"eval_steps_per_second": 9.404, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 47.38, |
|
"learning_rate": 4.512222558728712e-05, |
|
"loss": 0.4561, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 47.45, |
|
"learning_rate": 4.4921711282596053e-05, |
|
"loss": 0.456, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 47.45, |
|
"eval_loss": 0.4400256276130676, |
|
"eval_runtime": 1.4304, |
|
"eval_samples_per_second": 129.339, |
|
"eval_steps_per_second": 8.39, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 47.52, |
|
"learning_rate": 4.472158058845034e-05, |
|
"loss": 0.4563, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 47.59, |
|
"learning_rate": 4.45218356934472e-05, |
|
"loss": 0.456, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 47.59, |
|
"eval_loss": 0.4236604571342468, |
|
"eval_runtime": 1.4771, |
|
"eval_samples_per_second": 125.249, |
|
"eval_steps_per_second": 8.124, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 47.66, |
|
"learning_rate": 4.432247878196481e-05, |
|
"loss": 0.4557, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 47.73, |
|
"learning_rate": 4.412351203413846e-05, |
|
"loss": 0.4559, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 47.73, |
|
"eval_loss": 0.42355087399482727, |
|
"eval_runtime": 1.3018, |
|
"eval_samples_per_second": 142.115, |
|
"eval_steps_per_second": 9.218, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 47.8, |
|
"learning_rate": 4.3924937625836546e-05, |
|
"loss": 0.456, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 47.87, |
|
"learning_rate": 4.3726757728637025e-05, |
|
"loss": 0.4559, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 47.87, |
|
"eval_loss": 0.43047869205474854, |
|
"eval_runtime": 1.2595, |
|
"eval_samples_per_second": 146.879, |
|
"eval_steps_per_second": 9.527, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 47.94, |
|
"learning_rate": 4.3528974509803495e-05, |
|
"loss": 0.4558, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 4.333159013226154e-05, |
|
"loss": 0.4559, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"eval_loss": 0.42453786730766296, |
|
"eval_runtime": 1.4787, |
|
"eval_samples_per_second": 125.106, |
|
"eval_steps_per_second": 8.115, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 4.3135000319700124e-05, |
|
"loss": 0.4559, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 48.15, |
|
"learning_rate": 4.293841928759285e-05, |
|
"loss": 0.4549, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 48.15, |
|
"eval_loss": 0.41822031140327454, |
|
"eval_runtime": 1.385, |
|
"eval_samples_per_second": 133.578, |
|
"eval_steps_per_second": 8.664, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 48.22, |
|
"learning_rate": 4.2742243554994435e-05, |
|
"loss": 0.4553, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 48.29, |
|
"learning_rate": 4.254647526725133e-05, |
|
"loss": 0.4556, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 48.29, |
|
"eval_loss": 0.432986855506897, |
|
"eval_runtime": 1.3432, |
|
"eval_samples_per_second": 137.734, |
|
"eval_steps_per_second": 8.934, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 48.36, |
|
"learning_rate": 4.235111656525411e-05, |
|
"loss": 0.4554, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 48.43, |
|
"learning_rate": 4.215616958541425e-05, |
|
"loss": 0.4551, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 48.43, |
|
"eval_loss": 0.4397284984588623, |
|
"eval_runtime": 1.4485, |
|
"eval_samples_per_second": 127.714, |
|
"eval_steps_per_second": 8.284, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 48.49, |
|
"learning_rate": 4.1961636459640674e-05, |
|
"loss": 0.4549, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 48.56, |
|
"learning_rate": 4.1767519315316486e-05, |
|
"loss": 0.455, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 48.56, |
|
"eval_loss": 0.42523685097694397, |
|
"eval_runtime": 1.2279, |
|
"eval_samples_per_second": 150.663, |
|
"eval_steps_per_second": 9.773, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 48.63, |
|
"learning_rate": 4.157382027527568e-05, |
|
"loss": 0.455, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 48.7, |
|
"learning_rate": 4.1380541457779925e-05, |
|
"loss": 0.4548, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 48.7, |
|
"eval_loss": 0.4245500862598419, |
|
"eval_runtime": 1.3864, |
|
"eval_samples_per_second": 133.437, |
|
"eval_steps_per_second": 8.655, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 48.77, |
|
"learning_rate": 4.118807026656386e-05, |
|
"loss": 0.4549, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 48.84, |
|
"learning_rate": 4.099563737954589e-05, |
|
"loss": 0.4551, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 48.84, |
|
"eval_loss": 0.4291282594203949, |
|
"eval_runtime": 1.4115, |
|
"eval_samples_per_second": 131.064, |
|
"eval_steps_per_second": 8.501, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 48.91, |
|
"learning_rate": 4.0803631037988514e-05, |
|
"loss": 0.4548, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 48.98, |
|
"learning_rate": 4.0612053341642264e-05, |
|
"loss": 0.4554, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 48.98, |
|
"eval_loss": 0.42864859104156494, |
|
"eval_runtime": 1.2975, |
|
"eval_samples_per_second": 142.581, |
|
"eval_steps_per_second": 9.248, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 49.05, |
|
"learning_rate": 4.042090638557016e-05, |
|
"loss": 0.4556, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 49.12, |
|
"learning_rate": 4.02301922601247e-05, |
|
"loss": 0.4547, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 49.12, |
|
"eval_loss": 0.4335607588291168, |
|
"eval_runtime": 1.4371, |
|
"eval_samples_per_second": 128.729, |
|
"eval_steps_per_second": 8.35, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 49.19, |
|
"learning_rate": 4.003991305092499e-05, |
|
"loss": 0.4544, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 49.26, |
|
"learning_rate": 3.9850070838834005e-05, |
|
"loss": 0.4548, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 49.26, |
|
"eval_loss": 0.43242689967155457, |
|
"eval_runtime": 1.2661, |
|
"eval_samples_per_second": 146.119, |
|
"eval_steps_per_second": 9.478, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 49.33, |
|
"learning_rate": 3.9662181177343955e-05, |
|
"loss": 0.455, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 49.4, |
|
"learning_rate": 3.947321564555882e-05, |
|
"loss": 0.4545, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 49.4, |
|
"eval_loss": 0.42357033491134644, |
|
"eval_runtime": 1.2998, |
|
"eval_samples_per_second": 142.325, |
|
"eval_steps_per_second": 9.232, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 49.47, |
|
"learning_rate": 3.928469330819442e-05, |
|
"loss": 0.4543, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 49.54, |
|
"learning_rate": 3.9096991935330003e-05, |
|
"loss": 0.4547, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 49.54, |
|
"eval_loss": 0.4344870150089264, |
|
"eval_runtime": 1.4216, |
|
"eval_samples_per_second": 130.138, |
|
"eval_steps_per_second": 8.441, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"learning_rate": 3.8909361270212906e-05, |
|
"loss": 0.4548, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 49.67, |
|
"learning_rate": 3.872217996573768e-05, |
|
"loss": 0.4542, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 49.67, |
|
"eval_loss": 0.4328906536102295, |
|
"eval_runtime": 1.3917, |
|
"eval_samples_per_second": 132.927, |
|
"eval_steps_per_second": 8.622, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 49.74, |
|
"learning_rate": 3.853545006888908e-05, |
|
"loss": 0.4547, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 49.81, |
|
"learning_rate": 3.834917362171529e-05, |
|
"loss": 0.4545, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 49.81, |
|
"eval_loss": 0.4240635633468628, |
|
"eval_runtime": 1.384, |
|
"eval_samples_per_second": 133.67, |
|
"eval_steps_per_second": 8.67, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 49.88, |
|
"learning_rate": 3.816335266130577e-05, |
|
"loss": 0.4544, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 49.95, |
|
"learning_rate": 3.7977989219768646e-05, |
|
"loss": 0.4541, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 49.95, |
|
"eval_loss": 0.41770491003990173, |
|
"eval_runtime": 1.2864, |
|
"eval_samples_per_second": 143.813, |
|
"eval_steps_per_second": 9.328, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 50.02, |
|
"learning_rate": 3.779308532420894e-05, |
|
"loss": 0.4549, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 50.09, |
|
"learning_rate": 3.760864299670598e-05, |
|
"loss": 0.454, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 50.09, |
|
"eval_loss": 0.42438188195228577, |
|
"eval_runtime": 1.5248, |
|
"eval_samples_per_second": 121.326, |
|
"eval_steps_per_second": 7.87, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 50.16, |
|
"learning_rate": 3.742466425429149e-05, |
|
"loss": 0.4544, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 50.23, |
|
"learning_rate": 3.724115110892756e-05, |
|
"loss": 0.4538, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 50.23, |
|
"eval_loss": 0.4190036952495575, |
|
"eval_runtime": 1.4148, |
|
"eval_samples_per_second": 130.758, |
|
"eval_steps_per_second": 8.482, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 50.3, |
|
"learning_rate": 3.7058105567484526e-05, |
|
"loss": 0.4541, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 50.37, |
|
"learning_rate": 3.6875529631719126e-05, |
|
"loss": 0.4535, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 50.37, |
|
"eval_loss": 0.43312662839889526, |
|
"eval_runtime": 1.4875, |
|
"eval_samples_per_second": 124.369, |
|
"eval_steps_per_second": 8.067, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 50.44, |
|
"learning_rate": 3.66934252982525e-05, |
|
"loss": 0.454, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 50.51, |
|
"learning_rate": 3.6512157346060225e-05, |
|
"loss": 0.4545, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 50.51, |
|
"eval_loss": 0.4252246022224426, |
|
"eval_runtime": 1.4584, |
|
"eval_samples_per_second": 126.855, |
|
"eval_steps_per_second": 8.228, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 50.58, |
|
"learning_rate": 3.63313630695476e-05, |
|
"loss": 0.4541, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 50.65, |
|
"learning_rate": 3.615068355683803e-05, |
|
"loss": 0.454, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 50.65, |
|
"eval_loss": 0.43154436349868774, |
|
"eval_runtime": 1.6686, |
|
"eval_samples_per_second": 110.872, |
|
"eval_steps_per_second": 7.192, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 50.71, |
|
"learning_rate": 3.597048357322779e-05, |
|
"loss": 0.4538, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 50.78, |
|
"learning_rate": 3.579076508935504e-05, |
|
"loss": 0.4536, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 50.78, |
|
"eval_loss": 0.4300937056541443, |
|
"eval_runtime": 1.6992, |
|
"eval_samples_per_second": 108.874, |
|
"eval_steps_per_second": 7.062, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 50.85, |
|
"learning_rate": 3.5611530070592334e-05, |
|
"loss": 0.4536, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 50.92, |
|
"learning_rate": 3.543278047702514e-05, |
|
"loss": 0.4534, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 50.92, |
|
"eval_loss": 0.43572941422462463, |
|
"eval_runtime": 1.6472, |
|
"eval_samples_per_second": 112.315, |
|
"eval_steps_per_second": 7.285, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 50.99, |
|
"learning_rate": 3.525451826343032e-05, |
|
"loss": 0.4536, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 51.06, |
|
"learning_rate": 3.507710043537942e-05, |
|
"loss": 0.4537, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 51.06, |
|
"eval_loss": 0.43337032198905945, |
|
"eval_runtime": 1.2953, |
|
"eval_samples_per_second": 142.824, |
|
"eval_steps_per_second": 9.264, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 51.13, |
|
"learning_rate": 3.4899817840235556e-05, |
|
"loss": 0.4532, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 51.2, |
|
"learning_rate": 3.4723028453458075e-05, |
|
"loss": 0.4535, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 51.2, |
|
"eval_loss": 0.4200479984283447, |
|
"eval_runtime": 1.3971, |
|
"eval_samples_per_second": 132.414, |
|
"eval_steps_per_second": 8.589, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 51.27, |
|
"learning_rate": 3.4546734208387424e-05, |
|
"loss": 0.453, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 51.34, |
|
"learning_rate": 3.43709370329492e-05, |
|
"loss": 0.4538, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 51.34, |
|
"eval_loss": 0.42738237977027893, |
|
"eval_runtime": 1.2968, |
|
"eval_samples_per_second": 142.663, |
|
"eval_steps_per_second": 9.254, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 51.41, |
|
"learning_rate": 3.41956388496332e-05, |
|
"loss": 0.4536, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 51.48, |
|
"learning_rate": 3.40208415754722e-05, |
|
"loss": 0.4536, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 51.48, |
|
"eval_loss": 0.41781699657440186, |
|
"eval_runtime": 1.2907, |
|
"eval_samples_per_second": 143.329, |
|
"eval_steps_per_second": 9.297, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 51.55, |
|
"learning_rate": 3.3846547122021226e-05, |
|
"loss": 0.4531, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 51.62, |
|
"learning_rate": 3.367275739533648e-05, |
|
"loss": 0.4534, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 51.62, |
|
"eval_loss": 0.4181431233882904, |
|
"eval_runtime": 1.3515, |
|
"eval_samples_per_second": 136.885, |
|
"eval_steps_per_second": 8.879, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 51.69, |
|
"learning_rate": 3.349947429595459e-05, |
|
"loss": 0.4526, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 51.76, |
|
"learning_rate": 3.3326699718871764e-05, |
|
"loss": 0.4533, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 51.76, |
|
"eval_loss": 0.4211100935935974, |
|
"eval_runtime": 1.3352, |
|
"eval_samples_per_second": 138.561, |
|
"eval_steps_per_second": 8.988, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 51.82, |
|
"learning_rate": 3.315443555352308e-05, |
|
"loss": 0.4525, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 51.89, |
|
"learning_rate": 3.298268368376187e-05, |
|
"loss": 0.4535, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 51.89, |
|
"eval_loss": 0.4289679527282715, |
|
"eval_runtime": 1.3822, |
|
"eval_samples_per_second": 133.849, |
|
"eval_steps_per_second": 8.682, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 51.96, |
|
"learning_rate": 3.281144598783902e-05, |
|
"loss": 0.4529, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 52.03, |
|
"learning_rate": 3.2641747125725525e-05, |
|
"loss": 0.4535, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 52.03, |
|
"eval_loss": 0.4201316237449646, |
|
"eval_runtime": 1.2705, |
|
"eval_samples_per_second": 145.613, |
|
"eval_steps_per_second": 9.445, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 52.1, |
|
"learning_rate": 3.247154027668326e-05, |
|
"loss": 0.4523, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 52.17, |
|
"learning_rate": 3.2301853191261837e-05, |
|
"loss": 0.4526, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 52.17, |
|
"eval_loss": 0.4263089597225189, |
|
"eval_runtime": 1.255, |
|
"eval_samples_per_second": 147.413, |
|
"eval_steps_per_second": 9.562, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 52.24, |
|
"learning_rate": 3.21333633454771e-05, |
|
"loss": 0.4526, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 52.31, |
|
"learning_rate": 3.196471925105066e-05, |
|
"loss": 0.4526, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 52.31, |
|
"eval_loss": 0.4236815273761749, |
|
"eval_runtime": 1.3806, |
|
"eval_samples_per_second": 133.999, |
|
"eval_steps_per_second": 8.692, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 52.38, |
|
"learning_rate": 3.1796600462758575e-05, |
|
"loss": 0.4532, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 52.45, |
|
"learning_rate": 3.1629008819120935e-05, |
|
"loss": 0.4524, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 52.45, |
|
"eval_loss": 0.4254463016986847, |
|
"eval_runtime": 1.2654, |
|
"eval_samples_per_second": 146.204, |
|
"eval_steps_per_second": 9.483, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 52.52, |
|
"learning_rate": 3.146194615289314e-05, |
|
"loss": 0.4523, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 52.59, |
|
"learning_rate": 3.129541429104573e-05, |
|
"loss": 0.4529, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 52.59, |
|
"eval_loss": 0.42601755261421204, |
|
"eval_runtime": 1.4268, |
|
"eval_samples_per_second": 129.661, |
|
"eval_steps_per_second": 8.41, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 52.66, |
|
"learning_rate": 3.1129415054744466e-05, |
|
"loss": 0.4525, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 52.73, |
|
"learning_rate": 3.0963950259330426e-05, |
|
"loss": 0.4531, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 52.73, |
|
"eval_loss": 0.4202404022216797, |
|
"eval_runtime": 1.4812, |
|
"eval_samples_per_second": 124.898, |
|
"eval_steps_per_second": 8.101, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"learning_rate": 3.0799021714300064e-05, |
|
"loss": 0.4528, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 52.87, |
|
"learning_rate": 3.063463122328553e-05, |
|
"loss": 0.4523, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 52.87, |
|
"eval_loss": 0.42233461141586304, |
|
"eval_runtime": 1.6606, |
|
"eval_samples_per_second": 111.404, |
|
"eval_steps_per_second": 7.226, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 52.93, |
|
"learning_rate": 3.0470780584034914e-05, |
|
"loss": 0.4525, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 3.0307471588392577e-05, |
|
"loss": 0.4523, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_loss": 0.4270618259906769, |
|
"eval_runtime": 1.4098, |
|
"eval_samples_per_second": 131.228, |
|
"eval_steps_per_second": 8.512, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 53.07, |
|
"learning_rate": 3.0144706022279473e-05, |
|
"loss": 0.4523, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 53.14, |
|
"learning_rate": 2.9982485665673786e-05, |
|
"loss": 0.4522, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 53.14, |
|
"eval_loss": 0.4285798668861389, |
|
"eval_runtime": 1.3032, |
|
"eval_samples_per_second": 141.954, |
|
"eval_steps_per_second": 9.208, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 53.21, |
|
"learning_rate": 2.9820812292591337e-05, |
|
"loss": 0.4524, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 53.28, |
|
"learning_rate": 2.965968767106625e-05, |
|
"loss": 0.4524, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 53.28, |
|
"eval_loss": 0.4256191849708557, |
|
"eval_runtime": 1.223, |
|
"eval_samples_per_second": 151.266, |
|
"eval_steps_per_second": 9.812, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 53.35, |
|
"learning_rate": 2.9499113563131592e-05, |
|
"loss": 0.4522, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 53.42, |
|
"learning_rate": 2.9339091724800015e-05, |
|
"loss": 0.4515, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 53.42, |
|
"eval_loss": 0.42210718989372253, |
|
"eval_runtime": 1.2868, |
|
"eval_samples_per_second": 143.764, |
|
"eval_steps_per_second": 9.325, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 53.49, |
|
"learning_rate": 2.9179623906044737e-05, |
|
"loss": 0.4518, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 53.56, |
|
"learning_rate": 2.9020711850780284e-05, |
|
"loss": 0.4513, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 53.56, |
|
"eval_loss": 0.425523579120636, |
|
"eval_runtime": 1.4209, |
|
"eval_samples_per_second": 130.197, |
|
"eval_steps_per_second": 8.445, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 53.63, |
|
"learning_rate": 2.8862357296843427e-05, |
|
"loss": 0.452, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 53.7, |
|
"learning_rate": 2.8704561975974218e-05, |
|
"loss": 0.452, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 53.7, |
|
"eval_loss": 0.427028089761734, |
|
"eval_runtime": 1.5058, |
|
"eval_samples_per_second": 122.86, |
|
"eval_steps_per_second": 7.969, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 53.77, |
|
"learning_rate": 2.854732761379696e-05, |
|
"loss": 0.4514, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 53.84, |
|
"learning_rate": 2.839065592980148e-05, |
|
"loss": 0.4519, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 53.84, |
|
"eval_loss": 0.42220446467399597, |
|
"eval_runtime": 1.6802, |
|
"eval_samples_per_second": 110.105, |
|
"eval_steps_per_second": 7.142, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 53.91, |
|
"learning_rate": 2.82348602875107e-05, |
|
"loss": 0.4517, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 53.98, |
|
"learning_rate": 2.8079317959818956e-05, |
|
"loss": 0.4518, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 53.98, |
|
"eval_loss": 0.4233224391937256, |
|
"eval_runtime": 1.5975, |
|
"eval_samples_per_second": 115.803, |
|
"eval_steps_per_second": 7.512, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 54.04, |
|
"learning_rate": 2.7924343428387574e-05, |
|
"loss": 0.4518, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 54.11, |
|
"learning_rate": 2.7769938387993203e-05, |
|
"loss": 0.4513, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 54.11, |
|
"eval_loss": 0.42325669527053833, |
|
"eval_runtime": 1.4287, |
|
"eval_samples_per_second": 129.488, |
|
"eval_steps_per_second": 8.399, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 54.18, |
|
"learning_rate": 2.761641162374983e-05, |
|
"loss": 0.4512, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 54.25, |
|
"learning_rate": 2.7463149477430953e-05, |
|
"loss": 0.4517, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 54.25, |
|
"eval_loss": 0.4239380657672882, |
|
"eval_runtime": 1.4388, |
|
"eval_samples_per_second": 128.582, |
|
"eval_steps_per_second": 8.34, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 54.32, |
|
"learning_rate": 2.731046186569221e-05, |
|
"loss": 0.4516, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 54.39, |
|
"learning_rate": 2.71586541049571e-05, |
|
"loss": 0.4518, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 54.39, |
|
"eval_loss": 0.42732805013656616, |
|
"eval_runtime": 1.358, |
|
"eval_samples_per_second": 136.233, |
|
"eval_steps_per_second": 8.837, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 54.46, |
|
"learning_rate": 2.700711940798765e-05, |
|
"loss": 0.4515, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 54.53, |
|
"learning_rate": 2.6856164232670142e-05, |
|
"loss": 0.4508, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 54.53, |
|
"eval_loss": 0.42004072666168213, |
|
"eval_runtime": 1.2491, |
|
"eval_samples_per_second": 148.104, |
|
"eval_steps_per_second": 9.607, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 54.6, |
|
"learning_rate": 2.67057902298262e-05, |
|
"loss": 0.4513, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 54.67, |
|
"learning_rate": 2.6555999043921808e-05, |
|
"loss": 0.4511, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 54.67, |
|
"eval_loss": 0.4236282408237457, |
|
"eval_runtime": 1.2986, |
|
"eval_samples_per_second": 142.456, |
|
"eval_steps_per_second": 9.24, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 54.74, |
|
"learning_rate": 2.6406792313049402e-05, |
|
"loss": 0.4513, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 54.81, |
|
"learning_rate": 2.625817166890986e-05, |
|
"loss": 0.4508, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 54.81, |
|
"eval_loss": 0.41928112506866455, |
|
"eval_runtime": 1.2191, |
|
"eval_samples_per_second": 151.752, |
|
"eval_steps_per_second": 9.843, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 54.88, |
|
"learning_rate": 2.6110138736794743e-05, |
|
"loss": 0.4518, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 54.95, |
|
"learning_rate": 2.5962989433545918e-05, |
|
"loss": 0.4507, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 54.95, |
|
"eval_loss": 0.4292617440223694, |
|
"eval_runtime": 1.3922, |
|
"eval_samples_per_second": 132.879, |
|
"eval_steps_per_second": 8.619, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 55.02, |
|
"learning_rate": 2.581613559213658e-05, |
|
"loss": 0.4517, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 55.09, |
|
"learning_rate": 2.56698742967874e-05, |
|
"loss": 0.4508, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 55.09, |
|
"eval_loss": 0.4187283515930176, |
|
"eval_runtime": 1.3865, |
|
"eval_samples_per_second": 133.432, |
|
"eval_steps_per_second": 8.655, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 55.15, |
|
"learning_rate": 2.5524207146988447e-05, |
|
"loss": 0.4504, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 55.22, |
|
"learning_rate": 2.537913573573237e-05, |
|
"loss": 0.4504, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 55.22, |
|
"eval_loss": 0.4283367991447449, |
|
"eval_runtime": 1.2871, |
|
"eval_samples_per_second": 143.738, |
|
"eval_steps_per_second": 9.324, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 55.29, |
|
"learning_rate": 2.52349500004877e-05, |
|
"loss": 0.4513, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 55.36, |
|
"learning_rate": 2.509107361983578e-05, |
|
"loss": 0.4512, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 55.36, |
|
"eval_loss": 0.4239468276500702, |
|
"eval_runtime": 1.3492, |
|
"eval_samples_per_second": 137.123, |
|
"eval_steps_per_second": 8.894, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 55.43, |
|
"learning_rate": 2.4947797714405616e-05, |
|
"loss": 0.4508, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 55.5, |
|
"learning_rate": 2.480512385103948e-05, |
|
"loss": 0.4504, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 55.5, |
|
"eval_loss": 0.4269140362739563, |
|
"eval_runtime": 1.4306, |
|
"eval_samples_per_second": 129.315, |
|
"eval_steps_per_second": 8.388, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 55.57, |
|
"learning_rate": 2.4663337127089114e-05, |
|
"loss": 0.4511, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 55.64, |
|
"learning_rate": 2.4521870810167605e-05, |
|
"loss": 0.4506, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 55.64, |
|
"eval_loss": 0.42908209562301636, |
|
"eval_runtime": 1.3786, |
|
"eval_samples_per_second": 134.192, |
|
"eval_steps_per_second": 8.704, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 55.71, |
|
"learning_rate": 2.4381011193178528e-05, |
|
"loss": 0.4501, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 55.78, |
|
"learning_rate": 2.424075981654012e-05, |
|
"loss": 0.4504, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 55.78, |
|
"eval_loss": 0.42375433444976807, |
|
"eval_runtime": 1.4548, |
|
"eval_samples_per_second": 127.164, |
|
"eval_steps_per_second": 8.248, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 55.85, |
|
"learning_rate": 2.4101396887653254e-05, |
|
"loss": 0.4504, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 55.92, |
|
"learning_rate": 2.3962365362225434e-05, |
|
"loss": 0.4503, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 55.92, |
|
"eval_loss": 0.42001548409461975, |
|
"eval_runtime": 1.3313, |
|
"eval_samples_per_second": 138.963, |
|
"eval_steps_per_second": 9.014, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 55.99, |
|
"learning_rate": 2.3823946655392004e-05, |
|
"loss": 0.4513, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 56.06, |
|
"learning_rate": 2.3686142280877743e-05, |
|
"loss": 0.4506, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 56.06, |
|
"eval_loss": 0.4185764193534851, |
|
"eval_runtime": 1.4515, |
|
"eval_samples_per_second": 127.452, |
|
"eval_steps_per_second": 8.267, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 56.13, |
|
"learning_rate": 2.3549227507153627e-05, |
|
"loss": 0.4498, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 56.2, |
|
"learning_rate": 2.3412927603158495e-05, |
|
"loss": 0.4507, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 56.2, |
|
"eval_loss": 0.4259674847126007, |
|
"eval_runtime": 1.4534, |
|
"eval_samples_per_second": 127.292, |
|
"eval_steps_per_second": 8.257, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 56.26, |
|
"learning_rate": 2.3276972762387238e-05, |
|
"loss": 0.4507, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 56.33, |
|
"learning_rate": 2.3141638235554014e-05, |
|
"loss": 0.4504, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 56.33, |
|
"eval_loss": 0.4188092350959778, |
|
"eval_runtime": 1.3766, |
|
"eval_samples_per_second": 134.394, |
|
"eval_steps_per_second": 8.717, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 56.4, |
|
"learning_rate": 2.3006925502655538e-05, |
|
"loss": 0.4499, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 56.47, |
|
"learning_rate": 2.287283603688867e-05, |
|
"loss": 0.4503, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 56.47, |
|
"eval_loss": 0.42306798696517944, |
|
"eval_runtime": 1.4461, |
|
"eval_samples_per_second": 127.93, |
|
"eval_steps_per_second": 8.298, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 56.54, |
|
"learning_rate": 2.273937130463433e-05, |
|
"loss": 0.4502, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 56.61, |
|
"learning_rate": 2.260653276544143e-05, |
|
"loss": 0.4498, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 56.61, |
|
"eval_loss": 0.4147878587245941, |
|
"eval_runtime": 1.3663, |
|
"eval_samples_per_second": 135.406, |
|
"eval_steps_per_second": 8.783, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 56.68, |
|
"learning_rate": 2.2474321872010953e-05, |
|
"loss": 0.4508, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 56.75, |
|
"learning_rate": 2.234274007018007e-05, |
|
"loss": 0.4499, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 56.75, |
|
"eval_loss": 0.4182484447956085, |
|
"eval_runtime": 1.4788, |
|
"eval_samples_per_second": 125.098, |
|
"eval_steps_per_second": 8.114, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 56.82, |
|
"learning_rate": 2.2212050071226316e-05, |
|
"loss": 0.4494, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 56.89, |
|
"learning_rate": 2.2081729497221867e-05, |
|
"loss": 0.4498, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 56.89, |
|
"eval_loss": 0.42287564277648926, |
|
"eval_runtime": 1.4156, |
|
"eval_samples_per_second": 130.687, |
|
"eval_steps_per_second": 8.477, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 56.96, |
|
"learning_rate": 2.1952301049460503e-05, |
|
"loss": 0.4499, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 57.03, |
|
"learning_rate": 2.1823247392531053e-05, |
|
"loss": 0.4501, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 57.03, |
|
"eval_loss": 0.42520666122436523, |
|
"eval_runtime": 1.4416, |
|
"eval_samples_per_second": 128.331, |
|
"eval_steps_per_second": 8.324, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 57.1, |
|
"learning_rate": 2.1694829947247343e-05, |
|
"loss": 0.4496, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 57.17, |
|
"learning_rate": 2.156705011796204e-05, |
|
"loss": 0.4497, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 57.17, |
|
"eval_loss": 0.4220258593559265, |
|
"eval_runtime": 1.3143, |
|
"eval_samples_per_second": 140.763, |
|
"eval_steps_per_second": 9.131, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 57.24, |
|
"learning_rate": 2.1439909302054865e-05, |
|
"loss": 0.4495, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 57.31, |
|
"learning_rate": 2.13134088899174e-05, |
|
"loss": 0.45, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 57.31, |
|
"eval_loss": 0.4181089699268341, |
|
"eval_runtime": 1.3009, |
|
"eval_samples_per_second": 142.205, |
|
"eval_steps_per_second": 9.224, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 57.37, |
|
"learning_rate": 2.1187801340768273e-05, |
|
"loss": 0.4495, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 57.44, |
|
"learning_rate": 2.106258459162047e-05, |
|
"loss": 0.4497, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 57.44, |
|
"eval_loss": 0.42704877257347107, |
|
"eval_runtime": 1.5278, |
|
"eval_samples_per_second": 121.087, |
|
"eval_steps_per_second": 7.854, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 57.51, |
|
"learning_rate": 2.0938012372604977e-05, |
|
"loss": 0.4494, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 57.58, |
|
"learning_rate": 2.081408604602362e-05, |
|
"loss": 0.4497, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 57.58, |
|
"eval_loss": 0.4207887351512909, |
|
"eval_runtime": 1.466, |
|
"eval_samples_per_second": 126.195, |
|
"eval_steps_per_second": 8.186, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 57.65, |
|
"learning_rate": 2.0691052878422197e-05, |
|
"loss": 0.4498, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 57.72, |
|
"learning_rate": 2.0568421096813818e-05, |
|
"loss": 0.4499, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 57.72, |
|
"eval_loss": 0.42236852645874023, |
|
"eval_runtime": 1.4433, |
|
"eval_samples_per_second": 128.175, |
|
"eval_steps_per_second": 8.314, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 57.79, |
|
"learning_rate": 2.0446439249430564e-05, |
|
"loss": 0.4494, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 57.86, |
|
"learning_rate": 2.0325108670246337e-05, |
|
"loss": 0.4496, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 57.86, |
|
"eval_loss": 0.42070433497428894, |
|
"eval_runtime": 1.4057, |
|
"eval_samples_per_second": 131.609, |
|
"eval_steps_per_second": 8.537, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 57.93, |
|
"learning_rate": 2.0204430686112928e-05, |
|
"loss": 0.4492, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 2.0084406616745435e-05, |
|
"loss": 0.4494, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_loss": 0.4267515540122986, |
|
"eval_runtime": 1.3575, |
|
"eval_samples_per_second": 136.277, |
|
"eval_steps_per_second": 8.84, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 58.07, |
|
"learning_rate": 1.9965275857606217e-05, |
|
"loss": 0.45, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 58.14, |
|
"learning_rate": 1.984656223393361e-05, |
|
"loss": 0.4499, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 58.14, |
|
"eval_loss": 0.4240288734436035, |
|
"eval_runtime": 1.564, |
|
"eval_samples_per_second": 118.29, |
|
"eval_steps_per_second": 7.673, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 58.21, |
|
"learning_rate": 1.9728506438618975e-05, |
|
"loss": 0.4499, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 58.28, |
|
"learning_rate": 1.9611109762701606e-05, |
|
"loss": 0.4495, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 58.28, |
|
"eval_loss": 0.4293997287750244, |
|
"eval_runtime": 1.3004, |
|
"eval_samples_per_second": 142.261, |
|
"eval_steps_per_second": 9.228, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 58.35, |
|
"learning_rate": 1.94943734900127e-05, |
|
"loss": 0.4492, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 58.42, |
|
"learning_rate": 1.937853038514574e-05, |
|
"loss": 0.4487, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 58.42, |
|
"eval_loss": 0.4207267463207245, |
|
"eval_runtime": 1.3489, |
|
"eval_samples_per_second": 137.154, |
|
"eval_steps_per_second": 8.896, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 58.48, |
|
"learning_rate": 1.9263117414344867e-05, |
|
"loss": 0.4484, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 58.55, |
|
"learning_rate": 1.9148368652361025e-05, |
|
"loss": 0.4495, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 58.55, |
|
"eval_loss": 0.4246247708797455, |
|
"eval_runtime": 1.4028, |
|
"eval_samples_per_second": 131.881, |
|
"eval_steps_per_second": 8.554, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 58.62, |
|
"learning_rate": 1.903428535406832e-05, |
|
"loss": 0.4493, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 58.69, |
|
"learning_rate": 1.8920868767063384e-05, |
|
"loss": 0.4491, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 58.69, |
|
"eval_loss": 0.42133408784866333, |
|
"eval_runtime": 1.3492, |
|
"eval_samples_per_second": 137.118, |
|
"eval_steps_per_second": 8.894, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 58.76, |
|
"learning_rate": 1.880812013165191e-05, |
|
"loss": 0.4486, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 58.83, |
|
"learning_rate": 1.8696040680834842e-05, |
|
"loss": 0.4492, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 58.83, |
|
"eval_loss": 0.4240739643573761, |
|
"eval_runtime": 1.3437, |
|
"eval_samples_per_second": 137.682, |
|
"eval_steps_per_second": 8.931, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 58.9, |
|
"learning_rate": 1.8584631640295125e-05, |
|
"loss": 0.4486, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 58.97, |
|
"learning_rate": 1.84741150321165e-05, |
|
"loss": 0.4486, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 58.97, |
|
"eval_loss": 0.4246968924999237, |
|
"eval_runtime": 1.4975, |
|
"eval_samples_per_second": 123.537, |
|
"eval_steps_per_second": 8.013, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 59.04, |
|
"learning_rate": 1.8364049112957962e-05, |
|
"loss": 0.4489, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 59.11, |
|
"learning_rate": 1.8254657234683382e-05, |
|
"loss": 0.4485, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 59.11, |
|
"eval_loss": 0.4162987768650055, |
|
"eval_runtime": 1.4233, |
|
"eval_samples_per_second": 129.976, |
|
"eval_steps_per_second": 8.431, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 59.18, |
|
"learning_rate": 1.8145940593584787e-05, |
|
"loss": 0.4488, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 59.25, |
|
"learning_rate": 1.8037900378569963e-05, |
|
"loss": 0.4489, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 59.25, |
|
"eval_loss": 0.4239360988140106, |
|
"eval_runtime": 1.3783, |
|
"eval_samples_per_second": 134.225, |
|
"eval_steps_per_second": 8.706, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 59.32, |
|
"learning_rate": 1.7930537771149402e-05, |
|
"loss": 0.4489, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 59.39, |
|
"learning_rate": 1.7824066634873994e-05, |
|
"loss": 0.4483, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 59.39, |
|
"eval_loss": 0.4239516854286194, |
|
"eval_runtime": 1.2618, |
|
"eval_samples_per_second": 146.616, |
|
"eval_steps_per_second": 9.51, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 59.46, |
|
"learning_rate": 1.7718061396463565e-05, |
|
"loss": 0.4486, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 59.53, |
|
"learning_rate": 1.761294723111157e-05, |
|
"loss": 0.4491, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 59.53, |
|
"eval_loss": 0.42144644260406494, |
|
"eval_runtime": 1.3607, |
|
"eval_samples_per_second": 135.957, |
|
"eval_steps_per_second": 8.819, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 59.59, |
|
"learning_rate": 1.750830398945445e-05, |
|
"loss": 0.448, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 59.66, |
|
"learning_rate": 1.7404344146972867e-05, |
|
"loss": 0.4485, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 59.66, |
|
"eval_loss": 0.4285011887550354, |
|
"eval_runtime": 1.3489, |
|
"eval_samples_per_second": 137.154, |
|
"eval_steps_per_second": 8.896, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 59.73, |
|
"learning_rate": 1.730106884055495e-05, |
|
"loss": 0.449, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 59.8, |
|
"learning_rate": 1.7198479199602952e-05, |
|
"loss": 0.449, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 59.8, |
|
"eval_loss": 0.42652076482772827, |
|
"eval_runtime": 1.388, |
|
"eval_samples_per_second": 133.284, |
|
"eval_steps_per_second": 8.645, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 59.87, |
|
"learning_rate": 1.7096576346020748e-05, |
|
"loss": 0.4488, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 59.94, |
|
"learning_rate": 1.699536139420159e-05, |
|
"loss": 0.4484, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 59.94, |
|
"eval_loss": 0.41880765557289124, |
|
"eval_runtime": 1.4677, |
|
"eval_samples_per_second": 126.046, |
|
"eval_steps_per_second": 8.176, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 60.01, |
|
"learning_rate": 1.6895035814539804e-05, |
|
"loss": 0.449, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 60.08, |
|
"learning_rate": 1.6795198598015313e-05, |
|
"loss": 0.4484, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 60.08, |
|
"eval_loss": 0.4175633490085602, |
|
"eval_runtime": 1.3456, |
|
"eval_samples_per_second": 137.481, |
|
"eval_steps_per_second": 8.918, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 60.15, |
|
"learning_rate": 1.6696052579072644e-05, |
|
"loss": 0.4473, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 60.22, |
|
"learning_rate": 1.6597598841956794e-05, |
|
"loss": 0.4488, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 60.22, |
|
"eval_loss": 0.4199577867984772, |
|
"eval_runtime": 1.3012, |
|
"eval_samples_per_second": 142.172, |
|
"eval_steps_per_second": 9.222, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 60.29, |
|
"learning_rate": 1.6500033291415622e-05, |
|
"loss": 0.448, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 60.36, |
|
"learning_rate": 1.640296595047672e-05, |
|
"loss": 0.448, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 60.36, |
|
"eval_loss": 0.4116031229496002, |
|
"eval_runtime": 1.4238, |
|
"eval_samples_per_second": 129.935, |
|
"eval_steps_per_second": 8.428, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 60.43, |
|
"learning_rate": 1.6306594096513022e-05, |
|
"loss": 0.4484, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 60.5, |
|
"learning_rate": 1.621091878343171e-05, |
|
"loss": 0.4477, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 60.5, |
|
"eval_loss": 0.42147019505500793, |
|
"eval_runtime": 1.2453, |
|
"eval_samples_per_second": 148.563, |
|
"eval_steps_per_second": 9.636, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 60.57, |
|
"learning_rate": 1.6115941057522653e-05, |
|
"loss": 0.4485, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 60.64, |
|
"learning_rate": 1.6021661957447076e-05, |
|
"loss": 0.4484, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 60.64, |
|
"eval_loss": 0.4203680157661438, |
|
"eval_runtime": 1.4005, |
|
"eval_samples_per_second": 132.091, |
|
"eval_steps_per_second": 8.568, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 60.7, |
|
"learning_rate": 1.5928082514226098e-05, |
|
"loss": 0.4487, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 60.77, |
|
"learning_rate": 1.5835388808800246e-05, |
|
"loss": 0.448, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 60.77, |
|
"eval_loss": 0.40932518243789673, |
|
"eval_runtime": 1.3707, |
|
"eval_samples_per_second": 134.964, |
|
"eval_steps_per_second": 8.754, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 60.84, |
|
"learning_rate": 1.5743210337334882e-05, |
|
"loss": 0.448, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 60.91, |
|
"learning_rate": 1.5651734567826426e-05, |
|
"loss": 0.4479, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 60.91, |
|
"eval_loss": 0.41809263825416565, |
|
"eval_runtime": 1.2925, |
|
"eval_samples_per_second": 143.136, |
|
"eval_steps_per_second": 9.285, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 60.98, |
|
"learning_rate": 1.556096250063923e-05, |
|
"loss": 0.448, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 61.05, |
|
"learning_rate": 1.547089512844209e-05, |
|
"loss": 0.4481, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 61.05, |
|
"eval_loss": 0.4232490360736847, |
|
"eval_runtime": 1.3629, |
|
"eval_samples_per_second": 135.739, |
|
"eval_steps_per_second": 8.805, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 61.12, |
|
"learning_rate": 1.538153343619732e-05, |
|
"loss": 0.4478, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 61.19, |
|
"learning_rate": 1.5293055005330644e-05, |
|
"loss": 0.4477, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 61.19, |
|
"eval_loss": 0.4201977252960205, |
|
"eval_runtime": 1.4241, |
|
"eval_samples_per_second": 129.905, |
|
"eval_steps_per_second": 8.426, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 61.26, |
|
"learning_rate": 1.5205106180782385e-05, |
|
"loss": 0.4482, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 61.33, |
|
"learning_rate": 1.511786594281187e-05, |
|
"loss": 0.4478, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 61.33, |
|
"eval_loss": 0.41671010851860046, |
|
"eval_runtime": 1.351, |
|
"eval_samples_per_second": 136.94, |
|
"eval_steps_per_second": 8.883, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 61.4, |
|
"learning_rate": 1.5031335245464367e-05, |
|
"loss": 0.4485, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 61.47, |
|
"learning_rate": 1.4945856899331104e-05, |
|
"loss": 0.4481, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 61.47, |
|
"eval_loss": 0.4173124432563782, |
|
"eval_runtime": 1.4874, |
|
"eval_samples_per_second": 124.381, |
|
"eval_steps_per_second": 8.068, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 61.54, |
|
"learning_rate": 1.48610842948986e-05, |
|
"loss": 0.4483, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 61.61, |
|
"learning_rate": 1.477685025624781e-05, |
|
"loss": 0.4483, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 61.61, |
|
"eval_loss": 0.41581404209136963, |
|
"eval_runtime": 1.6578, |
|
"eval_samples_per_second": 111.595, |
|
"eval_steps_per_second": 7.239, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 61.68, |
|
"learning_rate": 1.4693159969167797e-05, |
|
"loss": 0.4483, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 61.75, |
|
"learning_rate": 1.4610183867227295e-05, |
|
"loss": 0.4473, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 61.75, |
|
"eval_loss": 0.41741910576820374, |
|
"eval_runtime": 1.7118, |
|
"eval_samples_per_second": 108.076, |
|
"eval_steps_per_second": 7.01, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 61.81, |
|
"learning_rate": 1.4527922857839624e-05, |
|
"loss": 0.4478, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 61.88, |
|
"learning_rate": 1.4446377840598036e-05, |
|
"loss": 0.4474, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 61.88, |
|
"eval_loss": 0.4265962243080139, |
|
"eval_runtime": 1.6624, |
|
"eval_samples_per_second": 111.288, |
|
"eval_steps_per_second": 7.219, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 61.95, |
|
"learning_rate": 1.4365549707265762e-05, |
|
"loss": 0.4477, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 62.02, |
|
"learning_rate": 1.4285439341766323e-05, |
|
"loss": 0.4477, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 62.02, |
|
"eval_loss": 0.424228310585022, |
|
"eval_runtime": 1.5115, |
|
"eval_samples_per_second": 122.398, |
|
"eval_steps_per_second": 7.939, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 62.09, |
|
"learning_rate": 1.420604762017384e-05, |
|
"loss": 0.4476, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 62.16, |
|
"learning_rate": 1.4127375410703455e-05, |
|
"loss": 0.4476, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 62.16, |
|
"eval_loss": 0.42402705550193787, |
|
"eval_runtime": 1.3286, |
|
"eval_samples_per_second": 139.247, |
|
"eval_steps_per_second": 9.032, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 62.23, |
|
"learning_rate": 1.4049423573701852e-05, |
|
"loss": 0.4475, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 62.3, |
|
"learning_rate": 1.3972192961637821e-05, |
|
"loss": 0.4478, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 62.3, |
|
"eval_loss": 0.42862966656684875, |
|
"eval_runtime": 1.3196, |
|
"eval_samples_per_second": 140.197, |
|
"eval_steps_per_second": 9.094, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 62.37, |
|
"learning_rate": 1.3895684419092996e-05, |
|
"loss": 0.4475, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 62.44, |
|
"learning_rate": 1.3819898782752499e-05, |
|
"loss": 0.4474, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 62.44, |
|
"eval_loss": 0.4294210374355316, |
|
"eval_runtime": 2.0421, |
|
"eval_samples_per_second": 90.595, |
|
"eval_steps_per_second": 5.876, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 62.51, |
|
"learning_rate": 1.3744836881395923e-05, |
|
"loss": 0.4479, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 62.58, |
|
"learning_rate": 1.3670647486930825e-05, |
|
"loss": 0.4482, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 62.58, |
|
"eval_loss": 0.41438260674476624, |
|
"eval_runtime": 1.3839, |
|
"eval_samples_per_second": 133.675, |
|
"eval_steps_per_second": 8.671, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 62.65, |
|
"learning_rate": 1.3597034058669625e-05, |
|
"loss": 0.4476, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 62.72, |
|
"learning_rate": 1.3524146802605277e-05, |
|
"loss": 0.4471, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 62.72, |
|
"eval_loss": 0.4316233992576599, |
|
"eval_runtime": 1.385, |
|
"eval_samples_per_second": 133.575, |
|
"eval_steps_per_second": 8.664, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 62.79, |
|
"learning_rate": 1.3451986515821102e-05, |
|
"loss": 0.4468, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 62.86, |
|
"learning_rate": 1.3380553987450471e-05, |
|
"loss": 0.448, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 62.86, |
|
"eval_loss": 0.4228358864784241, |
|
"eval_runtime": 1.383, |
|
"eval_samples_per_second": 133.771, |
|
"eval_steps_per_second": 8.677, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 62.92, |
|
"learning_rate": 1.3309849998668048e-05, |
|
"loss": 0.4476, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 62.99, |
|
"learning_rate": 1.3239875322681331e-05, |
|
"loss": 0.4474, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 62.99, |
|
"eval_loss": 0.42418819665908813, |
|
"eval_runtime": 1.4256, |
|
"eval_samples_per_second": 129.767, |
|
"eval_steps_per_second": 8.417, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 63.06, |
|
"learning_rate": 1.3170630724722182e-05, |
|
"loss": 0.4477, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 63.13, |
|
"learning_rate": 1.3102116962038392e-05, |
|
"loss": 0.447, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 63.13, |
|
"eval_loss": 0.4230823516845703, |
|
"eval_runtime": 1.4363, |
|
"eval_samples_per_second": 128.803, |
|
"eval_steps_per_second": 8.355, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 63.2, |
|
"learning_rate": 1.3034469617626461e-05, |
|
"loss": 0.4473, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 63.27, |
|
"learning_rate": 1.2967418299873526e-05, |
|
"loss": 0.4475, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 63.27, |
|
"eval_loss": 0.4234711825847626, |
|
"eval_runtime": 1.3311, |
|
"eval_samples_per_second": 138.983, |
|
"eval_steps_per_second": 9.015, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 63.34, |
|
"learning_rate": 1.29011000396945e-05, |
|
"loss": 0.4475, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 63.41, |
|
"learning_rate": 1.2835515562335245e-05, |
|
"loss": 0.4475, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 63.41, |
|
"eval_loss": 0.42788252234458923, |
|
"eval_runtime": 1.3825, |
|
"eval_samples_per_second": 133.816, |
|
"eval_steps_per_second": 8.68, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 63.48, |
|
"learning_rate": 1.277079455146798e-05, |
|
"loss": 0.4477, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 63.55, |
|
"learning_rate": 1.2706678312259012e-05, |
|
"loss": 0.4476, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 63.55, |
|
"eval_loss": 0.42295461893081665, |
|
"eval_runtime": 1.4126, |
|
"eval_samples_per_second": 130.962, |
|
"eval_steps_per_second": 8.495, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 63.62, |
|
"learning_rate": 1.2643297982034681e-05, |
|
"loss": 0.4469, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 63.69, |
|
"learning_rate": 1.2580654253912125e-05, |
|
"loss": 0.4464, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 63.69, |
|
"eval_loss": 0.4144819974899292, |
|
"eval_runtime": 1.3008, |
|
"eval_samples_per_second": 142.217, |
|
"eval_steps_per_second": 9.225, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 63.76, |
|
"learning_rate": 1.251874781295312e-05, |
|
"loss": 0.4468, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 63.83, |
|
"learning_rate": 1.2457579336156563e-05, |
|
"loss": 0.4467, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 63.83, |
|
"eval_loss": 0.42297813296318054, |
|
"eval_runtime": 1.5334, |
|
"eval_samples_per_second": 120.645, |
|
"eval_steps_per_second": 7.826, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 63.9, |
|
"learning_rate": 1.2397149492451143e-05, |
|
"loss": 0.4472, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 63.97, |
|
"learning_rate": 1.2337458942687934e-05, |
|
"loss": 0.4465, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 63.97, |
|
"eval_loss": 0.42076122760772705, |
|
"eval_runtime": 1.481, |
|
"eval_samples_per_second": 124.918, |
|
"eval_steps_per_second": 8.103, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 64.03, |
|
"learning_rate": 1.2278508339633242e-05, |
|
"loss": 0.4481, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 64.1, |
|
"learning_rate": 1.2220298327961394e-05, |
|
"loss": 0.4466, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 64.1, |
|
"eval_loss": 0.42430469393730164, |
|
"eval_runtime": 1.3654, |
|
"eval_samples_per_second": 135.494, |
|
"eval_steps_per_second": 8.789, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 64.17, |
|
"learning_rate": 1.216282954424777e-05, |
|
"loss": 0.4469, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 64.24, |
|
"learning_rate": 1.2106102616961766e-05, |
|
"loss": 0.447, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 64.24, |
|
"eval_loss": 0.42202654480934143, |
|
"eval_runtime": 1.4124, |
|
"eval_samples_per_second": 130.982, |
|
"eval_steps_per_second": 8.496, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 64.31, |
|
"learning_rate": 1.2050118166459983e-05, |
|
"loss": 0.4469, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 64.38, |
|
"learning_rate": 1.199498654569664e-05, |
|
"loss": 0.4473, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 64.38, |
|
"eval_loss": 0.4253380000591278, |
|
"eval_runtime": 1.5206, |
|
"eval_samples_per_second": 121.661, |
|
"eval_steps_per_second": 7.891, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 64.45, |
|
"learning_rate": 1.1940487389364166e-05, |
|
"loss": 0.4468, |
|
"step": 464500 |
|
}, |
|
{ |
|
"epoch": 64.52, |
|
"learning_rate": 1.1886732520957502e-05, |
|
"loss": 0.4471, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 64.52, |
|
"eval_loss": 0.41941824555397034, |
|
"eval_runtime": 1.3857, |
|
"eval_samples_per_second": 133.507, |
|
"eval_steps_per_second": 8.66, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 64.59, |
|
"learning_rate": 1.1833722528331292e-05, |
|
"loss": 0.4471, |
|
"step": 465500 |
|
}, |
|
{ |
|
"epoch": 64.66, |
|
"learning_rate": 1.1781665563635293e-05, |
|
"loss": 0.447, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 64.66, |
|
"eval_loss": 0.4262169599533081, |
|
"eval_runtime": 1.4244, |
|
"eval_samples_per_second": 129.876, |
|
"eval_steps_per_second": 8.424, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 64.73, |
|
"learning_rate": 1.1730144068308223e-05, |
|
"loss": 0.4466, |
|
"step": 466500 |
|
}, |
|
{ |
|
"epoch": 64.8, |
|
"learning_rate": 1.167936916118783e-05, |
|
"loss": 0.447, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 64.8, |
|
"eval_loss": 0.4245114028453827, |
|
"eval_runtime": 1.37, |
|
"eval_samples_per_second": 135.041, |
|
"eval_steps_per_second": 8.759, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 64.87, |
|
"learning_rate": 1.1629341397540343e-05, |
|
"loss": 0.4466, |
|
"step": 467500 |
|
}, |
|
{ |
|
"epoch": 64.94, |
|
"learning_rate": 1.1580061324461396e-05, |
|
"loss": 0.4468, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 64.94, |
|
"eval_loss": 0.41434457898139954, |
|
"eval_runtime": 1.7979, |
|
"eval_samples_per_second": 102.898, |
|
"eval_steps_per_second": 6.674, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 65.01, |
|
"learning_rate": 1.1531529480869956e-05, |
|
"loss": 0.4471, |
|
"step": 468500 |
|
}, |
|
{ |
|
"epoch": 65.08, |
|
"learning_rate": 1.1483746397502495e-05, |
|
"loss": 0.4463, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 65.08, |
|
"eval_loss": 0.4186690151691437, |
|
"eval_runtime": 1.3076, |
|
"eval_samples_per_second": 141.477, |
|
"eval_steps_per_second": 9.177, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 65.14, |
|
"learning_rate": 1.1436712596907158e-05, |
|
"loss": 0.4465, |
|
"step": 469500 |
|
}, |
|
{ |
|
"epoch": 65.21, |
|
"learning_rate": 1.1390428593438056e-05, |
|
"loss": 0.4465, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 65.21, |
|
"eval_loss": 0.41854894161224365, |
|
"eval_runtime": 1.6737, |
|
"eval_samples_per_second": 110.532, |
|
"eval_steps_per_second": 7.17, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 65.28, |
|
"learning_rate": 1.1344894893249622e-05, |
|
"loss": 0.447, |
|
"step": 470500 |
|
}, |
|
{ |
|
"epoch": 65.35, |
|
"learning_rate": 1.130020081046253e-05, |
|
"loss": 0.4465, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 65.35, |
|
"eval_loss": 0.4243544340133667, |
|
"eval_runtime": 1.8118, |
|
"eval_samples_per_second": 102.111, |
|
"eval_steps_per_second": 6.623, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 65.42, |
|
"learning_rate": 1.1256255015520584e-05, |
|
"loss": 0.4465, |
|
"step": 471500 |
|
}, |
|
{ |
|
"epoch": 65.49, |
|
"learning_rate": 1.1212972171983502e-05, |
|
"loss": 0.4467, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 65.49, |
|
"eval_loss": 0.4200620949268341, |
|
"eval_runtime": 1.7012, |
|
"eval_samples_per_second": 108.746, |
|
"eval_steps_per_second": 7.054, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 65.56, |
|
"learning_rate": 1.1170441572362081e-05, |
|
"loss": 0.4466, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 65.63, |
|
"learning_rate": 1.1128663681764173e-05, |
|
"loss": 0.4465, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 65.63, |
|
"eval_loss": 0.41595694422721863, |
|
"eval_runtime": 1.6375, |
|
"eval_samples_per_second": 112.976, |
|
"eval_steps_per_second": 7.328, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 65.7, |
|
"learning_rate": 1.1087638957066064e-05, |
|
"loss": 0.4471, |
|
"step": 473500 |
|
}, |
|
{ |
|
"epoch": 65.77, |
|
"learning_rate": 1.104736784690758e-05, |
|
"loss": 0.4467, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 65.77, |
|
"eval_loss": 0.4272841215133667, |
|
"eval_runtime": 1.8167, |
|
"eval_samples_per_second": 101.832, |
|
"eval_steps_per_second": 6.605, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 65.84, |
|
"learning_rate": 1.1007929072962149e-05, |
|
"loss": 0.4465, |
|
"step": 474500 |
|
}, |
|
{ |
|
"epoch": 65.91, |
|
"learning_rate": 1.0969164995431939e-05, |
|
"loss": 0.4465, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 65.91, |
|
"eval_loss": 0.4183247685432434, |
|
"eval_runtime": 1.9109, |
|
"eval_samples_per_second": 96.811, |
|
"eval_steps_per_second": 6.28, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 65.98, |
|
"learning_rate": 1.0931155828053629e-05, |
|
"loss": 0.4468, |
|
"step": 475500 |
|
}, |
|
{ |
|
"epoch": 66.05, |
|
"learning_rate": 1.0893901986489377e-05, |
|
"loss": 0.4467, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 66.05, |
|
"eval_loss": 0.4227275848388672, |
|
"eval_runtime": 1.3989, |
|
"eval_samples_per_second": 132.248, |
|
"eval_steps_per_second": 8.578, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 66.12, |
|
"learning_rate": 1.0857403878141225e-05, |
|
"loss": 0.4458, |
|
"step": 476500 |
|
}, |
|
{ |
|
"epoch": 66.19, |
|
"learning_rate": 1.082166190214664e-05, |
|
"loss": 0.4469, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 66.19, |
|
"eval_loss": 0.41657933592796326, |
|
"eval_runtime": 1.3422, |
|
"eval_samples_per_second": 137.83, |
|
"eval_steps_per_second": 8.94, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 66.26, |
|
"learning_rate": 1.078667644937414e-05, |
|
"loss": 0.4469, |
|
"step": 477500 |
|
}, |
|
{ |
|
"epoch": 66.32, |
|
"learning_rate": 1.0752447902419035e-05, |
|
"loss": 0.4467, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 66.32, |
|
"eval_loss": 0.4198838472366333, |
|
"eval_runtime": 1.2904, |
|
"eval_samples_per_second": 143.372, |
|
"eval_steps_per_second": 9.3, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 66.39, |
|
"learning_rate": 1.071904282212263e-05, |
|
"loss": 0.4465, |
|
"step": 478500 |
|
}, |
|
{ |
|
"epoch": 66.46, |
|
"learning_rate": 1.0686327685822393e-05, |
|
"loss": 0.4464, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 66.46, |
|
"eval_loss": 0.41814476251602173, |
|
"eval_runtime": 1.2937, |
|
"eval_samples_per_second": 142.996, |
|
"eval_steps_per_second": 9.275, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 66.53, |
|
"learning_rate": 1.0654370552737557e-05, |
|
"loss": 0.446, |
|
"step": 479500 |
|
}, |
|
{ |
|
"epoch": 66.6, |
|
"learning_rate": 1.0623171772346204e-05, |
|
"loss": 0.4463, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 66.6, |
|
"eval_loss": 0.4216901659965515, |
|
"eval_runtime": 1.2778, |
|
"eval_samples_per_second": 144.775, |
|
"eval_steps_per_second": 9.391, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 1.0592791808607091e-05, |
|
"loss": 0.4466, |
|
"step": 480500 |
|
}, |
|
{ |
|
"epoch": 66.74, |
|
"learning_rate": 1.0563109230480105e-05, |
|
"loss": 0.4464, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 66.74, |
|
"eval_loss": 0.4157991111278534, |
|
"eval_runtime": 1.2484, |
|
"eval_samples_per_second": 148.187, |
|
"eval_steps_per_second": 9.612, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 66.81, |
|
"learning_rate": 1.0534186003065792e-05, |
|
"loss": 0.4469, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 66.88, |
|
"learning_rate": 1.050602244266395e-05, |
|
"loss": 0.4468, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 66.88, |
|
"eval_loss": 0.4190557599067688, |
|
"eval_runtime": 1.405, |
|
"eval_samples_per_second": 131.674, |
|
"eval_steps_per_second": 8.541, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 66.95, |
|
"learning_rate": 1.0478618857266753e-05, |
|
"loss": 0.4457, |
|
"step": 482500 |
|
}, |
|
{ |
|
"epoch": 67.02, |
|
"learning_rate": 1.0451975546555428e-05, |
|
"loss": 0.447, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 67.02, |
|
"eval_loss": 0.4247581362724304, |
|
"eval_runtime": 1.4501, |
|
"eval_samples_per_second": 127.577, |
|
"eval_steps_per_second": 8.275, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 67.09, |
|
"learning_rate": 1.0426092801896943e-05, |
|
"loss": 0.4458, |
|
"step": 483500 |
|
}, |
|
{ |
|
"epoch": 67.16, |
|
"learning_rate": 1.0400970906340845e-05, |
|
"loss": 0.4465, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 67.16, |
|
"eval_loss": 0.42342451214790344, |
|
"eval_runtime": 1.4065, |
|
"eval_samples_per_second": 131.531, |
|
"eval_steps_per_second": 8.532, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 67.23, |
|
"learning_rate": 1.0376658096379588e-05, |
|
"loss": 0.4459, |
|
"step": 484500 |
|
}, |
|
{ |
|
"epoch": 67.3, |
|
"learning_rate": 1.0353057191851014e-05, |
|
"loss": 0.4463, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 67.3, |
|
"eval_loss": 0.4237620234489441, |
|
"eval_runtime": 1.4335, |
|
"eval_samples_per_second": 129.058, |
|
"eval_steps_per_second": 8.371, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 67.37, |
|
"learning_rate": 1.033021793513056e-05, |
|
"loss": 0.4463, |
|
"step": 485500 |
|
}, |
|
{ |
|
"epoch": 67.43, |
|
"learning_rate": 1.030814057598469e-05, |
|
"loss": 0.446, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 67.43, |
|
"eval_loss": 0.41616541147232056, |
|
"eval_runtime": 1.3014, |
|
"eval_samples_per_second": 142.159, |
|
"eval_steps_per_second": 9.221, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 67.5, |
|
"learning_rate": 1.0286867225517075e-05, |
|
"loss": 0.4464, |
|
"step": 486500 |
|
}, |
|
{ |
|
"epoch": 67.57, |
|
"learning_rate": 1.0266312852517802e-05, |
|
"loss": 0.4462, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 67.57, |
|
"eval_loss": 0.4202038645744324, |
|
"eval_runtime": 1.3496, |
|
"eval_samples_per_second": 137.08, |
|
"eval_steps_per_second": 8.892, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 67.64, |
|
"learning_rate": 1.0246521075948845e-05, |
|
"loss": 0.4466, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 67.71, |
|
"learning_rate": 1.0227492112249914e-05, |
|
"loss": 0.4462, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 67.71, |
|
"eval_loss": 0.41774439811706543, |
|
"eval_runtime": 1.2813, |
|
"eval_samples_per_second": 144.38, |
|
"eval_steps_per_second": 9.365, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 67.78, |
|
"learning_rate": 1.020922616951869e-05, |
|
"loss": 0.4458, |
|
"step": 488500 |
|
}, |
|
{ |
|
"epoch": 67.85, |
|
"learning_rate": 1.019172344750861e-05, |
|
"loss": 0.4455, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 67.85, |
|
"eval_loss": 0.4227893650531769, |
|
"eval_runtime": 1.4104, |
|
"eval_samples_per_second": 131.172, |
|
"eval_steps_per_second": 8.508, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 67.92, |
|
"learning_rate": 1.017498413762663e-05, |
|
"loss": 0.4461, |
|
"step": 489500 |
|
}, |
|
{ |
|
"epoch": 67.99, |
|
"learning_rate": 1.0159039612175048e-05, |
|
"loss": 0.4463, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 67.99, |
|
"eval_loss": 0.41463714838027954, |
|
"eval_runtime": 1.3969, |
|
"eval_samples_per_second": 132.434, |
|
"eval_steps_per_second": 8.59, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 68.06, |
|
"learning_rate": 1.0143826139665367e-05, |
|
"loss": 0.4464, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 68.13, |
|
"learning_rate": 1.0129376603081064e-05, |
|
"loss": 0.4454, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 68.13, |
|
"eval_loss": 0.4189724624156952, |
|
"eval_runtime": 1.4324, |
|
"eval_samples_per_second": 129.152, |
|
"eval_steps_per_second": 8.377, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 68.2, |
|
"learning_rate": 1.0115691160439967e-05, |
|
"loss": 0.4468, |
|
"step": 491500 |
|
}, |
|
{ |
|
"epoch": 68.27, |
|
"learning_rate": 1.010276996140389e-05, |
|
"loss": 0.446, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 68.27, |
|
"eval_loss": 0.42191821336746216, |
|
"eval_runtime": 1.3157, |
|
"eval_samples_per_second": 140.609, |
|
"eval_steps_per_second": 9.121, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 68.34, |
|
"learning_rate": 1.0090613147276983e-05, |
|
"loss": 0.447, |
|
"step": 492500 |
|
}, |
|
{ |
|
"epoch": 68.41, |
|
"learning_rate": 1.0079220851004224e-05, |
|
"loss": 0.4461, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 68.41, |
|
"eval_loss": 0.4250451922416687, |
|
"eval_runtime": 1.5611, |
|
"eval_samples_per_second": 118.504, |
|
"eval_steps_per_second": 7.687, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 68.48, |
|
"learning_rate": 1.0068593197169942e-05, |
|
"loss": 0.4466, |
|
"step": 493500 |
|
}, |
|
{ |
|
"epoch": 68.54, |
|
"learning_rate": 1.0058749264484569e-05, |
|
"loss": 0.4462, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 68.54, |
|
"eval_loss": 0.4171859323978424, |
|
"eval_runtime": 1.7206, |
|
"eval_samples_per_second": 107.521, |
|
"eval_steps_per_second": 6.974, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 68.61, |
|
"learning_rate": 1.0049649705995797e-05, |
|
"loss": 0.4455, |
|
"step": 494500 |
|
}, |
|
{ |
|
"epoch": 68.68, |
|
"learning_rate": 1.0041315113330795e-05, |
|
"loss": 0.4464, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 68.68, |
|
"eval_loss": 0.41223111748695374, |
|
"eval_runtime": 1.6922, |
|
"eval_samples_per_second": 109.323, |
|
"eval_steps_per_second": 7.091, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 68.75, |
|
"learning_rate": 1.003374557763534e-05, |
|
"loss": 0.4462, |
|
"step": 495500 |
|
}, |
|
{ |
|
"epoch": 68.82, |
|
"learning_rate": 1.0026941181688666e-05, |
|
"loss": 0.4459, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 68.82, |
|
"eval_loss": 0.41784965991973877, |
|
"eval_runtime": 1.7276, |
|
"eval_samples_per_second": 107.084, |
|
"eval_steps_per_second": 6.946, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 68.89, |
|
"learning_rate": 1.0020913314537103e-05, |
|
"loss": 0.4455, |
|
"step": 496500 |
|
}, |
|
{ |
|
"epoch": 68.96, |
|
"learning_rate": 1.001563788233432e-05, |
|
"loss": 0.4459, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 68.96, |
|
"eval_loss": 0.4094962477684021, |
|
"eval_runtime": 1.7269, |
|
"eval_samples_per_second": 107.131, |
|
"eval_steps_per_second": 6.949, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 69.03, |
|
"learning_rate": 1.0011127787903172e-05, |
|
"loss": 0.4458, |
|
"step": 497500 |
|
}, |
|
{ |
|
"epoch": 69.1, |
|
"learning_rate": 1.0007383080565327e-05, |
|
"loss": 0.4458, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 69.1, |
|
"eval_loss": 0.41240301728248596, |
|
"eval_runtime": 1.4516, |
|
"eval_samples_per_second": 127.441, |
|
"eval_steps_per_second": 8.266, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 69.17, |
|
"learning_rate": 1.0004403801272306e-05, |
|
"loss": 0.4462, |
|
"step": 498500 |
|
}, |
|
{ |
|
"epoch": 69.24, |
|
"learning_rate": 1.000218998260502e-05, |
|
"loss": 0.4458, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 69.24, |
|
"eval_loss": 0.4181687533855438, |
|
"eval_runtime": 1.4502, |
|
"eval_samples_per_second": 127.573, |
|
"eval_steps_per_second": 8.275, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 69.31, |
|
"learning_rate": 1.0000743781475326e-05, |
|
"loss": 0.4461, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 69.38, |
|
"learning_rate": 1.000006002205894e-05, |
|
"loss": 0.4458, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 69.38, |
|
"eval_loss": 0.4177255928516388, |
|
"eval_runtime": 1.3131, |
|
"eval_samples_per_second": 140.889, |
|
"eval_steps_per_second": 9.139, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 69.38, |
|
"step": 500000, |
|
"total_flos": 3.686314695252681e+22, |
|
"train_loss": 0.46582102978515627, |
|
"train_runtime": 361543.3816, |
|
"train_samples_per_second": 354.038, |
|
"train_steps_per_second": 1.383 |
|
} |
|
], |
|
"max_steps": 500000, |
|
"num_train_epochs": 70, |
|
"total_flos": 3.686314695252681e+22, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|