|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 28.997333333333334, |
|
"eval_steps": 500, |
|
"global_step": 5437, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.275871217250824, |
|
"learning_rate": 3e-05, |
|
"loss": 1.8545, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9973333333333333, |
|
"eval_accuracy": 0.6037399103139014, |
|
"eval_loss": 1.7123007774353027, |
|
"eval_runtime": 6.2976, |
|
"eval_samples_per_second": 79.395, |
|
"eval_steps_per_second": 10.004, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.9973333333333333, |
|
"eval_exact_match": 14.4, |
|
"eval_f1": 22.843015873015876, |
|
"eval_qa_bleu": 9.471299126615676, |
|
"eval_qa_exact_match": 0.112, |
|
"eval_recite_bleu": 12.267251092702137, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 0.32765188813209534, |
|
"learning_rate": 3e-05, |
|
"loss": 1.7408, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.3532409369945526, |
|
"learning_rate": 3e-05, |
|
"loss": 1.7063, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6058565022421525, |
|
"eval_loss": 1.69453763961792, |
|
"eval_runtime": 6.5928, |
|
"eval_samples_per_second": 75.841, |
|
"eval_steps_per_second": 9.556, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 14.4, |
|
"eval_f1": 23.29731601731603, |
|
"eval_qa_bleu": 9.4408132575913, |
|
"eval_qa_exact_match": 0.11, |
|
"eval_recite_bleu": 13.027794116434837, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 0.3777780532836914, |
|
"learning_rate": 3e-05, |
|
"loss": 1.7094, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.3971036374568939, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6702, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.997333333333333, |
|
"eval_accuracy": 0.6065919282511211, |
|
"eval_loss": 1.6851236820220947, |
|
"eval_runtime": 6.3104, |
|
"eval_samples_per_second": 79.234, |
|
"eval_steps_per_second": 9.984, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 2.997333333333333, |
|
"eval_exact_match": 14.0, |
|
"eval_f1": 23.306825396825417, |
|
"eval_qa_bleu": 8.683248465366908, |
|
"eval_qa_exact_match": 0.11, |
|
"eval_recite_bleu": 12.39336052958371, |
|
"eval_recite_exact_match": 0.006, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.4782978594303131, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6574, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.7333333333333334, |
|
"grad_norm": 0.52582186460495, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6356, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6074977578475337, |
|
"eval_loss": 1.682567834854126, |
|
"eval_runtime": 6.65, |
|
"eval_samples_per_second": 75.188, |
|
"eval_steps_per_second": 9.474, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 15.0, |
|
"eval_f1": 23.366825396825416, |
|
"eval_qa_bleu": 9.05994637010322, |
|
"eval_qa_exact_match": 0.114, |
|
"eval_recite_bleu": 13.060415058384661, |
|
"eval_recite_exact_match": 0.006, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.266666666666667, |
|
"grad_norm": 0.6418241262435913, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6005, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 0.6805307269096375, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5775, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.997333333333334, |
|
"eval_accuracy": 0.6071031390134529, |
|
"eval_loss": 1.69106125831604, |
|
"eval_runtime": 5.6598, |
|
"eval_samples_per_second": 88.343, |
|
"eval_steps_per_second": 11.131, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 4.997333333333334, |
|
"eval_exact_match": 15.2, |
|
"eval_f1": 23.38539682539684, |
|
"eval_qa_bleu": 11.854591678140883, |
|
"eval_qa_exact_match": 0.112, |
|
"eval_recite_bleu": 12.694062454678122, |
|
"eval_recite_exact_match": 0.004, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 5.333333333333333, |
|
"grad_norm": 0.8362048864364624, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5173, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.866666666666667, |
|
"grad_norm": 0.8521270155906677, |
|
"learning_rate": 3e-05, |
|
"loss": 1.529, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6069417040358744, |
|
"eval_loss": 1.7066757678985596, |
|
"eval_runtime": 6.2922, |
|
"eval_samples_per_second": 79.464, |
|
"eval_steps_per_second": 10.012, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 15.2, |
|
"eval_f1": 24.04682539682541, |
|
"eval_qa_bleu": 12.150293484705452, |
|
"eval_qa_exact_match": 0.106, |
|
"eval_recite_bleu": 12.732083161926754, |
|
"eval_recite_exact_match": 0.002, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 0.9372308254241943, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4613, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.933333333333334, |
|
"grad_norm": 1.0236693620681763, |
|
"learning_rate": 3e-05, |
|
"loss": 1.457, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.997333333333334, |
|
"eval_accuracy": 0.6055515695067265, |
|
"eval_loss": 1.72792649269104, |
|
"eval_runtime": 6.3278, |
|
"eval_samples_per_second": 79.016, |
|
"eval_steps_per_second": 9.956, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 6.997333333333334, |
|
"eval_exact_match": 15.0, |
|
"eval_f1": 23.28492063492065, |
|
"eval_qa_bleu": 9.849658422779212, |
|
"eval_qa_exact_match": 0.114, |
|
"eval_recite_bleu": 12.3997433702838, |
|
"eval_recite_exact_match": 0.006, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 7.466666666666667, |
|
"grad_norm": 1.0773049592971802, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3996, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.0705862045288086, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3907, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6046188340807175, |
|
"eval_loss": 1.7511730194091797, |
|
"eval_runtime": 5.5895, |
|
"eval_samples_per_second": 89.453, |
|
"eval_steps_per_second": 11.271, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 13.0, |
|
"eval_f1": 19.93238095238096, |
|
"eval_qa_bleu": 11.93942669425287, |
|
"eval_qa_exact_match": 0.098, |
|
"eval_recite_bleu": 11.949414001720045, |
|
"eval_recite_exact_match": 0.006, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.533333333333333, |
|
"grad_norm": 1.2424529790878296, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3309, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.997333333333334, |
|
"eval_accuracy": 0.6025022421524664, |
|
"eval_loss": 1.7774019241333008, |
|
"eval_runtime": 6.4013, |
|
"eval_samples_per_second": 78.109, |
|
"eval_steps_per_second": 9.842, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 8.997333333333334, |
|
"eval_exact_match": 13.6, |
|
"eval_f1": 21.089523809523815, |
|
"eval_qa_bleu": 10.094627644241111, |
|
"eval_qa_exact_match": 0.094, |
|
"eval_recite_bleu": 12.197681191541987, |
|
"eval_recite_exact_match": 0.006, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 9.066666666666666, |
|
"grad_norm": 1.535229206085205, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3336, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"grad_norm": 1.4936225414276123, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2841, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6012645739910314, |
|
"eval_loss": 1.804250717163086, |
|
"eval_runtime": 6.385, |
|
"eval_samples_per_second": 78.309, |
|
"eval_steps_per_second": 9.867, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 13.2, |
|
"eval_f1": 21.172832722832737, |
|
"eval_qa_bleu": 9.841075001407239, |
|
"eval_qa_exact_match": 0.096, |
|
"eval_recite_bleu": 11.691398764835105, |
|
"eval_recite_exact_match": 0.004, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 10.133333333333333, |
|
"grad_norm": 1.6687729358673096, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2562, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.666666666666666, |
|
"grad_norm": 1.437538504600525, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2308, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.997333333333334, |
|
"eval_accuracy": 0.6000717488789238, |
|
"eval_loss": 1.8528188467025757, |
|
"eval_runtime": 6.6644, |
|
"eval_samples_per_second": 75.026, |
|
"eval_steps_per_second": 9.453, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 10.997333333333334, |
|
"eval_exact_match": 12.6, |
|
"eval_f1": 20.865213675213692, |
|
"eval_qa_bleu": 7.784433449171618, |
|
"eval_qa_exact_match": 0.108, |
|
"eval_recite_bleu": 12.305413068460787, |
|
"eval_recite_exact_match": 0.008, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"grad_norm": 1.708260416984558, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2011, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.733333333333333, |
|
"grad_norm": 1.7026830911636353, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1722, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5987892376681614, |
|
"eval_loss": 1.8851206302642822, |
|
"eval_runtime": 5.5764, |
|
"eval_samples_per_second": 89.663, |
|
"eval_steps_per_second": 11.298, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_exact_match": 13.0, |
|
"eval_f1": 21.17809523809525, |
|
"eval_qa_bleu": 11.19937598521782, |
|
"eval_qa_exact_match": 0.096, |
|
"eval_recite_bleu": 12.500534642121808, |
|
"eval_recite_exact_match": 0.01, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.266666666666667, |
|
"grad_norm": 1.6281794309616089, |
|
"learning_rate": 3e-05, |
|
"loss": 1.151, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"grad_norm": 1.686490535736084, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1354, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 12.997333333333334, |
|
"eval_accuracy": 0.5979910313901345, |
|
"eval_loss": 1.9114351272583008, |
|
"eval_runtime": 5.5718, |
|
"eval_samples_per_second": 89.737, |
|
"eval_steps_per_second": 11.307, |
|
"step": 2437 |
|
}, |
|
{ |
|
"epoch": 12.997333333333334, |
|
"eval_exact_match": 13.8, |
|
"eval_f1": 21.856666666666673, |
|
"eval_qa_bleu": 10.206087008658956, |
|
"eval_qa_exact_match": 0.116, |
|
"eval_recite_bleu": 12.669262328097592, |
|
"eval_recite_exact_match": 0.008, |
|
"step": 2437 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 1.8666125535964966, |
|
"learning_rate": 3e-05, |
|
"loss": 1.097, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.866666666666667, |
|
"grad_norm": 1.9091932773590088, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0793, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5960986547085202, |
|
"eval_loss": 1.9584920406341553, |
|
"eval_runtime": 6.3056, |
|
"eval_samples_per_second": 79.294, |
|
"eval_steps_per_second": 9.991, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_exact_match": 10.6, |
|
"eval_f1": 18.697936507936518, |
|
"eval_qa_bleu": 6.4581555766931436, |
|
"eval_qa_exact_match": 0.082, |
|
"eval_recite_bleu": 12.843526576123885, |
|
"eval_recite_exact_match": 0.01, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"grad_norm": 2.042712688446045, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0376, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 14.933333333333334, |
|
"grad_norm": 1.8875784873962402, |
|
"learning_rate": 3e-05, |
|
"loss": 1.037, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 14.997333333333334, |
|
"eval_accuracy": 0.5948161434977578, |
|
"eval_loss": 1.9966908693313599, |
|
"eval_runtime": 6.0322, |
|
"eval_samples_per_second": 82.888, |
|
"eval_steps_per_second": 10.444, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 14.997333333333334, |
|
"eval_exact_match": 12.8, |
|
"eval_f1": 20.438917748917753, |
|
"eval_qa_bleu": 10.19364322169534, |
|
"eval_qa_exact_match": 0.104, |
|
"eval_recite_bleu": 12.69742056629334, |
|
"eval_recite_exact_match": 0.012, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 15.466666666666667, |
|
"grad_norm": 2.027426242828369, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9844, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 2.0708699226379395, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9901, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5934080717488789, |
|
"eval_loss": 2.0336477756500244, |
|
"eval_runtime": 5.9811, |
|
"eval_samples_per_second": 83.597, |
|
"eval_steps_per_second": 10.533, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_exact_match": 12.4, |
|
"eval_f1": 20.427142857142865, |
|
"eval_qa_bleu": 9.13217752730269, |
|
"eval_qa_exact_match": 0.096, |
|
"eval_recite_bleu": 12.688250071816965, |
|
"eval_recite_exact_match": 0.014, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.533333333333335, |
|
"grad_norm": 1.995875358581543, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9316, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 16.997333333333334, |
|
"eval_accuracy": 0.591372197309417, |
|
"eval_loss": 2.088000535964966, |
|
"eval_runtime": 5.9631, |
|
"eval_samples_per_second": 83.849, |
|
"eval_steps_per_second": 10.565, |
|
"step": 3187 |
|
}, |
|
{ |
|
"epoch": 16.997333333333334, |
|
"eval_exact_match": 11.4, |
|
"eval_f1": 18.528571428571443, |
|
"eval_qa_bleu": 7.115393148941145, |
|
"eval_qa_exact_match": 0.084, |
|
"eval_recite_bleu": 12.219544702526637, |
|
"eval_recite_exact_match": 0.014, |
|
"step": 3187 |
|
}, |
|
{ |
|
"epoch": 17.066666666666666, |
|
"grad_norm": 2.623262643814087, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9354, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"grad_norm": 2.4125397205352783, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8802, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5900627802690583, |
|
"eval_loss": 2.1439507007598877, |
|
"eval_runtime": 5.8546, |
|
"eval_samples_per_second": 85.404, |
|
"eval_steps_per_second": 10.761, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_exact_match": 10.8, |
|
"eval_f1": 17.508095238095244, |
|
"eval_qa_bleu": 7.769457934505562, |
|
"eval_qa_exact_match": 0.09, |
|
"eval_recite_bleu": 12.155079189562132, |
|
"eval_recite_exact_match": 0.016, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 18.133333333333333, |
|
"grad_norm": 2.3584065437316895, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8913, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 18.666666666666668, |
|
"grad_norm": 2.130357265472412, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8382, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.997333333333334, |
|
"eval_accuracy": 0.5892645739910314, |
|
"eval_loss": 2.171532154083252, |
|
"eval_runtime": 6.6257, |
|
"eval_samples_per_second": 75.464, |
|
"eval_steps_per_second": 9.508, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 18.997333333333334, |
|
"eval_exact_match": 13.0, |
|
"eval_f1": 18.838095238095242, |
|
"eval_qa_bleu": 7.682885911703284, |
|
"eval_qa_exact_match": 0.106, |
|
"eval_recite_bleu": 12.350855746597011, |
|
"eval_recite_exact_match": 0.014, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"grad_norm": 2.6397740840911865, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8248, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 19.733333333333334, |
|
"grad_norm": 3.049431085586548, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7962, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5878834080717489, |
|
"eval_loss": 2.2236711978912354, |
|
"eval_runtime": 6.764, |
|
"eval_samples_per_second": 73.921, |
|
"eval_steps_per_second": 9.314, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_exact_match": 12.2, |
|
"eval_f1": 18.373809523809534, |
|
"eval_qa_bleu": 8.448974659181921, |
|
"eval_qa_exact_match": 0.096, |
|
"eval_recite_bleu": 11.298504528713615, |
|
"eval_recite_exact_match": 0.014, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 20.266666666666666, |
|
"grad_norm": 2.647123098373413, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7797, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"grad_norm": 2.798877239227295, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7553, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 20.997333333333334, |
|
"eval_accuracy": 0.5860896860986547, |
|
"eval_loss": 2.295654535293579, |
|
"eval_runtime": 5.5847, |
|
"eval_samples_per_second": 89.531, |
|
"eval_steps_per_second": 11.281, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 20.997333333333334, |
|
"eval_exact_match": 10.2, |
|
"eval_f1": 16.601428571428578, |
|
"eval_qa_bleu": 7.90021052694059, |
|
"eval_qa_exact_match": 0.08, |
|
"eval_recite_bleu": 11.716166611151118, |
|
"eval_recite_exact_match": 0.01, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 21.333333333333332, |
|
"grad_norm": 2.7208638191223145, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7306, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.866666666666667, |
|
"grad_norm": 2.9887781143188477, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7238, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5850941704035875, |
|
"eval_loss": 2.331202507019043, |
|
"eval_runtime": 6.6871, |
|
"eval_samples_per_second": 74.771, |
|
"eval_steps_per_second": 9.421, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_exact_match": 10.8, |
|
"eval_f1": 17.400692640692647, |
|
"eval_qa_bleu": 4.77963863209427, |
|
"eval_qa_exact_match": 0.084, |
|
"eval_recite_bleu": 11.404518212072734, |
|
"eval_recite_exact_match": 0.014, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"grad_norm": 2.706368923187256, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6813, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 22.933333333333334, |
|
"grad_norm": 2.850799798965454, |
|
"learning_rate": 3e-05, |
|
"loss": 0.676, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 22.997333333333334, |
|
"eval_accuracy": 0.583237668161435, |
|
"eval_loss": 2.4042670726776123, |
|
"eval_runtime": 6.5747, |
|
"eval_samples_per_second": 76.049, |
|
"eval_steps_per_second": 9.582, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 22.997333333333334, |
|
"eval_exact_match": 8.6, |
|
"eval_f1": 16.314285714285724, |
|
"eval_qa_bleu": 7.466573197185606, |
|
"eval_qa_exact_match": 0.064, |
|
"eval_recite_bleu": 11.568247151151132, |
|
"eval_recite_exact_match": 0.014, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 23.466666666666665, |
|
"grad_norm": 2.95532488822937, |
|
"learning_rate": 3e-05, |
|
"loss": 0.632, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 3.36663556098938, |
|
"learning_rate": 3e-05, |
|
"loss": 0.644, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5824125560538117, |
|
"eval_loss": 2.443950891494751, |
|
"eval_runtime": 5.7069, |
|
"eval_samples_per_second": 87.614, |
|
"eval_steps_per_second": 11.039, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_exact_match": 8.8, |
|
"eval_f1": 15.491948051948057, |
|
"eval_qa_bleu": 4.531789460915551, |
|
"eval_qa_exact_match": 0.076, |
|
"eval_recite_bleu": 10.65729884465588, |
|
"eval_recite_exact_match": 0.014, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.533333333333335, |
|
"grad_norm": 3.074622631072998, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5939, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 24.997333333333334, |
|
"eval_accuracy": 0.5818385650224215, |
|
"eval_loss": 2.5127182006835938, |
|
"eval_runtime": 5.6588, |
|
"eval_samples_per_second": 88.357, |
|
"eval_steps_per_second": 11.133, |
|
"step": 4687 |
|
}, |
|
{ |
|
"epoch": 24.997333333333334, |
|
"eval_exact_match": 9.0, |
|
"eval_f1": 16.361587301587303, |
|
"eval_qa_bleu": 7.691728089554307, |
|
"eval_qa_exact_match": 0.068, |
|
"eval_recite_bleu": 11.016738678559781, |
|
"eval_recite_exact_match": 0.008, |
|
"step": 4687 |
|
}, |
|
{ |
|
"epoch": 25.066666666666666, |
|
"grad_norm": 2.702502727508545, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5904, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"grad_norm": 3.233320951461792, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5551, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.5809596412556054, |
|
"eval_loss": 2.538985252380371, |
|
"eval_runtime": 6.6838, |
|
"eval_samples_per_second": 74.808, |
|
"eval_steps_per_second": 9.426, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_exact_match": 9.2, |
|
"eval_f1": 16.819884559884564, |
|
"eval_qa_bleu": 4.658892987257927, |
|
"eval_qa_exact_match": 0.078, |
|
"eval_recite_bleu": 10.853797139069188, |
|
"eval_recite_exact_match": 0.012, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 26.133333333333333, |
|
"grad_norm": 3.280097246170044, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5503, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 26.666666666666668, |
|
"grad_norm": 3.1984827518463135, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5163, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 26.997333333333334, |
|
"eval_accuracy": 0.579829596412556, |
|
"eval_loss": 2.5808944702148438, |
|
"eval_runtime": 6.6339, |
|
"eval_samples_per_second": 75.37, |
|
"eval_steps_per_second": 9.497, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 26.997333333333334, |
|
"eval_exact_match": 8.0, |
|
"eval_f1": 16.641197691197693, |
|
"eval_qa_bleu": 5.575351494624609, |
|
"eval_qa_exact_match": 0.066, |
|
"eval_recite_bleu": 11.099361911324253, |
|
"eval_recite_exact_match": 0.01, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"grad_norm": 3.5599937438964844, |
|
"learning_rate": 3e-05, |
|
"loss": 0.52, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 27.733333333333334, |
|
"grad_norm": 2.9382145404815674, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4892, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5788609865470852, |
|
"eval_loss": 2.6670191287994385, |
|
"eval_runtime": 6.0585, |
|
"eval_samples_per_second": 82.529, |
|
"eval_steps_per_second": 10.399, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_exact_match": 8.6, |
|
"eval_f1": 15.144920634920638, |
|
"eval_qa_bleu": 5.387088284739065, |
|
"eval_qa_exact_match": 0.074, |
|
"eval_recite_bleu": 11.457740165981269, |
|
"eval_recite_exact_match": 0.012, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 28.266666666666666, |
|
"grad_norm": 4.097602367401123, |
|
"learning_rate": 3e-05, |
|
"loss": 0.47, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"grad_norm": 3.9295685291290283, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4669, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 28.997333333333334, |
|
"eval_accuracy": 0.578609865470852, |
|
"eval_loss": 2.669480085372925, |
|
"eval_runtime": 6.6992, |
|
"eval_samples_per_second": 74.636, |
|
"eval_steps_per_second": 9.404, |
|
"step": 5437 |
|
}, |
|
{ |
|
"epoch": 28.997333333333334, |
|
"eval_exact_match": 9.2, |
|
"eval_f1": 16.65718614718615, |
|
"eval_qa_bleu": 4.637892076000507, |
|
"eval_qa_exact_match": 0.074, |
|
"eval_recite_bleu": 11.77754451628722, |
|
"eval_recite_exact_match": 0.012, |
|
"step": 5437 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 9350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.000442259286262e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|