|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 24.997333333333334, |
|
"eval_steps": 500, |
|
"global_step": 4687, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.271308958530426, |
|
"learning_rate": 5e-05, |
|
"loss": 1.8262, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9973333333333333, |
|
"eval_accuracy": 0.6047174887892377, |
|
"eval_loss": 1.7027028799057007, |
|
"eval_runtime": 6.6338, |
|
"eval_samples_per_second": 75.372, |
|
"eval_steps_per_second": 9.497, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.9973333333333333, |
|
"eval_exact_match": 14.6, |
|
"eval_f1": 23.665714285714298, |
|
"eval_qa_bleu": 10.24028390049623, |
|
"eval_qa_exact_match": 0.114, |
|
"eval_recite_bleu": 12.525534079250063, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 0.301651269197464, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7424, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.35949045419692993, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6978, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6068699551569506, |
|
"eval_loss": 1.6845285892486572, |
|
"eval_runtime": 5.7524, |
|
"eval_samples_per_second": 86.92, |
|
"eval_steps_per_second": 10.952, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 14.6, |
|
"eval_f1": 23.74190476190477, |
|
"eval_qa_bleu": 9.70432698897927, |
|
"eval_qa_exact_match": 0.114, |
|
"eval_recite_bleu": 13.014822566317982, |
|
"eval_recite_exact_match": 0.006, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 0.41301965713500977, |
|
"learning_rate": 5e-05, |
|
"loss": 1.694, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.5057485699653625, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6324, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.997333333333333, |
|
"eval_accuracy": 0.6079551569506726, |
|
"eval_loss": 1.6851012706756592, |
|
"eval_runtime": 5.5314, |
|
"eval_samples_per_second": 90.394, |
|
"eval_steps_per_second": 11.39, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 2.997333333333333, |
|
"eval_exact_match": 14.2, |
|
"eval_f1": 23.29190476190477, |
|
"eval_qa_bleu": 10.724307586340888, |
|
"eval_qa_exact_match": 0.104, |
|
"eval_recite_bleu": 12.83531066630853, |
|
"eval_recite_exact_match": 0.004, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.6363334655761719, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6063, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.7333333333333334, |
|
"grad_norm": 0.6466453671455383, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5562, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6071121076233184, |
|
"eval_loss": 1.6969172954559326, |
|
"eval_runtime": 6.2612, |
|
"eval_samples_per_second": 79.857, |
|
"eval_steps_per_second": 10.062, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 14.0, |
|
"eval_f1": 21.34333333333334, |
|
"eval_qa_bleu": 10.250340106635626, |
|
"eval_qa_exact_match": 0.084, |
|
"eval_recite_bleu": 12.741830585824024, |
|
"eval_recite_exact_match": 0.004, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.266666666666667, |
|
"grad_norm": 0.8162885904312134, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5052, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 0.8799439072608948, |
|
"learning_rate": 5e-05, |
|
"loss": 1.469, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.997333333333334, |
|
"eval_accuracy": 0.6059282511210762, |
|
"eval_loss": 1.7216957807540894, |
|
"eval_runtime": 6.3012, |
|
"eval_samples_per_second": 79.35, |
|
"eval_steps_per_second": 9.998, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 4.997333333333334, |
|
"eval_exact_match": 12.6, |
|
"eval_f1": 21.041428571428575, |
|
"eval_qa_bleu": 8.239984727774104, |
|
"eval_qa_exact_match": 0.094, |
|
"eval_recite_bleu": 12.327373697720192, |
|
"eval_recite_exact_match": 0.002, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 5.333333333333333, |
|
"grad_norm": 0.9834449291229248, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3845, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.866666666666667, |
|
"grad_norm": 0.9377098679542542, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3927, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6047354260089686, |
|
"eval_loss": 1.7564114332199097, |
|
"eval_runtime": 6.3447, |
|
"eval_samples_per_second": 78.806, |
|
"eval_steps_per_second": 9.93, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 13.4, |
|
"eval_f1": 21.70317460317461, |
|
"eval_qa_bleu": 10.522554916579404, |
|
"eval_qa_exact_match": 0.104, |
|
"eval_recite_bleu": 12.383604385812577, |
|
"eval_recite_exact_match": 0.006, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 1.1172072887420654, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2924, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.933333333333334, |
|
"grad_norm": 1.0980473756790161, |
|
"learning_rate": 5e-05, |
|
"loss": 1.286, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.997333333333334, |
|
"eval_accuracy": 0.6026995515695067, |
|
"eval_loss": 1.7977122068405151, |
|
"eval_runtime": 5.9021, |
|
"eval_samples_per_second": 84.716, |
|
"eval_steps_per_second": 10.674, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 6.997333333333334, |
|
"eval_exact_match": 12.0, |
|
"eval_f1": 20.07142857142858, |
|
"eval_qa_bleu": 9.89685444999411, |
|
"eval_qa_exact_match": 0.102, |
|
"eval_recite_bleu": 12.555118604776576, |
|
"eval_recite_exact_match": 0.008, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 7.466666666666667, |
|
"grad_norm": 1.1534159183502197, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1956, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.1209707260131836, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1891, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6010403587443947, |
|
"eval_loss": 1.840914249420166, |
|
"eval_runtime": 6.3016, |
|
"eval_samples_per_second": 79.345, |
|
"eval_steps_per_second": 9.997, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 12.8, |
|
"eval_f1": 21.598730158730167, |
|
"eval_qa_bleu": 9.863863368837224, |
|
"eval_qa_exact_match": 0.104, |
|
"eval_recite_bleu": 12.09730367063489, |
|
"eval_recite_exact_match": 0.01, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.533333333333333, |
|
"grad_norm": 1.3646217584609985, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0861, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.997333333333334, |
|
"eval_accuracy": 0.5981255605381166, |
|
"eval_loss": 1.9223005771636963, |
|
"eval_runtime": 5.5092, |
|
"eval_samples_per_second": 90.757, |
|
"eval_steps_per_second": 11.435, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 8.997333333333334, |
|
"eval_exact_match": 14.8, |
|
"eval_f1": 22.160000000000004, |
|
"eval_qa_bleu": 8.357776258354589, |
|
"eval_qa_exact_match": 0.116, |
|
"eval_recite_bleu": 12.76267532884902, |
|
"eval_recite_exact_match": 0.01, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 9.066666666666666, |
|
"grad_norm": 1.6948473453521729, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0935, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"grad_norm": 1.6024364233016968, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0127, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5973632286995516, |
|
"eval_loss": 1.9414385557174683, |
|
"eval_runtime": 5.5824, |
|
"eval_samples_per_second": 89.568, |
|
"eval_steps_per_second": 11.286, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 12.4, |
|
"eval_f1": 20.75746031746032, |
|
"eval_qa_bleu": 8.760424886550503, |
|
"eval_qa_exact_match": 0.088, |
|
"eval_recite_bleu": 12.754935192029142, |
|
"eval_recite_exact_match": 0.016, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 10.133333333333333, |
|
"grad_norm": 1.6970032453536987, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9788, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.666666666666666, |
|
"grad_norm": 1.559332251548767, |
|
"learning_rate": 5e-05, |
|
"loss": 0.926, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.997333333333334, |
|
"eval_accuracy": 0.5945201793721973, |
|
"eval_loss": 2.0455663204193115, |
|
"eval_runtime": 5.5259, |
|
"eval_samples_per_second": 90.484, |
|
"eval_steps_per_second": 11.401, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 10.997333333333334, |
|
"eval_exact_match": 13.0, |
|
"eval_f1": 20.56190476190476, |
|
"eval_qa_bleu": 11.437402190353293, |
|
"eval_qa_exact_match": 0.106, |
|
"eval_recite_bleu": 13.600786696850818, |
|
"eval_recite_exact_match": 0.022, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"grad_norm": 1.667855143547058, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8872, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.733333333333333, |
|
"grad_norm": 1.6040037870407104, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8347, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5923946188340807, |
|
"eval_loss": 2.116677761077881, |
|
"eval_runtime": 6.4038, |
|
"eval_samples_per_second": 78.078, |
|
"eval_steps_per_second": 9.838, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_exact_match": 13.6, |
|
"eval_f1": 20.470981240981256, |
|
"eval_qa_bleu": 8.968260765274712, |
|
"eval_qa_exact_match": 0.106, |
|
"eval_recite_bleu": 12.793280320213494, |
|
"eval_recite_exact_match": 0.014, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.266666666666667, |
|
"grad_norm": 1.794092059135437, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8032, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"grad_norm": 1.8774526119232178, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7736, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 12.997333333333334, |
|
"eval_accuracy": 0.5902690582959641, |
|
"eval_loss": 2.179492950439453, |
|
"eval_runtime": 5.9945, |
|
"eval_samples_per_second": 83.409, |
|
"eval_steps_per_second": 10.51, |
|
"step": 2437 |
|
}, |
|
{ |
|
"epoch": 12.997333333333334, |
|
"eval_exact_match": 12.8, |
|
"eval_f1": 20.780476190476197, |
|
"eval_qa_bleu": 11.176823944321667, |
|
"eval_qa_exact_match": 0.094, |
|
"eval_recite_bleu": 13.292972390103905, |
|
"eval_recite_exact_match": 0.02, |
|
"step": 2437 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 1.9102168083190918, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7168, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.866666666666667, |
|
"grad_norm": 1.9183365106582642, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6903, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5875874439461883, |
|
"eval_loss": 2.274404287338257, |
|
"eval_runtime": 6.2502, |
|
"eval_samples_per_second": 79.998, |
|
"eval_steps_per_second": 10.08, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_exact_match": 11.2, |
|
"eval_f1": 18.26571428571429, |
|
"eval_qa_bleu": 10.347091943612007, |
|
"eval_qa_exact_match": 0.082, |
|
"eval_recite_bleu": 12.720454352082847, |
|
"eval_recite_exact_match": 0.014, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"grad_norm": 2.326387405395508, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6312, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 14.933333333333334, |
|
"grad_norm": 2.1555840969085693, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6267, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 14.997333333333334, |
|
"eval_accuracy": 0.5870582959641255, |
|
"eval_loss": 2.338937997817993, |
|
"eval_runtime": 6.643, |
|
"eval_samples_per_second": 75.267, |
|
"eval_steps_per_second": 9.484, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 14.997333333333334, |
|
"eval_exact_match": 10.6, |
|
"eval_f1": 18.311471861471865, |
|
"eval_qa_bleu": 9.56012066081651, |
|
"eval_qa_exact_match": 0.086, |
|
"eval_recite_bleu": 12.790292247765784, |
|
"eval_recite_exact_match": 0.022, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 15.466666666666667, |
|
"grad_norm": 1.8364830017089844, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5582, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.9223560094833374, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5673, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.585067264573991, |
|
"eval_loss": 2.4404428005218506, |
|
"eval_runtime": 6.2254, |
|
"eval_samples_per_second": 80.316, |
|
"eval_steps_per_second": 10.12, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_exact_match": 11.4, |
|
"eval_f1": 18.553809523809527, |
|
"eval_qa_bleu": 7.054735448183399, |
|
"eval_qa_exact_match": 0.088, |
|
"eval_recite_bleu": 12.41180854100743, |
|
"eval_recite_exact_match": 0.018, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.533333333333335, |
|
"grad_norm": 2.403259515762329, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4886, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 16.997333333333334, |
|
"eval_accuracy": 0.5844035874439462, |
|
"eval_loss": 2.5225181579589844, |
|
"eval_runtime": 5.9008, |
|
"eval_samples_per_second": 84.734, |
|
"eval_steps_per_second": 10.677, |
|
"step": 3187 |
|
}, |
|
{ |
|
"epoch": 16.997333333333334, |
|
"eval_exact_match": 10.6, |
|
"eval_f1": 17.309567099567108, |
|
"eval_qa_bleu": 7.6942420610670945, |
|
"eval_qa_exact_match": 0.086, |
|
"eval_recite_bleu": 11.974979252669918, |
|
"eval_recite_exact_match": 0.016, |
|
"step": 3187 |
|
}, |
|
{ |
|
"epoch": 17.066666666666666, |
|
"grad_norm": 2.1279492378234863, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4985, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"grad_norm": 2.377486228942871, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4357, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5838026905829596, |
|
"eval_loss": 2.590667724609375, |
|
"eval_runtime": 5.984, |
|
"eval_samples_per_second": 83.556, |
|
"eval_steps_per_second": 10.528, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_exact_match": 10.8, |
|
"eval_f1": 18.150000000000006, |
|
"eval_qa_bleu": 10.081752461945443, |
|
"eval_qa_exact_match": 0.078, |
|
"eval_recite_bleu": 12.17977433983274, |
|
"eval_recite_exact_match": 0.018, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 18.133333333333333, |
|
"grad_norm": 2.425731897354126, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4424, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 18.666666666666668, |
|
"grad_norm": 2.4585390090942383, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3873, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.997333333333334, |
|
"eval_accuracy": 0.5823228699551569, |
|
"eval_loss": 2.6423120498657227, |
|
"eval_runtime": 5.5641, |
|
"eval_samples_per_second": 89.861, |
|
"eval_steps_per_second": 11.323, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 18.997333333333334, |
|
"eval_exact_match": 11.0, |
|
"eval_f1": 18.006349206349213, |
|
"eval_qa_bleu": 8.890762850928647, |
|
"eval_qa_exact_match": 0.092, |
|
"eval_recite_bleu": 12.489755382776915, |
|
"eval_recite_exact_match": 0.022, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"grad_norm": 2.00604510307312, |
|
"learning_rate": 5e-05, |
|
"loss": 0.383, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 19.733333333333334, |
|
"grad_norm": 2.393627405166626, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3497, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5803587443946189, |
|
"eval_loss": 2.7260048389434814, |
|
"eval_runtime": 6.546, |
|
"eval_samples_per_second": 76.382, |
|
"eval_steps_per_second": 9.624, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_exact_match": 9.6, |
|
"eval_f1": 15.837460317460323, |
|
"eval_qa_bleu": 7.780402070269215, |
|
"eval_qa_exact_match": 0.078, |
|
"eval_recite_bleu": 12.344573606229106, |
|
"eval_recite_exact_match": 0.024, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 20.266666666666666, |
|
"grad_norm": 2.1227269172668457, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3321, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"grad_norm": 2.2578742504119873, |
|
"learning_rate": 5e-05, |
|
"loss": 0.314, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 20.997333333333334, |
|
"eval_accuracy": 0.5801883408071749, |
|
"eval_loss": 2.820314884185791, |
|
"eval_runtime": 5.6037, |
|
"eval_samples_per_second": 89.227, |
|
"eval_steps_per_second": 11.243, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 20.997333333333334, |
|
"eval_exact_match": 12.4, |
|
"eval_f1": 17.80337662337662, |
|
"eval_qa_bleu": 9.273311361506353, |
|
"eval_qa_exact_match": 0.096, |
|
"eval_recite_bleu": 12.39691014320001, |
|
"eval_recite_exact_match": 0.018, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 21.333333333333332, |
|
"grad_norm": 1.9079784154891968, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2951, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.866666666666667, |
|
"grad_norm": 2.5200765132904053, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2893, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5794260089686099, |
|
"eval_loss": 2.905937433242798, |
|
"eval_runtime": 5.8733, |
|
"eval_samples_per_second": 85.132, |
|
"eval_steps_per_second": 10.727, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_exact_match": 10.2, |
|
"eval_f1": 16.468253968253975, |
|
"eval_qa_bleu": 6.31129540696892, |
|
"eval_qa_exact_match": 0.084, |
|
"eval_recite_bleu": 12.11260232701576, |
|
"eval_recite_exact_match": 0.022, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"grad_norm": 1.9616879224777222, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2584, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 22.933333333333334, |
|
"grad_norm": 1.854027271270752, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2583, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 22.997333333333334, |
|
"eval_accuracy": 0.578914798206278, |
|
"eval_loss": 2.9786081314086914, |
|
"eval_runtime": 6.2893, |
|
"eval_samples_per_second": 79.5, |
|
"eval_steps_per_second": 10.017, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 22.997333333333334, |
|
"eval_exact_match": 10.2, |
|
"eval_f1": 16.564126984126986, |
|
"eval_qa_bleu": 6.2299815854348815, |
|
"eval_qa_exact_match": 0.086, |
|
"eval_recite_bleu": 11.81704787803335, |
|
"eval_recite_exact_match": 0.024, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 23.466666666666665, |
|
"grad_norm": 1.8527085781097412, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2289, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 2.258077621459961, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2382, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5789596412556054, |
|
"eval_loss": 3.0398380756378174, |
|
"eval_runtime": 6.5231, |
|
"eval_samples_per_second": 76.651, |
|
"eval_steps_per_second": 9.658, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_exact_match": 10.2, |
|
"eval_f1": 16.815281385281384, |
|
"eval_qa_bleu": 6.817434246718267, |
|
"eval_qa_exact_match": 0.082, |
|
"eval_recite_bleu": 12.918682221726373, |
|
"eval_recite_exact_match": 0.022, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.533333333333335, |
|
"grad_norm": 2.0298190116882324, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2051, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 24.997333333333334, |
|
"eval_accuracy": 0.5780627802690583, |
|
"eval_loss": 3.1147310733795166, |
|
"eval_runtime": 6.2317, |
|
"eval_samples_per_second": 80.236, |
|
"eval_steps_per_second": 10.11, |
|
"step": 4687 |
|
}, |
|
{ |
|
"epoch": 24.997333333333334, |
|
"eval_exact_match": 10.8, |
|
"eval_f1": 17.1636507936508, |
|
"eval_qa_bleu": 8.438045873055385, |
|
"eval_qa_exact_match": 0.09, |
|
"eval_recite_bleu": 12.425198158877508, |
|
"eval_recite_exact_match": 0.028, |
|
"step": 4687 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 9350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.721636404352778e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|