|
{ |
|
"best_metric": 0.8533899487562311, |
|
"best_model_checkpoint": "./results/checkpoint-1200", |
|
"epoch": 69.0, |
|
"eval_steps": 500, |
|
"global_step": 1656, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.4191670417785645, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 1.1717, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.5559615812547036, |
|
"eval_loss": 0.852466344833374, |
|
"eval_runtime": 0.2518, |
|
"eval_samples_per_second": 865.826, |
|
"eval_steps_per_second": 15.887, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 6.028530597686768, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.714, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.7746990836466595, |
|
"eval_loss": 0.6411612033843994, |
|
"eval_runtime": 0.2498, |
|
"eval_samples_per_second": 872.757, |
|
"eval_steps_per_second": 16.014, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 6.396097660064697, |
|
"learning_rate": 4.85e-05, |
|
"loss": 0.4376, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.7904679173662312, |
|
"eval_loss": 0.578528881072998, |
|
"eval_runtime": 0.2466, |
|
"eval_samples_per_second": 883.847, |
|
"eval_steps_per_second": 16.217, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 13.205650329589844, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.262, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.8032839961396288, |
|
"eval_loss": 0.6008332967758179, |
|
"eval_runtime": 0.2457, |
|
"eval_samples_per_second": 887.351, |
|
"eval_steps_per_second": 16.282, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 9.777926445007324, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.1384, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.7854101778464291, |
|
"eval_loss": 0.6125118732452393, |
|
"eval_runtime": 0.2485, |
|
"eval_samples_per_second": 877.32, |
|
"eval_steps_per_second": 16.098, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 4.165589332580566, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.0982, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.7672888039973402, |
|
"eval_loss": 0.7653169631958008, |
|
"eval_runtime": 0.2569, |
|
"eval_samples_per_second": 848.674, |
|
"eval_steps_per_second": 15.572, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 3.7239115238189697, |
|
"learning_rate": 4.6500000000000005e-05, |
|
"loss": 0.0657, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.8147995774570269, |
|
"eval_loss": 0.7002198100090027, |
|
"eval_runtime": 0.2474, |
|
"eval_samples_per_second": 881.237, |
|
"eval_steps_per_second": 16.169, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.3135942220687866, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.0395, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.8087986139563679, |
|
"eval_loss": 0.7771649956703186, |
|
"eval_runtime": 0.244, |
|
"eval_samples_per_second": 893.39, |
|
"eval_steps_per_second": 16.392, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.4529080092906952, |
|
"learning_rate": 4.55e-05, |
|
"loss": 0.0214, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.8392522702480626, |
|
"eval_loss": 0.7208238244056702, |
|
"eval_runtime": 0.2501, |
|
"eval_samples_per_second": 871.738, |
|
"eval_steps_per_second": 15.995, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 8.327765464782715, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.0237, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.8424386724386725, |
|
"eval_loss": 0.7019856572151184, |
|
"eval_runtime": 0.2469, |
|
"eval_samples_per_second": 883.057, |
|
"eval_steps_per_second": 16.203, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.6359758377075195, |
|
"learning_rate": 4.4500000000000004e-05, |
|
"loss": 0.0159, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_f1": 0.8074501800720288, |
|
"eval_loss": 0.9096614122390747, |
|
"eval_runtime": 0.2471, |
|
"eval_samples_per_second": 882.066, |
|
"eval_steps_per_second": 16.185, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 18.37872886657715, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.0322, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_f1": 0.7848399290035054, |
|
"eval_loss": 1.0800021886825562, |
|
"eval_runtime": 0.2488, |
|
"eval_samples_per_second": 876.232, |
|
"eval_steps_per_second": 16.078, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 13.307744026184082, |
|
"learning_rate": 4.35e-05, |
|
"loss": 0.029, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_f1": 0.798355010551357, |
|
"eval_loss": 1.0241613388061523, |
|
"eval_runtime": 0.2493, |
|
"eval_samples_per_second": 874.372, |
|
"eval_steps_per_second": 16.044, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.41816067695617676, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.0121, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_f1": 0.8120306519736268, |
|
"eval_loss": 1.022652268409729, |
|
"eval_runtime": 0.2454, |
|
"eval_samples_per_second": 888.194, |
|
"eval_steps_per_second": 16.297, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 10.153714179992676, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.0082, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_f1": 0.7906564533144493, |
|
"eval_loss": 1.096178650856018, |
|
"eval_runtime": 0.2461, |
|
"eval_samples_per_second": 885.781, |
|
"eval_steps_per_second": 16.253, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.026447944343090057, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.0035, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_f1": 0.7900391731356858, |
|
"eval_loss": 1.2207499742507935, |
|
"eval_runtime": 0.2461, |
|
"eval_samples_per_second": 885.87, |
|
"eval_steps_per_second": 16.254, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.008760841563344002, |
|
"learning_rate": 4.15e-05, |
|
"loss": 0.0064, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_f1": 0.7941827325696338, |
|
"eval_loss": 1.120672345161438, |
|
"eval_runtime": 0.2456, |
|
"eval_samples_per_second": 887.604, |
|
"eval_steps_per_second": 16.286, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.2175062894821167, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.0089, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_f1": 0.8121032920020557, |
|
"eval_loss": 1.1158227920532227, |
|
"eval_runtime": 0.2466, |
|
"eval_samples_per_second": 884.124, |
|
"eval_steps_per_second": 16.222, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.026284487918019295, |
|
"learning_rate": 4.05e-05, |
|
"loss": 0.0011, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_f1": 0.8106351612230018, |
|
"eval_loss": 1.1229100227355957, |
|
"eval_runtime": 0.2466, |
|
"eval_samples_per_second": 883.852, |
|
"eval_steps_per_second": 16.217, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 7.1518473625183105, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0036, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_f1": 0.8215225082145855, |
|
"eval_loss": 1.0700623989105225, |
|
"eval_runtime": 0.2447, |
|
"eval_samples_per_second": 890.866, |
|
"eval_steps_per_second": 16.346, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 0.030479425564408302, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"loss": 0.0038, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_f1": 0.79625307224709, |
|
"eval_loss": 1.1593633890151978, |
|
"eval_runtime": 0.2451, |
|
"eval_samples_per_second": 889.358, |
|
"eval_steps_per_second": 16.319, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 0.8992727994918823, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.013, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_f1": 0.8198287286484007, |
|
"eval_loss": 1.127288579940796, |
|
"eval_runtime": 0.2483, |
|
"eval_samples_per_second": 878.117, |
|
"eval_steps_per_second": 16.112, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 0.008566158823668957, |
|
"learning_rate": 3.85e-05, |
|
"loss": 0.0051, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_f1": 0.8245210270785887, |
|
"eval_loss": 1.0870376825332642, |
|
"eval_runtime": 0.2503, |
|
"eval_samples_per_second": 870.992, |
|
"eval_steps_per_second": 15.982, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 0.0786171406507492, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.0037, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_f1": 0.8210009216488835, |
|
"eval_loss": 1.1669812202453613, |
|
"eval_runtime": 0.2462, |
|
"eval_samples_per_second": 885.624, |
|
"eval_steps_per_second": 16.25, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 0.02009885385632515, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0035, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_f1": 0.823341564899699, |
|
"eval_loss": 1.1029468774795532, |
|
"eval_runtime": 0.246, |
|
"eval_samples_per_second": 886.238, |
|
"eval_steps_per_second": 16.261, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 0.10086794197559357, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.0051, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_f1": 0.8138320306415473, |
|
"eval_loss": 1.1733108758926392, |
|
"eval_runtime": 0.2405, |
|
"eval_samples_per_second": 906.567, |
|
"eval_steps_per_second": 16.634, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"grad_norm": 24.029403686523438, |
|
"learning_rate": 3.65e-05, |
|
"loss": 0.0032, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_f1": 0.8293087494328629, |
|
"eval_loss": 1.1072059869766235, |
|
"eval_runtime": 0.244, |
|
"eval_samples_per_second": 893.364, |
|
"eval_steps_per_second": 16.392, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 19.329952239990234, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.024, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_f1": 0.7828629987222324, |
|
"eval_loss": 1.3776278495788574, |
|
"eval_runtime": 0.2463, |
|
"eval_samples_per_second": 884.934, |
|
"eval_steps_per_second": 16.237, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"grad_norm": 0.0337708480656147, |
|
"learning_rate": 3.55e-05, |
|
"loss": 0.0097, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_f1": 0.8245008772063647, |
|
"eval_loss": 1.161023736000061, |
|
"eval_runtime": 0.246, |
|
"eval_samples_per_second": 886.218, |
|
"eval_steps_per_second": 16.261, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 0.05386161431670189, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0093, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_f1": 0.8331553578084018, |
|
"eval_loss": 1.0987571477890015, |
|
"eval_runtime": 0.2499, |
|
"eval_samples_per_second": 872.378, |
|
"eval_steps_per_second": 16.007, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"grad_norm": 0.011382571421563625, |
|
"learning_rate": 3.45e-05, |
|
"loss": 0.0048, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_f1": 0.8253722493039259, |
|
"eval_loss": 1.1277176141738892, |
|
"eval_runtime": 0.2469, |
|
"eval_samples_per_second": 882.906, |
|
"eval_steps_per_second": 16.2, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 0.0046822689473629, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.0041, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_f1": 0.8187101207450439, |
|
"eval_loss": 1.1826363801956177, |
|
"eval_runtime": 0.2504, |
|
"eval_samples_per_second": 870.593, |
|
"eval_steps_per_second": 15.974, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"grad_norm": 0.003356009954586625, |
|
"learning_rate": 3.35e-05, |
|
"loss": 0.0033, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_f1": 0.8126497250026661, |
|
"eval_loss": 1.1944386959075928, |
|
"eval_runtime": 0.2453, |
|
"eval_samples_per_second": 888.767, |
|
"eval_steps_per_second": 16.308, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"grad_norm": 0.005327207036316395, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.0087, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_f1": 0.8491252629565882, |
|
"eval_loss": 1.03484046459198, |
|
"eval_runtime": 0.2487, |
|
"eval_samples_per_second": 876.726, |
|
"eval_steps_per_second": 16.087, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 0.003193259472027421, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.0056, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_f1": 0.831069428937251, |
|
"eval_loss": 1.0799349546432495, |
|
"eval_runtime": 0.2433, |
|
"eval_samples_per_second": 895.888, |
|
"eval_steps_per_second": 16.438, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 0.0025981140788644552, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.0056, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_f1": 0.8005370004504828, |
|
"eval_loss": 1.2397780418395996, |
|
"eval_runtime": 0.2558, |
|
"eval_samples_per_second": 852.208, |
|
"eval_steps_per_second": 15.637, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"grad_norm": 0.004587420262396336, |
|
"learning_rate": 3.15e-05, |
|
"loss": 0.0043, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_f1": 0.7960422975955538, |
|
"eval_loss": 1.3035740852355957, |
|
"eval_runtime": 0.2431, |
|
"eval_samples_per_second": 896.654, |
|
"eval_steps_per_second": 16.452, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"grad_norm": 0.0024126123171299696, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.004, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_f1": 0.8348215283066768, |
|
"eval_loss": 1.1289474964141846, |
|
"eval_runtime": 0.2451, |
|
"eval_samples_per_second": 889.465, |
|
"eval_steps_per_second": 16.32, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"grad_norm": 0.002631419338285923, |
|
"learning_rate": 3.05e-05, |
|
"loss": 0.0002, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_f1": 0.8470719620834265, |
|
"eval_loss": 1.0962085723876953, |
|
"eval_runtime": 0.2555, |
|
"eval_samples_per_second": 853.196, |
|
"eval_steps_per_second": 15.655, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 0.0036367273423820734, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0002, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_f1": 0.8475274660470973, |
|
"eval_loss": 1.1172648668289185, |
|
"eval_runtime": 0.2665, |
|
"eval_samples_per_second": 818.091, |
|
"eval_steps_per_second": 15.011, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"grad_norm": 0.002203166950494051, |
|
"learning_rate": 2.95e-05, |
|
"loss": 0.0002, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_f1": 0.8463311286594244, |
|
"eval_loss": 1.1128673553466797, |
|
"eval_runtime": 0.2476, |
|
"eval_samples_per_second": 880.33, |
|
"eval_steps_per_second": 16.153, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"grad_norm": 0.0019745519384741783, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.0002, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_f1": 0.8524668435013261, |
|
"eval_loss": 1.1187357902526855, |
|
"eval_runtime": 0.2433, |
|
"eval_samples_per_second": 896.105, |
|
"eval_steps_per_second": 16.442, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"grad_norm": 0.0018844620790332556, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"loss": 0.0002, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_f1": 0.8524668435013261, |
|
"eval_loss": 1.1086227893829346, |
|
"eval_runtime": 0.2535, |
|
"eval_samples_per_second": 860.088, |
|
"eval_steps_per_second": 15.781, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"grad_norm": 0.0016616833163425326, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.0002, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_f1": 0.8524668435013261, |
|
"eval_loss": 1.1084070205688477, |
|
"eval_runtime": 0.2467, |
|
"eval_samples_per_second": 883.486, |
|
"eval_steps_per_second": 16.211, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"grad_norm": 0.0017370691057294607, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.0002, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_f1": 0.8524668435013261, |
|
"eval_loss": 1.1120808124542236, |
|
"eval_runtime": 0.2544, |
|
"eval_samples_per_second": 856.909, |
|
"eval_steps_per_second": 15.723, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"grad_norm": 0.01709812693297863, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.0018, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_f1": 0.8230951784833656, |
|
"eval_loss": 1.150782585144043, |
|
"eval_runtime": 0.2507, |
|
"eval_samples_per_second": 869.395, |
|
"eval_steps_per_second": 15.952, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"grad_norm": 0.0014819600619375706, |
|
"learning_rate": 2.6500000000000004e-05, |
|
"loss": 0.0033, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_f1": 0.8482902683102467, |
|
"eval_loss": 1.1178399324417114, |
|
"eval_runtime": 0.251, |
|
"eval_samples_per_second": 868.684, |
|
"eval_steps_per_second": 15.939, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"grad_norm": 0.001657757442444563, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.0012, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_f1": 0.8485012469416702, |
|
"eval_loss": 1.1701571941375732, |
|
"eval_runtime": 0.2457, |
|
"eval_samples_per_second": 887.403, |
|
"eval_steps_per_second": 16.283, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"grad_norm": 0.001472759060561657, |
|
"learning_rate": 2.5500000000000003e-05, |
|
"loss": 0.0001, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_f1": 0.8471931986968837, |
|
"eval_loss": 1.1576160192489624, |
|
"eval_runtime": 0.2492, |
|
"eval_samples_per_second": 874.869, |
|
"eval_steps_per_second": 16.053, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 0.0018886495381593704, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0001, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_f1": 0.8533899487562311, |
|
"eval_loss": 1.1580414772033691, |
|
"eval_runtime": 0.2453, |
|
"eval_samples_per_second": 888.766, |
|
"eval_steps_per_second": 16.308, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"grad_norm": 0.0017091715708374977, |
|
"learning_rate": 2.45e-05, |
|
"loss": 0.0001, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_f1": 0.8533899487562311, |
|
"eval_loss": 1.157599687576294, |
|
"eval_runtime": 0.2534, |
|
"eval_samples_per_second": 860.384, |
|
"eval_steps_per_second": 15.787, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"grad_norm": 0.0013613449409604073, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.0001, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_f1": 0.8533899487562311, |
|
"eval_loss": 1.155985713005066, |
|
"eval_runtime": 0.2572, |
|
"eval_samples_per_second": 847.53, |
|
"eval_steps_per_second": 15.551, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"grad_norm": 0.001274469424970448, |
|
"learning_rate": 2.35e-05, |
|
"loss": 0.0001, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_f1": 0.8533899487562311, |
|
"eval_loss": 1.1571167707443237, |
|
"eval_runtime": 0.2412, |
|
"eval_samples_per_second": 903.632, |
|
"eval_steps_per_second": 16.58, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"grad_norm": 0.0013310050126165152, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.0001, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_f1": 0.8533899487562311, |
|
"eval_loss": 1.1600090265274048, |
|
"eval_runtime": 0.2473, |
|
"eval_samples_per_second": 881.514, |
|
"eval_steps_per_second": 16.175, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"grad_norm": 0.0015337098157033324, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.0001, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_f1": 0.8426159574795251, |
|
"eval_loss": 1.1735517978668213, |
|
"eval_runtime": 0.2491, |
|
"eval_samples_per_second": 875.11, |
|
"eval_steps_per_second": 16.057, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"grad_norm": 0.008407847955822945, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.0001, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_f1": 0.8426159574795251, |
|
"eval_loss": 1.1661510467529297, |
|
"eval_runtime": 0.2486, |
|
"eval_samples_per_second": 876.938, |
|
"eval_steps_per_second": 16.091, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"grad_norm": 0.0011946760350838304, |
|
"learning_rate": 2.15e-05, |
|
"loss": 0.0001, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_f1": 0.8475855910205873, |
|
"eval_loss": 1.1671065092086792, |
|
"eval_runtime": 0.2535, |
|
"eval_samples_per_second": 859.876, |
|
"eval_steps_per_second": 15.778, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"grad_norm": 0.0011800089851021767, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.0001, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_f1": 0.8475855910205873, |
|
"eval_loss": 1.1681970357894897, |
|
"eval_runtime": 0.2494, |
|
"eval_samples_per_second": 874.117, |
|
"eval_steps_per_second": 16.039, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"grad_norm": 0.0018475407268851995, |
|
"learning_rate": 2.05e-05, |
|
"loss": 0.0001, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_f1": 0.8475855910205873, |
|
"eval_loss": 1.1666558980941772, |
|
"eval_runtime": 0.2484, |
|
"eval_samples_per_second": 877.516, |
|
"eval_steps_per_second": 16.101, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 0.0015070955269038677, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0001, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_f1": 0.8475855910205873, |
|
"eval_loss": 1.1691235303878784, |
|
"eval_runtime": 0.2503, |
|
"eval_samples_per_second": 870.922, |
|
"eval_steps_per_second": 15.98, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"grad_norm": 0.001129466574639082, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"loss": 0.0001, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_f1": 0.8475855910205873, |
|
"eval_loss": 1.1708507537841797, |
|
"eval_runtime": 0.2483, |
|
"eval_samples_per_second": 878.104, |
|
"eval_steps_per_second": 16.112, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"grad_norm": 0.001501582097262144, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.0001, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_f1": 0.8475855910205873, |
|
"eval_loss": 1.1724032163619995, |
|
"eval_runtime": 0.2483, |
|
"eval_samples_per_second": 877.807, |
|
"eval_steps_per_second": 16.107, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"grad_norm": 0.0010985672706738114, |
|
"learning_rate": 1.85e-05, |
|
"loss": 0.0001, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_f1": 0.8475855910205873, |
|
"eval_loss": 1.1738693714141846, |
|
"eval_runtime": 0.2494, |
|
"eval_samples_per_second": 874.122, |
|
"eval_steps_per_second": 16.039, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"grad_norm": 0.08045843243598938, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.0001, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_f1": 0.8533899487562311, |
|
"eval_loss": 1.1738698482513428, |
|
"eval_runtime": 0.2562, |
|
"eval_samples_per_second": 850.855, |
|
"eval_steps_per_second": 15.612, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"grad_norm": 0.002004158915951848, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.0001, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_f1": 0.8471931986968837, |
|
"eval_loss": 1.1787022352218628, |
|
"eval_runtime": 0.2428, |
|
"eval_samples_per_second": 897.779, |
|
"eval_steps_per_second": 16.473, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"grad_norm": 0.001049485057592392, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.0001, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_f1": 0.8471931986968837, |
|
"eval_loss": 1.180769443511963, |
|
"eval_runtime": 0.2525, |
|
"eval_samples_per_second": 863.258, |
|
"eval_steps_per_second": 15.84, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"grad_norm": 0.0010408489033579826, |
|
"learning_rate": 1.65e-05, |
|
"loss": 0.0001, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_f1": 0.8471931986968837, |
|
"eval_loss": 1.1815507411956787, |
|
"eval_runtime": 0.2505, |
|
"eval_samples_per_second": 870.186, |
|
"eval_steps_per_second": 15.967, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"grad_norm": 0.0012064232723787427, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.0001, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_f1": 0.8471931986968837, |
|
"eval_loss": 1.182210922241211, |
|
"eval_runtime": 0.2583, |
|
"eval_samples_per_second": 844.028, |
|
"eval_steps_per_second": 15.487, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"grad_norm": 0.0014143523294478655, |
|
"learning_rate": 1.55e-05, |
|
"loss": 0.0001, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_f1": 0.8524668435013261, |
|
"eval_loss": 1.1818993091583252, |
|
"eval_runtime": 0.2452, |
|
"eval_samples_per_second": 888.962, |
|
"eval_steps_per_second": 16.311, |
|
"step": 1656 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2551863638350032.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|