|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 2188, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.997714808043876e-05, |
|
"loss": 0.701, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9017367458866546e-05, |
|
"loss": 0.5623, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.9112, |
|
"eval_f1": 0.9178386380458919, |
|
"eval_loss": 0.3157382309436798, |
|
"eval_precision": 0.8539944903581267, |
|
"eval_recall": 0.992, |
|
"eval_runtime": 31.9687, |
|
"eval_samples_per_second": 117.302, |
|
"eval_steps_per_second": 14.671, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8034734917733096e-05, |
|
"loss": 0.4041, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.9682666666666667, |
|
"eval_f1": 0.9688563203349909, |
|
"eval_loss": 0.10907813906669617, |
|
"eval_precision": 0.9511819116135662, |
|
"eval_recall": 0.9872, |
|
"eval_runtime": 31.8725, |
|
"eval_samples_per_second": 117.656, |
|
"eval_steps_per_second": 14.715, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.705210237659964e-05, |
|
"loss": 0.0811, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.9829333333333333, |
|
"eval_f1": 0.9830777366472766, |
|
"eval_loss": 0.11184453219175339, |
|
"eval_precision": 0.9748295752490823, |
|
"eval_recall": 0.9914666666666667, |
|
"eval_runtime": 32.1826, |
|
"eval_samples_per_second": 116.523, |
|
"eval_steps_per_second": 14.573, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.606946983546618e-05, |
|
"loss": 0.1455, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.9826666666666667, |
|
"eval_f1": 0.9828360179561658, |
|
"eval_loss": 0.08915094286203384, |
|
"eval_precision": 0.9733263598326359, |
|
"eval_recall": 0.9925333333333334, |
|
"eval_runtime": 31.7933, |
|
"eval_samples_per_second": 117.949, |
|
"eval_steps_per_second": 14.752, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.5086837294332726e-05, |
|
"loss": 0.1159, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.9722666666666666, |
|
"eval_f1": 0.9729448491155047, |
|
"eval_loss": 0.15372106432914734, |
|
"eval_precision": 0.9497206703910615, |
|
"eval_recall": 0.9973333333333333, |
|
"eval_runtime": 31.823, |
|
"eval_samples_per_second": 117.839, |
|
"eval_steps_per_second": 14.738, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.410420475319927e-05, |
|
"loss": 0.1902, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.9765333333333334, |
|
"eval_f1": 0.9769874476987447, |
|
"eval_loss": 0.11824183911085129, |
|
"eval_precision": 0.9584402257567983, |
|
"eval_recall": 0.9962666666666666, |
|
"eval_runtime": 31.879, |
|
"eval_samples_per_second": 117.632, |
|
"eval_steps_per_second": 14.712, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.312157221206581e-05, |
|
"loss": 0.0948, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.9917333333333334, |
|
"eval_f1": 0.9917443408788282, |
|
"eval_loss": 0.04853309690952301, |
|
"eval_precision": 0.9904255319148936, |
|
"eval_recall": 0.9930666666666667, |
|
"eval_runtime": 32.1725, |
|
"eval_samples_per_second": 116.559, |
|
"eval_steps_per_second": 14.578, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.213893967093236e-05, |
|
"loss": 0.0947, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.9589333333333333, |
|
"eval_f1": 0.960431654676259, |
|
"eval_loss": 0.21628336608409882, |
|
"eval_precision": 0.9266236985622212, |
|
"eval_recall": 0.9968, |
|
"eval_runtime": 31.9931, |
|
"eval_samples_per_second": 117.213, |
|
"eval_steps_per_second": 14.659, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.1156307129798905e-05, |
|
"loss": 0.1513, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.9885333333333334, |
|
"eval_f1": 0.9884315308044122, |
|
"eval_loss": 0.056088127195835114, |
|
"eval_precision": 0.99728555917481, |
|
"eval_recall": 0.9797333333333333, |
|
"eval_runtime": 31.868, |
|
"eval_samples_per_second": 117.673, |
|
"eval_steps_per_second": 14.717, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.017367458866545e-05, |
|
"loss": 0.2024, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.9770666666666666, |
|
"eval_f1": 0.9774869109947643, |
|
"eval_loss": 0.11541605740785599, |
|
"eval_precision": 0.9598971722365038, |
|
"eval_recall": 0.9957333333333334, |
|
"eval_runtime": 31.7697, |
|
"eval_samples_per_second": 118.037, |
|
"eval_steps_per_second": 14.763, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.919104204753199e-05, |
|
"loss": 0.0442, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.9872, |
|
"eval_f1": 0.9871931696905016, |
|
"eval_loss": 0.06453149765729904, |
|
"eval_precision": 0.987720234917245, |
|
"eval_recall": 0.9866666666666667, |
|
"eval_runtime": 31.8692, |
|
"eval_samples_per_second": 117.668, |
|
"eval_steps_per_second": 14.716, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.820840950639854e-05, |
|
"loss": 0.0859, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.9792, |
|
"eval_f1": 0.9794736842105262, |
|
"eval_loss": 0.11312653124332428, |
|
"eval_precision": 0.9667532467532467, |
|
"eval_recall": 0.9925333333333334, |
|
"eval_runtime": 31.8306, |
|
"eval_samples_per_second": 117.811, |
|
"eval_steps_per_second": 14.734, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7225776965265085e-05, |
|
"loss": 0.0802, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.9866666666666667, |
|
"eval_f1": 0.9867654843832716, |
|
"eval_loss": 0.07340351492166519, |
|
"eval_precision": 0.9795060430898581, |
|
"eval_recall": 0.9941333333333333, |
|
"eval_runtime": 31.9433, |
|
"eval_samples_per_second": 117.396, |
|
"eval_steps_per_second": 14.682, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.624314442413163e-05, |
|
"loss": 0.0754, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.9909333333333333, |
|
"eval_f1": 0.9909718534253851, |
|
"eval_loss": 0.05534309148788452, |
|
"eval_precision": 0.9867794817556849, |
|
"eval_recall": 0.9952, |
|
"eval_runtime": 31.9035, |
|
"eval_samples_per_second": 117.542, |
|
"eval_steps_per_second": 14.701, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.526051188299818e-05, |
|
"loss": 0.0446, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.9802666666666666, |
|
"eval_f1": 0.980546792849632, |
|
"eval_loss": 0.10309968888759613, |
|
"eval_precision": 0.966822187662001, |
|
"eval_recall": 0.9946666666666667, |
|
"eval_runtime": 31.8495, |
|
"eval_samples_per_second": 117.741, |
|
"eval_steps_per_second": 14.725, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4277879341864715e-05, |
|
"loss": 0.2247, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.9837333333333333, |
|
"eval_f1": 0.9839262187088275, |
|
"eval_loss": 0.05565320700407028, |
|
"eval_precision": 0.9723958333333333, |
|
"eval_recall": 0.9957333333333334, |
|
"eval_runtime": 31.8106, |
|
"eval_samples_per_second": 117.885, |
|
"eval_steps_per_second": 14.744, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.329524680073126e-05, |
|
"loss": 0.0198, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.972, |
|
"eval_f1": 0.9726491273769211, |
|
"eval_loss": 0.19524268805980682, |
|
"eval_precision": 0.9506109979633401, |
|
"eval_recall": 0.9957333333333334, |
|
"eval_runtime": 31.8384, |
|
"eval_samples_per_second": 117.782, |
|
"eval_steps_per_second": 14.731, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.231261425959781e-05, |
|
"loss": 0.144, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.9845333333333334, |
|
"eval_f1": 0.984688489968321, |
|
"eval_loss": 0.07750081270933151, |
|
"eval_precision": 0.9749085206481966, |
|
"eval_recall": 0.9946666666666667, |
|
"eval_runtime": 31.9146, |
|
"eval_samples_per_second": 117.501, |
|
"eval_steps_per_second": 14.695, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.132998171846435e-05, |
|
"loss": 0.1193, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.988, |
|
"eval_f1": 0.9879194630872483, |
|
"eval_loss": 0.04342731088399887, |
|
"eval_precision": 0.9945945945945946, |
|
"eval_recall": 0.9813333333333333, |
|
"eval_runtime": 32.0746, |
|
"eval_samples_per_second": 116.915, |
|
"eval_steps_per_second": 14.622, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.03473491773309e-05, |
|
"loss": 0.0813, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.9773333333333334, |
|
"eval_f1": 0.9777661522364636, |
|
"eval_loss": 0.13094328343868256, |
|
"eval_precision": 0.9594455852156057, |
|
"eval_recall": 0.9968, |
|
"eval_runtime": 31.8439, |
|
"eval_samples_per_second": 117.762, |
|
"eval_steps_per_second": 14.728, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9364716636197442e-05, |
|
"loss": 0.1484, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.9893333333333333, |
|
"eval_f1": 0.989406779661017, |
|
"eval_loss": 0.04147784784436226, |
|
"eval_precision": 0.9826407154129405, |
|
"eval_recall": 0.9962666666666666, |
|
"eval_runtime": 32.1496, |
|
"eval_samples_per_second": 116.642, |
|
"eval_steps_per_second": 14.588, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.838208409506399e-05, |
|
"loss": 0.0575, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.9936, |
|
"eval_f1": 0.9935897435897435, |
|
"eval_loss": 0.0342133492231369, |
|
"eval_precision": 0.9951845906902087, |
|
"eval_recall": 0.992, |
|
"eval_runtime": 31.8831, |
|
"eval_samples_per_second": 117.617, |
|
"eval_steps_per_second": 14.71, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.739945155393053e-05, |
|
"loss": 0.0377, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.984, |
|
"eval_f1": 0.9842022116903633, |
|
"eval_loss": 0.08944225311279297, |
|
"eval_precision": 0.9719188767550702, |
|
"eval_recall": 0.9968, |
|
"eval_runtime": 31.8105, |
|
"eval_samples_per_second": 117.886, |
|
"eval_steps_per_second": 14.744, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.641681901279708e-05, |
|
"loss": 0.023, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.9666666666666667, |
|
"eval_f1": 0.9676752004137574, |
|
"eval_loss": 0.2229950875043869, |
|
"eval_precision": 0.9392570281124498, |
|
"eval_recall": 0.9978666666666667, |
|
"eval_runtime": 32.2928, |
|
"eval_samples_per_second": 116.125, |
|
"eval_steps_per_second": 14.523, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5434186471663625e-05, |
|
"loss": 0.0147, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.9922666666666666, |
|
"eval_f1": 0.9922892847646902, |
|
"eval_loss": 0.054798923432826996, |
|
"eval_precision": 0.9893955461293743, |
|
"eval_recall": 0.9952, |
|
"eval_runtime": 31.9316, |
|
"eval_samples_per_second": 117.439, |
|
"eval_steps_per_second": 14.688, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4451553930530165e-05, |
|
"loss": 0.0904, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.9946666666666667, |
|
"eval_f1": 0.9946638207043758, |
|
"eval_loss": 0.029427310451865196, |
|
"eval_precision": 0.9951948745328351, |
|
"eval_recall": 0.9941333333333333, |
|
"eval_runtime": 31.7699, |
|
"eval_samples_per_second": 118.036, |
|
"eval_steps_per_second": 14.762, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.346892138939671e-05, |
|
"loss": 0.0578, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.9922666666666666, |
|
"eval_f1": 0.992281075326058, |
|
"eval_loss": 0.03940477967262268, |
|
"eval_precision": 0.9904357066950054, |
|
"eval_recall": 0.9941333333333333, |
|
"eval_runtime": 31.7813, |
|
"eval_samples_per_second": 117.994, |
|
"eval_steps_per_second": 14.757, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2486288848263255e-05, |
|
"loss": 0.0049, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.9837333333333333, |
|
"eval_f1": 0.9839515916863982, |
|
"eval_loss": 0.10390568524599075, |
|
"eval_precision": 0.9709241952232607, |
|
"eval_recall": 0.9973333333333333, |
|
"eval_runtime": 31.8349, |
|
"eval_samples_per_second": 117.795, |
|
"eval_steps_per_second": 14.732, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1503656307129798e-05, |
|
"loss": 0.0008, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.9930666666666667, |
|
"eval_f1": 0.9930851063829788, |
|
"eval_loss": 0.04775088652968407, |
|
"eval_precision": 0.9904509283819629, |
|
"eval_recall": 0.9957333333333334, |
|
"eval_runtime": 31.8926, |
|
"eval_samples_per_second": 117.582, |
|
"eval_steps_per_second": 14.706, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0521023765996345e-05, |
|
"loss": 0.0437, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.9848, |
|
"eval_f1": 0.9849802371541502, |
|
"eval_loss": 0.10180956870317459, |
|
"eval_precision": 0.9734375, |
|
"eval_recall": 0.9968, |
|
"eval_runtime": 31.9165, |
|
"eval_samples_per_second": 117.494, |
|
"eval_steps_per_second": 14.695, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.953839122486289e-05, |
|
"loss": 0.0482, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.9952, |
|
"eval_f1": 0.995187165775401, |
|
"eval_loss": 0.03259880468249321, |
|
"eval_precision": 0.9978552278820375, |
|
"eval_recall": 0.9925333333333334, |
|
"eval_runtime": 31.8461, |
|
"eval_samples_per_second": 117.754, |
|
"eval_steps_per_second": 14.727, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8555758683729435e-05, |
|
"loss": 0.0513, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.9941333333333333, |
|
"eval_f1": 0.9941302027748132, |
|
"eval_loss": 0.03475691378116608, |
|
"eval_precision": 0.99466097170315, |
|
"eval_recall": 0.9936, |
|
"eval_runtime": 31.7619, |
|
"eval_samples_per_second": 118.066, |
|
"eval_steps_per_second": 14.766, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7573126142595978e-05, |
|
"loss": 0.0184, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.9888, |
|
"eval_f1": 0.9888830068819482, |
|
"eval_loss": 0.07543797791004181, |
|
"eval_precision": 0.9816079873883342, |
|
"eval_recall": 0.9962666666666666, |
|
"eval_runtime": 31.8663, |
|
"eval_samples_per_second": 117.679, |
|
"eval_steps_per_second": 14.718, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.659049360146252e-05, |
|
"loss": 0.0327, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.9914666666666667, |
|
"eval_f1": 0.9914984059511159, |
|
"eval_loss": 0.05370575934648514, |
|
"eval_precision": 0.9878242456326098, |
|
"eval_recall": 0.9952, |
|
"eval_runtime": 31.7637, |
|
"eval_samples_per_second": 118.059, |
|
"eval_steps_per_second": 14.765, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5607861060329068e-05, |
|
"loss": 0.0218, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.988, |
|
"eval_f1": 0.9880920878539297, |
|
"eval_loss": 0.07749391347169876, |
|
"eval_precision": 0.9805672268907563, |
|
"eval_recall": 0.9957333333333334, |
|
"eval_runtime": 32.0308, |
|
"eval_samples_per_second": 117.075, |
|
"eval_steps_per_second": 14.642, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4625228519195613e-05, |
|
"loss": 0.0592, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.9906666666666667, |
|
"eval_f1": 0.9907137171663571, |
|
"eval_loss": 0.05098994821310043, |
|
"eval_precision": 0.9857444561774024, |
|
"eval_recall": 0.9957333333333334, |
|
"eval_runtime": 32.1011, |
|
"eval_samples_per_second": 116.818, |
|
"eval_steps_per_second": 14.61, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3642595978062158e-05, |
|
"loss": 0.0506, |
|
"step": 1591 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.9928, |
|
"eval_f1": 0.9928172386272945, |
|
"eval_loss": 0.040217798203229904, |
|
"eval_precision": 0.9904458598726115, |
|
"eval_recall": 0.9952, |
|
"eval_runtime": 32.4765, |
|
"eval_samples_per_second": 115.468, |
|
"eval_steps_per_second": 14.441, |
|
"step": 1591 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2659963436928701e-05, |
|
"loss": 0.0228, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.9946666666666667, |
|
"eval_f1": 0.9946695095948828, |
|
"eval_loss": 0.031395528465509415, |
|
"eval_precision": 0.9941395844432606, |
|
"eval_recall": 0.9952, |
|
"eval_runtime": 32.1532, |
|
"eval_samples_per_second": 116.629, |
|
"eval_steps_per_second": 14.586, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1677330895795248e-05, |
|
"loss": 0.0297, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.9928, |
|
"eval_f1": 0.9928210582291943, |
|
"eval_loss": 0.044785238802433014, |
|
"eval_precision": 0.9899257688229056, |
|
"eval_recall": 0.9957333333333334, |
|
"eval_runtime": 32.1967, |
|
"eval_samples_per_second": 116.472, |
|
"eval_steps_per_second": 14.567, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0694698354661791e-05, |
|
"loss": 0.0138, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.9853333333333333, |
|
"eval_f1": 0.9855072463768116, |
|
"eval_loss": 0.09062495082616806, |
|
"eval_precision": 0.9739583333333334, |
|
"eval_recall": 0.9973333333333333, |
|
"eval_runtime": 32.0852, |
|
"eval_samples_per_second": 116.876, |
|
"eval_steps_per_second": 14.617, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.712065813528338e-06, |
|
"loss": 0.0949, |
|
"step": 1763 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.9941333333333333, |
|
"eval_f1": 0.994136460554371, |
|
"eval_loss": 0.028302613645792007, |
|
"eval_precision": 0.9936068193926478, |
|
"eval_recall": 0.9946666666666667, |
|
"eval_runtime": 32.2498, |
|
"eval_samples_per_second": 116.28, |
|
"eval_steps_per_second": 14.543, |
|
"step": 1763 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.729433272394881e-06, |
|
"loss": 0.0614, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.9941333333333333, |
|
"eval_f1": 0.9941333333333333, |
|
"eval_loss": 0.029561299830675125, |
|
"eval_precision": 0.9941333333333333, |
|
"eval_recall": 0.9941333333333333, |
|
"eval_runtime": 32.2034, |
|
"eval_samples_per_second": 116.447, |
|
"eval_steps_per_second": 14.564, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.746800731261426e-06, |
|
"loss": 0.0012, |
|
"step": 1849 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.9936, |
|
"eval_f1": 0.9936068193926478, |
|
"eval_loss": 0.03323618695139885, |
|
"eval_precision": 0.9925492283129325, |
|
"eval_recall": 0.9946666666666667, |
|
"eval_runtime": 32.2063, |
|
"eval_samples_per_second": 116.437, |
|
"eval_steps_per_second": 14.562, |
|
"step": 1849 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.764168190127972e-06, |
|
"loss": 0.015, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.9944, |
|
"eval_f1": 0.9943955164131306, |
|
"eval_loss": 0.03202632814645767, |
|
"eval_precision": 0.9951923076923077, |
|
"eval_recall": 0.9936, |
|
"eval_runtime": 32.1602, |
|
"eval_samples_per_second": 116.604, |
|
"eval_steps_per_second": 14.583, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.781535648994515e-06, |
|
"loss": 0.0266, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.9922666666666666, |
|
"eval_f1": 0.9922933829391443, |
|
"eval_loss": 0.042477842420339584, |
|
"eval_precision": 0.9888771186440678, |
|
"eval_recall": 0.9957333333333334, |
|
"eval_runtime": 32.2356, |
|
"eval_samples_per_second": 116.331, |
|
"eval_steps_per_second": 14.549, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.798903107861061e-06, |
|
"loss": 0.0292, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.9901333333333333, |
|
"eval_f1": 0.9901934799893983, |
|
"eval_loss": 0.04828348755836487, |
|
"eval_precision": 0.9841938883034773, |
|
"eval_recall": 0.9962666666666666, |
|
"eval_runtime": 32.2496, |
|
"eval_samples_per_second": 116.281, |
|
"eval_steps_per_second": 14.543, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.816270566727605e-06, |
|
"loss": 0.0141, |
|
"step": 2021 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.9890666666666666, |
|
"eval_f1": 0.9891505689335803, |
|
"eval_loss": 0.052797310054302216, |
|
"eval_precision": 0.9816176470588235, |
|
"eval_recall": 0.9968, |
|
"eval_runtime": 32.2638, |
|
"eval_samples_per_second": 116.229, |
|
"eval_steps_per_second": 14.536, |
|
"step": 2021 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.83363802559415e-06, |
|
"loss": 0.0006, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.9909333333333333, |
|
"eval_f1": 0.9909862142099681, |
|
"eval_loss": 0.04819910600781441, |
|
"eval_precision": 0.985239852398524, |
|
"eval_recall": 0.9968, |
|
"eval_runtime": 32.596, |
|
"eval_samples_per_second": 115.045, |
|
"eval_steps_per_second": 14.388, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.851005484460695e-06, |
|
"loss": 0.0225, |
|
"step": 2107 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.9922666666666666, |
|
"eval_f1": 0.9922933829391443, |
|
"eval_loss": 0.03804012015461922, |
|
"eval_precision": 0.9888771186440678, |
|
"eval_recall": 0.9957333333333334, |
|
"eval_runtime": 32.1606, |
|
"eval_samples_per_second": 116.602, |
|
"eval_steps_per_second": 14.583, |
|
"step": 2107 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.683729433272396e-07, |
|
"loss": 0.0199, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.9925333333333334, |
|
"eval_f1": 0.9925492283129324, |
|
"eval_loss": 0.036869850009679794, |
|
"eval_precision": 0.9904407859798194, |
|
"eval_recall": 0.9946666666666667, |
|
"eval_runtime": 32.2379, |
|
"eval_samples_per_second": 116.323, |
|
"eval_steps_per_second": 14.548, |
|
"step": 2150 |
|
} |
|
], |
|
"max_steps": 2188, |
|
"num_train_epochs": 1, |
|
"total_flos": 4604443468800000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|