{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 2188, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.997714808043876e-05, "loss": 0.701, "step": 1 }, { "epoch": 0.02, "learning_rate": 4.9017367458866546e-05, "loss": 0.5623, "step": 43 }, { "epoch": 0.02, "eval_accuracy": 0.9112, "eval_f1": 0.9178386380458919, "eval_loss": 0.3157382309436798, "eval_precision": 0.8539944903581267, "eval_recall": 0.992, "eval_runtime": 31.9687, "eval_samples_per_second": 117.302, "eval_steps_per_second": 14.671, "step": 43 }, { "epoch": 0.04, "learning_rate": 4.8034734917733096e-05, "loss": 0.4041, "step": 86 }, { "epoch": 0.04, "eval_accuracy": 0.9682666666666667, "eval_f1": 0.9688563203349909, "eval_loss": 0.10907813906669617, "eval_precision": 0.9511819116135662, "eval_recall": 0.9872, "eval_runtime": 31.8725, "eval_samples_per_second": 117.656, "eval_steps_per_second": 14.715, "step": 86 }, { "epoch": 0.06, "learning_rate": 4.705210237659964e-05, "loss": 0.0811, "step": 129 }, { "epoch": 0.06, "eval_accuracy": 0.9829333333333333, "eval_f1": 0.9830777366472766, "eval_loss": 0.11184453219175339, "eval_precision": 0.9748295752490823, "eval_recall": 0.9914666666666667, "eval_runtime": 32.1826, "eval_samples_per_second": 116.523, "eval_steps_per_second": 14.573, "step": 129 }, { "epoch": 0.08, "learning_rate": 4.606946983546618e-05, "loss": 0.1455, "step": 172 }, { "epoch": 0.08, "eval_accuracy": 0.9826666666666667, "eval_f1": 0.9828360179561658, "eval_loss": 0.08915094286203384, "eval_precision": 0.9733263598326359, "eval_recall": 0.9925333333333334, "eval_runtime": 31.7933, "eval_samples_per_second": 117.949, "eval_steps_per_second": 14.752, "step": 172 }, { "epoch": 0.1, "learning_rate": 4.5086837294332726e-05, "loss": 0.1159, "step": 215 }, { "epoch": 0.1, "eval_accuracy": 0.9722666666666666, "eval_f1": 0.9729448491155047, "eval_loss": 0.15372106432914734, "eval_precision": 0.9497206703910615, "eval_recall": 0.9973333333333333, "eval_runtime": 31.823, "eval_samples_per_second": 117.839, "eval_steps_per_second": 14.738, "step": 215 }, { "epoch": 0.12, "learning_rate": 4.410420475319927e-05, "loss": 0.1902, "step": 258 }, { "epoch": 0.12, "eval_accuracy": 0.9765333333333334, "eval_f1": 0.9769874476987447, "eval_loss": 0.11824183911085129, "eval_precision": 0.9584402257567983, "eval_recall": 0.9962666666666666, "eval_runtime": 31.879, "eval_samples_per_second": 117.632, "eval_steps_per_second": 14.712, "step": 258 }, { "epoch": 0.14, "learning_rate": 4.312157221206581e-05, "loss": 0.0948, "step": 301 }, { "epoch": 0.14, "eval_accuracy": 0.9917333333333334, "eval_f1": 0.9917443408788282, "eval_loss": 0.04853309690952301, "eval_precision": 0.9904255319148936, "eval_recall": 0.9930666666666667, "eval_runtime": 32.1725, "eval_samples_per_second": 116.559, "eval_steps_per_second": 14.578, "step": 301 }, { "epoch": 0.16, "learning_rate": 4.213893967093236e-05, "loss": 0.0947, "step": 344 }, { "epoch": 0.16, "eval_accuracy": 0.9589333333333333, "eval_f1": 0.960431654676259, "eval_loss": 0.21628336608409882, "eval_precision": 0.9266236985622212, "eval_recall": 0.9968, "eval_runtime": 31.9931, "eval_samples_per_second": 117.213, "eval_steps_per_second": 14.659, "step": 344 }, { "epoch": 0.18, "learning_rate": 4.1156307129798905e-05, "loss": 0.1513, "step": 387 }, { "epoch": 0.18, "eval_accuracy": 0.9885333333333334, "eval_f1": 0.9884315308044122, "eval_loss": 0.056088127195835114, "eval_precision": 0.99728555917481, "eval_recall": 0.9797333333333333, "eval_runtime": 31.868, "eval_samples_per_second": 117.673, "eval_steps_per_second": 14.717, "step": 387 }, { "epoch": 0.2, "learning_rate": 4.017367458866545e-05, "loss": 0.2024, "step": 430 }, { "epoch": 0.2, "eval_accuracy": 0.9770666666666666, "eval_f1": 0.9774869109947643, "eval_loss": 0.11541605740785599, "eval_precision": 0.9598971722365038, "eval_recall": 0.9957333333333334, "eval_runtime": 31.7697, "eval_samples_per_second": 118.037, "eval_steps_per_second": 14.763, "step": 430 }, { "epoch": 0.22, "learning_rate": 3.919104204753199e-05, "loss": 0.0442, "step": 473 }, { "epoch": 0.22, "eval_accuracy": 0.9872, "eval_f1": 0.9871931696905016, "eval_loss": 0.06453149765729904, "eval_precision": 0.987720234917245, "eval_recall": 0.9866666666666667, "eval_runtime": 31.8692, "eval_samples_per_second": 117.668, "eval_steps_per_second": 14.716, "step": 473 }, { "epoch": 0.24, "learning_rate": 3.820840950639854e-05, "loss": 0.0859, "step": 516 }, { "epoch": 0.24, "eval_accuracy": 0.9792, "eval_f1": 0.9794736842105262, "eval_loss": 0.11312653124332428, "eval_precision": 0.9667532467532467, "eval_recall": 0.9925333333333334, "eval_runtime": 31.8306, "eval_samples_per_second": 117.811, "eval_steps_per_second": 14.734, "step": 516 }, { "epoch": 0.26, "learning_rate": 3.7225776965265085e-05, "loss": 0.0802, "step": 559 }, { "epoch": 0.26, "eval_accuracy": 0.9866666666666667, "eval_f1": 0.9867654843832716, "eval_loss": 0.07340351492166519, "eval_precision": 0.9795060430898581, "eval_recall": 0.9941333333333333, "eval_runtime": 31.9433, "eval_samples_per_second": 117.396, "eval_steps_per_second": 14.682, "step": 559 }, { "epoch": 0.28, "learning_rate": 3.624314442413163e-05, "loss": 0.0754, "step": 602 }, { "epoch": 0.28, "eval_accuracy": 0.9909333333333333, "eval_f1": 0.9909718534253851, "eval_loss": 0.05534309148788452, "eval_precision": 0.9867794817556849, "eval_recall": 0.9952, "eval_runtime": 31.9035, "eval_samples_per_second": 117.542, "eval_steps_per_second": 14.701, "step": 602 }, { "epoch": 0.29, "learning_rate": 3.526051188299818e-05, "loss": 0.0446, "step": 645 }, { "epoch": 0.29, "eval_accuracy": 0.9802666666666666, "eval_f1": 0.980546792849632, "eval_loss": 0.10309968888759613, "eval_precision": 0.966822187662001, "eval_recall": 0.9946666666666667, "eval_runtime": 31.8495, "eval_samples_per_second": 117.741, "eval_steps_per_second": 14.725, "step": 645 }, { "epoch": 0.31, "learning_rate": 3.4277879341864715e-05, "loss": 0.2247, "step": 688 }, { "epoch": 0.31, "eval_accuracy": 0.9837333333333333, "eval_f1": 0.9839262187088275, "eval_loss": 0.05565320700407028, "eval_precision": 0.9723958333333333, "eval_recall": 0.9957333333333334, "eval_runtime": 31.8106, "eval_samples_per_second": 117.885, "eval_steps_per_second": 14.744, "step": 688 }, { "epoch": 0.33, "learning_rate": 3.329524680073126e-05, "loss": 0.0198, "step": 731 }, { "epoch": 0.33, "eval_accuracy": 0.972, "eval_f1": 0.9726491273769211, "eval_loss": 0.19524268805980682, "eval_precision": 0.9506109979633401, "eval_recall": 0.9957333333333334, "eval_runtime": 31.8384, "eval_samples_per_second": 117.782, "eval_steps_per_second": 14.731, "step": 731 }, { "epoch": 0.35, "learning_rate": 3.231261425959781e-05, "loss": 0.144, "step": 774 }, { "epoch": 0.35, "eval_accuracy": 0.9845333333333334, "eval_f1": 0.984688489968321, "eval_loss": 0.07750081270933151, "eval_precision": 0.9749085206481966, "eval_recall": 0.9946666666666667, "eval_runtime": 31.9146, "eval_samples_per_second": 117.501, "eval_steps_per_second": 14.695, "step": 774 }, { "epoch": 0.37, "learning_rate": 3.132998171846435e-05, "loss": 0.1193, "step": 817 }, { "epoch": 0.37, "eval_accuracy": 0.988, "eval_f1": 0.9879194630872483, "eval_loss": 0.04342731088399887, "eval_precision": 0.9945945945945946, "eval_recall": 0.9813333333333333, "eval_runtime": 32.0746, "eval_samples_per_second": 116.915, "eval_steps_per_second": 14.622, "step": 817 }, { "epoch": 0.39, "learning_rate": 3.03473491773309e-05, "loss": 0.0813, "step": 860 }, { "epoch": 0.39, "eval_accuracy": 0.9773333333333334, "eval_f1": 0.9777661522364636, "eval_loss": 0.13094328343868256, "eval_precision": 0.9594455852156057, "eval_recall": 0.9968, "eval_runtime": 31.8439, "eval_samples_per_second": 117.762, "eval_steps_per_second": 14.728, "step": 860 }, { "epoch": 0.41, "learning_rate": 2.9364716636197442e-05, "loss": 0.1484, "step": 903 }, { "epoch": 0.41, "eval_accuracy": 0.9893333333333333, "eval_f1": 0.989406779661017, "eval_loss": 0.04147784784436226, "eval_precision": 0.9826407154129405, "eval_recall": 0.9962666666666666, "eval_runtime": 32.1496, "eval_samples_per_second": 116.642, "eval_steps_per_second": 14.588, "step": 903 }, { "epoch": 0.43, "learning_rate": 2.838208409506399e-05, "loss": 0.0575, "step": 946 }, { "epoch": 0.43, "eval_accuracy": 0.9936, "eval_f1": 0.9935897435897435, "eval_loss": 0.0342133492231369, "eval_precision": 0.9951845906902087, "eval_recall": 0.992, "eval_runtime": 31.8831, "eval_samples_per_second": 117.617, "eval_steps_per_second": 14.71, "step": 946 }, { "epoch": 0.45, "learning_rate": 2.739945155393053e-05, "loss": 0.0377, "step": 989 }, { "epoch": 0.45, "eval_accuracy": 0.984, "eval_f1": 0.9842022116903633, "eval_loss": 0.08944225311279297, "eval_precision": 0.9719188767550702, "eval_recall": 0.9968, "eval_runtime": 31.8105, "eval_samples_per_second": 117.886, "eval_steps_per_second": 14.744, "step": 989 }, { "epoch": 0.47, "learning_rate": 2.641681901279708e-05, "loss": 0.023, "step": 1032 }, { "epoch": 0.47, "eval_accuracy": 0.9666666666666667, "eval_f1": 0.9676752004137574, "eval_loss": 0.2229950875043869, "eval_precision": 0.9392570281124498, "eval_recall": 0.9978666666666667, "eval_runtime": 32.2928, "eval_samples_per_second": 116.125, "eval_steps_per_second": 14.523, "step": 1032 }, { "epoch": 0.49, "learning_rate": 2.5434186471663625e-05, "loss": 0.0147, "step": 1075 }, { "epoch": 0.49, "eval_accuracy": 0.9922666666666666, "eval_f1": 0.9922892847646902, "eval_loss": 0.054798923432826996, "eval_precision": 0.9893955461293743, "eval_recall": 0.9952, "eval_runtime": 31.9316, "eval_samples_per_second": 117.439, "eval_steps_per_second": 14.688, "step": 1075 }, { "epoch": 0.51, "learning_rate": 2.4451553930530165e-05, "loss": 0.0904, "step": 1118 }, { "epoch": 0.51, "eval_accuracy": 0.9946666666666667, "eval_f1": 0.9946638207043758, "eval_loss": 0.029427310451865196, "eval_precision": 0.9951948745328351, "eval_recall": 0.9941333333333333, "eval_runtime": 31.7699, "eval_samples_per_second": 118.036, "eval_steps_per_second": 14.762, "step": 1118 }, { "epoch": 0.53, "learning_rate": 2.346892138939671e-05, "loss": 0.0578, "step": 1161 }, { "epoch": 0.53, "eval_accuracy": 0.9922666666666666, "eval_f1": 0.992281075326058, "eval_loss": 0.03940477967262268, "eval_precision": 0.9904357066950054, "eval_recall": 0.9941333333333333, "eval_runtime": 31.7813, "eval_samples_per_second": 117.994, "eval_steps_per_second": 14.757, "step": 1161 }, { "epoch": 0.55, "learning_rate": 2.2486288848263255e-05, "loss": 0.0049, "step": 1204 }, { "epoch": 0.55, "eval_accuracy": 0.9837333333333333, "eval_f1": 0.9839515916863982, "eval_loss": 0.10390568524599075, "eval_precision": 0.9709241952232607, "eval_recall": 0.9973333333333333, "eval_runtime": 31.8349, "eval_samples_per_second": 117.795, "eval_steps_per_second": 14.732, "step": 1204 }, { "epoch": 0.57, "learning_rate": 2.1503656307129798e-05, "loss": 0.0008, "step": 1247 }, { "epoch": 0.57, "eval_accuracy": 0.9930666666666667, "eval_f1": 0.9930851063829788, "eval_loss": 0.04775088652968407, "eval_precision": 0.9904509283819629, "eval_recall": 0.9957333333333334, "eval_runtime": 31.8926, "eval_samples_per_second": 117.582, "eval_steps_per_second": 14.706, "step": 1247 }, { "epoch": 0.59, "learning_rate": 2.0521023765996345e-05, "loss": 0.0437, "step": 1290 }, { "epoch": 0.59, "eval_accuracy": 0.9848, "eval_f1": 0.9849802371541502, "eval_loss": 0.10180956870317459, "eval_precision": 0.9734375, "eval_recall": 0.9968, "eval_runtime": 31.9165, "eval_samples_per_second": 117.494, "eval_steps_per_second": 14.695, "step": 1290 }, { "epoch": 0.61, "learning_rate": 1.953839122486289e-05, "loss": 0.0482, "step": 1333 }, { "epoch": 0.61, "eval_accuracy": 0.9952, "eval_f1": 0.995187165775401, "eval_loss": 0.03259880468249321, "eval_precision": 0.9978552278820375, "eval_recall": 0.9925333333333334, "eval_runtime": 31.8461, "eval_samples_per_second": 117.754, "eval_steps_per_second": 14.727, "step": 1333 }, { "epoch": 0.63, "learning_rate": 1.8555758683729435e-05, "loss": 0.0513, "step": 1376 }, { "epoch": 0.63, "eval_accuracy": 0.9941333333333333, "eval_f1": 0.9941302027748132, "eval_loss": 0.03475691378116608, "eval_precision": 0.99466097170315, "eval_recall": 0.9936, "eval_runtime": 31.7619, "eval_samples_per_second": 118.066, "eval_steps_per_second": 14.766, "step": 1376 }, { "epoch": 0.65, "learning_rate": 1.7573126142595978e-05, "loss": 0.0184, "step": 1419 }, { "epoch": 0.65, "eval_accuracy": 0.9888, "eval_f1": 0.9888830068819482, "eval_loss": 0.07543797791004181, "eval_precision": 0.9816079873883342, "eval_recall": 0.9962666666666666, "eval_runtime": 31.8663, "eval_samples_per_second": 117.679, "eval_steps_per_second": 14.718, "step": 1419 }, { "epoch": 0.67, "learning_rate": 1.659049360146252e-05, "loss": 0.0327, "step": 1462 }, { "epoch": 0.67, "eval_accuracy": 0.9914666666666667, "eval_f1": 0.9914984059511159, "eval_loss": 0.05370575934648514, "eval_precision": 0.9878242456326098, "eval_recall": 0.9952, "eval_runtime": 31.7637, "eval_samples_per_second": 118.059, "eval_steps_per_second": 14.765, "step": 1462 }, { "epoch": 0.69, "learning_rate": 1.5607861060329068e-05, "loss": 0.0218, "step": 1505 }, { "epoch": 0.69, "eval_accuracy": 0.988, "eval_f1": 0.9880920878539297, "eval_loss": 0.07749391347169876, "eval_precision": 0.9805672268907563, "eval_recall": 0.9957333333333334, "eval_runtime": 32.0308, "eval_samples_per_second": 117.075, "eval_steps_per_second": 14.642, "step": 1505 }, { "epoch": 0.71, "learning_rate": 1.4625228519195613e-05, "loss": 0.0592, "step": 1548 }, { "epoch": 0.71, "eval_accuracy": 0.9906666666666667, "eval_f1": 0.9907137171663571, "eval_loss": 0.05098994821310043, "eval_precision": 0.9857444561774024, "eval_recall": 0.9957333333333334, "eval_runtime": 32.1011, "eval_samples_per_second": 116.818, "eval_steps_per_second": 14.61, "step": 1548 }, { "epoch": 0.73, "learning_rate": 1.3642595978062158e-05, "loss": 0.0506, "step": 1591 }, { "epoch": 0.73, "eval_accuracy": 0.9928, "eval_f1": 0.9928172386272945, "eval_loss": 0.040217798203229904, "eval_precision": 0.9904458598726115, "eval_recall": 0.9952, "eval_runtime": 32.4765, "eval_samples_per_second": 115.468, "eval_steps_per_second": 14.441, "step": 1591 }, { "epoch": 0.75, "learning_rate": 1.2659963436928701e-05, "loss": 0.0228, "step": 1634 }, { "epoch": 0.75, "eval_accuracy": 0.9946666666666667, "eval_f1": 0.9946695095948828, "eval_loss": 0.031395528465509415, "eval_precision": 0.9941395844432606, "eval_recall": 0.9952, "eval_runtime": 32.1532, "eval_samples_per_second": 116.629, "eval_steps_per_second": 14.586, "step": 1634 }, { "epoch": 0.77, "learning_rate": 1.1677330895795248e-05, "loss": 0.0297, "step": 1677 }, { "epoch": 0.77, "eval_accuracy": 0.9928, "eval_f1": 0.9928210582291943, "eval_loss": 0.044785238802433014, "eval_precision": 0.9899257688229056, "eval_recall": 0.9957333333333334, "eval_runtime": 32.1967, "eval_samples_per_second": 116.472, "eval_steps_per_second": 14.567, "step": 1677 }, { "epoch": 0.79, "learning_rate": 1.0694698354661791e-05, "loss": 0.0138, "step": 1720 }, { "epoch": 0.79, "eval_accuracy": 0.9853333333333333, "eval_f1": 0.9855072463768116, "eval_loss": 0.09062495082616806, "eval_precision": 0.9739583333333334, "eval_recall": 0.9973333333333333, "eval_runtime": 32.0852, "eval_samples_per_second": 116.876, "eval_steps_per_second": 14.617, "step": 1720 }, { "epoch": 0.81, "learning_rate": 9.712065813528338e-06, "loss": 0.0949, "step": 1763 }, { "epoch": 0.81, "eval_accuracy": 0.9941333333333333, "eval_f1": 0.994136460554371, "eval_loss": 0.028302613645792007, "eval_precision": 0.9936068193926478, "eval_recall": 0.9946666666666667, "eval_runtime": 32.2498, "eval_samples_per_second": 116.28, "eval_steps_per_second": 14.543, "step": 1763 }, { "epoch": 0.83, "learning_rate": 8.729433272394881e-06, "loss": 0.0614, "step": 1806 }, { "epoch": 0.83, "eval_accuracy": 0.9941333333333333, "eval_f1": 0.9941333333333333, "eval_loss": 0.029561299830675125, "eval_precision": 0.9941333333333333, "eval_recall": 0.9941333333333333, "eval_runtime": 32.2034, "eval_samples_per_second": 116.447, "eval_steps_per_second": 14.564, "step": 1806 }, { "epoch": 0.85, "learning_rate": 7.746800731261426e-06, "loss": 0.0012, "step": 1849 }, { "epoch": 0.85, "eval_accuracy": 0.9936, "eval_f1": 0.9936068193926478, "eval_loss": 0.03323618695139885, "eval_precision": 0.9925492283129325, "eval_recall": 0.9946666666666667, "eval_runtime": 32.2063, "eval_samples_per_second": 116.437, "eval_steps_per_second": 14.562, "step": 1849 }, { "epoch": 0.86, "learning_rate": 6.764168190127972e-06, "loss": 0.015, "step": 1892 }, { "epoch": 0.86, "eval_accuracy": 0.9944, "eval_f1": 0.9943955164131306, "eval_loss": 0.03202632814645767, "eval_precision": 0.9951923076923077, "eval_recall": 0.9936, "eval_runtime": 32.1602, "eval_samples_per_second": 116.604, "eval_steps_per_second": 14.583, "step": 1892 }, { "epoch": 0.88, "learning_rate": 5.781535648994515e-06, "loss": 0.0266, "step": 1935 }, { "epoch": 0.88, "eval_accuracy": 0.9922666666666666, "eval_f1": 0.9922933829391443, "eval_loss": 0.042477842420339584, "eval_precision": 0.9888771186440678, "eval_recall": 0.9957333333333334, "eval_runtime": 32.2356, "eval_samples_per_second": 116.331, "eval_steps_per_second": 14.549, "step": 1935 }, { "epoch": 0.9, "learning_rate": 4.798903107861061e-06, "loss": 0.0292, "step": 1978 }, { "epoch": 0.9, "eval_accuracy": 0.9901333333333333, "eval_f1": 0.9901934799893983, "eval_loss": 0.04828348755836487, "eval_precision": 0.9841938883034773, "eval_recall": 0.9962666666666666, "eval_runtime": 32.2496, "eval_samples_per_second": 116.281, "eval_steps_per_second": 14.543, "step": 1978 }, { "epoch": 0.92, "learning_rate": 3.816270566727605e-06, "loss": 0.0141, "step": 2021 }, { "epoch": 0.92, "eval_accuracy": 0.9890666666666666, "eval_f1": 0.9891505689335803, "eval_loss": 0.052797310054302216, "eval_precision": 0.9816176470588235, "eval_recall": 0.9968, "eval_runtime": 32.2638, "eval_samples_per_second": 116.229, "eval_steps_per_second": 14.536, "step": 2021 }, { "epoch": 0.94, "learning_rate": 2.83363802559415e-06, "loss": 0.0006, "step": 2064 }, { "epoch": 0.94, "eval_accuracy": 0.9909333333333333, "eval_f1": 0.9909862142099681, "eval_loss": 0.04819910600781441, "eval_precision": 0.985239852398524, "eval_recall": 0.9968, "eval_runtime": 32.596, "eval_samples_per_second": 115.045, "eval_steps_per_second": 14.388, "step": 2064 }, { "epoch": 0.96, "learning_rate": 1.851005484460695e-06, "loss": 0.0225, "step": 2107 }, { "epoch": 0.96, "eval_accuracy": 0.9922666666666666, "eval_f1": 0.9922933829391443, "eval_loss": 0.03804012015461922, "eval_precision": 0.9888771186440678, "eval_recall": 0.9957333333333334, "eval_runtime": 32.1606, "eval_samples_per_second": 116.602, "eval_steps_per_second": 14.583, "step": 2107 }, { "epoch": 0.98, "learning_rate": 8.683729433272396e-07, "loss": 0.0199, "step": 2150 }, { "epoch": 0.98, "eval_accuracy": 0.9925333333333334, "eval_f1": 0.9925492283129324, "eval_loss": 0.036869850009679794, "eval_precision": 0.9904407859798194, "eval_recall": 0.9946666666666667, "eval_runtime": 32.2379, "eval_samples_per_second": 116.323, "eval_steps_per_second": 14.548, "step": 2150 } ], "max_steps": 2188, "num_train_epochs": 1, "total_flos": 4604443468800000.0, "trial_name": null, "trial_params": null }