{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 2625, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.998095238095239e-05, "loss": 0.0837, "step": 1 }, { "epoch": 0.02, "learning_rate": 4.900952380952381e-05, "loss": 3.1832, "step": 52 }, { "epoch": 0.02, "eval_accuracy": 0.5, "eval_f1": 0.0, "eval_loss": 1.1445605754852295, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 367.3994, "eval_samples_per_second": 12.248, "eval_steps_per_second": 1.532, "step": 52 }, { "epoch": 0.04, "learning_rate": 4.8019047619047617e-05, "loss": 0.5495, "step": 104 }, { "epoch": 0.04, "eval_accuracy": 0.9688888888888889, "eval_f1": 0.9681528662420383, "eval_loss": 0.3199825882911682, "eval_precision": 0.9916123019571296, "eval_recall": 0.9457777777777778, "eval_runtime": 366.8654, "eval_samples_per_second": 12.266, "eval_steps_per_second": 1.535, "step": 104 }, { "epoch": 0.06, "learning_rate": 4.702857142857143e-05, "loss": 0.5137, "step": 156 }, { "epoch": 0.06, "eval_accuracy": 0.9622222222222222, "eval_f1": 0.9608114338404794, "eval_loss": 0.14260223507881165, "eval_precision": 0.9980842911877394, "eval_recall": 0.9262222222222222, "eval_runtime": 366.5855, "eval_samples_per_second": 12.275, "eval_steps_per_second": 1.536, "step": 156 }, { "epoch": 0.08, "learning_rate": 4.6038095238095244e-05, "loss": 0.8596, "step": 208 }, { "epoch": 0.08, "eval_accuracy": 0.5, "eval_f1": 0.6666666666666666, "eval_loss": 1.3752027750015259, "eval_precision": 0.5, "eval_recall": 1.0, "eval_runtime": 366.5836, "eval_samples_per_second": 12.276, "eval_steps_per_second": 1.536, "step": 208 }, { "epoch": 0.1, "learning_rate": 4.504761904761905e-05, "loss": 0.5534, "step": 260 }, { "epoch": 0.1, "eval_accuracy": 0.9573333333333334, "eval_f1": 0.9563239308462239, "eval_loss": 0.4955243170261383, "eval_precision": 0.9794967381174278, "eval_recall": 0.9342222222222222, "eval_runtime": 366.7821, "eval_samples_per_second": 12.269, "eval_steps_per_second": 1.535, "step": 260 }, { "epoch": 0.12, "learning_rate": 4.405714285714286e-05, "loss": 0.2591, "step": 312 }, { "epoch": 0.12, "eval_accuracy": 0.9713333333333334, "eval_f1": 0.9705142857142858, "eval_loss": 0.19041332602500916, "eval_precision": 0.9990588235294118, "eval_recall": 0.9435555555555556, "eval_runtime": 366.5597, "eval_samples_per_second": 12.276, "eval_steps_per_second": 1.536, "step": 312 }, { "epoch": 0.14, "learning_rate": 4.3066666666666665e-05, "loss": 0.1978, "step": 364 }, { "epoch": 0.14, "eval_accuracy": 0.9668888888888889, "eval_f1": 0.9657864523536166, "eval_loss": 0.16762977838516235, "eval_precision": 0.9990498812351544, "eval_recall": 0.9346666666666666, "eval_runtime": 366.5739, "eval_samples_per_second": 12.276, "eval_steps_per_second": 1.536, "step": 364 }, { "epoch": 0.16, "learning_rate": 4.207619047619048e-05, "loss": 0.1662, "step": 416 }, { "epoch": 0.16, "eval_accuracy": 0.9686666666666667, "eval_f1": 0.9676531314521679, "eval_loss": 0.20070703327655792, "eval_precision": 1.0, "eval_recall": 0.9373333333333334, "eval_runtime": 366.9725, "eval_samples_per_second": 12.262, "eval_steps_per_second": 1.534, "step": 416 }, { "epoch": 0.18, "learning_rate": 4.1085714285714286e-05, "loss": 0.3206, "step": 468 }, { "epoch": 0.18, "eval_accuracy": 0.968, "eval_f1": 0.9669421487603307, "eval_loss": 0.18063302338123322, "eval_precision": 1.0, "eval_recall": 0.936, "eval_runtime": 371.3928, "eval_samples_per_second": 12.117, "eval_steps_per_second": 1.516, "step": 468 }, { "epoch": 0.2, "learning_rate": 4.00952380952381e-05, "loss": 0.2036, "step": 520 }, { "epoch": 0.2, "eval_accuracy": 0.9628888888888889, "eval_f1": 0.9625308503477674, "eval_loss": 0.5867714881896973, "eval_precision": 0.9719075668328047, "eval_recall": 0.9533333333333334, "eval_runtime": 366.8218, "eval_samples_per_second": 12.268, "eval_steps_per_second": 1.535, "step": 520 }, { "epoch": 0.22, "learning_rate": 3.910476190476191e-05, "loss": 0.4246, "step": 572 }, { "epoch": 0.22, "eval_accuracy": 0.9682222222222222, "eval_f1": 0.9675663415740532, "eval_loss": 0.14041858911514282, "eval_precision": 0.9879573876794813, "eval_recall": 0.948, "eval_runtime": 366.7155, "eval_samples_per_second": 12.271, "eval_steps_per_second": 1.535, "step": 572 }, { "epoch": 0.24, "learning_rate": 3.8114285714285714e-05, "loss": 0.1938, "step": 624 }, { "epoch": 0.24, "eval_accuracy": 0.9688888888888889, "eval_f1": 0.9679046309032554, "eval_loss": 0.12066192179918289, "eval_precision": 0.9995265151515151, "eval_recall": 0.9382222222222222, "eval_runtime": 367.3124, "eval_samples_per_second": 12.251, "eval_steps_per_second": 1.533, "step": 624 }, { "epoch": 0.26, "learning_rate": 3.712380952380953e-05, "loss": 0.1131, "step": 676 }, { "epoch": 0.26, "eval_accuracy": 0.9702222222222222, "eval_f1": 0.9693363844393592, "eval_loss": 0.15112678706645966, "eval_precision": 0.9990566037735849, "eval_recall": 0.9413333333333334, "eval_runtime": 366.4976, "eval_samples_per_second": 12.278, "eval_steps_per_second": 1.536, "step": 676 }, { "epoch": 0.28, "learning_rate": 3.6133333333333335e-05, "loss": 0.1786, "step": 728 }, { "epoch": 0.28, "eval_accuracy": 0.9717777777777777, "eval_f1": 0.9710376282782212, "eval_loss": 0.1380281001329422, "eval_precision": 0.9971896955503513, "eval_recall": 0.9462222222222222, "eval_runtime": 366.0457, "eval_samples_per_second": 12.294, "eval_steps_per_second": 1.538, "step": 728 }, { "epoch": 0.3, "learning_rate": 3.514285714285714e-05, "loss": 0.1303, "step": 780 }, { "epoch": 0.3, "eval_accuracy": 0.9717777777777777, "eval_f1": 0.9709846927119031, "eval_loss": 0.14563770592212677, "eval_precision": 0.999059708509638, "eval_recall": 0.9444444444444444, "eval_runtime": 366.3569, "eval_samples_per_second": 12.283, "eval_steps_per_second": 1.537, "step": 780 }, { "epoch": 0.32, "learning_rate": 3.415238095238095e-05, "loss": 0.1851, "step": 832 }, { "epoch": 0.32, "eval_accuracy": 0.9711111111111111, "eval_f1": 0.970360237118103, "eval_loss": 0.20535722374916077, "eval_precision": 0.9962546816479401, "eval_recall": 0.9457777777777778, "eval_runtime": 366.4431, "eval_samples_per_second": 12.28, "eval_steps_per_second": 1.536, "step": 832 }, { "epoch": 0.34, "learning_rate": 3.316190476190476e-05, "loss": 0.1857, "step": 884 }, { "epoch": 0.34, "eval_accuracy": 0.9722222222222222, "eval_f1": 0.9714807209673739, "eval_loss": 0.12345914542675018, "eval_precision": 0.9981247069854665, "eval_recall": 0.9462222222222222, "eval_runtime": 366.3331, "eval_samples_per_second": 12.284, "eval_steps_per_second": 1.537, "step": 884 }, { "epoch": 0.36, "learning_rate": 3.217142857142858e-05, "loss": 0.2037, "step": 936 }, { "epoch": 0.36, "eval_accuracy": 0.9548888888888889, "eval_f1": 0.9527797162130728, "eval_loss": 0.19388167560100555, "eval_precision": 0.9995119570522206, "eval_recall": 0.9102222222222223, "eval_runtime": 368.1148, "eval_samples_per_second": 12.224, "eval_steps_per_second": 1.529, "step": 936 }, { "epoch": 0.38, "learning_rate": 3.1180952380952384e-05, "loss": 0.133, "step": 988 }, { "epoch": 0.38, "eval_accuracy": 0.9591111111111111, "eval_f1": 0.9585772174696083, "eval_loss": 0.2834174335002899, "eval_precision": 0.9712591240875912, "eval_recall": 0.9462222222222222, "eval_runtime": 366.4123, "eval_samples_per_second": 12.281, "eval_steps_per_second": 1.537, "step": 988 }, { "epoch": 0.4, "learning_rate": 3.019047619047619e-05, "loss": 0.105, "step": 1040 }, { "epoch": 0.4, "eval_accuracy": 0.9693333333333334, "eval_f1": 0.9684065934065934, "eval_loss": 0.13286447525024414, "eval_precision": 0.9985835694050992, "eval_recall": 0.94, "eval_runtime": 372.0042, "eval_samples_per_second": 12.097, "eval_steps_per_second": 1.513, "step": 1040 }, { "epoch": 0.42, "learning_rate": 2.9199999999999998e-05, "loss": 0.2736, "step": 1092 }, { "epoch": 0.42, "eval_accuracy": 0.9504444444444444, "eval_f1": 0.9502343226958269, "eval_loss": 0.2549838125705719, "eval_precision": 0.9542805916629314, "eval_recall": 0.9462222222222222, "eval_runtime": 366.5595, "eval_samples_per_second": 12.276, "eval_steps_per_second": 1.536, "step": 1092 }, { "epoch": 0.44, "learning_rate": 2.8209523809523812e-05, "loss": 0.2565, "step": 1144 }, { "epoch": 0.44, "eval_accuracy": 0.9693333333333334, "eval_f1": 0.9683921209344939, "eval_loss": 0.16569776833057404, "eval_precision": 0.999054820415879, "eval_recall": 0.9395555555555556, "eval_runtime": 366.5097, "eval_samples_per_second": 12.278, "eval_steps_per_second": 1.536, "step": 1144 }, { "epoch": 0.46, "learning_rate": 2.7219047619047623e-05, "loss": 0.0888, "step": 1196 }, { "epoch": 0.46, "eval_accuracy": 0.9717777777777777, "eval_f1": 0.9709979447362412, "eval_loss": 0.14255712926387787, "eval_precision": 0.9985908877407234, "eval_recall": 0.9448888888888889, "eval_runtime": 366.3675, "eval_samples_per_second": 12.283, "eval_steps_per_second": 1.537, "step": 1196 }, { "epoch": 0.48, "learning_rate": 2.622857142857143e-05, "loss": 0.1704, "step": 1248 }, { "epoch": 0.48, "eval_accuracy": 0.9717777777777777, "eval_f1": 0.9709714285714286, "eval_loss": 0.13756924867630005, "eval_precision": 0.9995294117647059, "eval_recall": 0.944, "eval_runtime": 366.5849, "eval_samples_per_second": 12.275, "eval_steps_per_second": 1.536, "step": 1248 }, { "epoch": 0.5, "learning_rate": 2.523809523809524e-05, "loss": 0.0877, "step": 1300 }, { "epoch": 0.5, "eval_accuracy": 0.9715555555555555, "eval_f1": 0.9707895937927886, "eval_loss": 0.12067598104476929, "eval_precision": 0.9976547842401501, "eval_recall": 0.9453333333333334, "eval_runtime": 366.3594, "eval_samples_per_second": 12.283, "eval_steps_per_second": 1.537, "step": 1300 }, { "epoch": 0.52, "learning_rate": 2.424761904761905e-05, "loss": 0.1013, "step": 1352 }, { "epoch": 0.52, "eval_accuracy": 0.9711111111111111, "eval_f1": 0.9702653247941445, "eval_loss": 0.10261368751525879, "eval_precision": 0.9995287464655985, "eval_recall": 0.9426666666666667, "eval_runtime": 367.9823, "eval_samples_per_second": 12.229, "eval_steps_per_second": 1.53, "step": 1352 }, { "epoch": 0.53, "learning_rate": 2.3257142857142858e-05, "loss": 0.1281, "step": 1404 }, { "epoch": 0.53, "eval_accuracy": 0.9697777777777777, "eval_f1": 0.968878718535469, "eval_loss": 0.19041745364665985, "eval_precision": 0.9985849056603774, "eval_recall": 0.9408888888888889, "eval_runtime": 368.2198, "eval_samples_per_second": 12.221, "eval_steps_per_second": 1.529, "step": 1404 }, { "epoch": 0.55, "learning_rate": 2.2266666666666668e-05, "loss": 0.1385, "step": 1456 }, { "epoch": 0.55, "eval_accuracy": 0.9704444444444444, "eval_f1": 0.9695443095946874, "eval_loss": 0.1330152004957199, "eval_precision": 1.0, "eval_recall": 0.9408888888888889, "eval_runtime": 366.4259, "eval_samples_per_second": 12.281, "eval_steps_per_second": 1.536, "step": 1456 }, { "epoch": 0.57, "learning_rate": 2.127619047619048e-05, "loss": 0.0947, "step": 1508 }, { "epoch": 0.57, "eval_accuracy": 0.9722222222222222, "eval_f1": 0.9714285714285714, "eval_loss": 0.16506431996822357, "eval_precision": 1.0, "eval_recall": 0.9444444444444444, "eval_runtime": 366.4491, "eval_samples_per_second": 12.28, "eval_steps_per_second": 1.536, "step": 1508 }, { "epoch": 0.59, "learning_rate": 2.0285714285714286e-05, "loss": 0.1062, "step": 1560 }, { "epoch": 0.59, "eval_accuracy": 0.9728888888888889, "eval_f1": 0.9721333942439471, "eval_loss": 0.08876548707485199, "eval_precision": 1.0, "eval_recall": 0.9457777777777778, "eval_runtime": 366.4995, "eval_samples_per_second": 12.278, "eval_steps_per_second": 1.536, "step": 1560 }, { "epoch": 0.61, "learning_rate": 1.9295238095238096e-05, "loss": 0.0974, "step": 1612 }, { "epoch": 0.61, "eval_accuracy": 0.9726666666666667, "eval_f1": 0.9719113952957296, "eval_loss": 0.1103716641664505, "eval_precision": 0.9995302959135745, "eval_recall": 0.9457777777777778, "eval_runtime": 367.3196, "eval_samples_per_second": 12.251, "eval_steps_per_second": 1.533, "step": 1612 }, { "epoch": 0.63, "learning_rate": 1.8304761904761906e-05, "loss": 0.1549, "step": 1664 }, { "epoch": 0.63, "eval_accuracy": 0.9362222222222222, "eval_f1": 0.9375951293759512, "eval_loss": 1.4429972171783447, "eval_precision": 0.9178373776074925, "eval_recall": 0.9582222222222222, "eval_runtime": 366.4448, "eval_samples_per_second": 12.28, "eval_steps_per_second": 1.536, "step": 1664 }, { "epoch": 0.65, "learning_rate": 1.7314285714285717e-05, "loss": 0.1223, "step": 1716 }, { "epoch": 0.65, "eval_accuracy": 0.9717777777777777, "eval_f1": 0.9710244125028521, "eval_loss": 0.07695630192756653, "eval_precision": 0.9976558837318331, "eval_recall": 0.9457777777777778, "eval_runtime": 366.6321, "eval_samples_per_second": 12.274, "eval_steps_per_second": 1.536, "step": 1716 }, { "epoch": 0.67, "learning_rate": 1.6323809523809524e-05, "loss": 0.0686, "step": 1768 }, { "epoch": 0.67, "eval_accuracy": 0.9704444444444444, "eval_f1": 0.9697107720337054, "eval_loss": 0.15851092338562012, "eval_precision": 0.9943951424567959, "eval_recall": 0.9462222222222222, "eval_runtime": 366.4624, "eval_samples_per_second": 12.28, "eval_steps_per_second": 1.536, "step": 1768 }, { "epoch": 0.69, "learning_rate": 1.5333333333333334e-05, "loss": 0.156, "step": 1820 }, { "epoch": 0.69, "eval_accuracy": 0.9646666666666667, "eval_f1": 0.9633724948168625, "eval_loss": 0.2488366961479187, "eval_precision": 1.0, "eval_recall": 0.9293333333333333, "eval_runtime": 369.4407, "eval_samples_per_second": 12.181, "eval_steps_per_second": 1.524, "step": 1820 }, { "epoch": 0.71, "learning_rate": 1.4342857142857143e-05, "loss": 0.1607, "step": 1872 }, { "epoch": 0.71, "eval_accuracy": 0.9726666666666667, "eval_f1": 0.971898560657985, "eval_loss": 0.10330852121114731, "eval_precision": 1.0, "eval_recall": 0.9453333333333334, "eval_runtime": 367.7786, "eval_samples_per_second": 12.236, "eval_steps_per_second": 1.531, "step": 1872 }, { "epoch": 0.73, "learning_rate": 1.3352380952380952e-05, "loss": 0.0813, "step": 1924 }, { "epoch": 0.73, "eval_accuracy": 0.9726666666666667, "eval_f1": 0.971898560657985, "eval_loss": 0.11011069267988205, "eval_precision": 1.0, "eval_recall": 0.9453333333333334, "eval_runtime": 366.3291, "eval_samples_per_second": 12.284, "eval_steps_per_second": 1.537, "step": 1924 }, { "epoch": 0.75, "learning_rate": 1.2361904761904762e-05, "loss": 0.0902, "step": 1976 }, { "epoch": 0.75, "eval_accuracy": 0.9726666666666667, "eval_f1": 0.971898560657985, "eval_loss": 0.09210823476314545, "eval_precision": 1.0, "eval_recall": 0.9453333333333334, "eval_runtime": 366.51, "eval_samples_per_second": 12.278, "eval_steps_per_second": 1.536, "step": 1976 }, { "epoch": 0.77, "learning_rate": 1.1371428571428571e-05, "loss": 0.1138, "step": 2028 }, { "epoch": 0.77, "eval_accuracy": 0.9728888888888889, "eval_f1": 0.9721333942439471, "eval_loss": 0.12518948316574097, "eval_precision": 1.0, "eval_recall": 0.9457777777777778, "eval_runtime": 367.2547, "eval_samples_per_second": 12.253, "eval_steps_per_second": 1.533, "step": 2028 }, { "epoch": 0.79, "learning_rate": 1.0380952380952382e-05, "loss": 0.1131, "step": 2080 }, { "epoch": 0.79, "eval_accuracy": 0.9728888888888889, "eval_f1": 0.9721333942439471, "eval_loss": 0.07795156538486481, "eval_precision": 1.0, "eval_recall": 0.9457777777777778, "eval_runtime": 366.7002, "eval_samples_per_second": 12.272, "eval_steps_per_second": 1.535, "step": 2080 }, { "epoch": 0.81, "learning_rate": 9.39047619047619e-06, "loss": 0.1169, "step": 2132 }, { "epoch": 0.81, "eval_accuracy": 0.9728888888888889, "eval_f1": 0.9721333942439471, "eval_loss": 0.0857003927230835, "eval_precision": 1.0, "eval_recall": 0.9457777777777778, "eval_runtime": 366.5193, "eval_samples_per_second": 12.278, "eval_steps_per_second": 1.536, "step": 2132 }, { "epoch": 0.83, "learning_rate": 8.400000000000001e-06, "loss": 0.0624, "step": 2184 }, { "epoch": 0.83, "eval_accuracy": 0.9728888888888889, "eval_f1": 0.9721333942439471, "eval_loss": 0.11906697601079941, "eval_precision": 1.0, "eval_recall": 0.9457777777777778, "eval_runtime": 366.4052, "eval_samples_per_second": 12.281, "eval_steps_per_second": 1.537, "step": 2184 }, { "epoch": 0.85, "learning_rate": 7.40952380952381e-06, "loss": 0.0936, "step": 2236 }, { "epoch": 0.85, "eval_accuracy": 0.9728888888888889, "eval_f1": 0.9721333942439471, "eval_loss": 0.1168401688337326, "eval_precision": 1.0, "eval_recall": 0.9457777777777778, "eval_runtime": 368.3489, "eval_samples_per_second": 12.217, "eval_steps_per_second": 1.528, "step": 2236 }, { "epoch": 0.87, "learning_rate": 6.419047619047619e-06, "loss": 0.1124, "step": 2288 }, { "epoch": 0.87, "eval_accuracy": 0.9728888888888889, "eval_f1": 0.9721333942439471, "eval_loss": 0.08958863466978073, "eval_precision": 1.0, "eval_recall": 0.9457777777777778, "eval_runtime": 367.9579, "eval_samples_per_second": 12.23, "eval_steps_per_second": 1.53, "step": 2288 }, { "epoch": 0.89, "learning_rate": 5.428571428571429e-06, "loss": 0.0519, "step": 2340 }, { "epoch": 0.89, "eval_accuracy": 0.9728888888888889, "eval_f1": 0.9721333942439471, "eval_loss": 0.10539975017309189, "eval_precision": 1.0, "eval_recall": 0.9457777777777778, "eval_runtime": 367.9097, "eval_samples_per_second": 12.231, "eval_steps_per_second": 1.53, "step": 2340 }, { "epoch": 0.91, "learning_rate": 4.4380952380952385e-06, "loss": 0.1278, "step": 2392 }, { "epoch": 0.91, "eval_accuracy": 0.9728888888888889, "eval_f1": 0.9721333942439471, "eval_loss": 0.11540067195892334, "eval_precision": 1.0, "eval_recall": 0.9457777777777778, "eval_runtime": 367.0779, "eval_samples_per_second": 12.259, "eval_steps_per_second": 1.534, "step": 2392 }, { "epoch": 0.93, "learning_rate": 3.4476190476190472e-06, "loss": 0.1412, "step": 2444 }, { "epoch": 0.93, "eval_accuracy": 0.9728888888888889, "eval_f1": 0.9721333942439471, "eval_loss": 0.09740398824214935, "eval_precision": 1.0, "eval_recall": 0.9457777777777778, "eval_runtime": 366.6614, "eval_samples_per_second": 12.273, "eval_steps_per_second": 1.535, "step": 2444 }, { "epoch": 0.95, "learning_rate": 2.4571428571428573e-06, "loss": 0.0472, "step": 2496 }, { "epoch": 0.95, "eval_accuracy": 0.9728888888888889, "eval_f1": 0.9721333942439471, "eval_loss": 0.11149758845567703, "eval_precision": 1.0, "eval_recall": 0.9457777777777778, "eval_runtime": 366.4328, "eval_samples_per_second": 12.281, "eval_steps_per_second": 1.536, "step": 2496 }, { "epoch": 0.97, "learning_rate": 1.4666666666666667e-06, "loss": 0.128, "step": 2548 }, { "epoch": 0.97, "eval_accuracy": 0.9728888888888889, "eval_f1": 0.9721333942439471, "eval_loss": 0.09608805179595947, "eval_precision": 1.0, "eval_recall": 0.9457777777777778, "eval_runtime": 367.1973, "eval_samples_per_second": 12.255, "eval_steps_per_second": 1.533, "step": 2548 }, { "epoch": 0.99, "learning_rate": 4.761904761904763e-07, "loss": 0.0462, "step": 2600 }, { "epoch": 0.99, "eval_accuracy": 0.9728888888888889, "eval_f1": 0.9721333942439471, "eval_loss": 0.10537552833557129, "eval_precision": 1.0, "eval_recall": 0.9457777777777778, "eval_runtime": 366.8901, "eval_samples_per_second": 12.265, "eval_steps_per_second": 1.535, "step": 2600 } ], "max_steps": 2625, "num_train_epochs": 1, "total_flos": 7.7969900961792e+16, "trial_name": null, "trial_params": null }