{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 1750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9971428571428576e-05, "loss": 18.0539, "step": 1 }, { "epoch": 0.02, "learning_rate": 4.9e-05, "loss": 4.1309, "step": 35 }, { "epoch": 0.02, "eval_accuracy": 0.8826666666666667, "eval_f1": 0.8778625954198473, "eval_loss": 0.34147411584854126, "eval_precision": 0.9153400868306801, "eval_recall": 0.8433333333333334, "eval_runtime": 463.3794, "eval_samples_per_second": 6.474, "eval_steps_per_second": 0.809, "step": 35 }, { "epoch": 0.04, "learning_rate": 4.8e-05, "loss": 0.6366, "step": 70 }, { "epoch": 0.04, "eval_accuracy": 0.9403333333333334, "eval_f1": 0.9423881557772771, "eval_loss": 0.139574334025383, "eval_precision": 0.911014312383323, "eval_recall": 0.976, "eval_runtime": 463.7246, "eval_samples_per_second": 6.469, "eval_steps_per_second": 0.809, "step": 70 }, { "epoch": 0.06, "learning_rate": 4.7e-05, "loss": 0.8166, "step": 105 }, { "epoch": 0.06, "eval_accuracy": 0.8373333333333334, "eval_f1": 0.8576429404900816, "eval_loss": 0.8452138900756836, "eval_precision": 0.7624481327800829, "eval_recall": 0.98, "eval_runtime": 463.2546, "eval_samples_per_second": 6.476, "eval_steps_per_second": 0.809, "step": 105 }, { "epoch": 0.08, "learning_rate": 4.600000000000001e-05, "loss": 1.8141, "step": 140 }, { "epoch": 0.08, "eval_accuracy": 0.8703333333333333, "eval_f1": 0.8838459241564646, "eval_loss": 2.6052372455596924, "eval_precision": 0.8004326663061114, "eval_recall": 0.9866666666666667, "eval_runtime": 463.8217, "eval_samples_per_second": 6.468, "eval_steps_per_second": 0.809, "step": 140 }, { "epoch": 0.1, "learning_rate": 4.5e-05, "loss": 0.994, "step": 175 }, { "epoch": 0.1, "eval_accuracy": 0.8933333333333333, "eval_f1": 0.9029714978775015, "eval_loss": 0.5267955660820007, "eval_precision": 0.8281423804226918, "eval_recall": 0.9926666666666667, "eval_runtime": 463.9594, "eval_samples_per_second": 6.466, "eval_steps_per_second": 0.808, "step": 175 }, { "epoch": 0.12, "learning_rate": 4.4000000000000006e-05, "loss": 0.3827, "step": 210 }, { "epoch": 0.12, "eval_accuracy": 0.9343333333333333, "eval_f1": 0.9345297441010303, "eval_loss": 0.296316534280777, "eval_precision": 0.9317428760768721, "eval_recall": 0.9373333333333334, "eval_runtime": 463.5088, "eval_samples_per_second": 6.472, "eval_steps_per_second": 0.809, "step": 210 }, { "epoch": 0.14, "learning_rate": 4.3e-05, "loss": 0.5048, "step": 245 }, { "epoch": 0.14, "eval_accuracy": 0.9526666666666667, "eval_f1": 0.9523489932885906, "eval_loss": 0.2187061905860901, "eval_precision": 0.9587837837837838, "eval_recall": 0.946, "eval_runtime": 467.9247, "eval_samples_per_second": 6.411, "eval_steps_per_second": 0.801, "step": 245 }, { "epoch": 0.16, "learning_rate": 4.2e-05, "loss": 0.2841, "step": 280 }, { "epoch": 0.16, "eval_accuracy": 0.8916666666666667, "eval_f1": 0.8787765759045133, "eval_loss": 1.027541160583496, "eval_precision": 0.9974597798475868, "eval_recall": 0.7853333333333333, "eval_runtime": 463.3965, "eval_samples_per_second": 6.474, "eval_steps_per_second": 0.809, "step": 280 }, { "epoch": 0.18, "learning_rate": 4.1e-05, "loss": 0.3962, "step": 315 }, { "epoch": 0.18, "eval_accuracy": 0.9496666666666667, "eval_f1": 0.948656919415165, "eval_loss": 0.3296962380409241, "eval_precision": 0.9680777238029147, "eval_recall": 0.93, "eval_runtime": 463.326, "eval_samples_per_second": 6.475, "eval_steps_per_second": 0.809, "step": 315 }, { "epoch": 0.2, "learning_rate": 4e-05, "loss": 0.488, "step": 350 }, { "epoch": 0.2, "eval_accuracy": 0.9443333333333334, "eval_f1": 0.9429839535677706, "eval_loss": 0.4798208773136139, "eval_precision": 0.966410076976907, "eval_recall": 0.9206666666666666, "eval_runtime": 463.3936, "eval_samples_per_second": 6.474, "eval_steps_per_second": 0.809, "step": 350 }, { "epoch": 0.22, "learning_rate": 3.9000000000000006e-05, "loss": 0.4094, "step": 385 }, { "epoch": 0.22, "eval_accuracy": 0.9096666666666666, "eval_f1": 0.9166922840454964, "eval_loss": 0.5374864339828491, "eval_precision": 0.8505419281232174, "eval_recall": 0.994, "eval_runtime": 463.4888, "eval_samples_per_second": 6.473, "eval_steps_per_second": 0.809, "step": 385 }, { "epoch": 0.24, "learning_rate": 3.8e-05, "loss": 0.2203, "step": 420 }, { "epoch": 0.24, "eval_accuracy": 0.957, "eval_f1": 0.958130477117819, "eval_loss": 0.18050691485404968, "eval_precision": 0.9335863377609108, "eval_recall": 0.984, "eval_runtime": 463.3488, "eval_samples_per_second": 6.475, "eval_steps_per_second": 0.809, "step": 420 }, { "epoch": 0.26, "learning_rate": 3.7e-05, "loss": 0.2526, "step": 455 }, { "epoch": 0.26, "eval_accuracy": 0.9566666666666667, "eval_f1": 0.9579288025889968, "eval_loss": 0.32805779576301575, "eval_precision": 0.9308176100628931, "eval_recall": 0.9866666666666667, "eval_runtime": 464.3941, "eval_samples_per_second": 6.46, "eval_steps_per_second": 0.808, "step": 455 }, { "epoch": 0.28, "learning_rate": 3.6e-05, "loss": 0.1888, "step": 490 }, { "epoch": 0.28, "eval_accuracy": 0.972, "eval_f1": 0.9723502304147466, "eval_loss": 0.15383633971214294, "eval_precision": 0.9603381014304291, "eval_recall": 0.9846666666666667, "eval_runtime": 463.4989, "eval_samples_per_second": 6.473, "eval_steps_per_second": 0.809, "step": 490 }, { "epoch": 0.3, "learning_rate": 3.5e-05, "loss": 0.1859, "step": 525 }, { "epoch": 0.3, "eval_accuracy": 0.9783333333333334, "eval_f1": 0.9781659388646288, "eval_loss": 0.11581222712993622, "eval_precision": 0.985781990521327, "eval_recall": 0.9706666666666667, "eval_runtime": 463.5881, "eval_samples_per_second": 6.471, "eval_steps_per_second": 0.809, "step": 525 }, { "epoch": 0.32, "learning_rate": 3.4000000000000007e-05, "loss": 0.1007, "step": 560 }, { "epoch": 0.32, "eval_accuracy": 0.9753333333333334, "eval_f1": 0.9755129053606882, "eval_loss": 0.14892521500587463, "eval_precision": 0.9684625492772667, "eval_recall": 0.9826666666666667, "eval_runtime": 463.553, "eval_samples_per_second": 6.472, "eval_steps_per_second": 0.809, "step": 560 }, { "epoch": 0.34, "learning_rate": 3.3e-05, "loss": 0.1065, "step": 595 }, { "epoch": 0.34, "eval_accuracy": 0.9726666666666667, "eval_f1": 0.9720327421555252, "eval_loss": 0.15965215861797333, "eval_precision": 0.9951117318435754, "eval_recall": 0.95, "eval_runtime": 463.6429, "eval_samples_per_second": 6.47, "eval_steps_per_second": 0.809, "step": 595 }, { "epoch": 0.36, "learning_rate": 3.2000000000000005e-05, "loss": 0.2868, "step": 630 }, { "epoch": 0.36, "eval_accuracy": 0.9806666666666667, "eval_f1": 0.9807180851063829, "eval_loss": 0.08715511113405228, "eval_precision": 0.9781167108753316, "eval_recall": 0.9833333333333333, "eval_runtime": 463.4149, "eval_samples_per_second": 6.474, "eval_steps_per_second": 0.809, "step": 630 }, { "epoch": 0.38, "learning_rate": 3.1e-05, "loss": 0.1818, "step": 665 }, { "epoch": 0.38, "eval_accuracy": 0.9796666666666667, "eval_f1": 0.9795781720790091, "eval_loss": 0.12202201038599014, "eval_precision": 0.9838601210490922, "eval_recall": 0.9753333333333334, "eval_runtime": 464.4798, "eval_samples_per_second": 6.459, "eval_steps_per_second": 0.807, "step": 665 }, { "epoch": 0.4, "learning_rate": 3e-05, "loss": 0.3238, "step": 700 }, { "epoch": 0.4, "eval_accuracy": 0.9786666666666667, "eval_f1": 0.9784221173297369, "eval_loss": 0.16317808628082275, "eval_precision": 0.9897680763983628, "eval_recall": 0.9673333333333334, "eval_runtime": 463.2126, "eval_samples_per_second": 6.477, "eval_steps_per_second": 0.81, "step": 700 }, { "epoch": 0.42, "learning_rate": 2.9e-05, "loss": 0.125, "step": 735 }, { "epoch": 0.42, "eval_accuracy": 0.9796666666666667, "eval_f1": 0.9796054831160146, "eval_loss": 0.1086646318435669, "eval_precision": 0.9825620389000671, "eval_recall": 0.9766666666666667, "eval_runtime": 464.4048, "eval_samples_per_second": 6.46, "eval_steps_per_second": 0.807, "step": 735 }, { "epoch": 0.44, "learning_rate": 2.8000000000000003e-05, "loss": 0.1361, "step": 770 }, { "epoch": 0.44, "eval_accuracy": 0.976, "eval_f1": 0.9754935330156569, "eval_loss": 0.15513776242733002, "eval_precision": 0.9965229485396384, "eval_recall": 0.9553333333333334, "eval_runtime": 463.6268, "eval_samples_per_second": 6.471, "eval_steps_per_second": 0.809, "step": 770 }, { "epoch": 0.46, "learning_rate": 2.7000000000000002e-05, "loss": 0.1276, "step": 805 }, { "epoch": 0.46, "eval_accuracy": 0.9683333333333334, "eval_f1": 0.9673875729488499, "eval_loss": 0.20085427165031433, "eval_precision": 0.997169143665959, "eval_recall": 0.9393333333333334, "eval_runtime": 463.7219, "eval_samples_per_second": 6.469, "eval_steps_per_second": 0.809, "step": 805 }, { "epoch": 0.48, "learning_rate": 2.6000000000000002e-05, "loss": 0.1618, "step": 840 }, { "epoch": 0.48, "eval_accuracy": 0.9766666666666667, "eval_f1": 0.9762066621346024, "eval_loss": 0.12355328351259232, "eval_precision": 0.9958391123439667, "eval_recall": 0.9573333333333334, "eval_runtime": 464.5376, "eval_samples_per_second": 6.458, "eval_steps_per_second": 0.807, "step": 840 }, { "epoch": 0.5, "learning_rate": 2.5e-05, "loss": 0.2574, "step": 875 }, { "epoch": 0.5, "eval_accuracy": 0.9806666666666667, "eval_f1": 0.9804054054054054, "eval_loss": 0.126968115568161, "eval_precision": 0.9938356164383562, "eval_recall": 0.9673333333333334, "eval_runtime": 465.883, "eval_samples_per_second": 6.439, "eval_steps_per_second": 0.805, "step": 875 }, { "epoch": 0.52, "learning_rate": 2.4e-05, "loss": 0.1482, "step": 910 }, { "epoch": 0.52, "eval_accuracy": 0.9576666666666667, "eval_f1": 0.9558874609239321, "eval_loss": 0.2774529755115509, "eval_precision": 0.9978245105148659, "eval_recall": 0.9173333333333333, "eval_runtime": 465.4264, "eval_samples_per_second": 6.446, "eval_steps_per_second": 0.806, "step": 910 }, { "epoch": 0.54, "learning_rate": 2.3000000000000003e-05, "loss": 0.1156, "step": 945 }, { "epoch": 0.54, "eval_accuracy": 0.9856666666666667, "eval_f1": 0.9855849815621857, "eval_loss": 0.1439618021249771, "eval_precision": 0.9912339851652057, "eval_recall": 0.98, "eval_runtime": 466.4591, "eval_samples_per_second": 6.431, "eval_steps_per_second": 0.804, "step": 945 }, { "epoch": 0.56, "learning_rate": 2.2000000000000003e-05, "loss": 0.2393, "step": 980 }, { "epoch": 0.56, "eval_accuracy": 0.9406666666666667, "eval_f1": 0.9369688385269122, "eval_loss": 0.37739551067352295, "eval_precision": 0.9992447129909365, "eval_recall": 0.882, "eval_runtime": 465.7379, "eval_samples_per_second": 6.441, "eval_steps_per_second": 0.805, "step": 980 }, { "epoch": 0.58, "learning_rate": 2.1e-05, "loss": 0.2364, "step": 1015 }, { "epoch": 0.58, "eval_accuracy": 0.984, "eval_f1": 0.9838601210490923, "eval_loss": 0.0981753021478653, "eval_precision": 0.9925373134328358, "eval_recall": 0.9753333333333334, "eval_runtime": 464.4225, "eval_samples_per_second": 6.46, "eval_steps_per_second": 0.807, "step": 1015 }, { "epoch": 0.6, "learning_rate": 2e-05, "loss": 0.1246, "step": 1050 }, { "epoch": 0.6, "eval_accuracy": 0.985, "eval_f1": 0.9848637739656912, "eval_loss": 0.08204901963472366, "eval_precision": 0.9938900203665988, "eval_recall": 0.976, "eval_runtime": 466.5569, "eval_samples_per_second": 6.43, "eval_steps_per_second": 0.804, "step": 1050 }, { "epoch": 0.62, "learning_rate": 1.9e-05, "loss": 0.1411, "step": 1085 }, { "epoch": 0.62, "eval_accuracy": 0.9873333333333333, "eval_f1": 0.9872824631860776, "eval_loss": 0.05377618223428726, "eval_precision": 0.991263440860215, "eval_recall": 0.9833333333333333, "eval_runtime": 464.6089, "eval_samples_per_second": 6.457, "eval_steps_per_second": 0.807, "step": 1085 }, { "epoch": 0.64, "learning_rate": 1.8e-05, "loss": 0.2055, "step": 1120 }, { "epoch": 0.64, "eval_accuracy": 0.9736666666666667, "eval_f1": 0.9730099077553809, "eval_loss": 0.15216030180454254, "eval_precision": 0.9978976874562018, "eval_recall": 0.9493333333333334, "eval_runtime": 465.4747, "eval_samples_per_second": 6.445, "eval_steps_per_second": 0.806, "step": 1120 }, { "epoch": 0.66, "learning_rate": 1.7000000000000003e-05, "loss": 0.3018, "step": 1155 }, { "epoch": 0.66, "eval_accuracy": 0.9813333333333333, "eval_f1": 0.9815059445178336, "eval_loss": 0.07627255469560623, "eval_precision": 0.9725130890052356, "eval_recall": 0.9906666666666667, "eval_runtime": 464.4389, "eval_samples_per_second": 6.459, "eval_steps_per_second": 0.807, "step": 1155 }, { "epoch": 0.68, "learning_rate": 1.6000000000000003e-05, "loss": 0.1702, "step": 1190 }, { "epoch": 0.68, "eval_accuracy": 0.9873333333333333, "eval_f1": 0.9873586161011311, "eval_loss": 0.07290682196617126, "eval_precision": 0.9853917662682603, "eval_recall": 0.9893333333333333, "eval_runtime": 464.5835, "eval_samples_per_second": 6.457, "eval_steps_per_second": 0.807, "step": 1190 }, { "epoch": 0.7, "learning_rate": 1.5e-05, "loss": 0.2085, "step": 1225 }, { "epoch": 0.7, "eval_accuracy": 0.986, "eval_f1": 0.9859060402684564, "eval_loss": 0.08615541458129883, "eval_precision": 0.9925675675675676, "eval_recall": 0.9793333333333333, "eval_runtime": 464.5251, "eval_samples_per_second": 6.458, "eval_steps_per_second": 0.807, "step": 1225 }, { "epoch": 0.72, "learning_rate": 1.4000000000000001e-05, "loss": 0.0899, "step": 1260 }, { "epoch": 0.72, "eval_accuracy": 0.987, "eval_f1": 0.9869782971619365, "eval_loss": 0.07593820989131927, "eval_precision": 0.988628762541806, "eval_recall": 0.9853333333333333, "eval_runtime": 464.4867, "eval_samples_per_second": 6.459, "eval_steps_per_second": 0.807, "step": 1260 }, { "epoch": 0.74, "learning_rate": 1.3000000000000001e-05, "loss": 0.212, "step": 1295 }, { "epoch": 0.74, "eval_accuracy": 0.9846666666666667, "eval_f1": 0.9847277556440903, "eval_loss": 0.08726092427968979, "eval_precision": 0.9808201058201058, "eval_recall": 0.9886666666666667, "eval_runtime": 463.8528, "eval_samples_per_second": 6.468, "eval_steps_per_second": 0.808, "step": 1295 }, { "epoch": 0.76, "learning_rate": 1.2e-05, "loss": 0.0459, "step": 1330 }, { "epoch": 0.76, "eval_accuracy": 0.981, "eval_f1": 0.9807237064592493, "eval_loss": 0.11619190126657486, "eval_precision": 0.9951956074124915, "eval_recall": 0.9666666666666667, "eval_runtime": 463.7918, "eval_samples_per_second": 6.468, "eval_steps_per_second": 0.809, "step": 1330 }, { "epoch": 0.78, "learning_rate": 1.1000000000000001e-05, "loss": 0.2035, "step": 1365 }, { "epoch": 0.78, "eval_accuracy": 0.9876666666666667, "eval_f1": 0.9875797247398456, "eval_loss": 0.07956338673830032, "eval_precision": 0.9945909398242055, "eval_recall": 0.9806666666666667, "eval_runtime": 464.4206, "eval_samples_per_second": 6.46, "eval_steps_per_second": 0.807, "step": 1365 }, { "epoch": 0.8, "learning_rate": 1e-05, "loss": 0.0942, "step": 1400 }, { "epoch": 0.8, "eval_accuracy": 0.979, "eval_f1": 0.979269496544916, "eval_loss": 0.09173166751861572, "eval_precision": 0.9668615984405458, "eval_recall": 0.992, "eval_runtime": 464.6027, "eval_samples_per_second": 6.457, "eval_steps_per_second": 0.807, "step": 1400 }, { "epoch": 0.82, "learning_rate": 9e-06, "loss": 0.161, "step": 1435 }, { "epoch": 0.82, "eval_accuracy": 0.9873333333333333, "eval_f1": 0.9872397582269979, "eval_loss": 0.06276000291109085, "eval_precision": 0.9945872801082544, "eval_recall": 0.98, "eval_runtime": 463.3979, "eval_samples_per_second": 6.474, "eval_steps_per_second": 0.809, "step": 1435 }, { "epoch": 0.84, "learning_rate": 8.000000000000001e-06, "loss": 0.0365, "step": 1470 }, { "epoch": 0.84, "eval_accuracy": 0.9843333333333333, "eval_f1": 0.9841162554917202, "eval_loss": 0.08324441313743591, "eval_precision": 0.997943797121316, "eval_recall": 0.9706666666666667, "eval_runtime": 463.6189, "eval_samples_per_second": 6.471, "eval_steps_per_second": 0.809, "step": 1470 }, { "epoch": 0.86, "learning_rate": 7.000000000000001e-06, "loss": 0.0508, "step": 1505 }, { "epoch": 0.86, "eval_accuracy": 0.9843333333333333, "eval_f1": 0.9841162554917202, "eval_loss": 0.09412873536348343, "eval_precision": 0.997943797121316, "eval_recall": 0.9706666666666667, "eval_runtime": 466.9343, "eval_samples_per_second": 6.425, "eval_steps_per_second": 0.803, "step": 1505 }, { "epoch": 0.88, "learning_rate": 6e-06, "loss": 0.0597, "step": 1540 }, { "epoch": 0.88, "eval_accuracy": 0.9873333333333333, "eval_f1": 0.9872994652406418, "eval_loss": 0.05775593966245651, "eval_precision": 0.989946380697051, "eval_recall": 0.9846666666666667, "eval_runtime": 464.2967, "eval_samples_per_second": 6.461, "eval_steps_per_second": 0.808, "step": 1540 }, { "epoch": 0.9, "learning_rate": 5e-06, "loss": 0.0055, "step": 1575 }, { "epoch": 0.9, "eval_accuracy": 0.9863333333333333, "eval_f1": 0.9861813279406809, "eval_loss": 0.07944045215845108, "eval_precision": 0.9972733469665985, "eval_recall": 0.9753333333333334, "eval_runtime": 464.3973, "eval_samples_per_second": 6.46, "eval_steps_per_second": 0.807, "step": 1575 }, { "epoch": 0.92, "learning_rate": 4.000000000000001e-06, "loss": 0.0681, "step": 1610 }, { "epoch": 0.92, "eval_accuracy": 0.9873333333333333, "eval_f1": 0.9872139973082099, "eval_loss": 0.07313308119773865, "eval_precision": 0.9966032608695652, "eval_recall": 0.978, "eval_runtime": 464.8711, "eval_samples_per_second": 6.453, "eval_steps_per_second": 0.807, "step": 1610 }, { "epoch": 0.94, "learning_rate": 3e-06, "loss": 0.0978, "step": 1645 }, { "epoch": 0.94, "eval_accuracy": 0.9883333333333333, "eval_f1": 0.9883138564273791, "eval_loss": 0.058573223650455475, "eval_precision": 0.9899665551839465, "eval_recall": 0.9866666666666667, "eval_runtime": 465.0949, "eval_samples_per_second": 6.45, "eval_steps_per_second": 0.806, "step": 1645 }, { "epoch": 0.96, "learning_rate": 2.0000000000000003e-06, "loss": 0.1392, "step": 1680 }, { "epoch": 0.96, "eval_accuracy": 0.9883333333333333, "eval_f1": 0.9883060474440362, "eval_loss": 0.055864058434963226, "eval_precision": 0.9906229068988613, "eval_recall": 0.986, "eval_runtime": 464.3364, "eval_samples_per_second": 6.461, "eval_steps_per_second": 0.808, "step": 1680 }, { "epoch": 0.98, "learning_rate": 1.0000000000000002e-06, "loss": 0.0432, "step": 1715 }, { "epoch": 0.98, "eval_accuracy": 0.9883333333333333, "eval_f1": 0.9883060474440362, "eval_loss": 0.055420782417058945, "eval_precision": 0.9906229068988613, "eval_recall": 0.986, "eval_runtime": 464.6017, "eval_samples_per_second": 6.457, "eval_steps_per_second": 0.807, "step": 1715 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.0006, "step": 1750 }, { "epoch": 1.0, "eval_accuracy": 0.988, "eval_f1": 0.9879679144385026, "eval_loss": 0.05567142367362976, "eval_precision": 0.9906166219839142, "eval_recall": 0.9853333333333333, "eval_runtime": 464.5424, "eval_samples_per_second": 6.458, "eval_steps_per_second": 0.807, "step": 1750 } ], "max_steps": 1750, "num_train_epochs": 1, "total_flos": 1.0151220215808e+17, "trial_name": null, "trial_params": null }