{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 1750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9971428571428576e-05, "loss": 20.0833, "step": 1 }, { "epoch": 0.02, "learning_rate": 4.9e-05, "loss": 7.9373, "step": 35 }, { "epoch": 0.02, "eval_accuracy": 0.614, "eval_f1": 0.7190684133915575, "eval_loss": 2.977444648742676, "eval_precision": 0.5652173913043478, "eval_recall": 0.988, "eval_runtime": 80.901, "eval_samples_per_second": 37.082, "eval_steps_per_second": 4.635, "step": 35 }, { "epoch": 0.04, "learning_rate": 4.8e-05, "loss": 1.0358, "step": 70 }, { "epoch": 0.04, "eval_accuracy": 0.6373333333333333, "eval_f1": 0.7332025502697401, "eval_loss": 3.09529972076416, "eval_precision": 0.5799069045771916, "eval_recall": 0.9966666666666667, "eval_runtime": 82.2367, "eval_samples_per_second": 36.48, "eval_steps_per_second": 4.56, "step": 70 }, { "epoch": 0.06, "learning_rate": 4.7e-05, "loss": 1.8177, "step": 105 }, { "epoch": 0.06, "eval_accuracy": 0.8706666666666667, "eval_f1": 0.8636683063949402, "eval_loss": 0.640018105506897, "eval_precision": 0.913075780089153, "eval_recall": 0.8193333333333334, "eval_runtime": 81.9619, "eval_samples_per_second": 36.602, "eval_steps_per_second": 4.575, "step": 105 }, { "epoch": 0.08, "learning_rate": 4.600000000000001e-05, "loss": 0.7211, "step": 140 }, { "epoch": 0.08, "eval_accuracy": 0.821, "eval_f1": 0.811777076761304, "eval_loss": 1.0340842008590698, "eval_precision": 0.8558758314855875, "eval_recall": 0.772, "eval_runtime": 82.2896, "eval_samples_per_second": 36.457, "eval_steps_per_second": 4.557, "step": 140 }, { "epoch": 0.1, "learning_rate": 4.5e-05, "loss": 1.4119, "step": 175 }, { "epoch": 0.1, "eval_accuracy": 0.751, "eval_f1": 0.6828025477707006, "eval_loss": 0.7746813893318176, "eval_precision": 0.9403508771929825, "eval_recall": 0.536, "eval_runtime": 82.2817, "eval_samples_per_second": 36.46, "eval_steps_per_second": 4.558, "step": 175 }, { "epoch": 0.12, "learning_rate": 4.4000000000000006e-05, "loss": 0.8471, "step": 210 }, { "epoch": 0.12, "eval_accuracy": 0.854, "eval_f1": 0.8643122676579925, "eval_loss": 0.6877764463424683, "eval_precision": 0.8072916666666666, "eval_recall": 0.93, "eval_runtime": 81.2562, "eval_samples_per_second": 36.92, "eval_steps_per_second": 4.615, "step": 210 }, { "epoch": 0.14, "learning_rate": 4.3e-05, "loss": 1.3883, "step": 245 }, { "epoch": 0.14, "eval_accuracy": 0.7186666666666667, "eval_f1": 0.6114180478821363, "eval_loss": 2.117485284805298, "eval_precision": 0.9880952380952381, "eval_recall": 0.44266666666666665, "eval_runtime": 81.2535, "eval_samples_per_second": 36.921, "eval_steps_per_second": 4.615, "step": 245 }, { "epoch": 0.16, "learning_rate": 4.2e-05, "loss": 0.9955, "step": 280 }, { "epoch": 0.16, "eval_accuracy": 0.853, "eval_f1": 0.8333962976955043, "eval_loss": 0.9773063063621521, "eval_precision": 0.9616390584132519, "eval_recall": 0.7353333333333333, "eval_runtime": 82.2707, "eval_samples_per_second": 36.465, "eval_steps_per_second": 4.558, "step": 280 }, { "epoch": 0.18, "learning_rate": 4.1e-05, "loss": 0.9704, "step": 315 }, { "epoch": 0.18, "eval_accuracy": 0.8093333333333333, "eval_f1": 0.8361970217640321, "eval_loss": 0.7936842441558838, "eval_precision": 0.7329317269076305, "eval_recall": 0.9733333333333334, "eval_runtime": 81.186, "eval_samples_per_second": 36.952, "eval_steps_per_second": 4.619, "step": 315 }, { "epoch": 0.2, "learning_rate": 4e-05, "loss": 1.3156, "step": 350 }, { "epoch": 0.2, "eval_accuracy": 0.7793333333333333, "eval_f1": 0.8049499116087213, "eval_loss": 1.306660771369934, "eval_precision": 0.7212249208025343, "eval_recall": 0.9106666666666666, "eval_runtime": 81.1695, "eval_samples_per_second": 36.96, "eval_steps_per_second": 4.62, "step": 350 }, { "epoch": 0.22, "learning_rate": 3.9000000000000006e-05, "loss": 0.9042, "step": 385 }, { "epoch": 0.22, "eval_accuracy": 0.7506666666666667, "eval_f1": 0.6978998384491114, "eval_loss": 0.8157205581665039, "eval_precision": 0.8852459016393442, "eval_recall": 0.576, "eval_runtime": 81.9836, "eval_samples_per_second": 36.593, "eval_steps_per_second": 4.574, "step": 385 }, { "epoch": 0.24, "learning_rate": 3.8e-05, "loss": 0.6122, "step": 420 }, { "epoch": 0.24, "eval_accuracy": 0.861, "eval_f1": 0.8741322064594024, "eval_loss": 0.6521239280700684, "eval_precision": 0.7986762272476559, "eval_recall": 0.9653333333333334, "eval_runtime": 81.1912, "eval_samples_per_second": 36.95, "eval_steps_per_second": 4.619, "step": 420 }, { "epoch": 0.26, "learning_rate": 3.7e-05, "loss": 0.9266, "step": 455 }, { "epoch": 0.26, "eval_accuracy": 0.7576666666666667, "eval_f1": 0.8008764721993973, "eval_loss": 1.0884476900100708, "eval_precision": 0.6796838679683868, "eval_recall": 0.9746666666666667, "eval_runtime": 81.1743, "eval_samples_per_second": 36.958, "eval_steps_per_second": 4.62, "step": 455 }, { "epoch": 0.28, "learning_rate": 3.6e-05, "loss": 0.5418, "step": 490 }, { "epoch": 0.28, "eval_accuracy": 0.8626666666666667, "eval_f1": 0.8731527093596059, "eval_loss": 0.594514012336731, "eval_precision": 0.8112128146453089, "eval_recall": 0.9453333333333334, "eval_runtime": 81.6996, "eval_samples_per_second": 36.72, "eval_steps_per_second": 4.59, "step": 490 }, { "epoch": 0.3, "learning_rate": 3.5e-05, "loss": 0.4564, "step": 525 }, { "epoch": 0.3, "eval_accuracy": 0.9256666666666666, "eval_f1": 0.9236039739636861, "eval_loss": 0.2299291044473648, "eval_precision": 0.9499647639182522, "eval_recall": 0.8986666666666666, "eval_runtime": 81.1873, "eval_samples_per_second": 36.952, "eval_steps_per_second": 4.619, "step": 525 }, { "epoch": 0.32, "learning_rate": 3.4000000000000007e-05, "loss": 0.427, "step": 560 }, { "epoch": 0.32, "eval_accuracy": 0.9156666666666666, "eval_f1": 0.9154694286668894, "eval_loss": 0.36590951681137085, "eval_precision": 0.9176155391828533, "eval_recall": 0.9133333333333333, "eval_runtime": 81.3624, "eval_samples_per_second": 36.872, "eval_steps_per_second": 4.609, "step": 560 }, { "epoch": 0.34, "learning_rate": 3.3e-05, "loss": 0.7026, "step": 595 }, { "epoch": 0.34, "eval_accuracy": 0.9166666666666666, "eval_f1": 0.9150815217391304, "eval_loss": 0.509623646736145, "eval_precision": 0.932825484764543, "eval_recall": 0.898, "eval_runtime": 81.284, "eval_samples_per_second": 36.908, "eval_steps_per_second": 4.613, "step": 595 }, { "epoch": 0.36, "learning_rate": 3.2000000000000005e-05, "loss": 0.7019, "step": 630 }, { "epoch": 0.36, "eval_accuracy": 0.9066666666666666, "eval_f1": 0.9068529607451763, "eval_loss": 0.4853467643260956, "eval_precision": 0.9050464807436919, "eval_recall": 0.9086666666666666, "eval_runtime": 81.2065, "eval_samples_per_second": 36.943, "eval_steps_per_second": 4.618, "step": 630 }, { "epoch": 0.38, "learning_rate": 3.1e-05, "loss": 0.5116, "step": 665 }, { "epoch": 0.38, "eval_accuracy": 0.9003333333333333, "eval_f1": 0.9047467346288627, "eval_loss": 0.4890832006931305, "eval_precision": 0.8663819402074435, "eval_recall": 0.9466666666666667, "eval_runtime": 81.3158, "eval_samples_per_second": 36.893, "eval_steps_per_second": 4.612, "step": 665 }, { "epoch": 0.4, "learning_rate": 3e-05, "loss": 0.4625, "step": 700 }, { "epoch": 0.4, "eval_accuracy": 0.9276666666666666, "eval_f1": 0.92340275326509, "eval_loss": 0.391652911901474, "eval_precision": 0.981245311327832, "eval_recall": 0.872, "eval_runtime": 81.251, "eval_samples_per_second": 36.923, "eval_steps_per_second": 4.615, "step": 700 }, { "epoch": 0.42, "learning_rate": 2.9e-05, "loss": 0.455, "step": 735 }, { "epoch": 0.42, "eval_accuracy": 0.89, "eval_f1": 0.9003623188405797, "eval_loss": 0.5772602558135986, "eval_precision": 0.8228476821192053, "eval_recall": 0.994, "eval_runtime": 81.2335, "eval_samples_per_second": 36.931, "eval_steps_per_second": 4.616, "step": 735 }, { "epoch": 0.44, "learning_rate": 2.8000000000000003e-05, "loss": 0.3816, "step": 770 }, { "epoch": 0.44, "eval_accuracy": 0.929, "eval_f1": 0.9305510270622758, "eval_loss": 0.32315143942832947, "eval_precision": 0.9106573069559668, "eval_recall": 0.9513333333333334, "eval_runtime": 81.1544, "eval_samples_per_second": 36.967, "eval_steps_per_second": 4.621, "step": 770 }, { "epoch": 0.46, "learning_rate": 2.7000000000000002e-05, "loss": 0.3276, "step": 805 }, { "epoch": 0.46, "eval_accuracy": 0.894, "eval_f1": 0.9034608378870674, "eval_loss": 0.8502470850944519, "eval_precision": 0.8294314381270903, "eval_recall": 0.992, "eval_runtime": 81.1738, "eval_samples_per_second": 36.958, "eval_steps_per_second": 4.62, "step": 805 }, { "epoch": 0.48, "learning_rate": 2.6000000000000002e-05, "loss": 0.8028, "step": 840 }, { "epoch": 0.48, "eval_accuracy": 0.8066666666666666, "eval_f1": 0.761904761904762, "eval_loss": 1.2200298309326172, "eval_precision": 0.9914529914529915, "eval_recall": 0.6186666666666667, "eval_runtime": 81.3151, "eval_samples_per_second": 36.894, "eval_steps_per_second": 4.612, "step": 840 }, { "epoch": 0.5, "learning_rate": 2.5e-05, "loss": 0.4308, "step": 875 }, { "epoch": 0.5, "eval_accuracy": 0.9213333333333333, "eval_f1": 0.9263880224578914, "eval_loss": 0.36110782623291016, "eval_precision": 0.8704572098475967, "eval_recall": 0.99, "eval_runtime": 81.936, "eval_samples_per_second": 36.614, "eval_steps_per_second": 4.577, "step": 875 }, { "epoch": 0.52, "learning_rate": 2.4e-05, "loss": 0.3567, "step": 910 }, { "epoch": 0.52, "eval_accuracy": 0.908, "eval_f1": 0.9000724112961622, "eval_loss": 0.6623110771179199, "eval_precision": 0.9849445324881141, "eval_recall": 0.8286666666666667, "eval_runtime": 81.4083, "eval_samples_per_second": 36.851, "eval_steps_per_second": 4.606, "step": 910 }, { "epoch": 0.54, "learning_rate": 2.3000000000000003e-05, "loss": 0.4286, "step": 945 }, { "epoch": 0.54, "eval_accuracy": 0.9216666666666666, "eval_f1": 0.9264475743348982, "eval_loss": 0.3584176301956177, "eval_precision": 0.8731563421828908, "eval_recall": 0.9866666666666667, "eval_runtime": 81.0909, "eval_samples_per_second": 36.996, "eval_steps_per_second": 4.624, "step": 945 }, { "epoch": 0.56, "learning_rate": 2.2000000000000003e-05, "loss": 0.3559, "step": 980 }, { "epoch": 0.56, "eval_accuracy": 0.944, "eval_f1": 0.945736434108527, "eval_loss": 0.21195654571056366, "eval_precision": 0.9172932330827067, "eval_recall": 0.976, "eval_runtime": 81.1261, "eval_samples_per_second": 36.979, "eval_steps_per_second": 4.622, "step": 980 }, { "epoch": 0.58, "learning_rate": 2.1e-05, "loss": 0.3043, "step": 1015 }, { "epoch": 0.58, "eval_accuracy": 0.923, "eval_f1": 0.9262216544235068, "eval_loss": 0.23412927985191345, "eval_precision": 0.8890251379521765, "eval_recall": 0.9666666666666667, "eval_runtime": 81.1527, "eval_samples_per_second": 36.967, "eval_steps_per_second": 4.621, "step": 1015 }, { "epoch": 0.6, "learning_rate": 2e-05, "loss": 0.207, "step": 1050 }, { "epoch": 0.6, "eval_accuracy": 0.9186666666666666, "eval_f1": 0.9122302158273381, "eval_loss": 0.4979284703731537, "eval_precision": 0.990625, "eval_recall": 0.8453333333333334, "eval_runtime": 81.616, "eval_samples_per_second": 36.757, "eval_steps_per_second": 4.595, "step": 1050 }, { "epoch": 0.62, "learning_rate": 1.9e-05, "loss": 0.4594, "step": 1085 }, { "epoch": 0.62, "eval_accuracy": 0.9316666666666666, "eval_f1": 0.9276385457112601, "eval_loss": 0.33773142099380493, "eval_precision": 0.9857464366091523, "eval_recall": 0.876, "eval_runtime": 81.8837, "eval_samples_per_second": 36.637, "eval_steps_per_second": 4.58, "step": 1085 }, { "epoch": 0.64, "learning_rate": 1.8e-05, "loss": 0.3886, "step": 1120 }, { "epoch": 0.64, "eval_accuracy": 0.952, "eval_f1": 0.9511533242876526, "eval_loss": 0.21586619317531586, "eval_precision": 0.9682320441988951, "eval_recall": 0.9346666666666666, "eval_runtime": 81.9925, "eval_samples_per_second": 36.589, "eval_steps_per_second": 4.574, "step": 1120 }, { "epoch": 0.66, "learning_rate": 1.7000000000000003e-05, "loss": 0.3025, "step": 1155 }, { "epoch": 0.66, "eval_accuracy": 0.9516666666666667, "eval_f1": 0.9511620074099023, "eval_loss": 0.1657862514257431, "eval_precision": 0.9611980939414567, "eval_recall": 0.9413333333333334, "eval_runtime": 81.6092, "eval_samples_per_second": 36.761, "eval_steps_per_second": 4.595, "step": 1155 }, { "epoch": 0.68, "learning_rate": 1.6000000000000003e-05, "loss": 0.3561, "step": 1190 }, { "epoch": 0.68, "eval_accuracy": 0.926, "eval_f1": 0.9304075235109718, "eval_loss": 0.2859382629394531, "eval_precision": 0.8781065088757396, "eval_recall": 0.9893333333333333, "eval_runtime": 81.4491, "eval_samples_per_second": 36.833, "eval_steps_per_second": 4.604, "step": 1190 }, { "epoch": 0.7, "learning_rate": 1.5e-05, "loss": 0.2377, "step": 1225 }, { "epoch": 0.7, "eval_accuracy": 0.9123333333333333, "eval_f1": 0.9175290059579806, "eval_loss": 0.6120531558990479, "eval_precision": 0.8661930136175252, "eval_recall": 0.9753333333333334, "eval_runtime": 81.2586, "eval_samples_per_second": 36.919, "eval_steps_per_second": 4.615, "step": 1225 }, { "epoch": 0.72, "learning_rate": 1.4000000000000001e-05, "loss": 0.5469, "step": 1260 }, { "epoch": 0.72, "eval_accuracy": 0.9583333333333334, "eval_f1": 0.9584855529724343, "eval_loss": 0.179586261510849, "eval_precision": 0.9549966909331569, "eval_recall": 0.962, "eval_runtime": 81.2049, "eval_samples_per_second": 36.944, "eval_steps_per_second": 4.618, "step": 1260 }, { "epoch": 0.74, "learning_rate": 1.3000000000000001e-05, "loss": 0.1699, "step": 1295 }, { "epoch": 0.74, "eval_accuracy": 0.946, "eval_f1": 0.94375, "eval_loss": 0.24616345763206482, "eval_precision": 0.9847826086956522, "eval_recall": 0.906, "eval_runtime": 83.3002, "eval_samples_per_second": 36.014, "eval_steps_per_second": 4.502, "step": 1295 }, { "epoch": 0.76, "learning_rate": 1.2e-05, "loss": 0.2279, "step": 1330 }, { "epoch": 0.76, "eval_accuracy": 0.9576666666666667, "eval_f1": 0.9585644371941273, "eval_loss": 0.19657239317893982, "eval_precision": 0.9386581469648563, "eval_recall": 0.9793333333333333, "eval_runtime": 84.7312, "eval_samples_per_second": 35.406, "eval_steps_per_second": 4.426, "step": 1330 }, { "epoch": 0.78, "learning_rate": 1.1000000000000001e-05, "loss": 0.1383, "step": 1365 }, { "epoch": 0.78, "eval_accuracy": 0.9373333333333334, "eval_f1": 0.9336626676076217, "eval_loss": 0.4285222887992859, "eval_precision": 0.9917541229385307, "eval_recall": 0.882, "eval_runtime": 81.4307, "eval_samples_per_second": 36.841, "eval_steps_per_second": 4.605, "step": 1365 }, { "epoch": 0.8, "learning_rate": 1e-05, "loss": 0.2622, "step": 1400 }, { "epoch": 0.8, "eval_accuracy": 0.9606666666666667, "eval_f1": 0.9603227975790181, "eval_loss": 0.22786445915699005, "eval_precision": 0.9687924016282226, "eval_recall": 0.952, "eval_runtime": 81.1895, "eval_samples_per_second": 36.951, "eval_steps_per_second": 4.619, "step": 1400 }, { "epoch": 0.82, "learning_rate": 9e-06, "loss": 0.1573, "step": 1435 }, { "epoch": 0.82, "eval_accuracy": 0.9636666666666667, "eval_f1": 0.9632873021219266, "eval_loss": 0.1860896348953247, "eval_precision": 0.9734513274336283, "eval_recall": 0.9533333333333334, "eval_runtime": 81.6749, "eval_samples_per_second": 36.731, "eval_steps_per_second": 4.591, "step": 1435 }, { "epoch": 0.84, "learning_rate": 8.000000000000001e-06, "loss": 0.1174, "step": 1470 }, { "epoch": 0.84, "eval_accuracy": 0.9636666666666667, "eval_f1": 0.9634350888963435, "eval_loss": 0.1978105902671814, "eval_precision": 0.9696151249155975, "eval_recall": 0.9573333333333334, "eval_runtime": 81.6791, "eval_samples_per_second": 36.729, "eval_steps_per_second": 4.591, "step": 1470 }, { "epoch": 0.86, "learning_rate": 7.000000000000001e-06, "loss": 0.1827, "step": 1505 }, { "epoch": 0.86, "eval_accuracy": 0.9623333333333334, "eval_f1": 0.962219993313273, "eval_loss": 0.215500146150589, "eval_precision": 0.9651240778001341, "eval_recall": 0.9593333333333334, "eval_runtime": 81.1055, "eval_samples_per_second": 36.989, "eval_steps_per_second": 4.624, "step": 1505 }, { "epoch": 0.88, "learning_rate": 6e-06, "loss": 0.2172, "step": 1540 }, { "epoch": 0.88, "eval_accuracy": 0.962, "eval_f1": 0.9615643964935942, "eval_loss": 0.2241593450307846, "eval_precision": 0.9727148703956344, "eval_recall": 0.9506666666666667, "eval_runtime": 81.5602, "eval_samples_per_second": 36.783, "eval_steps_per_second": 4.598, "step": 1540 }, { "epoch": 0.9, "learning_rate": 5e-06, "loss": 0.2261, "step": 1575 }, { "epoch": 0.9, "eval_accuracy": 0.9643333333333334, "eval_f1": 0.9644636333444038, "eval_loss": 0.19370576739311218, "eval_precision": 0.9609530112508272, "eval_recall": 0.968, "eval_runtime": 81.2706, "eval_samples_per_second": 36.914, "eval_steps_per_second": 4.614, "step": 1575 }, { "epoch": 0.92, "learning_rate": 4.000000000000001e-06, "loss": 0.1769, "step": 1610 }, { "epoch": 0.92, "eval_accuracy": 0.9576666666666667, "eval_f1": 0.9567291311754687, "eval_loss": 0.26763275265693665, "eval_precision": 0.978397212543554, "eval_recall": 0.936, "eval_runtime": 81.1748, "eval_samples_per_second": 36.957, "eval_steps_per_second": 4.62, "step": 1610 }, { "epoch": 0.94, "learning_rate": 3e-06, "loss": 0.4139, "step": 1645 }, { "epoch": 0.94, "eval_accuracy": 0.9673333333333334, "eval_f1": 0.9674634794156707, "eval_loss": 0.1558542400598526, "eval_precision": 0.9636243386243386, "eval_recall": 0.9713333333333334, "eval_runtime": 81.3051, "eval_samples_per_second": 36.898, "eval_steps_per_second": 4.612, "step": 1645 }, { "epoch": 0.96, "learning_rate": 2.0000000000000003e-06, "loss": 0.2209, "step": 1680 }, { "epoch": 0.96, "eval_accuracy": 0.9706666666666667, "eval_f1": 0.9705488621151273, "eval_loss": 0.1362292617559433, "eval_precision": 0.9744623655913979, "eval_recall": 0.9666666666666667, "eval_runtime": 81.1766, "eval_samples_per_second": 36.956, "eval_steps_per_second": 4.62, "step": 1680 }, { "epoch": 0.98, "learning_rate": 1.0000000000000002e-06, "loss": 0.1792, "step": 1715 }, { "epoch": 0.98, "eval_accuracy": 0.9696666666666667, "eval_f1": 0.9696363029696363, "eval_loss": 0.13592535257339478, "eval_precision": 0.9706078824315297, "eval_recall": 0.9686666666666667, "eval_runtime": 81.8567, "eval_samples_per_second": 36.649, "eval_steps_per_second": 4.581, "step": 1715 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.1015, "step": 1750 }, { "epoch": 1.0, "eval_accuracy": 0.9696666666666667, "eval_f1": 0.9696160267111853, "eval_loss": 0.1375236213207245, "eval_precision": 0.9712374581939799, "eval_recall": 0.968, "eval_runtime": 81.3216, "eval_samples_per_second": 36.891, "eval_steps_per_second": 4.611, "step": 1750 } ], "max_steps": 1750, "num_train_epochs": 1, "total_flos": 1.300198588416e+16, "trial_name": null, "trial_params": null }