{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 2625, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.998095238095239e-05, "loss": 2.5699, "step": 1 }, { "epoch": 0.02, "learning_rate": 4.900952380952381e-05, "loss": 3.6256, "step": 52 }, { "epoch": 0.02, "eval_accuracy": 0.9404444444444444, "eval_f1": 0.9367327667610953, "eval_loss": 0.4405118525028229, "eval_precision": 0.998992950654582, "eval_recall": 0.8817777777777778, "eval_runtime": 119.9754, "eval_samples_per_second": 37.508, "eval_steps_per_second": 4.693, "step": 52 }, { "epoch": 0.04, "learning_rate": 4.8019047619047617e-05, "loss": 1.3465, "step": 104 }, { "epoch": 0.04, "eval_accuracy": 0.9548888888888889, "eval_f1": 0.9544332210998877, "eval_loss": 0.4884180724620819, "eval_precision": 0.964172335600907, "eval_recall": 0.9448888888888889, "eval_runtime": 120.2589, "eval_samples_per_second": 37.419, "eval_steps_per_second": 4.682, "step": 104 }, { "epoch": 0.06, "learning_rate": 4.702857142857143e-05, "loss": 1.3567, "step": 156 }, { "epoch": 0.06, "eval_accuracy": 0.8906666666666667, "eval_f1": 0.8772455089820359, "eval_loss": 0.8629839420318604, "eval_precision": 1.0, "eval_recall": 0.7813333333333333, "eval_runtime": 120.0666, "eval_samples_per_second": 37.479, "eval_steps_per_second": 4.689, "step": 156 }, { "epoch": 0.08, "learning_rate": 4.6038095238095244e-05, "loss": 0.7091, "step": 208 }, { "epoch": 0.08, "eval_accuracy": 0.9717777777777777, "eval_f1": 0.9710244125028521, "eval_loss": 0.32004401087760925, "eval_precision": 0.9976558837318331, "eval_recall": 0.9457777777777778, "eval_runtime": 120.0448, "eval_samples_per_second": 37.486, "eval_steps_per_second": 4.69, "step": 208 }, { "epoch": 0.1, "learning_rate": 4.504761904761905e-05, "loss": 0.2289, "step": 260 }, { "epoch": 0.1, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716636197440585, "eval_loss": 0.5497010350227356, "eval_precision": 1.0, "eval_recall": 0.9448888888888889, "eval_runtime": 123.4715, "eval_samples_per_second": 36.446, "eval_steps_per_second": 4.56, "step": 260 }, { "epoch": 0.12, "learning_rate": 4.405714285714286e-05, "loss": 0.594, "step": 312 }, { "epoch": 0.12, "eval_accuracy": 0.9722222222222222, "eval_f1": 0.971441626684944, "eval_loss": 0.23442013561725616, "eval_precision": 0.999529854254819, "eval_recall": 0.9448888888888889, "eval_runtime": 119.9304, "eval_samples_per_second": 37.522, "eval_steps_per_second": 4.694, "step": 312 }, { "epoch": 0.14, "learning_rate": 4.3066666666666665e-05, "loss": 0.2676, "step": 364 }, { "epoch": 0.14, "eval_accuracy": 0.9706666666666667, "eval_f1": 0.9697940503432495, "eval_loss": 0.5204830169677734, "eval_precision": 0.9995283018867924, "eval_recall": 0.9417777777777778, "eval_runtime": 120.2235, "eval_samples_per_second": 37.43, "eval_steps_per_second": 4.683, "step": 364 }, { "epoch": 0.16, "learning_rate": 4.207619047619048e-05, "loss": 0.3736, "step": 416 }, { "epoch": 0.16, "eval_accuracy": 0.972, "eval_f1": 0.9712328767123288, "eval_loss": 0.44008418917655945, "eval_precision": 0.9985915492957746, "eval_recall": 0.9453333333333334, "eval_runtime": 120.1926, "eval_samples_per_second": 37.44, "eval_steps_per_second": 4.684, "step": 416 }, { "epoch": 0.18, "learning_rate": 4.1085714285714286e-05, "loss": 0.2915, "step": 468 }, { "epoch": 0.18, "eval_accuracy": 0.972, "eval_f1": 0.9712328767123288, "eval_loss": 0.3523007333278656, "eval_precision": 0.9985915492957746, "eval_recall": 0.9453333333333334, "eval_runtime": 119.9611, "eval_samples_per_second": 37.512, "eval_steps_per_second": 4.693, "step": 468 }, { "epoch": 0.2, "learning_rate": 4.00952380952381e-05, "loss": 0.2387, "step": 520 }, { "epoch": 0.2, "eval_accuracy": 0.972, "eval_f1": 0.9712328767123288, "eval_loss": 0.5473064184188843, "eval_precision": 0.9985915492957746, "eval_recall": 0.9453333333333334, "eval_runtime": 120.1415, "eval_samples_per_second": 37.456, "eval_steps_per_second": 4.686, "step": 520 }, { "epoch": 0.22, "learning_rate": 3.910476190476191e-05, "loss": 0.3028, "step": 572 }, { "epoch": 0.22, "eval_accuracy": 0.9691111111111111, "eval_f1": 0.9683875369570161, "eval_loss": 0.25819283723831177, "eval_precision": 0.9916162086632511, "eval_recall": 0.9462222222222222, "eval_runtime": 120.0033, "eval_samples_per_second": 37.499, "eval_steps_per_second": 4.692, "step": 572 }, { "epoch": 0.24, "learning_rate": 3.8114285714285714e-05, "loss": 0.5703, "step": 624 }, { "epoch": 0.24, "eval_accuracy": 0.9691111111111111, "eval_f1": 0.968170368674147, "eval_loss": 0.19966714084148407, "eval_precision": 0.9985829003306566, "eval_recall": 0.9395555555555556, "eval_runtime": 119.8696, "eval_samples_per_second": 37.541, "eval_steps_per_second": 4.697, "step": 624 }, { "epoch": 0.26, "learning_rate": 3.712380952380953e-05, "loss": 0.5, "step": 676 }, { "epoch": 0.26, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.37493452429771423, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 119.9194, "eval_samples_per_second": 37.525, "eval_steps_per_second": 4.695, "step": 676 }, { "epoch": 0.28, "learning_rate": 3.6133333333333335e-05, "loss": 0.3681, "step": 728 }, { "epoch": 0.28, "eval_accuracy": 0.972, "eval_f1": 0.9712197350388305, "eval_loss": 0.14736813306808472, "eval_precision": 0.9990601503759399, "eval_recall": 0.9448888888888889, "eval_runtime": 119.8944, "eval_samples_per_second": 37.533, "eval_steps_per_second": 4.696, "step": 728 }, { "epoch": 0.3, "learning_rate": 3.514285714285714e-05, "loss": 0.3687, "step": 780 }, { "epoch": 0.3, "eval_accuracy": 0.9722222222222222, "eval_f1": 0.9714546700159854, "eval_loss": 0.2616070508956909, "eval_precision": 0.9990605918271489, "eval_recall": 0.9453333333333334, "eval_runtime": 120.1748, "eval_samples_per_second": 37.445, "eval_steps_per_second": 4.685, "step": 780 }, { "epoch": 0.32, "learning_rate": 3.415238095238095e-05, "loss": 0.5027, "step": 832 }, { "epoch": 0.32, "eval_accuracy": 0.9693333333333334, "eval_f1": 0.968377635197067, "eval_loss": 0.5182681083679199, "eval_precision": 0.9995269631031221, "eval_recall": 0.9391111111111111, "eval_runtime": 120.1313, "eval_samples_per_second": 37.459, "eval_steps_per_second": 4.687, "step": 832 }, { "epoch": 0.34, "learning_rate": 3.316190476190476e-05, "loss": 0.3527, "step": 884 }, { "epoch": 0.34, "eval_accuracy": 0.9717777777777777, "eval_f1": 0.9709846927119031, "eval_loss": 0.3203032314777374, "eval_precision": 0.999059708509638, "eval_recall": 0.9444444444444444, "eval_runtime": 120.2845, "eval_samples_per_second": 37.411, "eval_steps_per_second": 4.681, "step": 884 }, { "epoch": 0.36, "learning_rate": 3.217142857142858e-05, "loss": 0.2254, "step": 936 }, { "epoch": 0.36, "eval_accuracy": 0.9706666666666667, "eval_f1": 0.969807868252516, "eval_loss": 0.3301153779029846, "eval_precision": 0.9990574929311969, "eval_recall": 0.9422222222222222, "eval_runtime": 120.409, "eval_samples_per_second": 37.373, "eval_steps_per_second": 4.676, "step": 936 }, { "epoch": 0.38, "learning_rate": 3.1180952380952384e-05, "loss": 0.2214, "step": 988 }, { "epoch": 0.38, "eval_accuracy": 0.9713333333333334, "eval_f1": 0.9705142857142858, "eval_loss": 0.2255212664604187, "eval_precision": 0.9990588235294118, "eval_recall": 0.9435555555555556, "eval_runtime": 120.0554, "eval_samples_per_second": 37.483, "eval_steps_per_second": 4.69, "step": 988 }, { "epoch": 0.4, "learning_rate": 3.019047619047619e-05, "loss": 0.2486, "step": 1040 }, { "epoch": 0.4, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.22060254216194153, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 120.1829, "eval_samples_per_second": 37.443, "eval_steps_per_second": 4.685, "step": 1040 }, { "epoch": 0.42, "learning_rate": 2.9199999999999998e-05, "loss": 0.2049, "step": 1092 }, { "epoch": 0.42, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.35958772897720337, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 120.0615, "eval_samples_per_second": 37.481, "eval_steps_per_second": 4.689, "step": 1092 }, { "epoch": 0.44, "learning_rate": 2.8209523809523812e-05, "loss": 0.4056, "step": 1144 }, { "epoch": 0.44, "eval_accuracy": 0.9715555555555555, "eval_f1": 0.9707495429616089, "eval_loss": 0.30065128207206726, "eval_precision": 0.9990592662276576, "eval_recall": 0.944, "eval_runtime": 120.3856, "eval_samples_per_second": 37.38, "eval_steps_per_second": 4.677, "step": 1144 }, { "epoch": 0.46, "learning_rate": 2.7219047619047623e-05, "loss": 0.2204, "step": 1196 }, { "epoch": 0.46, "eval_accuracy": 0.9702222222222222, "eval_f1": 0.9693223443223443, "eval_loss": 0.22362905740737915, "eval_precision": 0.9995278564683664, "eval_recall": 0.9408888888888889, "eval_runtime": 120.2284, "eval_samples_per_second": 37.429, "eval_steps_per_second": 4.683, "step": 1196 }, { "epoch": 0.48, "learning_rate": 2.622857142857143e-05, "loss": 0.4425, "step": 1248 }, { "epoch": 0.48, "eval_accuracy": 0.9713333333333334, "eval_f1": 0.9705008003658816, "eval_loss": 0.561194121837616, "eval_precision": 0.9995289684408856, "eval_recall": 0.9431111111111111, "eval_runtime": 120.3958, "eval_samples_per_second": 37.377, "eval_steps_per_second": 4.676, "step": 1248 }, { "epoch": 0.5, "learning_rate": 2.523809523809524e-05, "loss": 0.2287, "step": 1300 }, { "epoch": 0.5, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.2646217942237854, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 120.7965, "eval_samples_per_second": 37.253, "eval_steps_per_second": 4.661, "step": 1300 }, { "epoch": 0.52, "learning_rate": 2.424761904761905e-05, "loss": 0.248, "step": 1352 }, { "epoch": 0.52, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.3093545734882355, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 125.2609, "eval_samples_per_second": 35.925, "eval_steps_per_second": 4.495, "step": 1352 }, { "epoch": 0.53, "learning_rate": 2.3257142857142858e-05, "loss": 0.3587, "step": 1404 }, { "epoch": 0.53, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.2874605655670166, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 125.5022, "eval_samples_per_second": 35.856, "eval_steps_per_second": 4.486, "step": 1404 }, { "epoch": 0.55, "learning_rate": 2.2266666666666668e-05, "loss": 0.2032, "step": 1456 }, { "epoch": 0.55, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.2600957155227661, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 119.7973, "eval_samples_per_second": 37.563, "eval_steps_per_second": 4.7, "step": 1456 }, { "epoch": 0.57, "learning_rate": 2.127619047619048e-05, "loss": 0.1874, "step": 1508 }, { "epoch": 0.57, "eval_accuracy": 0.9722222222222222, "eval_f1": 0.9714546700159854, "eval_loss": 0.34168195724487305, "eval_precision": 0.9990605918271489, "eval_recall": 0.9453333333333334, "eval_runtime": 120.1128, "eval_samples_per_second": 37.465, "eval_steps_per_second": 4.687, "step": 1508 }, { "epoch": 0.59, "learning_rate": 2.0285714285714286e-05, "loss": 0.1527, "step": 1560 }, { "epoch": 0.59, "eval_accuracy": 0.9611111111111111, "eval_f1": 0.9605233476201219, "eval_loss": 0.2756815552711487, "eval_precision": 0.9752633989922126, "eval_recall": 0.9462222222222222, "eval_runtime": 120.3166, "eval_samples_per_second": 37.401, "eval_steps_per_second": 4.679, "step": 1560 }, { "epoch": 0.61, "learning_rate": 1.9295238095238096e-05, "loss": 0.1734, "step": 1612 }, { "epoch": 0.61, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.3820748031139374, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 119.8424, "eval_samples_per_second": 37.549, "eval_steps_per_second": 4.698, "step": 1612 }, { "epoch": 0.63, "learning_rate": 1.8304761904761906e-05, "loss": 0.4296, "step": 1664 }, { "epoch": 0.63, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.28360387682914734, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 119.6839, "eval_samples_per_second": 37.599, "eval_steps_per_second": 4.704, "step": 1664 }, { "epoch": 0.65, "learning_rate": 1.7314285714285717e-05, "loss": 0.2293, "step": 1716 }, { "epoch": 0.65, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.1679239422082901, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 119.9354, "eval_samples_per_second": 37.52, "eval_steps_per_second": 4.694, "step": 1716 }, { "epoch": 0.67, "learning_rate": 1.6323809523809524e-05, "loss": 0.1589, "step": 1768 }, { "epoch": 0.67, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.23643441498279572, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 120.016, "eval_samples_per_second": 37.495, "eval_steps_per_second": 4.691, "step": 1768 }, { "epoch": 0.69, "learning_rate": 1.5333333333333334e-05, "loss": 0.2459, "step": 1820 }, { "epoch": 0.69, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.34067416191101074, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 119.7271, "eval_samples_per_second": 37.585, "eval_steps_per_second": 4.702, "step": 1820 }, { "epoch": 0.71, "learning_rate": 1.4342857142857143e-05, "loss": 0.2254, "step": 1872 }, { "epoch": 0.71, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.261232852935791, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 119.8503, "eval_samples_per_second": 37.547, "eval_steps_per_second": 4.698, "step": 1872 }, { "epoch": 0.73, "learning_rate": 1.3352380952380952e-05, "loss": 0.1613, "step": 1924 }, { "epoch": 0.73, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.2873767912387848, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 119.8281, "eval_samples_per_second": 37.554, "eval_steps_per_second": 4.698, "step": 1924 }, { "epoch": 0.75, "learning_rate": 1.2361904761904762e-05, "loss": 0.1651, "step": 1976 }, { "epoch": 0.75, "eval_accuracy": 0.9713333333333334, "eval_f1": 0.9705008003658816, "eval_loss": 0.2269158959388733, "eval_precision": 0.9995289684408856, "eval_recall": 0.9431111111111111, "eval_runtime": 119.7297, "eval_samples_per_second": 37.585, "eval_steps_per_second": 4.702, "step": 1976 }, { "epoch": 0.77, "learning_rate": 1.1371428571428571e-05, "loss": 0.2584, "step": 2028 }, { "epoch": 0.77, "eval_accuracy": 0.9713333333333334, "eval_f1": 0.9705008003658816, "eval_loss": 0.27417832612991333, "eval_precision": 0.9995289684408856, "eval_recall": 0.9431111111111111, "eval_runtime": 119.7799, "eval_samples_per_second": 37.569, "eval_steps_per_second": 4.7, "step": 2028 }, { "epoch": 0.79, "learning_rate": 1.0380952380952382e-05, "loss": 0.1901, "step": 2080 }, { "epoch": 0.79, "eval_accuracy": 0.9713333333333334, "eval_f1": 0.9705008003658816, "eval_loss": 0.19001567363739014, "eval_precision": 0.9995289684408856, "eval_recall": 0.9431111111111111, "eval_runtime": 119.8785, "eval_samples_per_second": 37.538, "eval_steps_per_second": 4.696, "step": 2080 }, { "epoch": 0.81, "learning_rate": 9.39047619047619e-06, "loss": 0.2527, "step": 2132 }, { "epoch": 0.81, "eval_accuracy": 0.972, "eval_f1": 0.9712197350388305, "eval_loss": 0.16979800164699554, "eval_precision": 0.9990601503759399, "eval_recall": 0.9448888888888889, "eval_runtime": 119.8785, "eval_samples_per_second": 37.538, "eval_steps_per_second": 4.696, "step": 2132 }, { "epoch": 0.83, "learning_rate": 8.400000000000001e-06, "loss": 0.1432, "step": 2184 }, { "epoch": 0.83, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.1947138011455536, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 119.9797, "eval_samples_per_second": 37.506, "eval_steps_per_second": 4.692, "step": 2184 }, { "epoch": 0.85, "learning_rate": 7.40952380952381e-06, "loss": 0.2133, "step": 2236 }, { "epoch": 0.85, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.2675629258155823, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 120.0094, "eval_samples_per_second": 37.497, "eval_steps_per_second": 4.691, "step": 2236 }, { "epoch": 0.87, "learning_rate": 6.419047619047619e-06, "loss": 0.2746, "step": 2288 }, { "epoch": 0.87, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.18613262474536896, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 119.9576, "eval_samples_per_second": 37.513, "eval_steps_per_second": 4.693, "step": 2288 }, { "epoch": 0.89, "learning_rate": 5.428571428571429e-06, "loss": 0.1116, "step": 2340 }, { "epoch": 0.89, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.28170281648635864, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 120.1643, "eval_samples_per_second": 37.449, "eval_steps_per_second": 4.685, "step": 2340 }, { "epoch": 0.91, "learning_rate": 4.4380952380952385e-06, "loss": 0.2212, "step": 2392 }, { "epoch": 0.91, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.2225799709558487, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 120.0901, "eval_samples_per_second": 37.472, "eval_steps_per_second": 4.688, "step": 2392 }, { "epoch": 0.93, "learning_rate": 3.4476190476190472e-06, "loss": 0.3059, "step": 2444 }, { "epoch": 0.93, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.21728740632534027, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 119.9486, "eval_samples_per_second": 37.516, "eval_steps_per_second": 4.694, "step": 2444 }, { "epoch": 0.95, "learning_rate": 2.4571428571428573e-06, "loss": 0.12, "step": 2496 }, { "epoch": 0.95, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.23962543904781342, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 122.131, "eval_samples_per_second": 36.846, "eval_steps_per_second": 4.61, "step": 2496 }, { "epoch": 0.97, "learning_rate": 1.4666666666666667e-06, "loss": 0.264, "step": 2548 }, { "epoch": 0.97, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.23284508287906647, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 121.4423, "eval_samples_per_second": 37.055, "eval_steps_per_second": 4.636, "step": 2548 }, { "epoch": 0.99, "learning_rate": 4.761904761904763e-07, "loss": 0.0859, "step": 2600 }, { "epoch": 0.99, "eval_accuracy": 0.9724444444444444, "eval_f1": 0.9716894977168951, "eval_loss": 0.2551863491535187, "eval_precision": 0.9990610328638497, "eval_recall": 0.9457777777777778, "eval_runtime": 119.8763, "eval_samples_per_second": 37.539, "eval_steps_per_second": 4.697, "step": 2600 } ], "max_steps": 2625, "num_train_epochs": 1, "total_flos": 1.950297882624e+16, "trial_name": null, "trial_params": null }