{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 1750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9971428571428576e-05, "loss": 0.6862, "step": 1 }, { "epoch": 0.02, "learning_rate": 4.9e-05, "loss": 0.4775, "step": 35 }, { "epoch": 0.02, "eval_accuracy": 0.857, "eval_f1": 0.8737864077669902, "eval_loss": 0.40033745765686035, "eval_precision": 0.7819905213270142, "eval_recall": 0.99, "eval_runtime": 39.2752, "eval_samples_per_second": 76.384, "eval_steps_per_second": 9.548, "step": 35 }, { "epoch": 0.04, "learning_rate": 4.8e-05, "loss": 0.3051, "step": 70 }, { "epoch": 0.04, "eval_accuracy": 0.8823333333333333, "eval_f1": 0.8942163620017981, "eval_loss": 0.6440998911857605, "eval_precision": 0.8121937942297224, "eval_recall": 0.9946666666666667, "eval_runtime": 37.422, "eval_samples_per_second": 80.167, "eval_steps_per_second": 10.021, "step": 70 }, { "epoch": 0.06, "learning_rate": 4.7e-05, "loss": 0.492, "step": 105 }, { "epoch": 0.06, "eval_accuracy": 0.8696666666666667, "eval_f1": 0.8846947803007963, "eval_loss": 0.6633870601654053, "eval_precision": 0.7932310946589106, "eval_recall": 1.0, "eval_runtime": 36.0985, "eval_samples_per_second": 83.106, "eval_steps_per_second": 10.388, "step": 105 }, { "epoch": 0.08, "learning_rate": 4.600000000000001e-05, "loss": 0.2019, "step": 140 }, { "epoch": 0.08, "eval_accuracy": 0.9256666666666666, "eval_f1": 0.9306376360808709, "eval_loss": 0.46288976073265076, "eval_precision": 0.8723032069970845, "eval_recall": 0.9973333333333333, "eval_runtime": 36.0593, "eval_samples_per_second": 83.196, "eval_steps_per_second": 10.4, "step": 140 }, { "epoch": 0.1, "learning_rate": 4.5e-05, "loss": 0.2224, "step": 175 }, { "epoch": 0.1, "eval_accuracy": 0.9183333333333333, "eval_f1": 0.9243126351560087, "eval_loss": 0.34658756852149963, "eval_precision": 0.8612550374208405, "eval_recall": 0.9973333333333333, "eval_runtime": 36.7878, "eval_samples_per_second": 81.549, "eval_steps_per_second": 10.194, "step": 175 }, { "epoch": 0.12, "learning_rate": 4.4000000000000006e-05, "loss": 0.1034, "step": 210 }, { "epoch": 0.12, "eval_accuracy": 0.9716666666666667, "eval_f1": 0.9722312969617772, "eval_loss": 0.12845683097839355, "eval_precision": 0.9532351057014734, "eval_recall": 0.992, "eval_runtime": 37.1714, "eval_samples_per_second": 80.707, "eval_steps_per_second": 10.088, "step": 210 }, { "epoch": 0.14, "learning_rate": 4.3e-05, "loss": 0.0783, "step": 245 }, { "epoch": 0.14, "eval_accuracy": 0.98, "eval_f1": 0.9803149606299214, "eval_loss": 0.09441035240888596, "eval_precision": 0.9651162790697675, "eval_recall": 0.996, "eval_runtime": 37.1535, "eval_samples_per_second": 80.746, "eval_steps_per_second": 10.093, "step": 245 }, { "epoch": 0.16, "learning_rate": 4.2e-05, "loss": 0.2329, "step": 280 }, { "epoch": 0.16, "eval_accuracy": 0.91, "eval_f1": 0.9173806609547123, "eval_loss": 0.33303627371788025, "eval_precision": 0.8478506787330317, "eval_recall": 0.9993333333333333, "eval_runtime": 37.5239, "eval_samples_per_second": 79.949, "eval_steps_per_second": 9.994, "step": 280 }, { "epoch": 0.18, "learning_rate": 4.1e-05, "loss": 0.0883, "step": 315 }, { "epoch": 0.18, "eval_accuracy": 0.9436666666666667, "eval_f1": 0.946603475513428, "eval_loss": 0.32765254378318787, "eval_precision": 0.8996996996996997, "eval_recall": 0.9986666666666667, "eval_runtime": 36.0524, "eval_samples_per_second": 83.212, "eval_steps_per_second": 10.402, "step": 315 }, { "epoch": 0.2, "learning_rate": 4e-05, "loss": 0.1818, "step": 350 }, { "epoch": 0.2, "eval_accuracy": 0.9783333333333334, "eval_f1": 0.9786535303776683, "eval_loss": 0.06494130194187164, "eval_precision": 0.9644012944983819, "eval_recall": 0.9933333333333333, "eval_runtime": 37.1476, "eval_samples_per_second": 80.759, "eval_steps_per_second": 10.095, "step": 350 }, { "epoch": 0.22, "learning_rate": 3.9000000000000006e-05, "loss": 0.1854, "step": 385 }, { "epoch": 0.22, "eval_accuracy": 0.9913333333333333, "eval_f1": 0.991315965263861, "eval_loss": 0.04114186391234398, "eval_precision": 0.9933065595716198, "eval_recall": 0.9893333333333333, "eval_runtime": 37.1144, "eval_samples_per_second": 80.831, "eval_steps_per_second": 10.104, "step": 385 }, { "epoch": 0.24, "learning_rate": 3.8e-05, "loss": 0.085, "step": 420 }, { "epoch": 0.24, "eval_accuracy": 0.988, "eval_f1": 0.9879437374413931, "eval_loss": 0.05784890055656433, "eval_precision": 0.9925975773889637, "eval_recall": 0.9833333333333333, "eval_runtime": 35.9925, "eval_samples_per_second": 83.351, "eval_steps_per_second": 10.419, "step": 420 }, { "epoch": 0.26, "learning_rate": 3.7e-05, "loss": 0.1281, "step": 455 }, { "epoch": 0.26, "eval_accuracy": 0.963, "eval_f1": 0.9642281662906864, "eval_loss": 0.23219378292560577, "eval_precision": 0.9332501559575795, "eval_recall": 0.9973333333333333, "eval_runtime": 36.1167, "eval_samples_per_second": 83.064, "eval_steps_per_second": 10.383, "step": 455 }, { "epoch": 0.28, "learning_rate": 3.6e-05, "loss": 0.155, "step": 490 }, { "epoch": 0.28, "eval_accuracy": 0.9793333333333333, "eval_f1": 0.9795783926218709, "eval_loss": 0.0876053124666214, "eval_precision": 0.9680989583333334, "eval_recall": 0.9913333333333333, "eval_runtime": 37.4935, "eval_samples_per_second": 80.014, "eval_steps_per_second": 10.002, "step": 490 }, { "epoch": 0.3, "learning_rate": 3.5e-05, "loss": 0.1536, "step": 525 }, { "epoch": 0.3, "eval_accuracy": 0.98, "eval_f1": 0.98022412656559, "eval_loss": 0.08141922205686569, "eval_precision": 0.969361147327249, "eval_recall": 0.9913333333333333, "eval_runtime": 36.207, "eval_samples_per_second": 82.857, "eval_steps_per_second": 10.357, "step": 525 }, { "epoch": 0.32, "learning_rate": 3.4000000000000007e-05, "loss": 0.0686, "step": 560 }, { "epoch": 0.32, "eval_accuracy": 0.8446666666666667, "eval_f1": 0.8654734411085451, "eval_loss": 0.7843948602676392, "eval_precision": 0.7632382892057027, "eval_recall": 0.9993333333333333, "eval_runtime": 37.456, "eval_samples_per_second": 80.094, "eval_steps_per_second": 10.012, "step": 560 }, { "epoch": 0.34, "learning_rate": 3.3e-05, "loss": 0.0569, "step": 595 }, { "epoch": 0.34, "eval_accuracy": 0.99, "eval_f1": 0.99, "eval_loss": 0.04974000155925751, "eval_precision": 0.99, "eval_recall": 0.99, "eval_runtime": 36.1808, "eval_samples_per_second": 82.917, "eval_steps_per_second": 10.365, "step": 595 }, { "epoch": 0.36, "learning_rate": 3.2000000000000005e-05, "loss": 0.0951, "step": 630 }, { "epoch": 0.36, "eval_accuracy": 0.9733333333333334, "eval_f1": 0.9739243807040417, "eval_loss": 0.14421893656253815, "eval_precision": 0.9528061224489796, "eval_recall": 0.996, "eval_runtime": 36.1151, "eval_samples_per_second": 83.068, "eval_steps_per_second": 10.383, "step": 630 }, { "epoch": 0.38, "learning_rate": 3.1e-05, "loss": 0.0434, "step": 665 }, { "epoch": 0.38, "eval_accuracy": 0.9026666666666666, "eval_f1": 0.9111922141119222, "eval_loss": 0.6636046767234802, "eval_precision": 0.8378076062639821, "eval_recall": 0.9986666666666667, "eval_runtime": 37.4412, "eval_samples_per_second": 80.126, "eval_steps_per_second": 10.016, "step": 665 }, { "epoch": 0.4, "learning_rate": 3e-05, "loss": 0.1023, "step": 700 }, { "epoch": 0.4, "eval_accuracy": 0.9653333333333334, "eval_f1": 0.9664082687338501, "eval_loss": 0.17706581950187683, "eval_precision": 0.9373433583959899, "eval_recall": 0.9973333333333333, "eval_runtime": 36.7932, "eval_samples_per_second": 81.537, "eval_steps_per_second": 10.192, "step": 700 }, { "epoch": 0.42, "learning_rate": 2.9e-05, "loss": 0.0619, "step": 735 }, { "epoch": 0.42, "eval_accuracy": 0.9893333333333333, "eval_f1": 0.9893617021276595, "eval_loss": 0.051007628440856934, "eval_precision": 0.986737400530504, "eval_recall": 0.992, "eval_runtime": 36.0289, "eval_samples_per_second": 83.267, "eval_steps_per_second": 10.408, "step": 735 }, { "epoch": 0.44, "learning_rate": 2.8000000000000003e-05, "loss": 0.0367, "step": 770 }, { "epoch": 0.44, "eval_accuracy": 0.9913333333333333, "eval_f1": 0.9913275517011341, "eval_loss": 0.04358353838324547, "eval_precision": 0.9919893190921228, "eval_recall": 0.9906666666666667, "eval_runtime": 36.0918, "eval_samples_per_second": 83.121, "eval_steps_per_second": 10.39, "step": 770 }, { "epoch": 0.46, "learning_rate": 2.7000000000000002e-05, "loss": 0.0011, "step": 805 }, { "epoch": 0.46, "eval_accuracy": 0.9896666666666667, "eval_f1": 0.9896355733868272, "eval_loss": 0.06585267186164856, "eval_precision": 0.9926224010731053, "eval_recall": 0.9866666666666667, "eval_runtime": 36.0308, "eval_samples_per_second": 83.262, "eval_steps_per_second": 10.408, "step": 805 }, { "epoch": 0.48, "learning_rate": 2.6000000000000002e-05, "loss": 0.0241, "step": 840 }, { "epoch": 0.48, "eval_accuracy": 0.976, "eval_f1": 0.9764705882352941, "eval_loss": 0.1684291660785675, "eval_precision": 0.9576923076923077, "eval_recall": 0.996, "eval_runtime": 37.039, "eval_samples_per_second": 80.996, "eval_steps_per_second": 10.124, "step": 840 }, { "epoch": 0.5, "learning_rate": 2.5e-05, "loss": 0.1195, "step": 875 }, { "epoch": 0.5, "eval_accuracy": 0.9793333333333333, "eval_f1": 0.9797120418848168, "eval_loss": 0.12230364978313446, "eval_precision": 0.962082262210797, "eval_recall": 0.998, "eval_runtime": 36.0431, "eval_samples_per_second": 83.234, "eval_steps_per_second": 10.404, "step": 875 }, { "epoch": 0.52, "learning_rate": 2.4e-05, "loss": 0.0557, "step": 910 }, { "epoch": 0.52, "eval_accuracy": 0.9916666666666667, "eval_f1": 0.9916805324459235, "eval_loss": 0.04335600137710571, "eval_precision": 0.9900332225913622, "eval_recall": 0.9933333333333333, "eval_runtime": 36.1241, "eval_samples_per_second": 83.047, "eval_steps_per_second": 10.381, "step": 910 }, { "epoch": 0.54, "learning_rate": 2.3000000000000003e-05, "loss": 0.054, "step": 945 }, { "epoch": 0.54, "eval_accuracy": 0.9913333333333333, "eval_f1": 0.9913563829787235, "eval_loss": 0.03961500525474548, "eval_precision": 0.9887267904509284, "eval_recall": 0.994, "eval_runtime": 36.0623, "eval_samples_per_second": 83.189, "eval_steps_per_second": 10.399, "step": 945 }, { "epoch": 0.56, "learning_rate": 2.2000000000000003e-05, "loss": 0.0345, "step": 980 }, { "epoch": 0.56, "eval_accuracy": 0.991, "eval_f1": 0.9909426366990942, "eval_loss": 0.05297405645251274, "eval_precision": 0.9972991222147198, "eval_recall": 0.9846666666666667, "eval_runtime": 36.0625, "eval_samples_per_second": 83.189, "eval_steps_per_second": 10.399, "step": 980 }, { "epoch": 0.58, "learning_rate": 2.1e-05, "loss": 0.0582, "step": 1015 }, { "epoch": 0.58, "eval_accuracy": 0.9636666666666667, "eval_f1": 0.9648953301127215, "eval_loss": 0.23049569129943848, "eval_precision": 0.9333333333333333, "eval_recall": 0.9986666666666667, "eval_runtime": 36.0114, "eval_samples_per_second": 83.307, "eval_steps_per_second": 10.413, "step": 1015 }, { "epoch": 0.6, "learning_rate": 2e-05, "loss": 0.0451, "step": 1050 }, { "epoch": 0.6, "eval_accuracy": 0.954, "eval_f1": 0.9559948979591837, "eval_loss": 0.2961997985839844, "eval_precision": 0.9162591687041565, "eval_recall": 0.9993333333333333, "eval_runtime": 36.0251, "eval_samples_per_second": 83.275, "eval_steps_per_second": 10.409, "step": 1050 }, { "epoch": 0.62, "learning_rate": 1.9e-05, "loss": 0.134, "step": 1085 }, { "epoch": 0.62, "eval_accuracy": 0.9836666666666667, "eval_f1": 0.9839080459770116, "eval_loss": 0.08320324122905731, "eval_precision": 0.96957928802589, "eval_recall": 0.9986666666666667, "eval_runtime": 36.0675, "eval_samples_per_second": 83.177, "eval_steps_per_second": 10.397, "step": 1085 }, { "epoch": 0.64, "learning_rate": 1.8e-05, "loss": 0.0852, "step": 1120 }, { "epoch": 0.64, "eval_accuracy": 0.987, "eval_f1": 0.9871329594193335, "eval_loss": 0.06261658668518066, "eval_precision": 0.977139124755062, "eval_recall": 0.9973333333333333, "eval_runtime": 36.0739, "eval_samples_per_second": 83.163, "eval_steps_per_second": 10.395, "step": 1120 }, { "epoch": 0.66, "learning_rate": 1.7000000000000003e-05, "loss": 0.1262, "step": 1155 }, { "epoch": 0.66, "eval_accuracy": 0.9516666666666667, "eval_f1": 0.9538363578478192, "eval_loss": 0.21791885793209076, "eval_precision": 0.9128580134064594, "eval_recall": 0.9986666666666667, "eval_runtime": 36.0083, "eval_samples_per_second": 83.314, "eval_steps_per_second": 10.414, "step": 1155 }, { "epoch": 0.68, "learning_rate": 1.6000000000000003e-05, "loss": 0.0167, "step": 1190 }, { "epoch": 0.68, "eval_accuracy": 0.9873333333333333, "eval_f1": 0.9874587458745875, "eval_loss": 0.060804687440395355, "eval_precision": 0.9777777777777777, "eval_recall": 0.9973333333333333, "eval_runtime": 37.0856, "eval_samples_per_second": 80.894, "eval_steps_per_second": 10.112, "step": 1190 }, { "epoch": 0.7, "learning_rate": 1.5e-05, "loss": 0.1207, "step": 1225 }, { "epoch": 0.7, "eval_accuracy": 0.993, "eval_f1": 0.993025572899369, "eval_loss": 0.025707580149173737, "eval_precision": 0.9894109861019192, "eval_recall": 0.9966666666666667, "eval_runtime": 36.8077, "eval_samples_per_second": 81.505, "eval_steps_per_second": 10.188, "step": 1225 }, { "epoch": 0.72, "learning_rate": 1.4000000000000001e-05, "loss": 0.033, "step": 1260 }, { "epoch": 0.72, "eval_accuracy": 0.994, "eval_f1": 0.9940159574468085, "eval_loss": 0.029781479388475418, "eval_precision": 0.9913793103448276, "eval_recall": 0.9966666666666667, "eval_runtime": 36.0985, "eval_samples_per_second": 83.106, "eval_steps_per_second": 10.388, "step": 1260 }, { "epoch": 0.74, "learning_rate": 1.3000000000000001e-05, "loss": 0.0403, "step": 1295 }, { "epoch": 0.74, "eval_accuracy": 0.97, "eval_f1": 0.9708360337005832, "eval_loss": 0.17279422283172607, "eval_precision": 0.9445145018915511, "eval_recall": 0.9986666666666667, "eval_runtime": 36.0515, "eval_samples_per_second": 83.214, "eval_steps_per_second": 10.402, "step": 1295 }, { "epoch": 0.76, "learning_rate": 1.2e-05, "loss": 0.0554, "step": 1330 }, { "epoch": 0.76, "eval_accuracy": 0.9866666666666667, "eval_f1": 0.98681608437706, "eval_loss": 0.0723133310675621, "eval_precision": 0.9758800521512386, "eval_recall": 0.998, "eval_runtime": 36.0327, "eval_samples_per_second": 83.258, "eval_steps_per_second": 10.407, "step": 1330 }, { "epoch": 0.78, "learning_rate": 1.1000000000000001e-05, "loss": 0.0443, "step": 1365 }, { "epoch": 0.78, "eval_accuracy": 0.9916666666666667, "eval_f1": 0.99171362280411, "eval_loss": 0.04444814473390579, "eval_precision": 0.986156888595913, "eval_recall": 0.9973333333333333, "eval_runtime": 36.8515, "eval_samples_per_second": 81.408, "eval_steps_per_second": 10.176, "step": 1365 }, { "epoch": 0.8, "learning_rate": 1e-05, "loss": 0.0308, "step": 1400 }, { "epoch": 0.8, "eval_accuracy": 0.9773333333333334, "eval_f1": 0.9778067885117493, "eval_loss": 0.1104651615023613, "eval_precision": 0.9578005115089514, "eval_recall": 0.9986666666666667, "eval_runtime": 37.2021, "eval_samples_per_second": 80.641, "eval_steps_per_second": 10.08, "step": 1400 }, { "epoch": 0.82, "learning_rate": 9e-06, "loss": 0.0519, "step": 1435 }, { "epoch": 0.82, "eval_accuracy": 0.9946666666666667, "eval_f1": 0.9946559786239145, "eval_loss": 0.023080775514245033, "eval_precision": 0.9966532797858099, "eval_recall": 0.9926666666666667, "eval_runtime": 36.06, "eval_samples_per_second": 83.195, "eval_steps_per_second": 10.399, "step": 1435 }, { "epoch": 0.84, "learning_rate": 8.000000000000001e-06, "loss": 0.0007, "step": 1470 }, { "epoch": 0.84, "eval_accuracy": 0.9856666666666667, "eval_f1": 0.9858506087528792, "eval_loss": 0.08097357302904129, "eval_precision": 0.9733593242365172, "eval_recall": 0.9986666666666667, "eval_runtime": 35.9829, "eval_samples_per_second": 83.373, "eval_steps_per_second": 10.422, "step": 1470 }, { "epoch": 0.86, "learning_rate": 7.000000000000001e-06, "loss": 0.0167, "step": 1505 }, { "epoch": 0.86, "eval_accuracy": 0.9883333333333333, "eval_f1": 0.9884526558891455, "eval_loss": 0.05811823159456253, "eval_precision": 0.9784454604833442, "eval_recall": 0.9986666666666667, "eval_runtime": 37.0549, "eval_samples_per_second": 80.961, "eval_steps_per_second": 10.12, "step": 1505 }, { "epoch": 0.88, "learning_rate": 6e-06, "loss": 0.0514, "step": 1540 }, { "epoch": 0.88, "eval_accuracy": 0.995, "eval_f1": 0.995008319467554, "eval_loss": 0.024803927168250084, "eval_precision": 0.9933554817275747, "eval_recall": 0.9966666666666667, "eval_runtime": 36.1623, "eval_samples_per_second": 82.959, "eval_steps_per_second": 10.37, "step": 1540 }, { "epoch": 0.9, "learning_rate": 5e-06, "loss": 0.0005, "step": 1575 }, { "epoch": 0.9, "eval_accuracy": 0.9943333333333333, "eval_f1": 0.994354035204251, "eval_loss": 0.03251485526561737, "eval_precision": 0.9907346128391793, "eval_recall": 0.998, "eval_runtime": 36.0076, "eval_samples_per_second": 83.316, "eval_steps_per_second": 10.414, "step": 1575 }, { "epoch": 0.92, "learning_rate": 4.000000000000001e-06, "loss": 0.0003, "step": 1610 }, { "epoch": 0.92, "eval_accuracy": 0.992, "eval_f1": 0.9920477137176937, "eval_loss": 0.04252306744456291, "eval_precision": 0.9861660079051383, "eval_recall": 0.998, "eval_runtime": 36.1851, "eval_samples_per_second": 82.907, "eval_steps_per_second": 10.363, "step": 1610 }, { "epoch": 0.94, "learning_rate": 3e-06, "loss": 0.0688, "step": 1645 }, { "epoch": 0.94, "eval_accuracy": 0.9903333333333333, "eval_f1": 0.9904132231404958, "eval_loss": 0.054892849177122116, "eval_precision": 0.9822950819672132, "eval_recall": 0.9986666666666667, "eval_runtime": 36.0592, "eval_samples_per_second": 83.197, "eval_steps_per_second": 10.4, "step": 1645 }, { "epoch": 0.96, "learning_rate": 2.0000000000000003e-06, "loss": 0.0289, "step": 1680 }, { "epoch": 0.96, "eval_accuracy": 0.987, "eval_f1": 0.9871499176276771, "eval_loss": 0.07362984865903854, "eval_precision": 0.9758957654723127, "eval_recall": 0.9986666666666667, "eval_runtime": 36.8485, "eval_samples_per_second": 81.415, "eval_steps_per_second": 10.177, "step": 1680 }, { "epoch": 0.98, "learning_rate": 1.0000000000000002e-06, "loss": 0.0291, "step": 1715 }, { "epoch": 0.98, "eval_accuracy": 0.9856666666666667, "eval_f1": 0.9858506087528792, "eval_loss": 0.07627929002046585, "eval_precision": 0.9733593242365172, "eval_recall": 0.9986666666666667, "eval_runtime": 37.3831, "eval_samples_per_second": 80.25, "eval_steps_per_second": 10.031, "step": 1715 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.0004, "step": 1750 }, { "epoch": 1.0, "eval_accuracy": 0.987, "eval_f1": 0.9871499176276771, "eval_loss": 0.07068450003862381, "eval_precision": 0.9758957654723127, "eval_recall": 0.9986666666666667, "eval_runtime": 36.1142, "eval_samples_per_second": 83.07, "eval_steps_per_second": 10.384, "step": 1750 } ], "max_steps": 1750, "num_train_epochs": 1, "total_flos": 3683554775040000.0, "trial_name": null, "trial_params": null }