{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.0, "global_step": 61693, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "learning_rate": 2.9769017554665847e-05, "loss": 0.0666, "step": 500 }, { "epoch": 0.31, "learning_rate": 2.953803510933169e-05, "loss": 0.0447, "step": 1000 }, { "epoch": 0.46, "learning_rate": 2.9307052663997538e-05, "loss": 0.0411, "step": 1500 }, { "epoch": 0.62, "learning_rate": 2.9076070218663384e-05, "loss": 0.038, "step": 2000 }, { "epoch": 0.77, "learning_rate": 2.8845087773329226e-05, "loss": 0.0346, "step": 2500 }, { "epoch": 0.92, "learning_rate": 2.8614105327995072e-05, "loss": 0.0332, "step": 3000 }, { "epoch": 1.0, "eval_accuracy": 0.9877246731440231, "eval_f1": 0.889670288672294, "eval_loss": 0.03370480611920357, "eval_precision": 0.8513566583363085, "eval_recall": 0.9315948823127258, "eval_runtime": 50.7022, "eval_samples_per_second": 438.443, "eval_steps_per_second": 13.708, "step": 3247 }, { "epoch": 1.08, "learning_rate": 2.8383122882660918e-05, "loss": 0.0291, "step": 3500 }, { "epoch": 1.23, "learning_rate": 2.8152140437326767e-05, "loss": 0.0226, "step": 4000 }, { "epoch": 1.39, "learning_rate": 2.792115799199261e-05, "loss": 0.023, "step": 4500 }, { "epoch": 1.54, "learning_rate": 2.7690175546658455e-05, "loss": 0.0235, "step": 5000 }, { "epoch": 1.69, "learning_rate": 2.74591931013243e-05, "loss": 0.023, "step": 5500 }, { "epoch": 1.85, "learning_rate": 2.7228210655990143e-05, "loss": 0.0218, "step": 6000 }, { "epoch": 2.0, "eval_accuracy": 0.9908514749994501, "eval_f1": 0.9142371280113368, "eval_loss": 0.0262825395911932, "eval_precision": 0.8852986917939805, "eval_recall": 0.9451313604844223, "eval_runtime": 50.2363, "eval_samples_per_second": 442.508, "eval_steps_per_second": 13.835, "step": 6494 }, { "epoch": 2.0, "learning_rate": 2.6997228210655992e-05, "loss": 0.0228, "step": 6500 }, { "epoch": 2.16, "learning_rate": 2.6766245765321838e-05, "loss": 0.014, "step": 7000 }, { "epoch": 2.31, "learning_rate": 2.653526331998768e-05, "loss": 0.0143, "step": 7500 }, { "epoch": 2.46, "learning_rate": 2.6304280874653526e-05, "loss": 0.0158, "step": 8000 }, { "epoch": 2.62, "learning_rate": 2.6073298429319372e-05, "loss": 0.015, "step": 8500 }, { "epoch": 2.77, "learning_rate": 2.5842315983985218e-05, "loss": 0.0142, "step": 9000 }, { "epoch": 2.93, "learning_rate": 2.5611333538651064e-05, "loss": 0.0147, "step": 9500 }, { "epoch": 3.0, "eval_accuracy": 0.9930254150662127, "eval_f1": 0.9280089446645269, "eval_loss": 0.024331996217370033, "eval_precision": 0.9160101990333752, "eval_recall": 0.9403262037308331, "eval_runtime": 50.2727, "eval_samples_per_second": 442.188, "eval_steps_per_second": 13.825, "step": 9741 }, { "epoch": 3.08, "learning_rate": 2.538035109331691e-05, "loss": 0.0122, "step": 10000 }, { "epoch": 3.23, "learning_rate": 2.5149368647982755e-05, "loss": 0.0103, "step": 10500 }, { "epoch": 3.39, "learning_rate": 2.4918386202648598e-05, "loss": 0.0095, "step": 11000 }, { "epoch": 3.54, "learning_rate": 2.4687403757314447e-05, "loss": 0.0111, "step": 11500 }, { "epoch": 3.7, "learning_rate": 2.4456421311980292e-05, "loss": 0.0099, "step": 12000 }, { "epoch": 3.85, "learning_rate": 2.4225438866646135e-05, "loss": 0.0097, "step": 12500 }, { "epoch": 4.0, "eval_accuracy": 0.9936326074754469, "eval_f1": 0.9362323294406885, "eval_loss": 0.027174057438969612, "eval_precision": 0.920862222054295, "eval_recall": 0.9521242308819221, "eval_runtime": 50.3309, "eval_samples_per_second": 441.677, "eval_steps_per_second": 13.809, "step": 12988 }, { "epoch": 4.0, "learning_rate": 2.399445642131198e-05, "loss": 0.0102, "step": 13000 }, { "epoch": 4.16, "learning_rate": 2.3763473975977826e-05, "loss": 0.0076, "step": 13500 }, { "epoch": 4.31, "learning_rate": 2.3532491530643672e-05, "loss": 0.007, "step": 14000 }, { "epoch": 4.47, "learning_rate": 2.3301509085309518e-05, "loss": 0.007, "step": 14500 }, { "epoch": 4.62, "learning_rate": 2.3070526639975364e-05, "loss": 0.0082, "step": 15000 }, { "epoch": 4.77, "learning_rate": 2.2839544194641206e-05, "loss": 0.0079, "step": 15500 }, { "epoch": 4.93, "learning_rate": 2.2608561749307052e-05, "loss": 0.0077, "step": 16000 }, { "epoch": 5.0, "eval_accuracy": 0.9943201214002335, "eval_f1": 0.9403489135049596, "eval_loss": 0.023451806977391243, "eval_precision": 0.929177551915486, "eval_recall": 0.9517921672038285, "eval_runtime": 50.2007, "eval_samples_per_second": 442.822, "eval_steps_per_second": 13.844, "step": 16235 }, { "epoch": 5.08, "learning_rate": 2.2377579303972898e-05, "loss": 0.0064, "step": 16500 }, { "epoch": 5.24, "learning_rate": 2.2146596858638743e-05, "loss": 0.0052, "step": 17000 }, { "epoch": 5.39, "learning_rate": 2.191561441330459e-05, "loss": 0.0055, "step": 17500 }, { "epoch": 5.54, "learning_rate": 2.1684631967970435e-05, "loss": 0.0061, "step": 18000 }, { "epoch": 5.7, "learning_rate": 2.145364952263628e-05, "loss": 0.0056, "step": 18500 }, { "epoch": 5.85, "learning_rate": 2.1222667077302123e-05, "loss": 0.0054, "step": 19000 }, { "epoch": 6.0, "eval_accuracy": 0.994450643863006, "eval_f1": 0.9450084195333173, "eval_loss": 0.0263934638351202, "eval_precision": 0.9312535558505595, "eval_recall": 0.9591757007520265, "eval_runtime": 50.2902, "eval_samples_per_second": 442.035, "eval_steps_per_second": 13.82, "step": 19482 }, { "epoch": 6.01, "learning_rate": 2.0991684631967972e-05, "loss": 0.0062, "step": 19500 }, { "epoch": 6.16, "learning_rate": 2.0760702186633818e-05, "loss": 0.0033, "step": 20000 }, { "epoch": 6.31, "learning_rate": 2.052971974129966e-05, "loss": 0.0042, "step": 20500 }, { "epoch": 6.47, "learning_rate": 2.0298737295965506e-05, "loss": 0.0042, "step": 21000 }, { "epoch": 6.62, "learning_rate": 2.0067754850631352e-05, "loss": 0.0045, "step": 21500 }, { "epoch": 6.78, "learning_rate": 1.9836772405297198e-05, "loss": 0.0039, "step": 22000 }, { "epoch": 6.93, "learning_rate": 1.9605789959963044e-05, "loss": 0.0043, "step": 22500 }, { "epoch": 7.0, "eval_accuracy": 0.9948756785494699, "eval_f1": 0.9503865101154663, "eval_loss": 0.026261983439326286, "eval_precision": 0.9473980473223471, "eval_recall": 0.9533938861216916, "eval_runtime": 50.3703, "eval_samples_per_second": 441.332, "eval_steps_per_second": 13.798, "step": 22729 }, { "epoch": 7.08, "learning_rate": 1.937480751462889e-05, "loss": 0.004, "step": 23000 }, { "epoch": 7.24, "learning_rate": 1.914382506929473e-05, "loss": 0.0029, "step": 23500 }, { "epoch": 7.39, "learning_rate": 1.8912842623960577e-05, "loss": 0.0031, "step": 24000 }, { "epoch": 7.55, "learning_rate": 1.8681860178626427e-05, "loss": 0.0034, "step": 24500 }, { "epoch": 7.7, "learning_rate": 1.8450877733292272e-05, "loss": 0.0037, "step": 25000 }, { "epoch": 7.85, "learning_rate": 1.8219895287958115e-05, "loss": 0.0036, "step": 25500 }, { "epoch": 8.0, "eval_accuracy": 0.994658141111516, "eval_f1": 0.9494068997650896, "eval_loss": 0.02981843240559101, "eval_precision": 0.943654361962835, "eval_recall": 0.9552300029299736, "eval_runtime": 50.1982, "eval_samples_per_second": 442.845, "eval_steps_per_second": 13.845, "step": 25976 }, { "epoch": 8.01, "learning_rate": 1.798891284262396e-05, "loss": 0.0036, "step": 26000 }, { "epoch": 8.16, "learning_rate": 1.7757930397289806e-05, "loss": 0.0027, "step": 26500 }, { "epoch": 8.32, "learning_rate": 1.7526947951955652e-05, "loss": 0.0026, "step": 27000 }, { "epoch": 8.47, "learning_rate": 1.7295965506621498e-05, "loss": 0.0029, "step": 27500 }, { "epoch": 8.62, "learning_rate": 1.7064983061287344e-05, "loss": 0.0028, "step": 28000 }, { "epoch": 8.78, "learning_rate": 1.6834000615953186e-05, "loss": 0.0029, "step": 28500 }, { "epoch": 8.93, "learning_rate": 1.6603018170619032e-05, "loss": 0.0027, "step": 29000 }, { "epoch": 9.0, "eval_accuracy": 0.9942837854765313, "eval_f1": 0.9483075157773954, "eval_loss": 0.03117133490741253, "eval_precision": 0.9288564203427929, "eval_recall": 0.9685906826838558, "eval_runtime": 50.2531, "eval_samples_per_second": 442.361, "eval_steps_per_second": 13.83, "step": 29223 }, { "epoch": 9.09, "learning_rate": 1.637203572528488e-05, "loss": 0.0028, "step": 29500 }, { "epoch": 9.24, "learning_rate": 1.6141053279950723e-05, "loss": 0.0019, "step": 30000 }, { "epoch": 9.39, "learning_rate": 1.591007083461657e-05, "loss": 0.0022, "step": 30500 }, { "epoch": 9.55, "learning_rate": 1.5679088389282415e-05, "loss": 0.0024, "step": 31000 }, { "epoch": 9.7, "learning_rate": 1.544810594394826e-05, "loss": 0.0024, "step": 31500 }, { "epoch": 9.86, "learning_rate": 1.5217123498614105e-05, "loss": 0.0023, "step": 32000 }, { "epoch": 10.0, "eval_accuracy": 0.9948642040472482, "eval_f1": 0.9512794425354655, "eval_loss": 0.036810312420129776, "eval_precision": 0.935158800551037, "eval_recall": 0.9679656216427386, "eval_runtime": 50.4106, "eval_samples_per_second": 440.978, "eval_steps_per_second": 13.787, "step": 32470 }, { "epoch": 10.01, "learning_rate": 1.4986141053279952e-05, "loss": 0.0024, "step": 32500 }, { "epoch": 10.16, "learning_rate": 1.4755158607945796e-05, "loss": 0.0018, "step": 33000 }, { "epoch": 10.32, "learning_rate": 1.4524176162611642e-05, "loss": 0.0018, "step": 33500 }, { "epoch": 10.47, "learning_rate": 1.4293193717277488e-05, "loss": 0.002, "step": 34000 }, { "epoch": 10.63, "learning_rate": 1.4062211271943332e-05, "loss": 0.0024, "step": 34500 }, { "epoch": 10.78, "learning_rate": 1.383122882660918e-05, "loss": 0.002, "step": 35000 }, { "epoch": 10.93, "learning_rate": 1.3600246381275023e-05, "loss": 0.0019, "step": 35500 }, { "epoch": 11.0, "eval_accuracy": 0.9952223041374186, "eval_f1": 0.9571868114802629, "eval_loss": 0.034278471022844315, "eval_precision": 0.9512868001697727, "eval_recall": 0.9631604648891493, "eval_runtime": 50.1571, "eval_samples_per_second": 443.208, "eval_steps_per_second": 13.856, "step": 35717 }, { "epoch": 11.09, "learning_rate": 1.3369263935940867e-05, "loss": 0.0018, "step": 36000 }, { "epoch": 11.24, "learning_rate": 1.3138281490606715e-05, "loss": 0.0014, "step": 36500 }, { "epoch": 11.4, "learning_rate": 1.2907299045272559e-05, "loss": 0.0016, "step": 37000 }, { "epoch": 11.55, "learning_rate": 1.2676316599938406e-05, "loss": 0.0016, "step": 37500 }, { "epoch": 11.7, "learning_rate": 1.244533415460425e-05, "loss": 0.0014, "step": 38000 }, { "epoch": 11.86, "learning_rate": 1.2214351709270095e-05, "loss": 0.0018, "step": 38500 }, { "epoch": 12.0, "eval_accuracy": 0.9950344091635375, "eval_f1": 0.9542591914950985, "eval_loss": 0.03573078662157059, "eval_precision": 0.941064747108317, "eval_recall": 0.9678288895399941, "eval_runtime": 50.2611, "eval_samples_per_second": 442.291, "eval_steps_per_second": 13.828, "step": 38964 }, { "epoch": 12.01, "learning_rate": 1.1983369263935942e-05, "loss": 0.0015, "step": 39000 }, { "epoch": 12.17, "learning_rate": 1.1752386818601786e-05, "loss": 0.0014, "step": 39500 }, { "epoch": 12.32, "learning_rate": 1.1521404373267632e-05, "loss": 0.0016, "step": 40000 }, { "epoch": 12.47, "learning_rate": 1.1290421927933478e-05, "loss": 0.0017, "step": 40500 }, { "epoch": 12.63, "learning_rate": 1.1059439482599322e-05, "loss": 0.0011, "step": 41000 }, { "epoch": 12.78, "learning_rate": 1.082845703726517e-05, "loss": 0.0014, "step": 41500 }, { "epoch": 12.94, "learning_rate": 1.0597474591931013e-05, "loss": 0.0014, "step": 42000 }, { "epoch": 13.0, "eval_accuracy": 0.9954953016694444, "eval_f1": 0.9582205552548141, "eval_loss": 0.034824222326278687, "eval_precision": 0.9485197022180545, "eval_recall": 0.9681218869030179, "eval_runtime": 50.5254, "eval_samples_per_second": 439.977, "eval_steps_per_second": 13.755, "step": 42211 }, { "epoch": 13.09, "learning_rate": 1.0366492146596857e-05, "loss": 0.0013, "step": 42500 }, { "epoch": 13.24, "learning_rate": 1.0135509701262705e-05, "loss": 0.001, "step": 43000 }, { "epoch": 13.4, "learning_rate": 9.904527255928549e-06, "loss": 0.0011, "step": 43500 }, { "epoch": 13.55, "learning_rate": 9.673544810594396e-06, "loss": 0.0012, "step": 44000 }, { "epoch": 13.7, "learning_rate": 9.44256236526024e-06, "loss": 0.0013, "step": 44500 }, { "epoch": 13.86, "learning_rate": 9.211579919926085e-06, "loss": 0.0012, "step": 45000 }, { "epoch": 14.0, "eval_accuracy": 0.99525290281001, "eval_f1": 0.9559630182389084, "eval_loss": 0.03898163139820099, "eval_precision": 0.9486449577811544, "eval_recall": 0.9633948627795683, "eval_runtime": 50.2226, "eval_samples_per_second": 442.629, "eval_steps_per_second": 13.838, "step": 45458 }, { "epoch": 14.01, "learning_rate": 8.980597474591932e-06, "loss": 0.0012, "step": 45500 }, { "epoch": 14.17, "learning_rate": 8.749615029257776e-06, "loss": 0.0007, "step": 46000 }, { "epoch": 14.32, "learning_rate": 8.518632583923622e-06, "loss": 0.0009, "step": 46500 }, { "epoch": 14.47, "learning_rate": 8.287650138589468e-06, "loss": 0.0013, "step": 47000 }, { "epoch": 14.63, "learning_rate": 8.056667693255312e-06, "loss": 0.0012, "step": 47500 }, { "epoch": 14.78, "learning_rate": 7.82568524792116e-06, "loss": 0.001, "step": 48000 }, { "epoch": 14.94, "learning_rate": 7.594702802587003e-06, "loss": 0.001, "step": 48500 }, { "epoch": 15.0, "eval_accuracy": 0.9952519466014915, "eval_f1": 0.9577630980820753, "eval_loss": 0.04225644841790199, "eval_precision": 0.9485612475573777, "eval_recall": 0.9671452290262721, "eval_runtime": 56.1286, "eval_samples_per_second": 396.055, "eval_steps_per_second": 12.382, "step": 48705 }, { "epoch": 15.09, "learning_rate": 7.363720357252849e-06, "loss": 0.0009, "step": 49000 }, { "epoch": 15.24, "learning_rate": 7.132737911918695e-06, "loss": 0.0008, "step": 49500 }, { "epoch": 15.4, "learning_rate": 6.90175546658454e-06, "loss": 0.0009, "step": 50000 }, { "epoch": 15.55, "learning_rate": 6.6707730212503855e-06, "loss": 0.0008, "step": 50500 }, { "epoch": 15.71, "learning_rate": 6.43979057591623e-06, "loss": 0.0007, "step": 51000 }, { "epoch": 15.86, "learning_rate": 6.208808130582076e-06, "loss": 0.0008, "step": 51500 }, { "epoch": 16.0, "eval_accuracy": 0.9950430150402038, "eval_f1": 0.9558787995940656, "eval_loss": 0.04262382909655571, "eval_precision": 0.9460493590969964, "eval_recall": 0.9659146401015725, "eval_runtime": 50.4693, "eval_samples_per_second": 440.466, "eval_steps_per_second": 13.771, "step": 51952 }, { "epoch": 16.01, "learning_rate": 5.977825685247921e-06, "loss": 0.0008, "step": 52000 }, { "epoch": 16.17, "learning_rate": 5.746843239913767e-06, "loss": 0.0008, "step": 52500 }, { "epoch": 16.32, "learning_rate": 5.515860794579612e-06, "loss": 0.0008, "step": 53000 }, { "epoch": 16.48, "learning_rate": 5.2848783492454576e-06, "loss": 0.0009, "step": 53500 }, { "epoch": 16.63, "learning_rate": 5.0538959039113025e-06, "loss": 0.0008, "step": 54000 }, { "epoch": 16.78, "learning_rate": 4.822913458577148e-06, "loss": 0.0006, "step": 54500 }, { "epoch": 16.94, "learning_rate": 4.591931013242994e-06, "loss": 0.001, "step": 55000 }, { "epoch": 17.0, "eval_accuracy": 0.9952113077394561, "eval_f1": 0.9582838092477171, "eval_loss": 0.04263555258512497, "eval_precision": 0.9482501434308663, "eval_recall": 0.9685320832112511, "eval_runtime": 50.1705, "eval_samples_per_second": 443.089, "eval_steps_per_second": 13.853, "step": 55199 }, { "epoch": 17.09, "learning_rate": 4.360948567908839e-06, "loss": 0.0008, "step": 55500 }, { "epoch": 17.25, "learning_rate": 4.129966122574684e-06, "loss": 0.0008, "step": 56000 }, { "epoch": 17.4, "learning_rate": 3.89898367724053e-06, "loss": 0.0008, "step": 56500 }, { "epoch": 17.55, "learning_rate": 3.668001231906375e-06, "loss": 0.0007, "step": 57000 }, { "epoch": 17.71, "learning_rate": 3.4370187865722208e-06, "loss": 0.0007, "step": 57500 }, { "epoch": 17.86, "learning_rate": 3.206036341238066e-06, "loss": 0.0006, "step": 58000 }, { "epoch": 18.0, "eval_accuracy": 0.9952199136161224, "eval_f1": 0.9574789135649616, "eval_loss": 0.045220062136650085, "eval_precision": 0.9483607656785913, "eval_recall": 0.9667740990331087, "eval_runtime": 50.3784, "eval_samples_per_second": 441.26, "eval_steps_per_second": 13.796, "step": 58446 }, { "epoch": 18.02, "learning_rate": 2.9750538959039115e-06, "loss": 0.0007, "step": 58500 }, { "epoch": 18.17, "learning_rate": 2.744071450569757e-06, "loss": 0.0007, "step": 59000 }, { "epoch": 18.32, "learning_rate": 2.513089005235602e-06, "loss": 0.0006, "step": 59500 }, { "epoch": 18.48, "learning_rate": 2.2821065599014475e-06, "loss": 0.0006, "step": 60000 }, { "epoch": 18.63, "learning_rate": 2.051124114567293e-06, "loss": 0.0007, "step": 60500 }, { "epoch": 18.79, "learning_rate": 1.8201416692331382e-06, "loss": 0.0007, "step": 61000 }, { "epoch": 18.94, "learning_rate": 1.5891592238989836e-06, "loss": 0.0006, "step": 61500 }, { "epoch": 19.0, "eval_accuracy": 0.9954895644183336, "eval_f1": 0.959768669851647, "eval_loss": 0.046882398426532745, "eval_precision": 0.9504664125500412, "eval_recall": 0.969254810040043, "eval_runtime": 50.5066, "eval_samples_per_second": 440.141, "eval_steps_per_second": 13.761, "step": 61693 } ], "max_steps": 64940, "num_train_epochs": 20, "total_flos": 5.157312926507336e+17, "trial_name": null, "trial_params": null }