batterydata's picture
update
b9a11b7
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.0,
"global_step": 61693,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15,
"learning_rate": 2.9769017554665847e-05,
"loss": 0.0666,
"step": 500
},
{
"epoch": 0.31,
"learning_rate": 2.953803510933169e-05,
"loss": 0.0447,
"step": 1000
},
{
"epoch": 0.46,
"learning_rate": 2.9307052663997538e-05,
"loss": 0.0411,
"step": 1500
},
{
"epoch": 0.62,
"learning_rate": 2.9076070218663384e-05,
"loss": 0.038,
"step": 2000
},
{
"epoch": 0.77,
"learning_rate": 2.8845087773329226e-05,
"loss": 0.0346,
"step": 2500
},
{
"epoch": 0.92,
"learning_rate": 2.8614105327995072e-05,
"loss": 0.0332,
"step": 3000
},
{
"epoch": 1.0,
"eval_accuracy": 0.9877246731440231,
"eval_f1": 0.889670288672294,
"eval_loss": 0.03370480611920357,
"eval_precision": 0.8513566583363085,
"eval_recall": 0.9315948823127258,
"eval_runtime": 50.7022,
"eval_samples_per_second": 438.443,
"eval_steps_per_second": 13.708,
"step": 3247
},
{
"epoch": 1.08,
"learning_rate": 2.8383122882660918e-05,
"loss": 0.0291,
"step": 3500
},
{
"epoch": 1.23,
"learning_rate": 2.8152140437326767e-05,
"loss": 0.0226,
"step": 4000
},
{
"epoch": 1.39,
"learning_rate": 2.792115799199261e-05,
"loss": 0.023,
"step": 4500
},
{
"epoch": 1.54,
"learning_rate": 2.7690175546658455e-05,
"loss": 0.0235,
"step": 5000
},
{
"epoch": 1.69,
"learning_rate": 2.74591931013243e-05,
"loss": 0.023,
"step": 5500
},
{
"epoch": 1.85,
"learning_rate": 2.7228210655990143e-05,
"loss": 0.0218,
"step": 6000
},
{
"epoch": 2.0,
"eval_accuracy": 0.9908514749994501,
"eval_f1": 0.9142371280113368,
"eval_loss": 0.0262825395911932,
"eval_precision": 0.8852986917939805,
"eval_recall": 0.9451313604844223,
"eval_runtime": 50.2363,
"eval_samples_per_second": 442.508,
"eval_steps_per_second": 13.835,
"step": 6494
},
{
"epoch": 2.0,
"learning_rate": 2.6997228210655992e-05,
"loss": 0.0228,
"step": 6500
},
{
"epoch": 2.16,
"learning_rate": 2.6766245765321838e-05,
"loss": 0.014,
"step": 7000
},
{
"epoch": 2.31,
"learning_rate": 2.653526331998768e-05,
"loss": 0.0143,
"step": 7500
},
{
"epoch": 2.46,
"learning_rate": 2.6304280874653526e-05,
"loss": 0.0158,
"step": 8000
},
{
"epoch": 2.62,
"learning_rate": 2.6073298429319372e-05,
"loss": 0.015,
"step": 8500
},
{
"epoch": 2.77,
"learning_rate": 2.5842315983985218e-05,
"loss": 0.0142,
"step": 9000
},
{
"epoch": 2.93,
"learning_rate": 2.5611333538651064e-05,
"loss": 0.0147,
"step": 9500
},
{
"epoch": 3.0,
"eval_accuracy": 0.9930254150662127,
"eval_f1": 0.9280089446645269,
"eval_loss": 0.024331996217370033,
"eval_precision": 0.9160101990333752,
"eval_recall": 0.9403262037308331,
"eval_runtime": 50.2727,
"eval_samples_per_second": 442.188,
"eval_steps_per_second": 13.825,
"step": 9741
},
{
"epoch": 3.08,
"learning_rate": 2.538035109331691e-05,
"loss": 0.0122,
"step": 10000
},
{
"epoch": 3.23,
"learning_rate": 2.5149368647982755e-05,
"loss": 0.0103,
"step": 10500
},
{
"epoch": 3.39,
"learning_rate": 2.4918386202648598e-05,
"loss": 0.0095,
"step": 11000
},
{
"epoch": 3.54,
"learning_rate": 2.4687403757314447e-05,
"loss": 0.0111,
"step": 11500
},
{
"epoch": 3.7,
"learning_rate": 2.4456421311980292e-05,
"loss": 0.0099,
"step": 12000
},
{
"epoch": 3.85,
"learning_rate": 2.4225438866646135e-05,
"loss": 0.0097,
"step": 12500
},
{
"epoch": 4.0,
"eval_accuracy": 0.9936326074754469,
"eval_f1": 0.9362323294406885,
"eval_loss": 0.027174057438969612,
"eval_precision": 0.920862222054295,
"eval_recall": 0.9521242308819221,
"eval_runtime": 50.3309,
"eval_samples_per_second": 441.677,
"eval_steps_per_second": 13.809,
"step": 12988
},
{
"epoch": 4.0,
"learning_rate": 2.399445642131198e-05,
"loss": 0.0102,
"step": 13000
},
{
"epoch": 4.16,
"learning_rate": 2.3763473975977826e-05,
"loss": 0.0076,
"step": 13500
},
{
"epoch": 4.31,
"learning_rate": 2.3532491530643672e-05,
"loss": 0.007,
"step": 14000
},
{
"epoch": 4.47,
"learning_rate": 2.3301509085309518e-05,
"loss": 0.007,
"step": 14500
},
{
"epoch": 4.62,
"learning_rate": 2.3070526639975364e-05,
"loss": 0.0082,
"step": 15000
},
{
"epoch": 4.77,
"learning_rate": 2.2839544194641206e-05,
"loss": 0.0079,
"step": 15500
},
{
"epoch": 4.93,
"learning_rate": 2.2608561749307052e-05,
"loss": 0.0077,
"step": 16000
},
{
"epoch": 5.0,
"eval_accuracy": 0.9943201214002335,
"eval_f1": 0.9403489135049596,
"eval_loss": 0.023451806977391243,
"eval_precision": 0.929177551915486,
"eval_recall": 0.9517921672038285,
"eval_runtime": 50.2007,
"eval_samples_per_second": 442.822,
"eval_steps_per_second": 13.844,
"step": 16235
},
{
"epoch": 5.08,
"learning_rate": 2.2377579303972898e-05,
"loss": 0.0064,
"step": 16500
},
{
"epoch": 5.24,
"learning_rate": 2.2146596858638743e-05,
"loss": 0.0052,
"step": 17000
},
{
"epoch": 5.39,
"learning_rate": 2.191561441330459e-05,
"loss": 0.0055,
"step": 17500
},
{
"epoch": 5.54,
"learning_rate": 2.1684631967970435e-05,
"loss": 0.0061,
"step": 18000
},
{
"epoch": 5.7,
"learning_rate": 2.145364952263628e-05,
"loss": 0.0056,
"step": 18500
},
{
"epoch": 5.85,
"learning_rate": 2.1222667077302123e-05,
"loss": 0.0054,
"step": 19000
},
{
"epoch": 6.0,
"eval_accuracy": 0.994450643863006,
"eval_f1": 0.9450084195333173,
"eval_loss": 0.0263934638351202,
"eval_precision": 0.9312535558505595,
"eval_recall": 0.9591757007520265,
"eval_runtime": 50.2902,
"eval_samples_per_second": 442.035,
"eval_steps_per_second": 13.82,
"step": 19482
},
{
"epoch": 6.01,
"learning_rate": 2.0991684631967972e-05,
"loss": 0.0062,
"step": 19500
},
{
"epoch": 6.16,
"learning_rate": 2.0760702186633818e-05,
"loss": 0.0033,
"step": 20000
},
{
"epoch": 6.31,
"learning_rate": 2.052971974129966e-05,
"loss": 0.0042,
"step": 20500
},
{
"epoch": 6.47,
"learning_rate": 2.0298737295965506e-05,
"loss": 0.0042,
"step": 21000
},
{
"epoch": 6.62,
"learning_rate": 2.0067754850631352e-05,
"loss": 0.0045,
"step": 21500
},
{
"epoch": 6.78,
"learning_rate": 1.9836772405297198e-05,
"loss": 0.0039,
"step": 22000
},
{
"epoch": 6.93,
"learning_rate": 1.9605789959963044e-05,
"loss": 0.0043,
"step": 22500
},
{
"epoch": 7.0,
"eval_accuracy": 0.9948756785494699,
"eval_f1": 0.9503865101154663,
"eval_loss": 0.026261983439326286,
"eval_precision": 0.9473980473223471,
"eval_recall": 0.9533938861216916,
"eval_runtime": 50.3703,
"eval_samples_per_second": 441.332,
"eval_steps_per_second": 13.798,
"step": 22729
},
{
"epoch": 7.08,
"learning_rate": 1.937480751462889e-05,
"loss": 0.004,
"step": 23000
},
{
"epoch": 7.24,
"learning_rate": 1.914382506929473e-05,
"loss": 0.0029,
"step": 23500
},
{
"epoch": 7.39,
"learning_rate": 1.8912842623960577e-05,
"loss": 0.0031,
"step": 24000
},
{
"epoch": 7.55,
"learning_rate": 1.8681860178626427e-05,
"loss": 0.0034,
"step": 24500
},
{
"epoch": 7.7,
"learning_rate": 1.8450877733292272e-05,
"loss": 0.0037,
"step": 25000
},
{
"epoch": 7.85,
"learning_rate": 1.8219895287958115e-05,
"loss": 0.0036,
"step": 25500
},
{
"epoch": 8.0,
"eval_accuracy": 0.994658141111516,
"eval_f1": 0.9494068997650896,
"eval_loss": 0.02981843240559101,
"eval_precision": 0.943654361962835,
"eval_recall": 0.9552300029299736,
"eval_runtime": 50.1982,
"eval_samples_per_second": 442.845,
"eval_steps_per_second": 13.845,
"step": 25976
},
{
"epoch": 8.01,
"learning_rate": 1.798891284262396e-05,
"loss": 0.0036,
"step": 26000
},
{
"epoch": 8.16,
"learning_rate": 1.7757930397289806e-05,
"loss": 0.0027,
"step": 26500
},
{
"epoch": 8.32,
"learning_rate": 1.7526947951955652e-05,
"loss": 0.0026,
"step": 27000
},
{
"epoch": 8.47,
"learning_rate": 1.7295965506621498e-05,
"loss": 0.0029,
"step": 27500
},
{
"epoch": 8.62,
"learning_rate": 1.7064983061287344e-05,
"loss": 0.0028,
"step": 28000
},
{
"epoch": 8.78,
"learning_rate": 1.6834000615953186e-05,
"loss": 0.0029,
"step": 28500
},
{
"epoch": 8.93,
"learning_rate": 1.6603018170619032e-05,
"loss": 0.0027,
"step": 29000
},
{
"epoch": 9.0,
"eval_accuracy": 0.9942837854765313,
"eval_f1": 0.9483075157773954,
"eval_loss": 0.03117133490741253,
"eval_precision": 0.9288564203427929,
"eval_recall": 0.9685906826838558,
"eval_runtime": 50.2531,
"eval_samples_per_second": 442.361,
"eval_steps_per_second": 13.83,
"step": 29223
},
{
"epoch": 9.09,
"learning_rate": 1.637203572528488e-05,
"loss": 0.0028,
"step": 29500
},
{
"epoch": 9.24,
"learning_rate": 1.6141053279950723e-05,
"loss": 0.0019,
"step": 30000
},
{
"epoch": 9.39,
"learning_rate": 1.591007083461657e-05,
"loss": 0.0022,
"step": 30500
},
{
"epoch": 9.55,
"learning_rate": 1.5679088389282415e-05,
"loss": 0.0024,
"step": 31000
},
{
"epoch": 9.7,
"learning_rate": 1.544810594394826e-05,
"loss": 0.0024,
"step": 31500
},
{
"epoch": 9.86,
"learning_rate": 1.5217123498614105e-05,
"loss": 0.0023,
"step": 32000
},
{
"epoch": 10.0,
"eval_accuracy": 0.9948642040472482,
"eval_f1": 0.9512794425354655,
"eval_loss": 0.036810312420129776,
"eval_precision": 0.935158800551037,
"eval_recall": 0.9679656216427386,
"eval_runtime": 50.4106,
"eval_samples_per_second": 440.978,
"eval_steps_per_second": 13.787,
"step": 32470
},
{
"epoch": 10.01,
"learning_rate": 1.4986141053279952e-05,
"loss": 0.0024,
"step": 32500
},
{
"epoch": 10.16,
"learning_rate": 1.4755158607945796e-05,
"loss": 0.0018,
"step": 33000
},
{
"epoch": 10.32,
"learning_rate": 1.4524176162611642e-05,
"loss": 0.0018,
"step": 33500
},
{
"epoch": 10.47,
"learning_rate": 1.4293193717277488e-05,
"loss": 0.002,
"step": 34000
},
{
"epoch": 10.63,
"learning_rate": 1.4062211271943332e-05,
"loss": 0.0024,
"step": 34500
},
{
"epoch": 10.78,
"learning_rate": 1.383122882660918e-05,
"loss": 0.002,
"step": 35000
},
{
"epoch": 10.93,
"learning_rate": 1.3600246381275023e-05,
"loss": 0.0019,
"step": 35500
},
{
"epoch": 11.0,
"eval_accuracy": 0.9952223041374186,
"eval_f1": 0.9571868114802629,
"eval_loss": 0.034278471022844315,
"eval_precision": 0.9512868001697727,
"eval_recall": 0.9631604648891493,
"eval_runtime": 50.1571,
"eval_samples_per_second": 443.208,
"eval_steps_per_second": 13.856,
"step": 35717
},
{
"epoch": 11.09,
"learning_rate": 1.3369263935940867e-05,
"loss": 0.0018,
"step": 36000
},
{
"epoch": 11.24,
"learning_rate": 1.3138281490606715e-05,
"loss": 0.0014,
"step": 36500
},
{
"epoch": 11.4,
"learning_rate": 1.2907299045272559e-05,
"loss": 0.0016,
"step": 37000
},
{
"epoch": 11.55,
"learning_rate": 1.2676316599938406e-05,
"loss": 0.0016,
"step": 37500
},
{
"epoch": 11.7,
"learning_rate": 1.244533415460425e-05,
"loss": 0.0014,
"step": 38000
},
{
"epoch": 11.86,
"learning_rate": 1.2214351709270095e-05,
"loss": 0.0018,
"step": 38500
},
{
"epoch": 12.0,
"eval_accuracy": 0.9950344091635375,
"eval_f1": 0.9542591914950985,
"eval_loss": 0.03573078662157059,
"eval_precision": 0.941064747108317,
"eval_recall": 0.9678288895399941,
"eval_runtime": 50.2611,
"eval_samples_per_second": 442.291,
"eval_steps_per_second": 13.828,
"step": 38964
},
{
"epoch": 12.01,
"learning_rate": 1.1983369263935942e-05,
"loss": 0.0015,
"step": 39000
},
{
"epoch": 12.17,
"learning_rate": 1.1752386818601786e-05,
"loss": 0.0014,
"step": 39500
},
{
"epoch": 12.32,
"learning_rate": 1.1521404373267632e-05,
"loss": 0.0016,
"step": 40000
},
{
"epoch": 12.47,
"learning_rate": 1.1290421927933478e-05,
"loss": 0.0017,
"step": 40500
},
{
"epoch": 12.63,
"learning_rate": 1.1059439482599322e-05,
"loss": 0.0011,
"step": 41000
},
{
"epoch": 12.78,
"learning_rate": 1.082845703726517e-05,
"loss": 0.0014,
"step": 41500
},
{
"epoch": 12.94,
"learning_rate": 1.0597474591931013e-05,
"loss": 0.0014,
"step": 42000
},
{
"epoch": 13.0,
"eval_accuracy": 0.9954953016694444,
"eval_f1": 0.9582205552548141,
"eval_loss": 0.034824222326278687,
"eval_precision": 0.9485197022180545,
"eval_recall": 0.9681218869030179,
"eval_runtime": 50.5254,
"eval_samples_per_second": 439.977,
"eval_steps_per_second": 13.755,
"step": 42211
},
{
"epoch": 13.09,
"learning_rate": 1.0366492146596857e-05,
"loss": 0.0013,
"step": 42500
},
{
"epoch": 13.24,
"learning_rate": 1.0135509701262705e-05,
"loss": 0.001,
"step": 43000
},
{
"epoch": 13.4,
"learning_rate": 9.904527255928549e-06,
"loss": 0.0011,
"step": 43500
},
{
"epoch": 13.55,
"learning_rate": 9.673544810594396e-06,
"loss": 0.0012,
"step": 44000
},
{
"epoch": 13.7,
"learning_rate": 9.44256236526024e-06,
"loss": 0.0013,
"step": 44500
},
{
"epoch": 13.86,
"learning_rate": 9.211579919926085e-06,
"loss": 0.0012,
"step": 45000
},
{
"epoch": 14.0,
"eval_accuracy": 0.99525290281001,
"eval_f1": 0.9559630182389084,
"eval_loss": 0.03898163139820099,
"eval_precision": 0.9486449577811544,
"eval_recall": 0.9633948627795683,
"eval_runtime": 50.2226,
"eval_samples_per_second": 442.629,
"eval_steps_per_second": 13.838,
"step": 45458
},
{
"epoch": 14.01,
"learning_rate": 8.980597474591932e-06,
"loss": 0.0012,
"step": 45500
},
{
"epoch": 14.17,
"learning_rate": 8.749615029257776e-06,
"loss": 0.0007,
"step": 46000
},
{
"epoch": 14.32,
"learning_rate": 8.518632583923622e-06,
"loss": 0.0009,
"step": 46500
},
{
"epoch": 14.47,
"learning_rate": 8.287650138589468e-06,
"loss": 0.0013,
"step": 47000
},
{
"epoch": 14.63,
"learning_rate": 8.056667693255312e-06,
"loss": 0.0012,
"step": 47500
},
{
"epoch": 14.78,
"learning_rate": 7.82568524792116e-06,
"loss": 0.001,
"step": 48000
},
{
"epoch": 14.94,
"learning_rate": 7.594702802587003e-06,
"loss": 0.001,
"step": 48500
},
{
"epoch": 15.0,
"eval_accuracy": 0.9952519466014915,
"eval_f1": 0.9577630980820753,
"eval_loss": 0.04225644841790199,
"eval_precision": 0.9485612475573777,
"eval_recall": 0.9671452290262721,
"eval_runtime": 56.1286,
"eval_samples_per_second": 396.055,
"eval_steps_per_second": 12.382,
"step": 48705
},
{
"epoch": 15.09,
"learning_rate": 7.363720357252849e-06,
"loss": 0.0009,
"step": 49000
},
{
"epoch": 15.24,
"learning_rate": 7.132737911918695e-06,
"loss": 0.0008,
"step": 49500
},
{
"epoch": 15.4,
"learning_rate": 6.90175546658454e-06,
"loss": 0.0009,
"step": 50000
},
{
"epoch": 15.55,
"learning_rate": 6.6707730212503855e-06,
"loss": 0.0008,
"step": 50500
},
{
"epoch": 15.71,
"learning_rate": 6.43979057591623e-06,
"loss": 0.0007,
"step": 51000
},
{
"epoch": 15.86,
"learning_rate": 6.208808130582076e-06,
"loss": 0.0008,
"step": 51500
},
{
"epoch": 16.0,
"eval_accuracy": 0.9950430150402038,
"eval_f1": 0.9558787995940656,
"eval_loss": 0.04262382909655571,
"eval_precision": 0.9460493590969964,
"eval_recall": 0.9659146401015725,
"eval_runtime": 50.4693,
"eval_samples_per_second": 440.466,
"eval_steps_per_second": 13.771,
"step": 51952
},
{
"epoch": 16.01,
"learning_rate": 5.977825685247921e-06,
"loss": 0.0008,
"step": 52000
},
{
"epoch": 16.17,
"learning_rate": 5.746843239913767e-06,
"loss": 0.0008,
"step": 52500
},
{
"epoch": 16.32,
"learning_rate": 5.515860794579612e-06,
"loss": 0.0008,
"step": 53000
},
{
"epoch": 16.48,
"learning_rate": 5.2848783492454576e-06,
"loss": 0.0009,
"step": 53500
},
{
"epoch": 16.63,
"learning_rate": 5.0538959039113025e-06,
"loss": 0.0008,
"step": 54000
},
{
"epoch": 16.78,
"learning_rate": 4.822913458577148e-06,
"loss": 0.0006,
"step": 54500
},
{
"epoch": 16.94,
"learning_rate": 4.591931013242994e-06,
"loss": 0.001,
"step": 55000
},
{
"epoch": 17.0,
"eval_accuracy": 0.9952113077394561,
"eval_f1": 0.9582838092477171,
"eval_loss": 0.04263555258512497,
"eval_precision": 0.9482501434308663,
"eval_recall": 0.9685320832112511,
"eval_runtime": 50.1705,
"eval_samples_per_second": 443.089,
"eval_steps_per_second": 13.853,
"step": 55199
},
{
"epoch": 17.09,
"learning_rate": 4.360948567908839e-06,
"loss": 0.0008,
"step": 55500
},
{
"epoch": 17.25,
"learning_rate": 4.129966122574684e-06,
"loss": 0.0008,
"step": 56000
},
{
"epoch": 17.4,
"learning_rate": 3.89898367724053e-06,
"loss": 0.0008,
"step": 56500
},
{
"epoch": 17.55,
"learning_rate": 3.668001231906375e-06,
"loss": 0.0007,
"step": 57000
},
{
"epoch": 17.71,
"learning_rate": 3.4370187865722208e-06,
"loss": 0.0007,
"step": 57500
},
{
"epoch": 17.86,
"learning_rate": 3.206036341238066e-06,
"loss": 0.0006,
"step": 58000
},
{
"epoch": 18.0,
"eval_accuracy": 0.9952199136161224,
"eval_f1": 0.9574789135649616,
"eval_loss": 0.045220062136650085,
"eval_precision": 0.9483607656785913,
"eval_recall": 0.9667740990331087,
"eval_runtime": 50.3784,
"eval_samples_per_second": 441.26,
"eval_steps_per_second": 13.796,
"step": 58446
},
{
"epoch": 18.02,
"learning_rate": 2.9750538959039115e-06,
"loss": 0.0007,
"step": 58500
},
{
"epoch": 18.17,
"learning_rate": 2.744071450569757e-06,
"loss": 0.0007,
"step": 59000
},
{
"epoch": 18.32,
"learning_rate": 2.513089005235602e-06,
"loss": 0.0006,
"step": 59500
},
{
"epoch": 18.48,
"learning_rate": 2.2821065599014475e-06,
"loss": 0.0006,
"step": 60000
},
{
"epoch": 18.63,
"learning_rate": 2.051124114567293e-06,
"loss": 0.0007,
"step": 60500
},
{
"epoch": 18.79,
"learning_rate": 1.8201416692331382e-06,
"loss": 0.0007,
"step": 61000
},
{
"epoch": 18.94,
"learning_rate": 1.5891592238989836e-06,
"loss": 0.0006,
"step": 61500
},
{
"epoch": 19.0,
"eval_accuracy": 0.9954895644183336,
"eval_f1": 0.959768669851647,
"eval_loss": 0.046882398426532745,
"eval_precision": 0.9504664125500412,
"eval_recall": 0.969254810040043,
"eval_runtime": 50.5066,
"eval_samples_per_second": 440.141,
"eval_steps_per_second": 13.761,
"step": 61693
}
],
"max_steps": 64940,
"num_train_epochs": 20,
"total_flos": 5.157312926507336e+17,
"trial_name": null,
"trial_params": null
}