{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "global_step": 995085, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.9984925910851835e-05, "loss": 0.1438, "step": 500 }, { "epoch": 0.02, "learning_rate": 2.9969851821703676e-05, "loss": 0.1079, "step": 1000 }, { "epoch": 0.02, "learning_rate": 2.995477773255551e-05, "loss": 0.101, "step": 1500 }, { "epoch": 0.03, "learning_rate": 2.9939703643407347e-05, "loss": 0.0991, "step": 2000 }, { "epoch": 0.04, "learning_rate": 2.9924629554259184e-05, "loss": 0.0906, "step": 2500 }, { "epoch": 0.05, "learning_rate": 2.9909555465111022e-05, "loss": 0.0929, "step": 3000 }, { "epoch": 0.05, "learning_rate": 2.9894481375962856e-05, "loss": 0.09, "step": 3500 }, { "epoch": 0.06, "learning_rate": 2.9879407286814693e-05, "loss": 0.0875, "step": 4000 }, { "epoch": 0.07, "learning_rate": 2.986433319766653e-05, "loss": 0.0922, "step": 4500 }, { "epoch": 0.08, "learning_rate": 2.9849259108518368e-05, "loss": 0.0882, "step": 5000 }, { "epoch": 0.08, "learning_rate": 2.9834185019370202e-05, "loss": 0.0895, "step": 5500 }, { "epoch": 0.09, "learning_rate": 2.9819110930222043e-05, "loss": 0.0889, "step": 6000 }, { "epoch": 0.1, "learning_rate": 2.9804036841073877e-05, "loss": 0.0842, "step": 6500 }, { "epoch": 0.11, "learning_rate": 2.9788962751925715e-05, "loss": 0.0846, "step": 7000 }, { "epoch": 0.11, "learning_rate": 2.9773888662777552e-05, "loss": 0.0841, "step": 7500 }, { "epoch": 0.12, "learning_rate": 2.975881457362939e-05, "loss": 0.0842, "step": 8000 }, { "epoch": 0.13, "learning_rate": 2.9743740484481223e-05, "loss": 0.0827, "step": 8500 }, { "epoch": 0.14, "learning_rate": 2.9728666395333064e-05, "loss": 0.0826, "step": 9000 }, { "epoch": 0.14, "learning_rate": 2.9713592306184898e-05, "loss": 0.0796, "step": 9500 }, { "epoch": 0.15, "learning_rate": 2.9698518217036736e-05, "loss": 0.0818, "step": 10000 }, { "epoch": 0.16, "learning_rate": 2.9683444127888573e-05, "loss": 0.0806, "step": 10500 }, { "epoch": 0.17, "learning_rate": 2.966837003874041e-05, "loss": 0.0829, "step": 11000 }, { "epoch": 0.17, "learning_rate": 2.9653295949592245e-05, "loss": 0.0824, "step": 11500 }, { "epoch": 0.18, "learning_rate": 2.9638221860444082e-05, "loss": 0.0798, "step": 12000 }, { "epoch": 0.19, "learning_rate": 2.962314777129592e-05, "loss": 0.0808, "step": 12500 }, { "epoch": 0.2, "learning_rate": 2.9608073682147757e-05, "loss": 0.0802, "step": 13000 }, { "epoch": 0.2, "learning_rate": 2.959299959299959e-05, "loss": 0.0782, "step": 13500 }, { "epoch": 0.21, "learning_rate": 2.957792550385143e-05, "loss": 0.0783, "step": 14000 }, { "epoch": 0.22, "learning_rate": 2.9562851414703266e-05, "loss": 0.0766, "step": 14500 }, { "epoch": 0.23, "learning_rate": 2.9547777325555103e-05, "loss": 0.0779, "step": 15000 }, { "epoch": 0.23, "learning_rate": 2.953270323640694e-05, "loss": 0.0748, "step": 15500 }, { "epoch": 0.24, "learning_rate": 2.9517629147258778e-05, "loss": 0.0784, "step": 16000 }, { "epoch": 0.25, "learning_rate": 2.9502555058110612e-05, "loss": 0.0771, "step": 16500 }, { "epoch": 0.26, "learning_rate": 2.9487480968962453e-05, "loss": 0.0752, "step": 17000 }, { "epoch": 0.26, "learning_rate": 2.9472406879814287e-05, "loss": 0.0764, "step": 17500 }, { "epoch": 0.27, "learning_rate": 2.9457332790666124e-05, "loss": 0.0795, "step": 18000 }, { "epoch": 0.28, "learning_rate": 2.944225870151796e-05, "loss": 0.0764, "step": 18500 }, { "epoch": 0.29, "learning_rate": 2.94271846123698e-05, "loss": 0.0735, "step": 19000 }, { "epoch": 0.29, "learning_rate": 2.9412110523221633e-05, "loss": 0.0734, "step": 19500 }, { "epoch": 0.3, "learning_rate": 2.939703643407347e-05, "loss": 0.0741, "step": 20000 }, { "epoch": 0.31, "learning_rate": 2.9381962344925308e-05, "loss": 0.0749, "step": 20500 }, { "epoch": 0.32, "learning_rate": 2.9366888255777145e-05, "loss": 0.0731, "step": 21000 }, { "epoch": 0.32, "learning_rate": 2.935181416662898e-05, "loss": 0.0738, "step": 21500 }, { "epoch": 0.33, "learning_rate": 2.933674007748082e-05, "loss": 0.0733, "step": 22000 }, { "epoch": 0.34, "learning_rate": 2.9321665988332654e-05, "loss": 0.0724, "step": 22500 }, { "epoch": 0.35, "learning_rate": 2.930659189918449e-05, "loss": 0.0746, "step": 23000 }, { "epoch": 0.35, "learning_rate": 2.929151781003633e-05, "loss": 0.0727, "step": 23500 }, { "epoch": 0.36, "learning_rate": 2.9276443720888167e-05, "loss": 0.0731, "step": 24000 }, { "epoch": 0.37, "learning_rate": 2.926136963174e-05, "loss": 0.0709, "step": 24500 }, { "epoch": 0.38, "learning_rate": 2.924629554259184e-05, "loss": 0.0697, "step": 25000 }, { "epoch": 0.38, "learning_rate": 2.9231221453443675e-05, "loss": 0.0711, "step": 25500 }, { "epoch": 0.39, "learning_rate": 2.9216147364295513e-05, "loss": 0.0728, "step": 26000 }, { "epoch": 0.4, "learning_rate": 2.920107327514735e-05, "loss": 0.0719, "step": 26500 }, { "epoch": 0.41, "learning_rate": 2.9185999185999188e-05, "loss": 0.0719, "step": 27000 }, { "epoch": 0.41, "learning_rate": 2.9170925096851022e-05, "loss": 0.0726, "step": 27500 }, { "epoch": 0.42, "learning_rate": 2.915585100770286e-05, "loss": 0.0689, "step": 28000 }, { "epoch": 0.43, "learning_rate": 2.9140776918554697e-05, "loss": 0.0712, "step": 28500 }, { "epoch": 0.44, "learning_rate": 2.9125702829406534e-05, "loss": 0.0698, "step": 29000 }, { "epoch": 0.44, "learning_rate": 2.9110628740258368e-05, "loss": 0.0714, "step": 29500 }, { "epoch": 0.45, "learning_rate": 2.909555465111021e-05, "loss": 0.0689, "step": 30000 }, { "epoch": 0.46, "learning_rate": 2.9080480561962043e-05, "loss": 0.0671, "step": 30500 }, { "epoch": 0.47, "learning_rate": 2.906540647281388e-05, "loss": 0.0706, "step": 31000 }, { "epoch": 0.47, "learning_rate": 2.9050332383665718e-05, "loss": 0.0706, "step": 31500 }, { "epoch": 0.48, "learning_rate": 2.9035258294517555e-05, "loss": 0.0688, "step": 32000 }, { "epoch": 0.49, "learning_rate": 2.902018420536939e-05, "loss": 0.0689, "step": 32500 }, { "epoch": 0.5, "learning_rate": 2.900511011622123e-05, "loss": 0.0677, "step": 33000 }, { "epoch": 0.5, "learning_rate": 2.8990036027073064e-05, "loss": 0.0681, "step": 33500 }, { "epoch": 0.51, "learning_rate": 2.89749619379249e-05, "loss": 0.0682, "step": 34000 }, { "epoch": 0.52, "learning_rate": 2.8959887848776735e-05, "loss": 0.0685, "step": 34500 }, { "epoch": 0.53, "learning_rate": 2.8944813759628576e-05, "loss": 0.069, "step": 35000 }, { "epoch": 0.54, "learning_rate": 2.892973967048041e-05, "loss": 0.0671, "step": 35500 }, { "epoch": 0.54, "learning_rate": 2.8914665581332248e-05, "loss": 0.0692, "step": 36000 }, { "epoch": 0.55, "learning_rate": 2.8899591492184085e-05, "loss": 0.068, "step": 36500 }, { "epoch": 0.56, "learning_rate": 2.8884517403035923e-05, "loss": 0.0671, "step": 37000 }, { "epoch": 0.57, "learning_rate": 2.8869443313887757e-05, "loss": 0.0651, "step": 37500 }, { "epoch": 0.57, "learning_rate": 2.8854369224739597e-05, "loss": 0.0651, "step": 38000 }, { "epoch": 0.58, "learning_rate": 2.883929513559143e-05, "loss": 0.0674, "step": 38500 }, { "epoch": 0.59, "learning_rate": 2.882422104644327e-05, "loss": 0.0674, "step": 39000 }, { "epoch": 0.6, "learning_rate": 2.8809146957295106e-05, "loss": 0.067, "step": 39500 }, { "epoch": 0.6, "learning_rate": 2.8794072868146944e-05, "loss": 0.0667, "step": 40000 }, { "epoch": 0.61, "learning_rate": 2.8778998778998778e-05, "loss": 0.0664, "step": 40500 }, { "epoch": 0.62, "learning_rate": 2.876392468985062e-05, "loss": 0.0674, "step": 41000 }, { "epoch": 0.63, "learning_rate": 2.8748850600702453e-05, "loss": 0.0647, "step": 41500 }, { "epoch": 0.63, "learning_rate": 2.873377651155429e-05, "loss": 0.0674, "step": 42000 }, { "epoch": 0.64, "learning_rate": 2.8718702422406124e-05, "loss": 0.0669, "step": 42500 }, { "epoch": 0.65, "learning_rate": 2.8703628333257965e-05, "loss": 0.0643, "step": 43000 }, { "epoch": 0.66, "learning_rate": 2.86885542441098e-05, "loss": 0.0673, "step": 43500 }, { "epoch": 0.66, "learning_rate": 2.8673480154961636e-05, "loss": 0.0664, "step": 44000 }, { "epoch": 0.67, "learning_rate": 2.8658406065813474e-05, "loss": 0.0671, "step": 44500 }, { "epoch": 0.68, "learning_rate": 2.864333197666531e-05, "loss": 0.0648, "step": 45000 }, { "epoch": 0.69, "learning_rate": 2.8628257887517145e-05, "loss": 0.0673, "step": 45500 }, { "epoch": 0.69, "learning_rate": 2.8613183798368986e-05, "loss": 0.0657, "step": 46000 }, { "epoch": 0.7, "learning_rate": 2.859810970922082e-05, "loss": 0.0654, "step": 46500 }, { "epoch": 0.71, "learning_rate": 2.8583035620072657e-05, "loss": 0.0643, "step": 47000 }, { "epoch": 0.72, "learning_rate": 2.8567961530924495e-05, "loss": 0.0626, "step": 47500 }, { "epoch": 0.72, "learning_rate": 2.8552887441776332e-05, "loss": 0.0659, "step": 48000 }, { "epoch": 0.73, "learning_rate": 2.8537813352628166e-05, "loss": 0.0626, "step": 48500 }, { "epoch": 0.74, "learning_rate": 2.8522739263480007e-05, "loss": 0.0644, "step": 49000 }, { "epoch": 0.75, "learning_rate": 2.850766517433184e-05, "loss": 0.0654, "step": 49500 }, { "epoch": 0.75, "learning_rate": 2.849259108518368e-05, "loss": 0.062, "step": 50000 }, { "epoch": 0.76, "learning_rate": 2.8477516996035513e-05, "loss": 0.0631, "step": 50500 }, { "epoch": 0.77, "learning_rate": 2.8462442906887353e-05, "loss": 0.0642, "step": 51000 }, { "epoch": 0.78, "learning_rate": 2.8447368817739187e-05, "loss": 0.0635, "step": 51500 }, { "epoch": 0.78, "learning_rate": 2.8432294728591025e-05, "loss": 0.0624, "step": 52000 }, { "epoch": 0.79, "learning_rate": 2.8417220639442862e-05, "loss": 0.063, "step": 52500 }, { "epoch": 0.8, "learning_rate": 2.84021465502947e-05, "loss": 0.0631, "step": 53000 }, { "epoch": 0.81, "learning_rate": 2.8387072461146534e-05, "loss": 0.0613, "step": 53500 }, { "epoch": 0.81, "learning_rate": 2.8371998371998375e-05, "loss": 0.0607, "step": 54000 }, { "epoch": 0.82, "learning_rate": 2.835692428285021e-05, "loss": 0.0645, "step": 54500 }, { "epoch": 0.83, "learning_rate": 2.8341850193702046e-05, "loss": 0.0644, "step": 55000 }, { "epoch": 0.84, "learning_rate": 2.8326776104553883e-05, "loss": 0.0638, "step": 55500 }, { "epoch": 0.84, "learning_rate": 2.831170201540572e-05, "loss": 0.0633, "step": 56000 }, { "epoch": 0.85, "learning_rate": 2.8296627926257555e-05, "loss": 0.0621, "step": 56500 }, { "epoch": 0.86, "learning_rate": 2.8281553837109396e-05, "loss": 0.0645, "step": 57000 }, { "epoch": 0.87, "learning_rate": 2.826647974796123e-05, "loss": 0.0622, "step": 57500 }, { "epoch": 0.87, "learning_rate": 2.8251405658813067e-05, "loss": 0.0618, "step": 58000 }, { "epoch": 0.88, "learning_rate": 2.82363315696649e-05, "loss": 0.06, "step": 58500 }, { "epoch": 0.89, "learning_rate": 2.8221257480516742e-05, "loss": 0.0622, "step": 59000 }, { "epoch": 0.9, "learning_rate": 2.8206183391368576e-05, "loss": 0.0603, "step": 59500 }, { "epoch": 0.9, "learning_rate": 2.8191109302220413e-05, "loss": 0.063, "step": 60000 }, { "epoch": 0.91, "learning_rate": 2.817603521307225e-05, "loss": 0.064, "step": 60500 }, { "epoch": 0.92, "learning_rate": 2.816096112392409e-05, "loss": 0.0652, "step": 61000 }, { "epoch": 0.93, "learning_rate": 2.8145887034775922e-05, "loss": 0.0642, "step": 61500 }, { "epoch": 0.93, "learning_rate": 2.8130812945627763e-05, "loss": 0.0629, "step": 62000 }, { "epoch": 0.94, "learning_rate": 2.8115738856479597e-05, "loss": 0.0642, "step": 62500 }, { "epoch": 0.95, "learning_rate": 2.8100664767331435e-05, "loss": 0.0632, "step": 63000 }, { "epoch": 0.96, "learning_rate": 2.8085590678183272e-05, "loss": 0.0602, "step": 63500 }, { "epoch": 0.96, "learning_rate": 2.807051658903511e-05, "loss": 0.0615, "step": 64000 }, { "epoch": 0.97, "learning_rate": 2.8055442499886943e-05, "loss": 0.0639, "step": 64500 }, { "epoch": 0.98, "learning_rate": 2.804036841073878e-05, "loss": 0.0603, "step": 65000 }, { "epoch": 0.99, "learning_rate": 2.802529432159062e-05, "loss": 0.0626, "step": 65500 }, { "epoch": 0.99, "learning_rate": 2.8010220232442456e-05, "loss": 0.0631, "step": 66000 }, { "epoch": 1.0, "eval_accuracy": 0.9753165380931752, "eval_f1": 0.8856332889806651, "eval_loss": 0.05840853601694107, "eval_precision": 0.8600431733567476, "eval_recall": 0.9127929453033121, "eval_runtime": 275.276, "eval_samples_per_second": 428.385, "eval_steps_per_second": 26.777, "step": 66339 }, { "epoch": 1.0, "learning_rate": 2.799514614329429e-05, "loss": 0.0583, "step": 66500 }, { "epoch": 1.01, "learning_rate": 2.798007205414613e-05, "loss": 0.0548, "step": 67000 }, { "epoch": 1.02, "learning_rate": 2.7964997964997965e-05, "loss": 0.054, "step": 67500 }, { "epoch": 1.03, "learning_rate": 2.7949923875849802e-05, "loss": 0.0535, "step": 68000 }, { "epoch": 1.03, "learning_rate": 2.793484978670164e-05, "loss": 0.0555, "step": 68500 }, { "epoch": 1.04, "learning_rate": 2.7919775697553477e-05, "loss": 0.0537, "step": 69000 }, { "epoch": 1.05, "learning_rate": 2.790470160840531e-05, "loss": 0.0562, "step": 69500 }, { "epoch": 1.06, "learning_rate": 2.7889627519257152e-05, "loss": 0.0548, "step": 70000 }, { "epoch": 1.06, "learning_rate": 2.7874553430108986e-05, "loss": 0.0536, "step": 70500 }, { "epoch": 1.07, "learning_rate": 2.7859479340960823e-05, "loss": 0.0539, "step": 71000 }, { "epoch": 1.08, "learning_rate": 2.784440525181266e-05, "loss": 0.052, "step": 71500 }, { "epoch": 1.09, "learning_rate": 2.7829331162664498e-05, "loss": 0.0559, "step": 72000 }, { "epoch": 1.09, "learning_rate": 2.7814257073516332e-05, "loss": 0.0532, "step": 72500 }, { "epoch": 1.1, "learning_rate": 2.779918298436817e-05, "loss": 0.0548, "step": 73000 }, { "epoch": 1.11, "learning_rate": 2.7784108895220007e-05, "loss": 0.0564, "step": 73500 }, { "epoch": 1.12, "learning_rate": 2.7769034806071844e-05, "loss": 0.0545, "step": 74000 }, { "epoch": 1.12, "learning_rate": 2.775396071692368e-05, "loss": 0.0553, "step": 74500 }, { "epoch": 1.13, "learning_rate": 2.773888662777552e-05, "loss": 0.0537, "step": 75000 }, { "epoch": 1.14, "learning_rate": 2.7723812538627353e-05, "loss": 0.0541, "step": 75500 }, { "epoch": 1.15, "learning_rate": 2.770873844947919e-05, "loss": 0.0556, "step": 76000 }, { "epoch": 1.15, "learning_rate": 2.7693664360331028e-05, "loss": 0.052, "step": 76500 }, { "epoch": 1.16, "learning_rate": 2.7678590271182865e-05, "loss": 0.0532, "step": 77000 }, { "epoch": 1.17, "learning_rate": 2.76635161820347e-05, "loss": 0.0543, "step": 77500 }, { "epoch": 1.18, "learning_rate": 2.764844209288654e-05, "loss": 0.0547, "step": 78000 }, { "epoch": 1.18, "learning_rate": 2.7633368003738374e-05, "loss": 0.0552, "step": 78500 }, { "epoch": 1.19, "learning_rate": 2.7618293914590212e-05, "loss": 0.0548, "step": 79000 }, { "epoch": 1.2, "learning_rate": 2.760321982544205e-05, "loss": 0.0531, "step": 79500 }, { "epoch": 1.21, "learning_rate": 2.7588145736293887e-05, "loss": 0.0548, "step": 80000 }, { "epoch": 1.21, "learning_rate": 2.757307164714572e-05, "loss": 0.0567, "step": 80500 }, { "epoch": 1.22, "learning_rate": 2.7557997557997558e-05, "loss": 0.0538, "step": 81000 }, { "epoch": 1.23, "learning_rate": 2.7542923468849396e-05, "loss": 0.0536, "step": 81500 }, { "epoch": 1.24, "learning_rate": 2.7527849379701233e-05, "loss": 0.0559, "step": 82000 }, { "epoch": 1.24, "learning_rate": 2.7512775290553067e-05, "loss": 0.0525, "step": 82500 }, { "epoch": 1.25, "learning_rate": 2.7497701201404908e-05, "loss": 0.0541, "step": 83000 }, { "epoch": 1.26, "learning_rate": 2.7482627112256742e-05, "loss": 0.0507, "step": 83500 }, { "epoch": 1.27, "learning_rate": 2.746755302310858e-05, "loss": 0.0556, "step": 84000 }, { "epoch": 1.27, "learning_rate": 2.7452478933960417e-05, "loss": 0.0527, "step": 84500 }, { "epoch": 1.28, "learning_rate": 2.7437404844812254e-05, "loss": 0.0529, "step": 85000 }, { "epoch": 1.29, "learning_rate": 2.7422330755664088e-05, "loss": 0.0533, "step": 85500 }, { "epoch": 1.3, "learning_rate": 2.740725666651593e-05, "loss": 0.0537, "step": 86000 }, { "epoch": 1.3, "learning_rate": 2.7392182577367763e-05, "loss": 0.0532, "step": 86500 }, { "epoch": 1.31, "learning_rate": 2.73771084882196e-05, "loss": 0.0516, "step": 87000 }, { "epoch": 1.32, "learning_rate": 2.7362034399071438e-05, "loss": 0.053, "step": 87500 }, { "epoch": 1.33, "learning_rate": 2.7346960309923275e-05, "loss": 0.0526, "step": 88000 }, { "epoch": 1.33, "learning_rate": 2.733188622077511e-05, "loss": 0.053, "step": 88500 }, { "epoch": 1.34, "learning_rate": 2.7316812131626947e-05, "loss": 0.0538, "step": 89000 }, { "epoch": 1.35, "learning_rate": 2.7301738042478784e-05, "loss": 0.0554, "step": 89500 }, { "epoch": 1.36, "learning_rate": 2.728666395333062e-05, "loss": 0.0514, "step": 90000 }, { "epoch": 1.36, "learning_rate": 2.7271589864182456e-05, "loss": 0.0539, "step": 90500 }, { "epoch": 1.37, "learning_rate": 2.7256515775034296e-05, "loss": 0.0532, "step": 91000 }, { "epoch": 1.38, "learning_rate": 2.724144168588613e-05, "loss": 0.053, "step": 91500 }, { "epoch": 1.39, "learning_rate": 2.7226367596737968e-05, "loss": 0.052, "step": 92000 }, { "epoch": 1.39, "learning_rate": 2.7211293507589805e-05, "loss": 0.0528, "step": 92500 }, { "epoch": 1.4, "learning_rate": 2.7196219418441643e-05, "loss": 0.0546, "step": 93000 }, { "epoch": 1.41, "learning_rate": 2.7181145329293477e-05, "loss": 0.0495, "step": 93500 }, { "epoch": 1.42, "learning_rate": 2.7166071240145317e-05, "loss": 0.0501, "step": 94000 }, { "epoch": 1.42, "learning_rate": 2.715099715099715e-05, "loss": 0.0504, "step": 94500 }, { "epoch": 1.43, "learning_rate": 2.713592306184899e-05, "loss": 0.052, "step": 95000 }, { "epoch": 1.44, "learning_rate": 2.7120848972700823e-05, "loss": 0.0513, "step": 95500 }, { "epoch": 1.45, "learning_rate": 2.7105774883552664e-05, "loss": 0.0527, "step": 96000 }, { "epoch": 1.45, "learning_rate": 2.7090700794404498e-05, "loss": 0.0515, "step": 96500 }, { "epoch": 1.46, "learning_rate": 2.7075626705256335e-05, "loss": 0.0519, "step": 97000 }, { "epoch": 1.47, "learning_rate": 2.7060552616108173e-05, "loss": 0.0514, "step": 97500 }, { "epoch": 1.48, "learning_rate": 2.704547852696001e-05, "loss": 0.0535, "step": 98000 }, { "epoch": 1.48, "learning_rate": 2.7030404437811844e-05, "loss": 0.0525, "step": 98500 }, { "epoch": 1.49, "learning_rate": 2.7015330348663685e-05, "loss": 0.0513, "step": 99000 }, { "epoch": 1.5, "learning_rate": 2.700025625951552e-05, "loss": 0.0526, "step": 99500 }, { "epoch": 1.51, "learning_rate": 2.6985182170367356e-05, "loss": 0.0524, "step": 100000 }, { "epoch": 1.51, "learning_rate": 2.6970108081219194e-05, "loss": 0.0533, "step": 100500 }, { "epoch": 1.52, "learning_rate": 2.695503399207103e-05, "loss": 0.0539, "step": 101000 }, { "epoch": 1.53, "learning_rate": 2.6939959902922865e-05, "loss": 0.0533, "step": 101500 }, { "epoch": 1.54, "learning_rate": 2.6924885813774706e-05, "loss": 0.0523, "step": 102000 }, { "epoch": 1.55, "learning_rate": 2.690981172462654e-05, "loss": 0.0516, "step": 102500 }, { "epoch": 1.55, "learning_rate": 2.6894737635478378e-05, "loss": 0.0519, "step": 103000 }, { "epoch": 1.56, "learning_rate": 2.687966354633021e-05, "loss": 0.0517, "step": 103500 }, { "epoch": 1.57, "learning_rate": 2.6864589457182052e-05, "loss": 0.0519, "step": 104000 }, { "epoch": 1.58, "learning_rate": 2.6849515368033886e-05, "loss": 0.0507, "step": 104500 }, { "epoch": 1.58, "learning_rate": 2.6834441278885724e-05, "loss": 0.0499, "step": 105000 }, { "epoch": 1.59, "learning_rate": 2.681936718973756e-05, "loss": 0.0513, "step": 105500 }, { "epoch": 1.6, "learning_rate": 2.68042931005894e-05, "loss": 0.051, "step": 106000 }, { "epoch": 1.61, "learning_rate": 2.6789219011441233e-05, "loss": 0.0545, "step": 106500 }, { "epoch": 1.61, "learning_rate": 2.6774144922293074e-05, "loss": 0.0516, "step": 107000 }, { "epoch": 1.62, "learning_rate": 2.6759070833144908e-05, "loss": 0.0523, "step": 107500 }, { "epoch": 1.63, "learning_rate": 2.6743996743996745e-05, "loss": 0.0536, "step": 108000 }, { "epoch": 1.64, "learning_rate": 2.6728922654848582e-05, "loss": 0.0527, "step": 108500 }, { "epoch": 1.64, "learning_rate": 2.671384856570042e-05, "loss": 0.0524, "step": 109000 }, { "epoch": 1.65, "learning_rate": 2.6698774476552254e-05, "loss": 0.0503, "step": 109500 }, { "epoch": 1.66, "learning_rate": 2.6683700387404095e-05, "loss": 0.0516, "step": 110000 }, { "epoch": 1.67, "learning_rate": 2.666862629825593e-05, "loss": 0.0527, "step": 110500 }, { "epoch": 1.67, "learning_rate": 2.6653552209107766e-05, "loss": 0.0505, "step": 111000 }, { "epoch": 1.68, "learning_rate": 2.66384781199596e-05, "loss": 0.0518, "step": 111500 }, { "epoch": 1.69, "learning_rate": 2.662340403081144e-05, "loss": 0.0522, "step": 112000 }, { "epoch": 1.7, "learning_rate": 2.6608329941663275e-05, "loss": 0.0527, "step": 112500 }, { "epoch": 1.7, "learning_rate": 2.6593255852515112e-05, "loss": 0.0505, "step": 113000 }, { "epoch": 1.71, "learning_rate": 2.657818176336695e-05, "loss": 0.0517, "step": 113500 }, { "epoch": 1.72, "learning_rate": 2.6563107674218787e-05, "loss": 0.0528, "step": 114000 }, { "epoch": 1.73, "learning_rate": 2.654803358507062e-05, "loss": 0.0535, "step": 114500 }, { "epoch": 1.73, "learning_rate": 2.6532959495922462e-05, "loss": 0.0519, "step": 115000 }, { "epoch": 1.74, "learning_rate": 2.6517885406774296e-05, "loss": 0.0503, "step": 115500 }, { "epoch": 1.75, "learning_rate": 2.6502811317626134e-05, "loss": 0.0482, "step": 116000 }, { "epoch": 1.76, "learning_rate": 2.648773722847797e-05, "loss": 0.0511, "step": 116500 }, { "epoch": 1.76, "learning_rate": 2.647266313932981e-05, "loss": 0.0523, "step": 117000 }, { "epoch": 1.77, "learning_rate": 2.6457589050181642e-05, "loss": 0.0516, "step": 117500 }, { "epoch": 1.78, "learning_rate": 2.644251496103348e-05, "loss": 0.0516, "step": 118000 }, { "epoch": 1.79, "learning_rate": 2.6427440871885317e-05, "loss": 0.051, "step": 118500 }, { "epoch": 1.79, "learning_rate": 2.6412366782737155e-05, "loss": 0.05, "step": 119000 }, { "epoch": 1.8, "learning_rate": 2.639729269358899e-05, "loss": 0.0527, "step": 119500 }, { "epoch": 1.81, "learning_rate": 2.638221860444083e-05, "loss": 0.0516, "step": 120000 }, { "epoch": 1.82, "learning_rate": 2.6367144515292664e-05, "loss": 0.0524, "step": 120500 }, { "epoch": 1.82, "learning_rate": 2.63520704261445e-05, "loss": 0.0511, "step": 121000 }, { "epoch": 1.83, "learning_rate": 2.633699633699634e-05, "loss": 0.0512, "step": 121500 }, { "epoch": 1.84, "learning_rate": 2.6321922247848176e-05, "loss": 0.0505, "step": 122000 }, { "epoch": 1.85, "learning_rate": 2.630684815870001e-05, "loss": 0.052, "step": 122500 }, { "epoch": 1.85, "learning_rate": 2.629177406955185e-05, "loss": 0.0495, "step": 123000 }, { "epoch": 1.86, "learning_rate": 2.6276699980403685e-05, "loss": 0.0515, "step": 123500 }, { "epoch": 1.87, "learning_rate": 2.6261625891255522e-05, "loss": 0.0488, "step": 124000 }, { "epoch": 1.88, "learning_rate": 2.624655180210736e-05, "loss": 0.0502, "step": 124500 }, { "epoch": 1.88, "learning_rate": 2.6231477712959197e-05, "loss": 0.0518, "step": 125000 }, { "epoch": 1.89, "learning_rate": 2.621640362381103e-05, "loss": 0.0503, "step": 125500 }, { "epoch": 1.9, "learning_rate": 2.620132953466287e-05, "loss": 0.05, "step": 126000 }, { "epoch": 1.91, "learning_rate": 2.6186255445514706e-05, "loss": 0.0492, "step": 126500 }, { "epoch": 1.91, "learning_rate": 2.6171181356366543e-05, "loss": 0.0501, "step": 127000 }, { "epoch": 1.92, "learning_rate": 2.6156107267218377e-05, "loss": 0.0494, "step": 127500 }, { "epoch": 1.93, "learning_rate": 2.6141033178070218e-05, "loss": 0.0496, "step": 128000 }, { "epoch": 1.94, "learning_rate": 2.6125959088922052e-05, "loss": 0.049, "step": 128500 }, { "epoch": 1.94, "learning_rate": 2.611088499977389e-05, "loss": 0.0492, "step": 129000 }, { "epoch": 1.95, "learning_rate": 2.6095810910625727e-05, "loss": 0.0495, "step": 129500 }, { "epoch": 1.96, "learning_rate": 2.6080736821477564e-05, "loss": 0.0509, "step": 130000 }, { "epoch": 1.97, "learning_rate": 2.60656627323294e-05, "loss": 0.0463, "step": 130500 }, { "epoch": 1.97, "learning_rate": 2.605058864318124e-05, "loss": 0.0535, "step": 131000 }, { "epoch": 1.98, "learning_rate": 2.6035514554033073e-05, "loss": 0.0502, "step": 131500 }, { "epoch": 1.99, "learning_rate": 2.602044046488491e-05, "loss": 0.0488, "step": 132000 }, { "epoch": 2.0, "learning_rate": 2.6005366375736748e-05, "loss": 0.0503, "step": 132500 }, { "epoch": 2.0, "eval_accuracy": 0.9789404549123145, "eval_f1": 0.9029215903640982, "eval_loss": 0.05017755180597305, "eval_precision": 0.8732449520391443, "eval_recall": 0.9346862702808287, "eval_runtime": 248.7301, "eval_samples_per_second": 474.104, "eval_steps_per_second": 29.635, "step": 132678 }, { "epoch": 2.0, "learning_rate": 2.5990292286588586e-05, "loss": 0.0441, "step": 133000 }, { "epoch": 2.01, "learning_rate": 2.597521819744042e-05, "loss": 0.0419, "step": 133500 }, { "epoch": 2.02, "learning_rate": 2.5960144108292257e-05, "loss": 0.0429, "step": 134000 }, { "epoch": 2.03, "learning_rate": 2.5945070019144094e-05, "loss": 0.04, "step": 134500 }, { "epoch": 2.04, "learning_rate": 2.5929995929995932e-05, "loss": 0.0438, "step": 135000 }, { "epoch": 2.04, "learning_rate": 2.5914921840847766e-05, "loss": 0.0386, "step": 135500 }, { "epoch": 2.05, "learning_rate": 2.5899847751699607e-05, "loss": 0.0418, "step": 136000 }, { "epoch": 2.06, "learning_rate": 2.588477366255144e-05, "loss": 0.0413, "step": 136500 }, { "epoch": 2.07, "learning_rate": 2.5869699573403278e-05, "loss": 0.0434, "step": 137000 }, { "epoch": 2.07, "learning_rate": 2.5854625484255116e-05, "loss": 0.0416, "step": 137500 }, { "epoch": 2.08, "learning_rate": 2.5839551395106953e-05, "loss": 0.0417, "step": 138000 }, { "epoch": 2.09, "learning_rate": 2.5824477305958787e-05, "loss": 0.0414, "step": 138500 }, { "epoch": 2.1, "learning_rate": 2.5809403216810628e-05, "loss": 0.041, "step": 139000 }, { "epoch": 2.1, "learning_rate": 2.5794329127662462e-05, "loss": 0.043, "step": 139500 }, { "epoch": 2.11, "learning_rate": 2.57792550385143e-05, "loss": 0.042, "step": 140000 }, { "epoch": 2.12, "learning_rate": 2.5764180949366137e-05, "loss": 0.0416, "step": 140500 }, { "epoch": 2.13, "learning_rate": 2.5749106860217974e-05, "loss": 0.0442, "step": 141000 }, { "epoch": 2.13, "learning_rate": 2.5734032771069808e-05, "loss": 0.0415, "step": 141500 }, { "epoch": 2.14, "learning_rate": 2.5718958681921646e-05, "loss": 0.0438, "step": 142000 }, { "epoch": 2.15, "learning_rate": 2.5703884592773483e-05, "loss": 0.0436, "step": 142500 }, { "epoch": 2.16, "learning_rate": 2.568881050362532e-05, "loss": 0.0425, "step": 143000 }, { "epoch": 2.16, "learning_rate": 2.5673736414477155e-05, "loss": 0.0418, "step": 143500 }, { "epoch": 2.17, "learning_rate": 2.5658662325328995e-05, "loss": 0.0414, "step": 144000 }, { "epoch": 2.18, "learning_rate": 2.564358823618083e-05, "loss": 0.0418, "step": 144500 }, { "epoch": 2.19, "learning_rate": 2.5628514147032667e-05, "loss": 0.0427, "step": 145000 }, { "epoch": 2.19, "learning_rate": 2.5613440057884504e-05, "loss": 0.0409, "step": 145500 }, { "epoch": 2.2, "learning_rate": 2.559836596873634e-05, "loss": 0.0416, "step": 146000 }, { "epoch": 2.21, "learning_rate": 2.5583291879588176e-05, "loss": 0.0426, "step": 146500 }, { "epoch": 2.22, "learning_rate": 2.5568217790440016e-05, "loss": 0.0399, "step": 147000 }, { "epoch": 2.22, "learning_rate": 2.555314370129185e-05, "loss": 0.0411, "step": 147500 }, { "epoch": 2.23, "learning_rate": 2.5538069612143688e-05, "loss": 0.0426, "step": 148000 }, { "epoch": 2.24, "learning_rate": 2.5522995522995522e-05, "loss": 0.0435, "step": 148500 }, { "epoch": 2.25, "learning_rate": 2.5507921433847363e-05, "loss": 0.0411, "step": 149000 }, { "epoch": 2.25, "learning_rate": 2.5492847344699197e-05, "loss": 0.0416, "step": 149500 }, { "epoch": 2.26, "learning_rate": 2.547777325555103e-05, "loss": 0.0426, "step": 150000 }, { "epoch": 2.27, "learning_rate": 2.546269916640287e-05, "loss": 0.0439, "step": 150500 }, { "epoch": 2.28, "learning_rate": 2.5447625077254706e-05, "loss": 0.0417, "step": 151000 }, { "epoch": 2.28, "learning_rate": 2.5432550988106543e-05, "loss": 0.0425, "step": 151500 }, { "epoch": 2.29, "learning_rate": 2.541747689895838e-05, "loss": 0.0431, "step": 152000 }, { "epoch": 2.3, "learning_rate": 2.5402402809810218e-05, "loss": 0.0422, "step": 152500 }, { "epoch": 2.31, "learning_rate": 2.5387328720662052e-05, "loss": 0.0417, "step": 153000 }, { "epoch": 2.31, "learning_rate": 2.5372254631513893e-05, "loss": 0.0423, "step": 153500 }, { "epoch": 2.32, "learning_rate": 2.5357180542365727e-05, "loss": 0.0442, "step": 154000 }, { "epoch": 2.33, "learning_rate": 2.5342106453217564e-05, "loss": 0.0426, "step": 154500 }, { "epoch": 2.34, "learning_rate": 2.53270323640694e-05, "loss": 0.0417, "step": 155000 }, { "epoch": 2.34, "learning_rate": 2.531195827492124e-05, "loss": 0.0417, "step": 155500 }, { "epoch": 2.35, "learning_rate": 2.5296884185773073e-05, "loss": 0.0423, "step": 156000 }, { "epoch": 2.36, "learning_rate": 2.528181009662491e-05, "loss": 0.0432, "step": 156500 }, { "epoch": 2.37, "learning_rate": 2.5266736007476748e-05, "loss": 0.043, "step": 157000 }, { "epoch": 2.37, "learning_rate": 2.5251661918328585e-05, "loss": 0.0434, "step": 157500 }, { "epoch": 2.38, "learning_rate": 2.523658782918042e-05, "loss": 0.041, "step": 158000 }, { "epoch": 2.39, "learning_rate": 2.522151374003226e-05, "loss": 0.0415, "step": 158500 }, { "epoch": 2.4, "learning_rate": 2.5206439650884094e-05, "loss": 0.0441, "step": 159000 }, { "epoch": 2.4, "learning_rate": 2.519136556173593e-05, "loss": 0.0425, "step": 159500 }, { "epoch": 2.41, "learning_rate": 2.517629147258777e-05, "loss": 0.0433, "step": 160000 }, { "epoch": 2.42, "learning_rate": 2.5161217383439607e-05, "loss": 0.0438, "step": 160500 }, { "epoch": 2.43, "learning_rate": 2.514614329429144e-05, "loss": 0.0414, "step": 161000 }, { "epoch": 2.43, "learning_rate": 2.513106920514328e-05, "loss": 0.0413, "step": 161500 }, { "epoch": 2.44, "learning_rate": 2.5115995115995115e-05, "loss": 0.0422, "step": 162000 }, { "epoch": 2.45, "learning_rate": 2.5100921026846953e-05, "loss": 0.0418, "step": 162500 }, { "epoch": 2.46, "learning_rate": 2.508584693769879e-05, "loss": 0.0421, "step": 163000 }, { "epoch": 2.46, "learning_rate": 2.5070772848550628e-05, "loss": 0.0436, "step": 163500 }, { "epoch": 2.47, "learning_rate": 2.5055698759402462e-05, "loss": 0.0427, "step": 164000 }, { "epoch": 2.48, "learning_rate": 2.50406246702543e-05, "loss": 0.0415, "step": 164500 }, { "epoch": 2.49, "learning_rate": 2.5025550581106137e-05, "loss": 0.0426, "step": 165000 }, { "epoch": 2.49, "learning_rate": 2.5010476491957974e-05, "loss": 0.042, "step": 165500 }, { "epoch": 2.5, "learning_rate": 2.4995402402809808e-05, "loss": 0.0431, "step": 166000 }, { "epoch": 2.51, "learning_rate": 2.498032831366165e-05, "loss": 0.0425, "step": 166500 }, { "epoch": 2.52, "learning_rate": 2.4965254224513483e-05, "loss": 0.0429, "step": 167000 }, { "epoch": 2.52, "learning_rate": 2.495018013536532e-05, "loss": 0.0438, "step": 167500 }, { "epoch": 2.53, "learning_rate": 2.4935106046217158e-05, "loss": 0.044, "step": 168000 }, { "epoch": 2.54, "learning_rate": 2.4920031957068995e-05, "loss": 0.0429, "step": 168500 }, { "epoch": 2.55, "learning_rate": 2.490495786792083e-05, "loss": 0.0419, "step": 169000 }, { "epoch": 2.56, "learning_rate": 2.488988377877267e-05, "loss": 0.0429, "step": 169500 }, { "epoch": 2.56, "learning_rate": 2.4874809689624504e-05, "loss": 0.0429, "step": 170000 }, { "epoch": 2.57, "learning_rate": 2.485973560047634e-05, "loss": 0.0427, "step": 170500 }, { "epoch": 2.58, "learning_rate": 2.484466151132818e-05, "loss": 0.0428, "step": 171000 }, { "epoch": 2.59, "learning_rate": 2.4829587422180016e-05, "loss": 0.0431, "step": 171500 }, { "epoch": 2.59, "learning_rate": 2.481451333303185e-05, "loss": 0.0409, "step": 172000 }, { "epoch": 2.6, "learning_rate": 2.4799439243883688e-05, "loss": 0.0428, "step": 172500 }, { "epoch": 2.61, "learning_rate": 2.4784365154735525e-05, "loss": 0.0424, "step": 173000 }, { "epoch": 2.62, "learning_rate": 2.4769291065587363e-05, "loss": 0.0413, "step": 173500 }, { "epoch": 2.62, "learning_rate": 2.4754216976439197e-05, "loss": 0.0411, "step": 174000 }, { "epoch": 2.63, "learning_rate": 2.4739142887291037e-05, "loss": 0.0425, "step": 174500 }, { "epoch": 2.64, "learning_rate": 2.472406879814287e-05, "loss": 0.0429, "step": 175000 }, { "epoch": 2.65, "learning_rate": 2.470899470899471e-05, "loss": 0.043, "step": 175500 }, { "epoch": 2.65, "learning_rate": 2.4693920619846546e-05, "loss": 0.0455, "step": 176000 }, { "epoch": 2.66, "learning_rate": 2.4678846530698384e-05, "loss": 0.041, "step": 176500 }, { "epoch": 2.67, "learning_rate": 2.4663772441550218e-05, "loss": 0.0427, "step": 177000 }, { "epoch": 2.68, "learning_rate": 2.464869835240206e-05, "loss": 0.0419, "step": 177500 }, { "epoch": 2.68, "learning_rate": 2.4633624263253893e-05, "loss": 0.0424, "step": 178000 }, { "epoch": 2.69, "learning_rate": 2.461855017410573e-05, "loss": 0.0429, "step": 178500 }, { "epoch": 2.7, "learning_rate": 2.4603476084957567e-05, "loss": 0.0403, "step": 179000 }, { "epoch": 2.71, "learning_rate": 2.4588401995809405e-05, "loss": 0.0432, "step": 179500 }, { "epoch": 2.71, "learning_rate": 2.457332790666124e-05, "loss": 0.042, "step": 180000 }, { "epoch": 2.72, "learning_rate": 2.4558253817513076e-05, "loss": 0.0416, "step": 180500 }, { "epoch": 2.73, "learning_rate": 2.4543179728364914e-05, "loss": 0.0409, "step": 181000 }, { "epoch": 2.74, "learning_rate": 2.452810563921675e-05, "loss": 0.041, "step": 181500 }, { "epoch": 2.74, "learning_rate": 2.4513031550068585e-05, "loss": 0.0416, "step": 182000 }, { "epoch": 2.75, "learning_rate": 2.4497957460920426e-05, "loss": 0.0441, "step": 182500 }, { "epoch": 2.76, "learning_rate": 2.448288337177226e-05, "loss": 0.0432, "step": 183000 }, { "epoch": 2.77, "learning_rate": 2.4467809282624097e-05, "loss": 0.042, "step": 183500 }, { "epoch": 2.77, "learning_rate": 2.4452735193475935e-05, "loss": 0.0418, "step": 184000 }, { "epoch": 2.78, "learning_rate": 2.4437661104327772e-05, "loss": 0.0401, "step": 184500 }, { "epoch": 2.79, "learning_rate": 2.4422587015179606e-05, "loss": 0.041, "step": 185000 }, { "epoch": 2.8, "learning_rate": 2.4407512926031447e-05, "loss": 0.0426, "step": 185500 }, { "epoch": 2.8, "learning_rate": 2.439243883688328e-05, "loss": 0.0422, "step": 186000 }, { "epoch": 2.81, "learning_rate": 2.437736474773512e-05, "loss": 0.0411, "step": 186500 }, { "epoch": 2.82, "learning_rate": 2.4362290658586953e-05, "loss": 0.0423, "step": 187000 }, { "epoch": 2.83, "learning_rate": 2.4347216569438793e-05, "loss": 0.0403, "step": 187500 }, { "epoch": 2.83, "learning_rate": 2.4332142480290627e-05, "loss": 0.0418, "step": 188000 }, { "epoch": 2.84, "learning_rate": 2.4317068391142465e-05, "loss": 0.0424, "step": 188500 }, { "epoch": 2.85, "learning_rate": 2.4301994301994302e-05, "loss": 0.0414, "step": 189000 }, { "epoch": 2.86, "learning_rate": 2.428692021284614e-05, "loss": 0.0431, "step": 189500 }, { "epoch": 2.86, "learning_rate": 2.4271846123697974e-05, "loss": 0.0419, "step": 190000 }, { "epoch": 2.87, "learning_rate": 2.4256772034549815e-05, "loss": 0.043, "step": 190500 }, { "epoch": 2.88, "learning_rate": 2.424169794540165e-05, "loss": 0.0437, "step": 191000 }, { "epoch": 2.89, "learning_rate": 2.4226623856253486e-05, "loss": 0.0424, "step": 191500 }, { "epoch": 2.89, "learning_rate": 2.4211549767105323e-05, "loss": 0.0414, "step": 192000 }, { "epoch": 2.9, "learning_rate": 2.419647567795716e-05, "loss": 0.039, "step": 192500 }, { "epoch": 2.91, "learning_rate": 2.4181401588808995e-05, "loss": 0.0435, "step": 193000 }, { "epoch": 2.92, "learning_rate": 2.4166327499660836e-05, "loss": 0.0415, "step": 193500 }, { "epoch": 2.92, "learning_rate": 2.415125341051267e-05, "loss": 0.0431, "step": 194000 }, { "epoch": 2.93, "learning_rate": 2.4136179321364507e-05, "loss": 0.0418, "step": 194500 }, { "epoch": 2.94, "learning_rate": 2.412110523221634e-05, "loss": 0.0416, "step": 195000 }, { "epoch": 2.95, "learning_rate": 2.4106031143068182e-05, "loss": 0.0419, "step": 195500 }, { "epoch": 2.95, "learning_rate": 2.4090957053920016e-05, "loss": 0.039, "step": 196000 }, { "epoch": 2.96, "learning_rate": 2.4075882964771853e-05, "loss": 0.0409, "step": 196500 }, { "epoch": 2.97, "learning_rate": 2.406080887562369e-05, "loss": 0.0426, "step": 197000 }, { "epoch": 2.98, "learning_rate": 2.404573478647553e-05, "loss": 0.0403, "step": 197500 }, { "epoch": 2.98, "learning_rate": 2.4030660697327362e-05, "loss": 0.0413, "step": 198000 }, { "epoch": 2.99, "learning_rate": 2.4015586608179203e-05, "loss": 0.0418, "step": 198500 }, { "epoch": 3.0, "learning_rate": 2.4000512519031037e-05, "loss": 0.0427, "step": 199000 }, { "epoch": 3.0, "eval_accuracy": 0.981078655264078, "eval_f1": 0.9117072535486391, "eval_loss": 0.04831404611468315, "eval_precision": 0.8849228388559697, "eval_recall": 0.9401636709164676, "eval_runtime": 265.5234, "eval_samples_per_second": 444.119, "eval_steps_per_second": 27.76, "step": 199017 }, { "epoch": 3.01, "learning_rate": 2.3985438429882875e-05, "loss": 0.0338, "step": 199500 }, { "epoch": 3.01, "learning_rate": 2.3970364340734712e-05, "loss": 0.035, "step": 200000 }, { "epoch": 3.02, "learning_rate": 2.395529025158655e-05, "loss": 0.034, "step": 200500 }, { "epoch": 3.03, "learning_rate": 2.3940216162438383e-05, "loss": 0.0338, "step": 201000 }, { "epoch": 3.04, "learning_rate": 2.3925142073290224e-05, "loss": 0.0343, "step": 201500 }, { "epoch": 3.04, "learning_rate": 2.391006798414206e-05, "loss": 0.0344, "step": 202000 }, { "epoch": 3.05, "learning_rate": 2.3894993894993896e-05, "loss": 0.0341, "step": 202500 }, { "epoch": 3.06, "learning_rate": 2.387991980584573e-05, "loss": 0.035, "step": 203000 }, { "epoch": 3.07, "learning_rate": 2.386484571669757e-05, "loss": 0.0346, "step": 203500 }, { "epoch": 3.08, "learning_rate": 2.3849771627549405e-05, "loss": 0.0343, "step": 204000 }, { "epoch": 3.08, "learning_rate": 2.3834697538401242e-05, "loss": 0.0329, "step": 204500 }, { "epoch": 3.09, "learning_rate": 2.381962344925308e-05, "loss": 0.0348, "step": 205000 }, { "epoch": 3.1, "learning_rate": 2.3804549360104917e-05, "loss": 0.0334, "step": 205500 }, { "epoch": 3.11, "learning_rate": 2.378947527095675e-05, "loss": 0.0329, "step": 206000 }, { "epoch": 3.11, "learning_rate": 2.3774401181808592e-05, "loss": 0.0339, "step": 206500 }, { "epoch": 3.12, "learning_rate": 2.3759327092660426e-05, "loss": 0.0335, "step": 207000 }, { "epoch": 3.13, "learning_rate": 2.3744253003512263e-05, "loss": 0.0339, "step": 207500 }, { "epoch": 3.14, "learning_rate": 2.37291789143641e-05, "loss": 0.034, "step": 208000 }, { "epoch": 3.14, "learning_rate": 2.3714104825215938e-05, "loss": 0.0341, "step": 208500 }, { "epoch": 3.15, "learning_rate": 2.3699030736067772e-05, "loss": 0.0347, "step": 209000 }, { "epoch": 3.16, "learning_rate": 2.368395664691961e-05, "loss": 0.0356, "step": 209500 }, { "epoch": 3.17, "learning_rate": 2.3668882557771447e-05, "loss": 0.0349, "step": 210000 }, { "epoch": 3.17, "learning_rate": 2.3653808468623284e-05, "loss": 0.0337, "step": 210500 }, { "epoch": 3.18, "learning_rate": 2.363873437947512e-05, "loss": 0.0342, "step": 211000 }, { "epoch": 3.19, "learning_rate": 2.362366029032696e-05, "loss": 0.0349, "step": 211500 }, { "epoch": 3.2, "learning_rate": 2.3608586201178793e-05, "loss": 0.0344, "step": 212000 }, { "epoch": 3.2, "learning_rate": 2.359351211203063e-05, "loss": 0.0333, "step": 212500 }, { "epoch": 3.21, "learning_rate": 2.3578438022882468e-05, "loss": 0.0334, "step": 213000 }, { "epoch": 3.22, "learning_rate": 2.3563363933734305e-05, "loss": 0.0352, "step": 213500 }, { "epoch": 3.23, "learning_rate": 2.354828984458614e-05, "loss": 0.0343, "step": 214000 }, { "epoch": 3.23, "learning_rate": 2.353321575543798e-05, "loss": 0.0354, "step": 214500 }, { "epoch": 3.24, "learning_rate": 2.3518141666289814e-05, "loss": 0.0343, "step": 215000 }, { "epoch": 3.25, "learning_rate": 2.3503067577141652e-05, "loss": 0.0335, "step": 215500 }, { "epoch": 3.26, "learning_rate": 2.348799348799349e-05, "loss": 0.0339, "step": 216000 }, { "epoch": 3.26, "learning_rate": 2.3472919398845327e-05, "loss": 0.0347, "step": 216500 }, { "epoch": 3.27, "learning_rate": 2.345784530969716e-05, "loss": 0.0345, "step": 217000 }, { "epoch": 3.28, "learning_rate": 2.3442771220548998e-05, "loss": 0.0344, "step": 217500 }, { "epoch": 3.29, "learning_rate": 2.3427697131400836e-05, "loss": 0.0334, "step": 218000 }, { "epoch": 3.29, "learning_rate": 2.3412623042252673e-05, "loss": 0.0335, "step": 218500 }, { "epoch": 3.3, "learning_rate": 2.3397548953104507e-05, "loss": 0.0345, "step": 219000 }, { "epoch": 3.31, "learning_rate": 2.3382474863956348e-05, "loss": 0.0349, "step": 219500 }, { "epoch": 3.32, "learning_rate": 2.3367400774808182e-05, "loss": 0.0369, "step": 220000 }, { "epoch": 3.32, "learning_rate": 2.335232668566002e-05, "loss": 0.0362, "step": 220500 }, { "epoch": 3.33, "learning_rate": 2.3337252596511857e-05, "loss": 0.0344, "step": 221000 }, { "epoch": 3.34, "learning_rate": 2.3322178507363694e-05, "loss": 0.0363, "step": 221500 }, { "epoch": 3.35, "learning_rate": 2.3307104418215528e-05, "loss": 0.0351, "step": 222000 }, { "epoch": 3.35, "learning_rate": 2.329203032906737e-05, "loss": 0.036, "step": 222500 }, { "epoch": 3.36, "learning_rate": 2.3276956239919203e-05, "loss": 0.0348, "step": 223000 }, { "epoch": 3.37, "learning_rate": 2.326188215077104e-05, "loss": 0.0331, "step": 223500 }, { "epoch": 3.38, "learning_rate": 2.3246808061622878e-05, "loss": 0.0331, "step": 224000 }, { "epoch": 3.38, "learning_rate": 2.3231733972474715e-05, "loss": 0.0365, "step": 224500 }, { "epoch": 3.39, "learning_rate": 2.321665988332655e-05, "loss": 0.0365, "step": 225000 }, { "epoch": 3.4, "learning_rate": 2.3201585794178387e-05, "loss": 0.0347, "step": 225500 }, { "epoch": 3.41, "learning_rate": 2.3186511705030224e-05, "loss": 0.034, "step": 226000 }, { "epoch": 3.41, "learning_rate": 2.317143761588206e-05, "loss": 0.0352, "step": 226500 }, { "epoch": 3.42, "learning_rate": 2.3156363526733896e-05, "loss": 0.0338, "step": 227000 }, { "epoch": 3.43, "learning_rate": 2.3141289437585736e-05, "loss": 0.0349, "step": 227500 }, { "epoch": 3.44, "learning_rate": 2.312621534843757e-05, "loss": 0.0348, "step": 228000 }, { "epoch": 3.44, "learning_rate": 2.3111141259289408e-05, "loss": 0.0366, "step": 228500 }, { "epoch": 3.45, "learning_rate": 2.3096067170141245e-05, "loss": 0.0351, "step": 229000 }, { "epoch": 3.46, "learning_rate": 2.3080993080993083e-05, "loss": 0.036, "step": 229500 }, { "epoch": 3.47, "learning_rate": 2.3065918991844917e-05, "loss": 0.0353, "step": 230000 }, { "epoch": 3.47, "learning_rate": 2.3050844902696758e-05, "loss": 0.0335, "step": 230500 }, { "epoch": 3.48, "learning_rate": 2.303577081354859e-05, "loss": 0.0363, "step": 231000 }, { "epoch": 3.49, "learning_rate": 2.302069672440043e-05, "loss": 0.0348, "step": 231500 }, { "epoch": 3.5, "learning_rate": 2.3005622635252266e-05, "loss": 0.0356, "step": 232000 }, { "epoch": 3.5, "learning_rate": 2.2990548546104104e-05, "loss": 0.0361, "step": 232500 }, { "epoch": 3.51, "learning_rate": 2.2975474456955938e-05, "loss": 0.0352, "step": 233000 }, { "epoch": 3.52, "learning_rate": 2.2960400367807775e-05, "loss": 0.0371, "step": 233500 }, { "epoch": 3.53, "learning_rate": 2.2945326278659613e-05, "loss": 0.0362, "step": 234000 }, { "epoch": 3.53, "learning_rate": 2.293025218951145e-05, "loss": 0.0353, "step": 234500 }, { "epoch": 3.54, "learning_rate": 2.2915178100363284e-05, "loss": 0.0352, "step": 235000 }, { "epoch": 3.55, "learning_rate": 2.2900104011215125e-05, "loss": 0.0332, "step": 235500 }, { "epoch": 3.56, "learning_rate": 2.288502992206696e-05, "loss": 0.0351, "step": 236000 }, { "epoch": 3.57, "learning_rate": 2.2869955832918796e-05, "loss": 0.0355, "step": 236500 }, { "epoch": 3.57, "learning_rate": 2.2854881743770634e-05, "loss": 0.0351, "step": 237000 }, { "epoch": 3.58, "learning_rate": 2.283980765462247e-05, "loss": 0.0367, "step": 237500 }, { "epoch": 3.59, "learning_rate": 2.2824733565474305e-05, "loss": 0.0331, "step": 238000 }, { "epoch": 3.6, "learning_rate": 2.2809659476326146e-05, "loss": 0.0354, "step": 238500 }, { "epoch": 3.6, "learning_rate": 2.279458538717798e-05, "loss": 0.0343, "step": 239000 }, { "epoch": 3.61, "learning_rate": 2.2779511298029818e-05, "loss": 0.0369, "step": 239500 }, { "epoch": 3.62, "learning_rate": 2.276443720888165e-05, "loss": 0.0345, "step": 240000 }, { "epoch": 3.63, "learning_rate": 2.2749363119733492e-05, "loss": 0.0351, "step": 240500 }, { "epoch": 3.63, "learning_rate": 2.2734289030585326e-05, "loss": 0.0346, "step": 241000 }, { "epoch": 3.64, "learning_rate": 2.2719214941437164e-05, "loss": 0.034, "step": 241500 }, { "epoch": 3.65, "learning_rate": 2.2704140852289e-05, "loss": 0.035, "step": 242000 }, { "epoch": 3.66, "learning_rate": 2.268906676314084e-05, "loss": 0.035, "step": 242500 }, { "epoch": 3.66, "learning_rate": 2.2673992673992673e-05, "loss": 0.0359, "step": 243000 }, { "epoch": 3.67, "learning_rate": 2.2658918584844514e-05, "loss": 0.0355, "step": 243500 }, { "epoch": 3.68, "learning_rate": 2.2643844495696348e-05, "loss": 0.0353, "step": 244000 }, { "epoch": 3.69, "learning_rate": 2.2628770406548185e-05, "loss": 0.0354, "step": 244500 }, { "epoch": 3.69, "learning_rate": 2.2613696317400022e-05, "loss": 0.0337, "step": 245000 }, { "epoch": 3.7, "learning_rate": 2.259862222825186e-05, "loss": 0.0343, "step": 245500 }, { "epoch": 3.71, "learning_rate": 2.2583548139103694e-05, "loss": 0.0346, "step": 246000 }, { "epoch": 3.72, "learning_rate": 2.2568474049955535e-05, "loss": 0.0352, "step": 246500 }, { "epoch": 3.72, "learning_rate": 2.255339996080737e-05, "loss": 0.0373, "step": 247000 }, { "epoch": 3.73, "learning_rate": 2.2538325871659206e-05, "loss": 0.0337, "step": 247500 }, { "epoch": 3.74, "learning_rate": 2.252325178251104e-05, "loss": 0.0361, "step": 248000 }, { "epoch": 3.75, "learning_rate": 2.250817769336288e-05, "loss": 0.0356, "step": 248500 }, { "epoch": 3.75, "learning_rate": 2.2493103604214715e-05, "loss": 0.0344, "step": 249000 }, { "epoch": 3.76, "learning_rate": 2.2478029515066552e-05, "loss": 0.0362, "step": 249500 }, { "epoch": 3.77, "learning_rate": 2.246295542591839e-05, "loss": 0.0335, "step": 250000 }, { "epoch": 3.78, "learning_rate": 2.2447881336770227e-05, "loss": 0.0354, "step": 250500 }, { "epoch": 3.78, "learning_rate": 2.243280724762206e-05, "loss": 0.0336, "step": 251000 }, { "epoch": 3.79, "learning_rate": 2.2417733158473902e-05, "loss": 0.0357, "step": 251500 }, { "epoch": 3.8, "learning_rate": 2.2402659069325736e-05, "loss": 0.0352, "step": 252000 }, { "epoch": 3.81, "learning_rate": 2.2387584980177574e-05, "loss": 0.036, "step": 252500 }, { "epoch": 3.81, "learning_rate": 2.237251089102941e-05, "loss": 0.0352, "step": 253000 }, { "epoch": 3.82, "learning_rate": 2.235743680188125e-05, "loss": 0.0358, "step": 253500 }, { "epoch": 3.83, "learning_rate": 2.2342362712733082e-05, "loss": 0.0343, "step": 254000 }, { "epoch": 3.84, "learning_rate": 2.2327288623584923e-05, "loss": 0.0358, "step": 254500 }, { "epoch": 3.84, "learning_rate": 2.2312214534436757e-05, "loss": 0.0349, "step": 255000 }, { "epoch": 3.85, "learning_rate": 2.2297140445288595e-05, "loss": 0.0364, "step": 255500 }, { "epoch": 3.86, "learning_rate": 2.228206635614043e-05, "loss": 0.0348, "step": 256000 }, { "epoch": 3.87, "learning_rate": 2.226699226699227e-05, "loss": 0.0341, "step": 256500 }, { "epoch": 3.87, "learning_rate": 2.2251918177844104e-05, "loss": 0.0342, "step": 257000 }, { "epoch": 3.88, "learning_rate": 2.223684408869594e-05, "loss": 0.0365, "step": 257500 }, { "epoch": 3.89, "learning_rate": 2.222176999954778e-05, "loss": 0.0369, "step": 258000 }, { "epoch": 3.9, "learning_rate": 2.2206695910399616e-05, "loss": 0.0359, "step": 258500 }, { "epoch": 3.9, "learning_rate": 2.219162182125145e-05, "loss": 0.0354, "step": 259000 }, { "epoch": 3.91, "learning_rate": 2.217654773210329e-05, "loss": 0.0347, "step": 259500 }, { "epoch": 3.92, "learning_rate": 2.2161473642955125e-05, "loss": 0.0343, "step": 260000 }, { "epoch": 3.93, "learning_rate": 2.2146399553806962e-05, "loss": 0.0336, "step": 260500 }, { "epoch": 3.93, "learning_rate": 2.21313254646588e-05, "loss": 0.0351, "step": 261000 }, { "epoch": 3.94, "learning_rate": 2.2116251375510637e-05, "loss": 0.0374, "step": 261500 }, { "epoch": 3.95, "learning_rate": 2.210117728636247e-05, "loss": 0.0341, "step": 262000 }, { "epoch": 3.96, "learning_rate": 2.208610319721431e-05, "loss": 0.0338, "step": 262500 }, { "epoch": 3.96, "learning_rate": 2.2071029108066146e-05, "loss": 0.0352, "step": 263000 }, { "epoch": 3.97, "learning_rate": 2.2055955018917983e-05, "loss": 0.0358, "step": 263500 }, { "epoch": 3.98, "learning_rate": 2.2040880929769817e-05, "loss": 0.0349, "step": 264000 }, { "epoch": 3.99, "learning_rate": 2.2025806840621658e-05, "loss": 0.0357, "step": 264500 }, { "epoch": 3.99, "learning_rate": 2.2010732751473492e-05, "loss": 0.0343, "step": 265000 }, { "epoch": 4.0, "eval_accuracy": 0.9821812051277597, "eval_f1": 0.9179256257864373, "eval_loss": 0.048279065638780594, "eval_precision": 0.8932373790463869, "eval_recall": 0.9440173844394617, "eval_runtime": 248.8624, "eval_samples_per_second": 473.852, "eval_steps_per_second": 29.619, "step": 265356 }, { "epoch": 4.0, "learning_rate": 2.199565866232533e-05, "loss": 0.0264, "step": 265500 }, { "epoch": 4.01, "learning_rate": 2.1980584573177167e-05, "loss": 0.0275, "step": 266000 }, { "epoch": 4.02, "learning_rate": 2.1965510484029004e-05, "loss": 0.0286, "step": 266500 }, { "epoch": 4.02, "learning_rate": 2.195043639488084e-05, "loss": 0.0294, "step": 267000 }, { "epoch": 4.03, "learning_rate": 2.193536230573268e-05, "loss": 0.028, "step": 267500 }, { "epoch": 4.04, "learning_rate": 2.1920288216584513e-05, "loss": 0.027, "step": 268000 }, { "epoch": 4.05, "learning_rate": 2.190521412743635e-05, "loss": 0.0279, "step": 268500 }, { "epoch": 4.05, "learning_rate": 2.1890140038288188e-05, "loss": 0.0275, "step": 269000 }, { "epoch": 4.06, "learning_rate": 2.1875065949140026e-05, "loss": 0.0281, "step": 269500 }, { "epoch": 4.07, "learning_rate": 2.185999185999186e-05, "loss": 0.0289, "step": 270000 }, { "epoch": 4.08, "learning_rate": 2.1844917770843697e-05, "loss": 0.0268, "step": 270500 }, { "epoch": 4.09, "learning_rate": 2.1829843681695534e-05, "loss": 0.0282, "step": 271000 }, { "epoch": 4.09, "learning_rate": 2.1814769592547372e-05, "loss": 0.028, "step": 271500 }, { "epoch": 4.1, "learning_rate": 2.1799695503399206e-05, "loss": 0.0286, "step": 272000 }, { "epoch": 4.11, "learning_rate": 2.1784621414251047e-05, "loss": 0.0276, "step": 272500 }, { "epoch": 4.12, "learning_rate": 2.176954732510288e-05, "loss": 0.0286, "step": 273000 }, { "epoch": 4.12, "learning_rate": 2.1754473235954718e-05, "loss": 0.0283, "step": 273500 }, { "epoch": 4.13, "learning_rate": 2.1739399146806556e-05, "loss": 0.0286, "step": 274000 }, { "epoch": 4.14, "learning_rate": 2.1724325057658393e-05, "loss": 0.0276, "step": 274500 }, { "epoch": 4.15, "learning_rate": 2.1709250968510227e-05, "loss": 0.0295, "step": 275000 }, { "epoch": 4.15, "learning_rate": 2.1694176879362068e-05, "loss": 0.0288, "step": 275500 }, { "epoch": 4.16, "learning_rate": 2.1679102790213902e-05, "loss": 0.0287, "step": 276000 }, { "epoch": 4.17, "learning_rate": 2.166402870106574e-05, "loss": 0.0288, "step": 276500 }, { "epoch": 4.18, "learning_rate": 2.1648954611917577e-05, "loss": 0.0284, "step": 277000 }, { "epoch": 4.18, "learning_rate": 2.1633880522769414e-05, "loss": 0.0265, "step": 277500 }, { "epoch": 4.19, "learning_rate": 2.1618806433621248e-05, "loss": 0.0277, "step": 278000 }, { "epoch": 4.2, "learning_rate": 2.1603732344473086e-05, "loss": 0.0281, "step": 278500 }, { "epoch": 4.21, "learning_rate": 2.1588658255324923e-05, "loss": 0.0292, "step": 279000 }, { "epoch": 4.21, "learning_rate": 2.157358416617676e-05, "loss": 0.0281, "step": 279500 }, { "epoch": 4.22, "learning_rate": 2.1558510077028595e-05, "loss": 0.0273, "step": 280000 }, { "epoch": 4.23, "learning_rate": 2.1543435987880435e-05, "loss": 0.0284, "step": 280500 }, { "epoch": 4.24, "learning_rate": 2.152836189873227e-05, "loss": 0.0273, "step": 281000 }, { "epoch": 4.24, "learning_rate": 2.1513287809584107e-05, "loss": 0.0282, "step": 281500 }, { "epoch": 4.25, "learning_rate": 2.1498213720435944e-05, "loss": 0.029, "step": 282000 }, { "epoch": 4.26, "learning_rate": 2.148313963128778e-05, "loss": 0.0289, "step": 282500 }, { "epoch": 4.27, "learning_rate": 2.1468065542139616e-05, "loss": 0.0299, "step": 283000 }, { "epoch": 4.27, "learning_rate": 2.1452991452991456e-05, "loss": 0.0268, "step": 283500 }, { "epoch": 4.28, "learning_rate": 2.143791736384329e-05, "loss": 0.0288, "step": 284000 }, { "epoch": 4.29, "learning_rate": 2.1422843274695128e-05, "loss": 0.0294, "step": 284500 }, { "epoch": 4.3, "learning_rate": 2.1407769185546965e-05, "loss": 0.0287, "step": 285000 }, { "epoch": 4.3, "learning_rate": 2.1392695096398803e-05, "loss": 0.0274, "step": 285500 }, { "epoch": 4.31, "learning_rate": 2.1377621007250637e-05, "loss": 0.0289, "step": 286000 }, { "epoch": 4.32, "learning_rate": 2.1362546918102474e-05, "loss": 0.0317, "step": 286500 }, { "epoch": 4.33, "learning_rate": 2.134747282895431e-05, "loss": 0.0298, "step": 287000 }, { "epoch": 4.33, "learning_rate": 2.133239873980615e-05, "loss": 0.0288, "step": 287500 }, { "epoch": 4.34, "learning_rate": 2.1317324650657983e-05, "loss": 0.0312, "step": 288000 }, { "epoch": 4.35, "learning_rate": 2.1302250561509824e-05, "loss": 0.0287, "step": 288500 }, { "epoch": 4.36, "learning_rate": 2.1287176472361658e-05, "loss": 0.0283, "step": 289000 }, { "epoch": 4.36, "learning_rate": 2.1272102383213495e-05, "loss": 0.0284, "step": 289500 }, { "epoch": 4.37, "learning_rate": 2.1257028294065333e-05, "loss": 0.0281, "step": 290000 }, { "epoch": 4.38, "learning_rate": 2.124195420491717e-05, "loss": 0.0298, "step": 290500 }, { "epoch": 4.39, "learning_rate": 2.1226880115769004e-05, "loss": 0.0296, "step": 291000 }, { "epoch": 4.39, "learning_rate": 2.1211806026620845e-05, "loss": 0.03, "step": 291500 }, { "epoch": 4.4, "learning_rate": 2.119673193747268e-05, "loss": 0.0279, "step": 292000 }, { "epoch": 4.41, "learning_rate": 2.1181657848324517e-05, "loss": 0.0299, "step": 292500 }, { "epoch": 4.42, "learning_rate": 2.1166583759176354e-05, "loss": 0.0285, "step": 293000 }, { "epoch": 4.42, "learning_rate": 2.115150967002819e-05, "loss": 0.0301, "step": 293500 }, { "epoch": 4.43, "learning_rate": 2.1136435580880025e-05, "loss": 0.0273, "step": 294000 }, { "epoch": 4.44, "learning_rate": 2.1121361491731863e-05, "loss": 0.028, "step": 294500 }, { "epoch": 4.45, "learning_rate": 2.11062874025837e-05, "loss": 0.0279, "step": 295000 }, { "epoch": 4.45, "learning_rate": 2.1091213313435538e-05, "loss": 0.0291, "step": 295500 }, { "epoch": 4.46, "learning_rate": 2.107613922428737e-05, "loss": 0.0284, "step": 296000 }, { "epoch": 4.47, "learning_rate": 2.1061065135139212e-05, "loss": 0.0285, "step": 296500 }, { "epoch": 4.48, "learning_rate": 2.1045991045991047e-05, "loss": 0.0284, "step": 297000 }, { "epoch": 4.48, "learning_rate": 2.1030916956842884e-05, "loss": 0.0299, "step": 297500 }, { "epoch": 4.49, "learning_rate": 2.101584286769472e-05, "loss": 0.0293, "step": 298000 }, { "epoch": 4.5, "learning_rate": 2.1000768778546555e-05, "loss": 0.0304, "step": 298500 }, { "epoch": 4.51, "learning_rate": 2.0985694689398393e-05, "loss": 0.03, "step": 299000 }, { "epoch": 4.51, "learning_rate": 2.097062060025023e-05, "loss": 0.0302, "step": 299500 }, { "epoch": 4.52, "learning_rate": 2.0955546511102068e-05, "loss": 0.0297, "step": 300000 }, { "epoch": 4.53, "learning_rate": 2.0940472421953902e-05, "loss": 0.0299, "step": 300500 }, { "epoch": 4.54, "learning_rate": 2.092539833280574e-05, "loss": 0.03, "step": 301000 }, { "epoch": 4.54, "learning_rate": 2.0910324243657577e-05, "loss": 0.0299, "step": 301500 }, { "epoch": 4.55, "learning_rate": 2.0895250154509414e-05, "loss": 0.0283, "step": 302000 }, { "epoch": 4.56, "learning_rate": 2.0880176065361248e-05, "loss": 0.0302, "step": 302500 }, { "epoch": 4.57, "learning_rate": 2.086510197621309e-05, "loss": 0.0297, "step": 303000 }, { "epoch": 4.57, "learning_rate": 2.0850027887064923e-05, "loss": 0.03, "step": 303500 }, { "epoch": 4.58, "learning_rate": 2.083495379791676e-05, "loss": 0.0298, "step": 304000 }, { "epoch": 4.59, "learning_rate": 2.0819879708768598e-05, "loss": 0.0292, "step": 304500 }, { "epoch": 4.6, "learning_rate": 2.0804805619620435e-05, "loss": 0.0307, "step": 305000 }, { "epoch": 4.61, "learning_rate": 2.078973153047227e-05, "loss": 0.0309, "step": 305500 }, { "epoch": 4.61, "learning_rate": 2.077465744132411e-05, "loss": 0.0307, "step": 306000 }, { "epoch": 4.62, "learning_rate": 2.0759583352175944e-05, "loss": 0.0305, "step": 306500 }, { "epoch": 4.63, "learning_rate": 2.074450926302778e-05, "loss": 0.0299, "step": 307000 }, { "epoch": 4.64, "learning_rate": 2.072943517387962e-05, "loss": 0.0279, "step": 307500 }, { "epoch": 4.64, "learning_rate": 2.0714361084731456e-05, "loss": 0.0287, "step": 308000 }, { "epoch": 4.65, "learning_rate": 2.069928699558329e-05, "loss": 0.029, "step": 308500 }, { "epoch": 4.66, "learning_rate": 2.0684212906435128e-05, "loss": 0.0288, "step": 309000 }, { "epoch": 4.67, "learning_rate": 2.0669138817286965e-05, "loss": 0.0295, "step": 309500 }, { "epoch": 4.67, "learning_rate": 2.0654064728138803e-05, "loss": 0.03, "step": 310000 }, { "epoch": 4.68, "learning_rate": 2.0638990638990637e-05, "loss": 0.0281, "step": 310500 }, { "epoch": 4.69, "learning_rate": 2.0623916549842477e-05, "loss": 0.0292, "step": 311000 }, { "epoch": 4.7, "learning_rate": 2.060884246069431e-05, "loss": 0.0291, "step": 311500 }, { "epoch": 4.7, "learning_rate": 2.059376837154615e-05, "loss": 0.0305, "step": 312000 }, { "epoch": 4.71, "learning_rate": 2.0578694282397986e-05, "loss": 0.0305, "step": 312500 }, { "epoch": 4.72, "learning_rate": 2.0563620193249824e-05, "loss": 0.0276, "step": 313000 }, { "epoch": 4.73, "learning_rate": 2.0548546104101658e-05, "loss": 0.0286, "step": 313500 }, { "epoch": 4.73, "learning_rate": 2.05334720149535e-05, "loss": 0.0312, "step": 314000 }, { "epoch": 4.74, "learning_rate": 2.0518397925805333e-05, "loss": 0.0296, "step": 314500 }, { "epoch": 4.75, "learning_rate": 2.050332383665717e-05, "loss": 0.0297, "step": 315000 }, { "epoch": 4.76, "learning_rate": 2.0488249747509007e-05, "loss": 0.0283, "step": 315500 }, { "epoch": 4.76, "learning_rate": 2.0473175658360845e-05, "loss": 0.0298, "step": 316000 }, { "epoch": 4.77, "learning_rate": 2.045810156921268e-05, "loss": 0.0301, "step": 316500 }, { "epoch": 4.78, "learning_rate": 2.0443027480064516e-05, "loss": 0.0291, "step": 317000 }, { "epoch": 4.79, "learning_rate": 2.0427953390916354e-05, "loss": 0.0298, "step": 317500 }, { "epoch": 4.79, "learning_rate": 2.041287930176819e-05, "loss": 0.0304, "step": 318000 }, { "epoch": 4.8, "learning_rate": 2.0397805212620025e-05, "loss": 0.0287, "step": 318500 }, { "epoch": 4.81, "learning_rate": 2.0382731123471866e-05, "loss": 0.0317, "step": 319000 }, { "epoch": 4.82, "learning_rate": 2.03676570343237e-05, "loss": 0.0285, "step": 319500 }, { "epoch": 4.82, "learning_rate": 2.0352582945175537e-05, "loss": 0.0298, "step": 320000 }, { "epoch": 4.83, "learning_rate": 2.0337508856027375e-05, "loss": 0.0316, "step": 320500 }, { "epoch": 4.84, "learning_rate": 2.0322434766879212e-05, "loss": 0.029, "step": 321000 }, { "epoch": 4.85, "learning_rate": 2.0307360677731046e-05, "loss": 0.0298, "step": 321500 }, { "epoch": 4.85, "learning_rate": 2.0292286588582887e-05, "loss": 0.0307, "step": 322000 }, { "epoch": 4.86, "learning_rate": 2.027721249943472e-05, "loss": 0.0315, "step": 322500 }, { "epoch": 4.87, "learning_rate": 2.026213841028656e-05, "loss": 0.03, "step": 323000 }, { "epoch": 4.88, "learning_rate": 2.0247064321138396e-05, "loss": 0.0299, "step": 323500 }, { "epoch": 4.88, "learning_rate": 2.0231990231990233e-05, "loss": 0.028, "step": 324000 }, { "epoch": 4.89, "learning_rate": 2.0216916142842067e-05, "loss": 0.0293, "step": 324500 }, { "epoch": 4.9, "learning_rate": 2.0201842053693905e-05, "loss": 0.0307, "step": 325000 }, { "epoch": 4.91, "learning_rate": 2.0186767964545742e-05, "loss": 0.0291, "step": 325500 }, { "epoch": 4.91, "learning_rate": 2.017169387539758e-05, "loss": 0.0283, "step": 326000 }, { "epoch": 4.92, "learning_rate": 2.0156619786249414e-05, "loss": 0.0295, "step": 326500 }, { "epoch": 4.93, "learning_rate": 2.0141545697101255e-05, "loss": 0.0301, "step": 327000 }, { "epoch": 4.94, "learning_rate": 2.012647160795309e-05, "loss": 0.0297, "step": 327500 }, { "epoch": 4.94, "learning_rate": 2.0111397518804926e-05, "loss": 0.0312, "step": 328000 }, { "epoch": 4.95, "learning_rate": 2.0096323429656763e-05, "loss": 0.0298, "step": 328500 }, { "epoch": 4.96, "learning_rate": 2.00812493405086e-05, "loss": 0.0281, "step": 329000 }, { "epoch": 4.97, "learning_rate": 2.0066175251360435e-05, "loss": 0.0318, "step": 329500 }, { "epoch": 4.97, "learning_rate": 2.0051101162212276e-05, "loss": 0.03, "step": 330000 }, { "epoch": 4.98, "learning_rate": 2.003602707306411e-05, "loss": 0.0303, "step": 330500 }, { "epoch": 4.99, "learning_rate": 2.0020952983915947e-05, "loss": 0.0286, "step": 331000 }, { "epoch": 5.0, "learning_rate": 2.000587889476778e-05, "loss": 0.0294, "step": 331500 }, { "epoch": 5.0, "eval_accuracy": 0.9836750045531131, "eval_f1": 0.9254364451293497, "eval_loss": 0.04797117039561272, "eval_precision": 0.9037506787681328, "eval_recall": 0.9481885104807172, "eval_runtime": 272.7989, "eval_samples_per_second": 432.275, "eval_steps_per_second": 27.02, "step": 331695 }, { "epoch": 5.0, "learning_rate": 1.9990804805619622e-05, "loss": 0.0249, "step": 332000 }, { "epoch": 5.01, "learning_rate": 1.9975730716471456e-05, "loss": 0.0222, "step": 332500 }, { "epoch": 5.02, "learning_rate": 1.9960656627323293e-05, "loss": 0.0229, "step": 333000 }, { "epoch": 5.03, "learning_rate": 1.994558253817513e-05, "loss": 0.0243, "step": 333500 }, { "epoch": 5.03, "learning_rate": 1.993050844902697e-05, "loss": 0.0221, "step": 334000 }, { "epoch": 5.04, "learning_rate": 1.9915434359878802e-05, "loss": 0.0236, "step": 334500 }, { "epoch": 5.05, "learning_rate": 1.9900360270730643e-05, "loss": 0.0232, "step": 335000 }, { "epoch": 5.06, "learning_rate": 1.9885286181582477e-05, "loss": 0.0227, "step": 335500 }, { "epoch": 5.06, "learning_rate": 1.9870212092434315e-05, "loss": 0.0228, "step": 336000 }, { "epoch": 5.07, "learning_rate": 1.9855138003286152e-05, "loss": 0.0232, "step": 336500 }, { "epoch": 5.08, "learning_rate": 1.984006391413799e-05, "loss": 0.0234, "step": 337000 }, { "epoch": 5.09, "learning_rate": 1.9824989824989823e-05, "loss": 0.0222, "step": 337500 }, { "epoch": 5.1, "learning_rate": 1.9809915735841664e-05, "loss": 0.0225, "step": 338000 }, { "epoch": 5.1, "learning_rate": 1.97948416466935e-05, "loss": 0.0235, "step": 338500 }, { "epoch": 5.11, "learning_rate": 1.9779767557545336e-05, "loss": 0.0235, "step": 339000 }, { "epoch": 5.12, "learning_rate": 1.976469346839717e-05, "loss": 0.0231, "step": 339500 }, { "epoch": 5.13, "learning_rate": 1.974961937924901e-05, "loss": 0.0232, "step": 340000 }, { "epoch": 5.13, "learning_rate": 1.9734545290100845e-05, "loss": 0.0232, "step": 340500 }, { "epoch": 5.14, "learning_rate": 1.9719471200952682e-05, "loss": 0.0236, "step": 341000 }, { "epoch": 5.15, "learning_rate": 1.970439711180452e-05, "loss": 0.0221, "step": 341500 }, { "epoch": 5.16, "learning_rate": 1.9689323022656357e-05, "loss": 0.0234, "step": 342000 }, { "epoch": 5.16, "learning_rate": 1.967424893350819e-05, "loss": 0.0247, "step": 342500 }, { "epoch": 5.17, "learning_rate": 1.9659174844360032e-05, "loss": 0.0245, "step": 343000 }, { "epoch": 5.18, "learning_rate": 1.9644100755211866e-05, "loss": 0.0244, "step": 343500 }, { "epoch": 5.19, "learning_rate": 1.9629026666063703e-05, "loss": 0.0232, "step": 344000 }, { "epoch": 5.19, "learning_rate": 1.961395257691554e-05, "loss": 0.0232, "step": 344500 }, { "epoch": 5.2, "learning_rate": 1.9598878487767378e-05, "loss": 0.0241, "step": 345000 }, { "epoch": 5.21, "learning_rate": 1.9583804398619212e-05, "loss": 0.0248, "step": 345500 }, { "epoch": 5.22, "learning_rate": 1.9568730309471053e-05, "loss": 0.0251, "step": 346000 }, { "epoch": 5.22, "learning_rate": 1.9553656220322887e-05, "loss": 0.0247, "step": 346500 }, { "epoch": 5.23, "learning_rate": 1.9538582131174724e-05, "loss": 0.0241, "step": 347000 }, { "epoch": 5.24, "learning_rate": 1.952350804202656e-05, "loss": 0.0255, "step": 347500 }, { "epoch": 5.25, "learning_rate": 1.95084339528784e-05, "loss": 0.0242, "step": 348000 }, { "epoch": 5.25, "learning_rate": 1.9493359863730233e-05, "loss": 0.0241, "step": 348500 }, { "epoch": 5.26, "learning_rate": 1.947828577458207e-05, "loss": 0.0252, "step": 349000 }, { "epoch": 5.27, "learning_rate": 1.9463211685433908e-05, "loss": 0.0242, "step": 349500 }, { "epoch": 5.28, "learning_rate": 1.9448137596285745e-05, "loss": 0.0229, "step": 350000 }, { "epoch": 5.28, "learning_rate": 1.943306350713758e-05, "loss": 0.0235, "step": 350500 }, { "epoch": 5.29, "learning_rate": 1.941798941798942e-05, "loss": 0.0234, "step": 351000 }, { "epoch": 5.3, "learning_rate": 1.9402915328841254e-05, "loss": 0.0237, "step": 351500 }, { "epoch": 5.31, "learning_rate": 1.9387841239693092e-05, "loss": 0.0256, "step": 352000 }, { "epoch": 5.31, "learning_rate": 1.937276715054493e-05, "loss": 0.0229, "step": 352500 }, { "epoch": 5.32, "learning_rate": 1.9357693061396767e-05, "loss": 0.0249, "step": 353000 }, { "epoch": 5.33, "learning_rate": 1.93426189722486e-05, "loss": 0.0266, "step": 353500 }, { "epoch": 5.34, "learning_rate": 1.9327544883100438e-05, "loss": 0.0228, "step": 354000 }, { "epoch": 5.34, "learning_rate": 1.9312470793952276e-05, "loss": 0.0249, "step": 354500 }, { "epoch": 5.35, "learning_rate": 1.9297396704804113e-05, "loss": 0.0256, "step": 355000 }, { "epoch": 5.36, "learning_rate": 1.9282322615655947e-05, "loss": 0.0249, "step": 355500 }, { "epoch": 5.37, "learning_rate": 1.9267248526507788e-05, "loss": 0.0239, "step": 356000 }, { "epoch": 5.37, "learning_rate": 1.9252174437359622e-05, "loss": 0.0244, "step": 356500 }, { "epoch": 5.38, "learning_rate": 1.923710034821146e-05, "loss": 0.0234, "step": 357000 }, { "epoch": 5.39, "learning_rate": 1.9222026259063297e-05, "loss": 0.0242, "step": 357500 }, { "epoch": 5.4, "learning_rate": 1.9206952169915134e-05, "loss": 0.0252, "step": 358000 }, { "epoch": 5.4, "learning_rate": 1.9191878080766968e-05, "loss": 0.0248, "step": 358500 }, { "epoch": 5.41, "learning_rate": 1.917680399161881e-05, "loss": 0.0245, "step": 359000 }, { "epoch": 5.42, "learning_rate": 1.9161729902470643e-05, "loss": 0.0246, "step": 359500 }, { "epoch": 5.43, "learning_rate": 1.914665581332248e-05, "loss": 0.0242, "step": 360000 }, { "epoch": 5.43, "learning_rate": 1.9131581724174318e-05, "loss": 0.0242, "step": 360500 }, { "epoch": 5.44, "learning_rate": 1.9116507635026155e-05, "loss": 0.0243, "step": 361000 }, { "epoch": 5.45, "learning_rate": 1.910143354587799e-05, "loss": 0.0247, "step": 361500 }, { "epoch": 5.46, "learning_rate": 1.9086359456729827e-05, "loss": 0.0237, "step": 362000 }, { "epoch": 5.46, "learning_rate": 1.9071285367581664e-05, "loss": 0.0242, "step": 362500 }, { "epoch": 5.47, "learning_rate": 1.90562112784335e-05, "loss": 0.0237, "step": 363000 }, { "epoch": 5.48, "learning_rate": 1.9041137189285336e-05, "loss": 0.0241, "step": 363500 }, { "epoch": 5.49, "learning_rate": 1.9026063100137176e-05, "loss": 0.025, "step": 364000 }, { "epoch": 5.49, "learning_rate": 1.901098901098901e-05, "loss": 0.0248, "step": 364500 }, { "epoch": 5.5, "learning_rate": 1.8995914921840848e-05, "loss": 0.0249, "step": 365000 }, { "epoch": 5.51, "learning_rate": 1.8980840832692685e-05, "loss": 0.0246, "step": 365500 }, { "epoch": 5.52, "learning_rate": 1.8965766743544523e-05, "loss": 0.0249, "step": 366000 }, { "epoch": 5.52, "learning_rate": 1.8950692654396357e-05, "loss": 0.0252, "step": 366500 }, { "epoch": 5.53, "learning_rate": 1.8935618565248198e-05, "loss": 0.0233, "step": 367000 }, { "epoch": 5.54, "learning_rate": 1.892054447610003e-05, "loss": 0.0262, "step": 367500 }, { "epoch": 5.55, "learning_rate": 1.890547038695187e-05, "loss": 0.0248, "step": 368000 }, { "epoch": 5.55, "learning_rate": 1.8890396297803706e-05, "loss": 0.0245, "step": 368500 }, { "epoch": 5.56, "learning_rate": 1.8875322208655544e-05, "loss": 0.0259, "step": 369000 }, { "epoch": 5.57, "learning_rate": 1.8860248119507378e-05, "loss": 0.0239, "step": 369500 }, { "epoch": 5.58, "learning_rate": 1.8845174030359215e-05, "loss": 0.0252, "step": 370000 }, { "epoch": 5.58, "learning_rate": 1.8830099941211053e-05, "loss": 0.0241, "step": 370500 }, { "epoch": 5.59, "learning_rate": 1.881502585206289e-05, "loss": 0.0259, "step": 371000 }, { "epoch": 5.6, "learning_rate": 1.8799951762914724e-05, "loss": 0.0248, "step": 371500 }, { "epoch": 5.61, "learning_rate": 1.8784877673766565e-05, "loss": 0.0258, "step": 372000 }, { "epoch": 5.62, "learning_rate": 1.87698035846184e-05, "loss": 0.0248, "step": 372500 }, { "epoch": 5.62, "learning_rate": 1.8754729495470236e-05, "loss": 0.0251, "step": 373000 }, { "epoch": 5.63, "learning_rate": 1.8739655406322074e-05, "loss": 0.0249, "step": 373500 }, { "epoch": 5.64, "learning_rate": 1.872458131717391e-05, "loss": 0.0249, "step": 374000 }, { "epoch": 5.65, "learning_rate": 1.8709507228025745e-05, "loss": 0.0259, "step": 374500 }, { "epoch": 5.65, "learning_rate": 1.8694433138877586e-05, "loss": 0.0262, "step": 375000 }, { "epoch": 5.66, "learning_rate": 1.867935904972942e-05, "loss": 0.0261, "step": 375500 }, { "epoch": 5.67, "learning_rate": 1.8664284960581258e-05, "loss": 0.0252, "step": 376000 }, { "epoch": 5.68, "learning_rate": 1.8649210871433095e-05, "loss": 0.0244, "step": 376500 }, { "epoch": 5.68, "learning_rate": 1.8634136782284932e-05, "loss": 0.0262, "step": 377000 }, { "epoch": 5.69, "learning_rate": 1.8619062693136766e-05, "loss": 0.0258, "step": 377500 }, { "epoch": 5.7, "learning_rate": 1.8603988603988604e-05, "loss": 0.0241, "step": 378000 }, { "epoch": 5.71, "learning_rate": 1.858891451484044e-05, "loss": 0.0245, "step": 378500 }, { "epoch": 5.71, "learning_rate": 1.857384042569228e-05, "loss": 0.0257, "step": 379000 }, { "epoch": 5.72, "learning_rate": 1.8558766336544113e-05, "loss": 0.0258, "step": 379500 }, { "epoch": 5.73, "learning_rate": 1.8543692247395954e-05, "loss": 0.0252, "step": 380000 }, { "epoch": 5.74, "learning_rate": 1.8528618158247788e-05, "loss": 0.0259, "step": 380500 }, { "epoch": 5.74, "learning_rate": 1.8513544069099625e-05, "loss": 0.026, "step": 381000 }, { "epoch": 5.75, "learning_rate": 1.8498469979951462e-05, "loss": 0.0246, "step": 381500 }, { "epoch": 5.76, "learning_rate": 1.84833958908033e-05, "loss": 0.0258, "step": 382000 }, { "epoch": 5.77, "learning_rate": 1.8468321801655134e-05, "loss": 0.0251, "step": 382500 }, { "epoch": 5.77, "learning_rate": 1.8453247712506975e-05, "loss": 0.023, "step": 383000 }, { "epoch": 5.78, "learning_rate": 1.843817362335881e-05, "loss": 0.0257, "step": 383500 }, { "epoch": 5.79, "learning_rate": 1.8423099534210646e-05, "loss": 0.0254, "step": 384000 }, { "epoch": 5.8, "learning_rate": 1.840802544506248e-05, "loss": 0.0266, "step": 384500 }, { "epoch": 5.8, "learning_rate": 1.839295135591432e-05, "loss": 0.0264, "step": 385000 }, { "epoch": 5.81, "learning_rate": 1.8377877266766155e-05, "loss": 0.0248, "step": 385500 }, { "epoch": 5.82, "learning_rate": 1.8362803177617992e-05, "loss": 0.0254, "step": 386000 }, { "epoch": 5.83, "learning_rate": 1.834772908846983e-05, "loss": 0.0254, "step": 386500 }, { "epoch": 5.83, "learning_rate": 1.8332654999321667e-05, "loss": 0.0246, "step": 387000 }, { "epoch": 5.84, "learning_rate": 1.83175809101735e-05, "loss": 0.0258, "step": 387500 }, { "epoch": 5.85, "learning_rate": 1.8302506821025342e-05, "loss": 0.0259, "step": 388000 }, { "epoch": 5.86, "learning_rate": 1.8287432731877176e-05, "loss": 0.0239, "step": 388500 }, { "epoch": 5.86, "learning_rate": 1.8272358642729014e-05, "loss": 0.0253, "step": 389000 }, { "epoch": 5.87, "learning_rate": 1.825728455358085e-05, "loss": 0.0263, "step": 389500 }, { "epoch": 5.88, "learning_rate": 1.824221046443269e-05, "loss": 0.0257, "step": 390000 }, { "epoch": 5.89, "learning_rate": 1.8227136375284522e-05, "loss": 0.0244, "step": 390500 }, { "epoch": 5.89, "learning_rate": 1.8212062286136363e-05, "loss": 0.0269, "step": 391000 }, { "epoch": 5.9, "learning_rate": 1.8196988196988197e-05, "loss": 0.0245, "step": 391500 }, { "epoch": 5.91, "learning_rate": 1.8181914107840035e-05, "loss": 0.0237, "step": 392000 }, { "epoch": 5.92, "learning_rate": 1.816684001869187e-05, "loss": 0.0242, "step": 392500 }, { "epoch": 5.92, "learning_rate": 1.815176592954371e-05, "loss": 0.0244, "step": 393000 }, { "epoch": 5.93, "learning_rate": 1.8136691840395544e-05, "loss": 0.0235, "step": 393500 }, { "epoch": 5.94, "learning_rate": 1.812161775124738e-05, "loss": 0.0249, "step": 394000 }, { "epoch": 5.95, "learning_rate": 1.810654366209922e-05, "loss": 0.0234, "step": 394500 }, { "epoch": 5.95, "learning_rate": 1.8091469572951056e-05, "loss": 0.0265, "step": 395000 }, { "epoch": 5.96, "learning_rate": 1.807639548380289e-05, "loss": 0.0254, "step": 395500 }, { "epoch": 5.97, "learning_rate": 1.806132139465473e-05, "loss": 0.0246, "step": 396000 }, { "epoch": 5.98, "learning_rate": 1.8046247305506565e-05, "loss": 0.0242, "step": 396500 }, { "epoch": 5.98, "learning_rate": 1.8031173216358402e-05, "loss": 0.0258, "step": 397000 }, { "epoch": 5.99, "learning_rate": 1.801609912721024e-05, "loss": 0.0244, "step": 397500 }, { "epoch": 6.0, "learning_rate": 1.8001025038062077e-05, "loss": 0.0248, "step": 398000 }, { "epoch": 6.0, "eval_accuracy": 0.9847667198135485, "eval_f1": 0.9301843687374748, "eval_loss": 0.04803721234202385, "eval_precision": 0.9163636220055831, "eval_recall": 0.9444283929566976, "eval_runtime": 246.304, "eval_samples_per_second": 478.774, "eval_steps_per_second": 29.926, "step": 398034 }, { "epoch": 6.01, "learning_rate": 1.798595094891391e-05, "loss": 0.0185, "step": 398500 }, { "epoch": 6.01, "learning_rate": 1.7970876859765752e-05, "loss": 0.0206, "step": 399000 }, { "epoch": 6.02, "learning_rate": 1.7955802770617586e-05, "loss": 0.0183, "step": 399500 }, { "epoch": 6.03, "learning_rate": 1.7940728681469423e-05, "loss": 0.0184, "step": 400000 }, { "epoch": 6.04, "learning_rate": 1.7925654592321257e-05, "loss": 0.0186, "step": 400500 }, { "epoch": 6.04, "learning_rate": 1.7910580503173098e-05, "loss": 0.0187, "step": 401000 }, { "epoch": 6.05, "learning_rate": 1.7895506414024932e-05, "loss": 0.0194, "step": 401500 }, { "epoch": 6.06, "learning_rate": 1.788043232487677e-05, "loss": 0.0207, "step": 402000 }, { "epoch": 6.07, "learning_rate": 1.7865358235728607e-05, "loss": 0.0192, "step": 402500 }, { "epoch": 6.07, "learning_rate": 1.7850284146580444e-05, "loss": 0.0195, "step": 403000 }, { "epoch": 6.08, "learning_rate": 1.783521005743228e-05, "loss": 0.0198, "step": 403500 }, { "epoch": 6.09, "learning_rate": 1.782013596828412e-05, "loss": 0.0199, "step": 404000 }, { "epoch": 6.1, "learning_rate": 1.7805061879135953e-05, "loss": 0.0212, "step": 404500 }, { "epoch": 6.11, "learning_rate": 1.778998778998779e-05, "loss": 0.019, "step": 405000 }, { "epoch": 6.11, "learning_rate": 1.7774913700839628e-05, "loss": 0.0202, "step": 405500 }, { "epoch": 6.12, "learning_rate": 1.7759839611691466e-05, "loss": 0.0189, "step": 406000 }, { "epoch": 6.13, "learning_rate": 1.77447655225433e-05, "loss": 0.0205, "step": 406500 }, { "epoch": 6.14, "learning_rate": 1.772969143339514e-05, "loss": 0.0189, "step": 407000 }, { "epoch": 6.14, "learning_rate": 1.7714617344246974e-05, "loss": 0.0201, "step": 407500 }, { "epoch": 6.15, "learning_rate": 1.7699543255098812e-05, "loss": 0.0205, "step": 408000 }, { "epoch": 6.16, "learning_rate": 1.7684469165950646e-05, "loss": 0.0197, "step": 408500 }, { "epoch": 6.17, "learning_rate": 1.7669395076802487e-05, "loss": 0.02, "step": 409000 }, { "epoch": 6.17, "learning_rate": 1.765432098765432e-05, "loss": 0.0213, "step": 409500 }, { "epoch": 6.18, "learning_rate": 1.7639246898506158e-05, "loss": 0.0215, "step": 410000 }, { "epoch": 6.19, "learning_rate": 1.7624172809357996e-05, "loss": 0.0197, "step": 410500 }, { "epoch": 6.2, "learning_rate": 1.7609098720209833e-05, "loss": 0.0184, "step": 411000 }, { "epoch": 6.2, "learning_rate": 1.7594024631061667e-05, "loss": 0.0191, "step": 411500 }, { "epoch": 6.21, "learning_rate": 1.7578950541913508e-05, "loss": 0.0194, "step": 412000 }, { "epoch": 6.22, "learning_rate": 1.7563876452765342e-05, "loss": 0.0209, "step": 412500 }, { "epoch": 6.23, "learning_rate": 1.754880236361718e-05, "loss": 0.0207, "step": 413000 }, { "epoch": 6.23, "learning_rate": 1.7533728274469017e-05, "loss": 0.02, "step": 413500 }, { "epoch": 6.24, "learning_rate": 1.7518654185320854e-05, "loss": 0.0198, "step": 414000 }, { "epoch": 6.25, "learning_rate": 1.7503580096172688e-05, "loss": 0.021, "step": 414500 }, { "epoch": 6.26, "learning_rate": 1.7488506007024526e-05, "loss": 0.0191, "step": 415000 }, { "epoch": 6.26, "learning_rate": 1.7473431917876363e-05, "loss": 0.0204, "step": 415500 }, { "epoch": 6.27, "learning_rate": 1.74583578287282e-05, "loss": 0.0214, "step": 416000 }, { "epoch": 6.28, "learning_rate": 1.7443283739580035e-05, "loss": 0.0203, "step": 416500 }, { "epoch": 6.29, "learning_rate": 1.7428209650431875e-05, "loss": 0.0208, "step": 417000 }, { "epoch": 6.29, "learning_rate": 1.741313556128371e-05, "loss": 0.0199, "step": 417500 }, { "epoch": 6.3, "learning_rate": 1.7398061472135547e-05, "loss": 0.021, "step": 418000 }, { "epoch": 6.31, "learning_rate": 1.7382987382987384e-05, "loss": 0.0211, "step": 418500 }, { "epoch": 6.32, "learning_rate": 1.736791329383922e-05, "loss": 0.0217, "step": 419000 }, { "epoch": 6.32, "learning_rate": 1.7352839204691056e-05, "loss": 0.0209, "step": 419500 }, { "epoch": 6.33, "learning_rate": 1.7337765115542896e-05, "loss": 0.0207, "step": 420000 }, { "epoch": 6.34, "learning_rate": 1.732269102639473e-05, "loss": 0.0205, "step": 420500 }, { "epoch": 6.35, "learning_rate": 1.7307616937246568e-05, "loss": 0.0203, "step": 421000 }, { "epoch": 6.35, "learning_rate": 1.7292542848098405e-05, "loss": 0.0204, "step": 421500 }, { "epoch": 6.36, "learning_rate": 1.7277468758950243e-05, "loss": 0.0202, "step": 422000 }, { "epoch": 6.37, "learning_rate": 1.7262394669802077e-05, "loss": 0.0202, "step": 422500 }, { "epoch": 6.38, "learning_rate": 1.7247320580653914e-05, "loss": 0.0224, "step": 423000 }, { "epoch": 6.38, "learning_rate": 1.723224649150575e-05, "loss": 0.0212, "step": 423500 }, { "epoch": 6.39, "learning_rate": 1.721717240235759e-05, "loss": 0.0216, "step": 424000 }, { "epoch": 6.4, "learning_rate": 1.7202098313209423e-05, "loss": 0.0206, "step": 424500 }, { "epoch": 6.41, "learning_rate": 1.7187024224061264e-05, "loss": 0.0199, "step": 425000 }, { "epoch": 6.41, "learning_rate": 1.7171950134913098e-05, "loss": 0.0215, "step": 425500 }, { "epoch": 6.42, "learning_rate": 1.7156876045764935e-05, "loss": 0.0213, "step": 426000 }, { "epoch": 6.43, "learning_rate": 1.7141801956616773e-05, "loss": 0.0208, "step": 426500 }, { "epoch": 6.44, "learning_rate": 1.712672786746861e-05, "loss": 0.0216, "step": 427000 }, { "epoch": 6.44, "learning_rate": 1.7111653778320444e-05, "loss": 0.0204, "step": 427500 }, { "epoch": 6.45, "learning_rate": 1.7096579689172285e-05, "loss": 0.0217, "step": 428000 }, { "epoch": 6.46, "learning_rate": 1.708150560002412e-05, "loss": 0.0209, "step": 428500 }, { "epoch": 6.47, "learning_rate": 1.7066431510875957e-05, "loss": 0.0212, "step": 429000 }, { "epoch": 6.47, "learning_rate": 1.7051357421727794e-05, "loss": 0.0206, "step": 429500 }, { "epoch": 6.48, "learning_rate": 1.703628333257963e-05, "loss": 0.0227, "step": 430000 }, { "epoch": 6.49, "learning_rate": 1.7021209243431465e-05, "loss": 0.0204, "step": 430500 }, { "epoch": 6.5, "learning_rate": 1.7006135154283303e-05, "loss": 0.0204, "step": 431000 }, { "epoch": 6.5, "learning_rate": 1.699106106513514e-05, "loss": 0.0207, "step": 431500 }, { "epoch": 6.51, "learning_rate": 1.6975986975986978e-05, "loss": 0.02, "step": 432000 }, { "epoch": 6.52, "learning_rate": 1.6960912886838812e-05, "loss": 0.0206, "step": 432500 }, { "epoch": 6.53, "learning_rate": 1.6945838797690653e-05, "loss": 0.0206, "step": 433000 }, { "epoch": 6.53, "learning_rate": 1.6930764708542487e-05, "loss": 0.0216, "step": 433500 }, { "epoch": 6.54, "learning_rate": 1.6915690619394324e-05, "loss": 0.0219, "step": 434000 }, { "epoch": 6.55, "learning_rate": 1.690061653024616e-05, "loss": 0.0196, "step": 434500 }, { "epoch": 6.56, "learning_rate": 1.6885542441098e-05, "loss": 0.0212, "step": 435000 }, { "epoch": 6.56, "learning_rate": 1.6870468351949833e-05, "loss": 0.0213, "step": 435500 }, { "epoch": 6.57, "learning_rate": 1.6855394262801674e-05, "loss": 0.0197, "step": 436000 }, { "epoch": 6.58, "learning_rate": 1.6840320173653508e-05, "loss": 0.0208, "step": 436500 }, { "epoch": 6.59, "learning_rate": 1.6825246084505345e-05, "loss": 0.0221, "step": 437000 }, { "epoch": 6.59, "learning_rate": 1.6810171995357183e-05, "loss": 0.0213, "step": 437500 }, { "epoch": 6.6, "learning_rate": 1.679509790620902e-05, "loss": 0.0214, "step": 438000 }, { "epoch": 6.61, "learning_rate": 1.6780023817060854e-05, "loss": 0.0202, "step": 438500 }, { "epoch": 6.62, "learning_rate": 1.676494972791269e-05, "loss": 0.0206, "step": 439000 }, { "epoch": 6.63, "learning_rate": 1.674987563876453e-05, "loss": 0.0205, "step": 439500 }, { "epoch": 6.63, "learning_rate": 1.6734801549616366e-05, "loss": 0.0197, "step": 440000 }, { "epoch": 6.64, "learning_rate": 1.67197274604682e-05, "loss": 0.023, "step": 440500 }, { "epoch": 6.65, "learning_rate": 1.670465337132004e-05, "loss": 0.0218, "step": 441000 }, { "epoch": 6.66, "learning_rate": 1.6689579282171875e-05, "loss": 0.0215, "step": 441500 }, { "epoch": 6.66, "learning_rate": 1.6674505193023713e-05, "loss": 0.0224, "step": 442000 }, { "epoch": 6.67, "learning_rate": 1.665943110387555e-05, "loss": 0.0213, "step": 442500 }, { "epoch": 6.68, "learning_rate": 1.6644357014727387e-05, "loss": 0.0205, "step": 443000 }, { "epoch": 6.69, "learning_rate": 1.662928292557922e-05, "loss": 0.02, "step": 443500 }, { "epoch": 6.69, "learning_rate": 1.6614208836431062e-05, "loss": 0.0225, "step": 444000 }, { "epoch": 6.7, "learning_rate": 1.6599134747282896e-05, "loss": 0.0215, "step": 444500 }, { "epoch": 6.71, "learning_rate": 1.6584060658134734e-05, "loss": 0.0207, "step": 445000 }, { "epoch": 6.72, "learning_rate": 1.6568986568986568e-05, "loss": 0.0218, "step": 445500 }, { "epoch": 6.72, "learning_rate": 1.655391247983841e-05, "loss": 0.021, "step": 446000 }, { "epoch": 6.73, "learning_rate": 1.6538838390690243e-05, "loss": 0.0207, "step": 446500 }, { "epoch": 6.74, "learning_rate": 1.652376430154208e-05, "loss": 0.0207, "step": 447000 }, { "epoch": 6.75, "learning_rate": 1.6508690212393917e-05, "loss": 0.0212, "step": 447500 }, { "epoch": 6.75, "learning_rate": 1.649361612324575e-05, "loss": 0.022, "step": 448000 }, { "epoch": 6.76, "learning_rate": 1.647854203409759e-05, "loss": 0.0214, "step": 448500 }, { "epoch": 6.77, "learning_rate": 1.6463467944949426e-05, "loss": 0.0212, "step": 449000 }, { "epoch": 6.78, "learning_rate": 1.6448393855801264e-05, "loss": 0.0228, "step": 449500 }, { "epoch": 6.78, "learning_rate": 1.6433319766653098e-05, "loss": 0.0221, "step": 450000 }, { "epoch": 6.79, "learning_rate": 1.641824567750494e-05, "loss": 0.02, "step": 450500 }, { "epoch": 6.8, "learning_rate": 1.6403171588356773e-05, "loss": 0.0222, "step": 451000 }, { "epoch": 6.81, "learning_rate": 1.638809749920861e-05, "loss": 0.0218, "step": 451500 }, { "epoch": 6.81, "learning_rate": 1.6373023410060447e-05, "loss": 0.0214, "step": 452000 }, { "epoch": 6.82, "learning_rate": 1.6357949320912285e-05, "loss": 0.0209, "step": 452500 }, { "epoch": 6.83, "learning_rate": 1.634287523176412e-05, "loss": 0.0207, "step": 453000 }, { "epoch": 6.84, "learning_rate": 1.6327801142615956e-05, "loss": 0.0203, "step": 453500 }, { "epoch": 6.84, "learning_rate": 1.6312727053467794e-05, "loss": 0.0212, "step": 454000 }, { "epoch": 6.85, "learning_rate": 1.629765296431963e-05, "loss": 0.0215, "step": 454500 }, { "epoch": 6.86, "learning_rate": 1.6282578875171465e-05, "loss": 0.0216, "step": 455000 }, { "epoch": 6.87, "learning_rate": 1.6267504786023306e-05, "loss": 0.021, "step": 455500 }, { "epoch": 6.87, "learning_rate": 1.625243069687514e-05, "loss": 0.0209, "step": 456000 }, { "epoch": 6.88, "learning_rate": 1.6237356607726977e-05, "loss": 0.0223, "step": 456500 }, { "epoch": 6.89, "learning_rate": 1.6222282518578815e-05, "loss": 0.0228, "step": 457000 }, { "epoch": 6.9, "learning_rate": 1.6207208429430652e-05, "loss": 0.0206, "step": 457500 }, { "epoch": 6.9, "learning_rate": 1.6192134340282486e-05, "loss": 0.0215, "step": 458000 }, { "epoch": 6.91, "learning_rate": 1.6177060251134327e-05, "loss": 0.0226, "step": 458500 }, { "epoch": 6.92, "learning_rate": 1.616198616198616e-05, "loss": 0.0213, "step": 459000 }, { "epoch": 6.93, "learning_rate": 1.6146912072838e-05, "loss": 0.0216, "step": 459500 }, { "epoch": 6.93, "learning_rate": 1.6131837983689836e-05, "loss": 0.0207, "step": 460000 }, { "epoch": 6.94, "learning_rate": 1.6116763894541673e-05, "loss": 0.0207, "step": 460500 }, { "epoch": 6.95, "learning_rate": 1.6101689805393507e-05, "loss": 0.0212, "step": 461000 }, { "epoch": 6.96, "learning_rate": 1.6086615716245345e-05, "loss": 0.0209, "step": 461500 }, { "epoch": 6.96, "learning_rate": 1.6071541627097182e-05, "loss": 0.0222, "step": 462000 }, { "epoch": 6.97, "learning_rate": 1.605646753794902e-05, "loss": 0.0209, "step": 462500 }, { "epoch": 6.98, "learning_rate": 1.6041393448800854e-05, "loss": 0.0209, "step": 463000 }, { "epoch": 6.99, "learning_rate": 1.6026319359652695e-05, "loss": 0.021, "step": 463500 }, { "epoch": 6.99, "learning_rate": 1.601124527050453e-05, "loss": 0.0207, "step": 464000 }, { "epoch": 7.0, "eval_accuracy": 0.9850018135062101, "eval_f1": 0.932737915732166, "eval_loss": 0.050809137523174286, "eval_precision": 0.9158391378012049, "eval_recall": 0.9502720388057151, "eval_runtime": 218.6826, "eval_samples_per_second": 539.247, "eval_steps_per_second": 33.706, "step": 464373 }, { "epoch": 7.0, "learning_rate": 1.5996171181356366e-05, "loss": 0.0192, "step": 464500 }, { "epoch": 7.01, "learning_rate": 1.5981097092208203e-05, "loss": 0.0163, "step": 465000 }, { "epoch": 7.02, "learning_rate": 1.596602300306004e-05, "loss": 0.0169, "step": 465500 }, { "epoch": 7.02, "learning_rate": 1.5950948913911875e-05, "loss": 0.0159, "step": 466000 }, { "epoch": 7.03, "learning_rate": 1.5935874824763716e-05, "loss": 0.0157, "step": 466500 }, { "epoch": 7.04, "learning_rate": 1.592080073561555e-05, "loss": 0.0162, "step": 467000 }, { "epoch": 7.05, "learning_rate": 1.5905726646467387e-05, "loss": 0.0159, "step": 467500 }, { "epoch": 7.05, "learning_rate": 1.5890652557319225e-05, "loss": 0.0153, "step": 468000 }, { "epoch": 7.06, "learning_rate": 1.5875578468171062e-05, "loss": 0.0174, "step": 468500 }, { "epoch": 7.07, "learning_rate": 1.5860504379022896e-05, "loss": 0.0167, "step": 469000 }, { "epoch": 7.08, "learning_rate": 1.5845430289874733e-05, "loss": 0.0163, "step": 469500 }, { "epoch": 7.08, "learning_rate": 1.583035620072657e-05, "loss": 0.017, "step": 470000 }, { "epoch": 7.09, "learning_rate": 1.581528211157841e-05, "loss": 0.0162, "step": 470500 }, { "epoch": 7.1, "learning_rate": 1.5800208022430242e-05, "loss": 0.0152, "step": 471000 }, { "epoch": 7.11, "learning_rate": 1.5785133933282083e-05, "loss": 0.0175, "step": 471500 }, { "epoch": 7.11, "learning_rate": 1.5770059844133917e-05, "loss": 0.0163, "step": 472000 }, { "epoch": 7.12, "learning_rate": 1.5754985754985755e-05, "loss": 0.0166, "step": 472500 }, { "epoch": 7.13, "learning_rate": 1.5739911665837592e-05, "loss": 0.0177, "step": 473000 }, { "epoch": 7.14, "learning_rate": 1.572483757668943e-05, "loss": 0.0165, "step": 473500 }, { "epoch": 7.15, "learning_rate": 1.5709763487541264e-05, "loss": 0.0164, "step": 474000 }, { "epoch": 7.15, "learning_rate": 1.5694689398393104e-05, "loss": 0.0172, "step": 474500 }, { "epoch": 7.16, "learning_rate": 1.567961530924494e-05, "loss": 0.016, "step": 475000 }, { "epoch": 7.17, "learning_rate": 1.5664541220096776e-05, "loss": 0.0181, "step": 475500 }, { "epoch": 7.18, "learning_rate": 1.564946713094861e-05, "loss": 0.0172, "step": 476000 }, { "epoch": 7.18, "learning_rate": 1.563439304180045e-05, "loss": 0.0167, "step": 476500 }, { "epoch": 7.19, "learning_rate": 1.5619318952652285e-05, "loss": 0.0186, "step": 477000 }, { "epoch": 7.2, "learning_rate": 1.5604244863504122e-05, "loss": 0.0163, "step": 477500 }, { "epoch": 7.21, "learning_rate": 1.558917077435596e-05, "loss": 0.0173, "step": 478000 }, { "epoch": 7.21, "learning_rate": 1.5574096685207797e-05, "loss": 0.0164, "step": 478500 }, { "epoch": 7.22, "learning_rate": 1.555902259605963e-05, "loss": 0.0167, "step": 479000 }, { "epoch": 7.23, "learning_rate": 1.5543948506911472e-05, "loss": 0.0176, "step": 479500 }, { "epoch": 7.24, "learning_rate": 1.5528874417763306e-05, "loss": 0.0176, "step": 480000 }, { "epoch": 7.24, "learning_rate": 1.5513800328615143e-05, "loss": 0.0163, "step": 480500 }, { "epoch": 7.25, "learning_rate": 1.549872623946698e-05, "loss": 0.0164, "step": 481000 }, { "epoch": 7.26, "learning_rate": 1.5483652150318818e-05, "loss": 0.0173, "step": 481500 }, { "epoch": 7.27, "learning_rate": 1.5468578061170652e-05, "loss": 0.0167, "step": 482000 }, { "epoch": 7.27, "learning_rate": 1.5453503972022493e-05, "loss": 0.018, "step": 482500 }, { "epoch": 7.28, "learning_rate": 1.5438429882874327e-05, "loss": 0.017, "step": 483000 }, { "epoch": 7.29, "learning_rate": 1.5423355793726164e-05, "loss": 0.0189, "step": 483500 }, { "epoch": 7.3, "learning_rate": 1.5408281704578e-05, "loss": 0.0166, "step": 484000 }, { "epoch": 7.3, "learning_rate": 1.539320761542984e-05, "loss": 0.0179, "step": 484500 }, { "epoch": 7.31, "learning_rate": 1.5378133526281673e-05, "loss": 0.0172, "step": 485000 }, { "epoch": 7.32, "learning_rate": 1.536305943713351e-05, "loss": 0.018, "step": 485500 }, { "epoch": 7.33, "learning_rate": 1.5347985347985348e-05, "loss": 0.0163, "step": 486000 }, { "epoch": 7.33, "learning_rate": 1.5332911258837186e-05, "loss": 0.0183, "step": 486500 }, { "epoch": 7.34, "learning_rate": 1.531783716968902e-05, "loss": 0.0185, "step": 487000 }, { "epoch": 7.35, "learning_rate": 1.530276308054086e-05, "loss": 0.0171, "step": 487500 }, { "epoch": 7.36, "learning_rate": 1.5287688991392694e-05, "loss": 0.018, "step": 488000 }, { "epoch": 7.36, "learning_rate": 1.5272614902244532e-05, "loss": 0.0172, "step": 488500 }, { "epoch": 7.37, "learning_rate": 1.5257540813096368e-05, "loss": 0.0176, "step": 489000 }, { "epoch": 7.38, "learning_rate": 1.5242466723948207e-05, "loss": 0.0182, "step": 489500 }, { "epoch": 7.39, "learning_rate": 1.522739263480004e-05, "loss": 0.0172, "step": 490000 }, { "epoch": 7.39, "learning_rate": 1.521231854565188e-05, "loss": 0.0169, "step": 490500 }, { "epoch": 7.4, "learning_rate": 1.5197244456503716e-05, "loss": 0.0185, "step": 491000 }, { "epoch": 7.41, "learning_rate": 1.5182170367355553e-05, "loss": 0.0161, "step": 491500 }, { "epoch": 7.42, "learning_rate": 1.5167096278207389e-05, "loss": 0.0179, "step": 492000 }, { "epoch": 7.42, "learning_rate": 1.5152022189059228e-05, "loss": 0.0172, "step": 492500 }, { "epoch": 7.43, "learning_rate": 1.5136948099911062e-05, "loss": 0.0185, "step": 493000 }, { "epoch": 7.44, "learning_rate": 1.5121874010762901e-05, "loss": 0.0183, "step": 493500 }, { "epoch": 7.45, "learning_rate": 1.5106799921614735e-05, "loss": 0.0175, "step": 494000 }, { "epoch": 7.45, "learning_rate": 1.5091725832466574e-05, "loss": 0.0172, "step": 494500 }, { "epoch": 7.46, "learning_rate": 1.507665174331841e-05, "loss": 0.018, "step": 495000 }, { "epoch": 7.47, "learning_rate": 1.5061577654170247e-05, "loss": 0.0169, "step": 495500 }, { "epoch": 7.48, "learning_rate": 1.5046503565022083e-05, "loss": 0.0178, "step": 496000 }, { "epoch": 7.48, "learning_rate": 1.5031429475873922e-05, "loss": 0.0172, "step": 496500 }, { "epoch": 7.49, "learning_rate": 1.5016355386725756e-05, "loss": 0.0178, "step": 497000 }, { "epoch": 7.5, "learning_rate": 1.5001281297577595e-05, "loss": 0.0178, "step": 497500 }, { "epoch": 7.51, "learning_rate": 1.4986207208429431e-05, "loss": 0.0171, "step": 498000 }, { "epoch": 7.51, "learning_rate": 1.4971133119281268e-05, "loss": 0.0173, "step": 498500 }, { "epoch": 7.52, "learning_rate": 1.4956059030133106e-05, "loss": 0.018, "step": 499000 }, { "epoch": 7.53, "learning_rate": 1.4940984940984942e-05, "loss": 0.0175, "step": 499500 }, { "epoch": 7.54, "learning_rate": 1.4925910851836779e-05, "loss": 0.0179, "step": 500000 }, { "epoch": 7.54, "learning_rate": 1.4910836762688615e-05, "loss": 0.0181, "step": 500500 }, { "epoch": 7.55, "learning_rate": 1.4895762673540452e-05, "loss": 0.0165, "step": 501000 }, { "epoch": 7.56, "learning_rate": 1.488068858439229e-05, "loss": 0.0171, "step": 501500 }, { "epoch": 7.57, "learning_rate": 1.4865614495244125e-05, "loss": 0.0187, "step": 502000 }, { "epoch": 7.57, "learning_rate": 1.4850540406095963e-05, "loss": 0.0177, "step": 502500 }, { "epoch": 7.58, "learning_rate": 1.48354663169478e-05, "loss": 0.0183, "step": 503000 }, { "epoch": 7.59, "learning_rate": 1.4820392227799636e-05, "loss": 0.0173, "step": 503500 }, { "epoch": 7.6, "learning_rate": 1.4805318138651473e-05, "loss": 0.0174, "step": 504000 }, { "epoch": 7.6, "learning_rate": 1.4790244049503309e-05, "loss": 0.0177, "step": 504500 }, { "epoch": 7.61, "learning_rate": 1.4775169960355146e-05, "loss": 0.0172, "step": 505000 }, { "epoch": 7.62, "learning_rate": 1.4760095871206984e-05, "loss": 0.0168, "step": 505500 }, { "epoch": 7.63, "learning_rate": 1.474502178205882e-05, "loss": 0.0175, "step": 506000 }, { "epoch": 7.64, "learning_rate": 1.4729947692910657e-05, "loss": 0.0178, "step": 506500 }, { "epoch": 7.64, "learning_rate": 1.4714873603762494e-05, "loss": 0.0185, "step": 507000 }, { "epoch": 7.65, "learning_rate": 1.469979951461433e-05, "loss": 0.0174, "step": 507500 }, { "epoch": 7.66, "learning_rate": 1.4684725425466168e-05, "loss": 0.0172, "step": 508000 }, { "epoch": 7.67, "learning_rate": 1.4669651336318003e-05, "loss": 0.0182, "step": 508500 }, { "epoch": 7.67, "learning_rate": 1.465457724716984e-05, "loss": 0.0169, "step": 509000 }, { "epoch": 7.68, "learning_rate": 1.4639503158021678e-05, "loss": 0.0192, "step": 509500 }, { "epoch": 7.69, "learning_rate": 1.4624429068873514e-05, "loss": 0.0168, "step": 510000 }, { "epoch": 7.7, "learning_rate": 1.4609354979725351e-05, "loss": 0.0182, "step": 510500 }, { "epoch": 7.7, "learning_rate": 1.4594280890577189e-05, "loss": 0.0172, "step": 511000 }, { "epoch": 7.71, "learning_rate": 1.4579206801429024e-05, "loss": 0.0182, "step": 511500 }, { "epoch": 7.72, "learning_rate": 1.4564132712280862e-05, "loss": 0.0187, "step": 512000 }, { "epoch": 7.73, "learning_rate": 1.4549058623132698e-05, "loss": 0.0179, "step": 512500 }, { "epoch": 7.73, "learning_rate": 1.4533984533984535e-05, "loss": 0.0174, "step": 513000 }, { "epoch": 7.74, "learning_rate": 1.4518910444836372e-05, "loss": 0.0183, "step": 513500 }, { "epoch": 7.75, "learning_rate": 1.4503836355688208e-05, "loss": 0.0176, "step": 514000 }, { "epoch": 7.76, "learning_rate": 1.4488762266540046e-05, "loss": 0.0183, "step": 514500 }, { "epoch": 7.76, "learning_rate": 1.4473688177391883e-05, "loss": 0.0183, "step": 515000 }, { "epoch": 7.77, "learning_rate": 1.4458614088243719e-05, "loss": 0.0174, "step": 515500 }, { "epoch": 7.78, "learning_rate": 1.4443539999095556e-05, "loss": 0.0173, "step": 516000 }, { "epoch": 7.79, "learning_rate": 1.4428465909947392e-05, "loss": 0.0185, "step": 516500 }, { "epoch": 7.79, "learning_rate": 1.441339182079923e-05, "loss": 0.0174, "step": 517000 }, { "epoch": 7.8, "learning_rate": 1.4398317731651067e-05, "loss": 0.0166, "step": 517500 }, { "epoch": 7.81, "learning_rate": 1.4383243642502902e-05, "loss": 0.0188, "step": 518000 }, { "epoch": 7.82, "learning_rate": 1.436816955335474e-05, "loss": 0.0184, "step": 518500 }, { "epoch": 7.82, "learning_rate": 1.4353095464206577e-05, "loss": 0.0168, "step": 519000 }, { "epoch": 7.83, "learning_rate": 1.4338021375058413e-05, "loss": 0.0182, "step": 519500 }, { "epoch": 7.84, "learning_rate": 1.432294728591025e-05, "loss": 0.0176, "step": 520000 }, { "epoch": 7.85, "learning_rate": 1.4307873196762086e-05, "loss": 0.0183, "step": 520500 }, { "epoch": 7.85, "learning_rate": 1.4292799107613924e-05, "loss": 0.0176, "step": 521000 }, { "epoch": 7.86, "learning_rate": 1.4277725018465761e-05, "loss": 0.0181, "step": 521500 }, { "epoch": 7.87, "learning_rate": 1.4262650929317597e-05, "loss": 0.0184, "step": 522000 }, { "epoch": 7.88, "learning_rate": 1.4247576840169432e-05, "loss": 0.0177, "step": 522500 }, { "epoch": 7.88, "learning_rate": 1.423250275102127e-05, "loss": 0.0169, "step": 523000 }, { "epoch": 7.89, "learning_rate": 1.4217428661873106e-05, "loss": 0.0183, "step": 523500 }, { "epoch": 7.9, "learning_rate": 1.4202354572724943e-05, "loss": 0.0173, "step": 524000 }, { "epoch": 7.91, "learning_rate": 1.4187280483576779e-05, "loss": 0.0177, "step": 524500 }, { "epoch": 7.91, "learning_rate": 1.4172206394428616e-05, "loss": 0.0182, "step": 525000 }, { "epoch": 7.92, "learning_rate": 1.4157132305280454e-05, "loss": 0.0172, "step": 525500 }, { "epoch": 7.93, "learning_rate": 1.414205821613229e-05, "loss": 0.0179, "step": 526000 }, { "epoch": 7.94, "learning_rate": 1.4126984126984127e-05, "loss": 0.0182, "step": 526500 }, { "epoch": 7.94, "learning_rate": 1.4111910037835964e-05, "loss": 0.0191, "step": 527000 }, { "epoch": 7.95, "learning_rate": 1.40968359486878e-05, "loss": 0.0171, "step": 527500 }, { "epoch": 7.96, "learning_rate": 1.4081761859539637e-05, "loss": 0.0181, "step": 528000 }, { "epoch": 7.97, "learning_rate": 1.4066687770391473e-05, "loss": 0.0176, "step": 528500 }, { "epoch": 7.97, "learning_rate": 1.405161368124331e-05, "loss": 0.0186, "step": 529000 }, { "epoch": 7.98, "learning_rate": 1.4036539592095148e-05, "loss": 0.0178, "step": 529500 }, { "epoch": 7.99, "learning_rate": 1.4021465502946984e-05, "loss": 0.0185, "step": 530000 }, { "epoch": 8.0, "learning_rate": 1.4006391413798821e-05, "loss": 0.0184, "step": 530500 }, { "epoch": 8.0, "eval_accuracy": 0.9852597802501692, "eval_f1": 0.9353083770465744, "eval_loss": 0.060605715960264206, "eval_precision": 0.9138706934376007, "eval_recall": 0.9577759962887151, "eval_runtime": 249.5037, "eval_samples_per_second": 472.634, "eval_steps_per_second": 29.543, "step": 530712 }, { "epoch": 8.0, "learning_rate": 1.3991317324650657e-05, "loss": 0.0149, "step": 531000 }, { "epoch": 8.01, "learning_rate": 1.3976243235502494e-05, "loss": 0.0131, "step": 531500 }, { "epoch": 8.02, "learning_rate": 1.3961169146354332e-05, "loss": 0.0145, "step": 532000 }, { "epoch": 8.03, "learning_rate": 1.3946095057206167e-05, "loss": 0.0136, "step": 532500 }, { "epoch": 8.03, "learning_rate": 1.3931020968058005e-05, "loss": 0.013, "step": 533000 }, { "epoch": 8.04, "learning_rate": 1.3915946878909842e-05, "loss": 0.0144, "step": 533500 }, { "epoch": 8.05, "learning_rate": 1.3900872789761678e-05, "loss": 0.0139, "step": 534000 }, { "epoch": 8.06, "learning_rate": 1.3885798700613515e-05, "loss": 0.0139, "step": 534500 }, { "epoch": 8.06, "learning_rate": 1.3870724611465351e-05, "loss": 0.0135, "step": 535000 }, { "epoch": 8.07, "learning_rate": 1.3855650522317188e-05, "loss": 0.013, "step": 535500 }, { "epoch": 8.08, "learning_rate": 1.3840576433169026e-05, "loss": 0.0137, "step": 536000 }, { "epoch": 8.09, "learning_rate": 1.3825502344020862e-05, "loss": 0.014, "step": 536500 }, { "epoch": 8.09, "learning_rate": 1.3810428254872699e-05, "loss": 0.0157, "step": 537000 }, { "epoch": 8.1, "learning_rate": 1.3795354165724536e-05, "loss": 0.0146, "step": 537500 }, { "epoch": 8.11, "learning_rate": 1.3780280076576372e-05, "loss": 0.0133, "step": 538000 }, { "epoch": 8.12, "learning_rate": 1.376520598742821e-05, "loss": 0.0131, "step": 538500 }, { "epoch": 8.12, "learning_rate": 1.3750131898280045e-05, "loss": 0.0134, "step": 539000 }, { "epoch": 8.13, "learning_rate": 1.3735057809131883e-05, "loss": 0.0144, "step": 539500 }, { "epoch": 8.14, "learning_rate": 1.371998371998372e-05, "loss": 0.0144, "step": 540000 }, { "epoch": 8.15, "learning_rate": 1.3704909630835556e-05, "loss": 0.0137, "step": 540500 }, { "epoch": 8.16, "learning_rate": 1.3689835541687393e-05, "loss": 0.0145, "step": 541000 }, { "epoch": 8.16, "learning_rate": 1.367476145253923e-05, "loss": 0.0154, "step": 541500 }, { "epoch": 8.17, "learning_rate": 1.3659687363391066e-05, "loss": 0.015, "step": 542000 }, { "epoch": 8.18, "learning_rate": 1.3644613274242904e-05, "loss": 0.0143, "step": 542500 }, { "epoch": 8.19, "learning_rate": 1.362953918509474e-05, "loss": 0.0143, "step": 543000 }, { "epoch": 8.19, "learning_rate": 1.3614465095946577e-05, "loss": 0.0149, "step": 543500 }, { "epoch": 8.2, "learning_rate": 1.3599391006798414e-05, "loss": 0.0145, "step": 544000 }, { "epoch": 8.21, "learning_rate": 1.358431691765025e-05, "loss": 0.0147, "step": 544500 }, { "epoch": 8.22, "learning_rate": 1.3569242828502088e-05, "loss": 0.0153, "step": 545000 }, { "epoch": 8.22, "learning_rate": 1.3554168739353925e-05, "loss": 0.0141, "step": 545500 }, { "epoch": 8.23, "learning_rate": 1.353909465020576e-05, "loss": 0.0145, "step": 546000 }, { "epoch": 8.24, "learning_rate": 1.3524020561057598e-05, "loss": 0.0137, "step": 546500 }, { "epoch": 8.25, "learning_rate": 1.3508946471909434e-05, "loss": 0.0151, "step": 547000 }, { "epoch": 8.25, "learning_rate": 1.3493872382761271e-05, "loss": 0.0146, "step": 547500 }, { "epoch": 8.26, "learning_rate": 1.3478798293613109e-05, "loss": 0.0145, "step": 548000 }, { "epoch": 8.27, "learning_rate": 1.3463724204464945e-05, "loss": 0.0153, "step": 548500 }, { "epoch": 8.28, "learning_rate": 1.3448650115316782e-05, "loss": 0.0145, "step": 549000 }, { "epoch": 8.28, "learning_rate": 1.343357602616862e-05, "loss": 0.0159, "step": 549500 }, { "epoch": 8.29, "learning_rate": 1.3418501937020455e-05, "loss": 0.0144, "step": 550000 }, { "epoch": 8.3, "learning_rate": 1.3403427847872293e-05, "loss": 0.0148, "step": 550500 }, { "epoch": 8.31, "learning_rate": 1.3388353758724128e-05, "loss": 0.0145, "step": 551000 }, { "epoch": 8.31, "learning_rate": 1.3373279669575966e-05, "loss": 0.0149, "step": 551500 }, { "epoch": 8.32, "learning_rate": 1.3358205580427803e-05, "loss": 0.0148, "step": 552000 }, { "epoch": 8.33, "learning_rate": 1.3343131491279639e-05, "loss": 0.0148, "step": 552500 }, { "epoch": 8.34, "learning_rate": 1.3328057402131476e-05, "loss": 0.0154, "step": 553000 }, { "epoch": 8.34, "learning_rate": 1.3312983312983314e-05, "loss": 0.0142, "step": 553500 }, { "epoch": 8.35, "learning_rate": 1.329790922383515e-05, "loss": 0.014, "step": 554000 }, { "epoch": 8.36, "learning_rate": 1.3282835134686987e-05, "loss": 0.0152, "step": 554500 }, { "epoch": 8.37, "learning_rate": 1.3267761045538823e-05, "loss": 0.0156, "step": 555000 }, { "epoch": 8.37, "learning_rate": 1.325268695639066e-05, "loss": 0.0155, "step": 555500 }, { "epoch": 8.38, "learning_rate": 1.3237612867242497e-05, "loss": 0.0136, "step": 556000 }, { "epoch": 8.39, "learning_rate": 1.3222538778094333e-05, "loss": 0.0143, "step": 556500 }, { "epoch": 8.4, "learning_rate": 1.320746468894617e-05, "loss": 0.0141, "step": 557000 }, { "epoch": 8.4, "learning_rate": 1.3192390599798008e-05, "loss": 0.0145, "step": 557500 }, { "epoch": 8.41, "learning_rate": 1.3177316510649844e-05, "loss": 0.0165, "step": 558000 }, { "epoch": 8.42, "learning_rate": 1.3162242421501681e-05, "loss": 0.0162, "step": 558500 }, { "epoch": 8.43, "learning_rate": 1.3147168332353517e-05, "loss": 0.0139, "step": 559000 }, { "epoch": 8.43, "learning_rate": 1.3132094243205354e-05, "loss": 0.0141, "step": 559500 }, { "epoch": 8.44, "learning_rate": 1.3117020154057192e-05, "loss": 0.0147, "step": 560000 }, { "epoch": 8.45, "learning_rate": 1.3101946064909027e-05, "loss": 0.0144, "step": 560500 }, { "epoch": 8.46, "learning_rate": 1.3086871975760865e-05, "loss": 0.0156, "step": 561000 }, { "epoch": 8.46, "learning_rate": 1.30717978866127e-05, "loss": 0.0155, "step": 561500 }, { "epoch": 8.47, "learning_rate": 1.3056723797464538e-05, "loss": 0.0152, "step": 562000 }, { "epoch": 8.48, "learning_rate": 1.3041649708316375e-05, "loss": 0.0153, "step": 562500 }, { "epoch": 8.49, "learning_rate": 1.3026575619168211e-05, "loss": 0.0146, "step": 563000 }, { "epoch": 8.49, "learning_rate": 1.3011501530020049e-05, "loss": 0.0158, "step": 563500 }, { "epoch": 8.5, "learning_rate": 1.2996427440871886e-05, "loss": 0.0143, "step": 564000 }, { "epoch": 8.51, "learning_rate": 1.2981353351723722e-05, "loss": 0.0138, "step": 564500 }, { "epoch": 8.52, "learning_rate": 1.2966279262575559e-05, "loss": 0.0158, "step": 565000 }, { "epoch": 8.52, "learning_rate": 1.2951205173427395e-05, "loss": 0.0156, "step": 565500 }, { "epoch": 8.53, "learning_rate": 1.2936131084279232e-05, "loss": 0.0161, "step": 566000 }, { "epoch": 8.54, "learning_rate": 1.292105699513107e-05, "loss": 0.0151, "step": 566500 }, { "epoch": 8.55, "learning_rate": 1.2905982905982905e-05, "loss": 0.0156, "step": 567000 }, { "epoch": 8.55, "learning_rate": 1.2890908816834743e-05, "loss": 0.0156, "step": 567500 }, { "epoch": 8.56, "learning_rate": 1.287583472768658e-05, "loss": 0.0158, "step": 568000 }, { "epoch": 8.57, "learning_rate": 1.2860760638538416e-05, "loss": 0.015, "step": 568500 }, { "epoch": 8.58, "learning_rate": 1.2845686549390253e-05, "loss": 0.0146, "step": 569000 }, { "epoch": 8.58, "learning_rate": 1.2830612460242089e-05, "loss": 0.016, "step": 569500 }, { "epoch": 8.59, "learning_rate": 1.2815538371093927e-05, "loss": 0.0142, "step": 570000 }, { "epoch": 8.6, "learning_rate": 1.2800464281945764e-05, "loss": 0.0159, "step": 570500 }, { "epoch": 8.61, "learning_rate": 1.27853901927976e-05, "loss": 0.015, "step": 571000 }, { "epoch": 8.61, "learning_rate": 1.2770316103649437e-05, "loss": 0.0163, "step": 571500 }, { "epoch": 8.62, "learning_rate": 1.2755242014501275e-05, "loss": 0.0148, "step": 572000 }, { "epoch": 8.63, "learning_rate": 1.274016792535311e-05, "loss": 0.0148, "step": 572500 }, { "epoch": 8.64, "learning_rate": 1.2725093836204948e-05, "loss": 0.0143, "step": 573000 }, { "epoch": 8.64, "learning_rate": 1.2710019747056783e-05, "loss": 0.016, "step": 573500 }, { "epoch": 8.65, "learning_rate": 1.269494565790862e-05, "loss": 0.0158, "step": 574000 }, { "epoch": 8.66, "learning_rate": 1.2679871568760458e-05, "loss": 0.0146, "step": 574500 }, { "epoch": 8.67, "learning_rate": 1.2664797479612294e-05, "loss": 0.0147, "step": 575000 }, { "epoch": 8.68, "learning_rate": 1.2649723390464131e-05, "loss": 0.0151, "step": 575500 }, { "epoch": 8.68, "learning_rate": 1.2634649301315969e-05, "loss": 0.0159, "step": 576000 }, { "epoch": 8.69, "learning_rate": 1.2619575212167805e-05, "loss": 0.0162, "step": 576500 }, { "epoch": 8.7, "learning_rate": 1.2604501123019642e-05, "loss": 0.0166, "step": 577000 }, { "epoch": 8.71, "learning_rate": 1.2589427033871478e-05, "loss": 0.0153, "step": 577500 }, { "epoch": 8.71, "learning_rate": 1.2574352944723315e-05, "loss": 0.0148, "step": 578000 }, { "epoch": 8.72, "learning_rate": 1.2559278855575153e-05, "loss": 0.0146, "step": 578500 }, { "epoch": 8.73, "learning_rate": 1.2544204766426988e-05, "loss": 0.0143, "step": 579000 }, { "epoch": 8.74, "learning_rate": 1.2529130677278826e-05, "loss": 0.013, "step": 579500 }, { "epoch": 8.74, "learning_rate": 1.2514056588130663e-05, "loss": 0.0142, "step": 580000 }, { "epoch": 8.75, "learning_rate": 1.2498982498982499e-05, "loss": 0.0151, "step": 580500 }, { "epoch": 8.76, "learning_rate": 1.2483908409834336e-05, "loss": 0.0161, "step": 581000 }, { "epoch": 8.77, "learning_rate": 1.2468834320686172e-05, "loss": 0.0156, "step": 581500 }, { "epoch": 8.77, "learning_rate": 1.245376023153801e-05, "loss": 0.0158, "step": 582000 }, { "epoch": 8.78, "learning_rate": 1.2438686142389847e-05, "loss": 0.0156, "step": 582500 }, { "epoch": 8.79, "learning_rate": 1.2423612053241683e-05, "loss": 0.0137, "step": 583000 }, { "epoch": 8.8, "learning_rate": 1.240853796409352e-05, "loss": 0.0152, "step": 583500 }, { "epoch": 8.8, "learning_rate": 1.2393463874945357e-05, "loss": 0.0149, "step": 584000 }, { "epoch": 8.81, "learning_rate": 1.2378389785797193e-05, "loss": 0.0151, "step": 584500 }, { "epoch": 8.82, "learning_rate": 1.236331569664903e-05, "loss": 0.0147, "step": 585000 }, { "epoch": 8.83, "learning_rate": 1.2348241607500866e-05, "loss": 0.0151, "step": 585500 }, { "epoch": 8.83, "learning_rate": 1.2333167518352704e-05, "loss": 0.016, "step": 586000 }, { "epoch": 8.84, "learning_rate": 1.2318093429204541e-05, "loss": 0.0145, "step": 586500 }, { "epoch": 8.85, "learning_rate": 1.2303019340056377e-05, "loss": 0.0155, "step": 587000 }, { "epoch": 8.86, "learning_rate": 1.2287945250908214e-05, "loss": 0.0164, "step": 587500 }, { "epoch": 8.86, "learning_rate": 1.227287116176005e-05, "loss": 0.0139, "step": 588000 }, { "epoch": 8.87, "learning_rate": 1.2257797072611887e-05, "loss": 0.0161, "step": 588500 }, { "epoch": 8.88, "learning_rate": 1.2242722983463725e-05, "loss": 0.015, "step": 589000 }, { "epoch": 8.89, "learning_rate": 1.222764889431556e-05, "loss": 0.0153, "step": 589500 }, { "epoch": 8.89, "learning_rate": 1.2212574805167398e-05, "loss": 0.0148, "step": 590000 }, { "epoch": 8.9, "learning_rate": 1.2197500716019235e-05, "loss": 0.0146, "step": 590500 }, { "epoch": 8.91, "learning_rate": 1.2182426626871071e-05, "loss": 0.0154, "step": 591000 }, { "epoch": 8.92, "learning_rate": 1.2167352537722909e-05, "loss": 0.0155, "step": 591500 }, { "epoch": 8.92, "learning_rate": 1.2152278448574744e-05, "loss": 0.0152, "step": 592000 }, { "epoch": 8.93, "learning_rate": 1.2137204359426582e-05, "loss": 0.0162, "step": 592500 }, { "epoch": 8.94, "learning_rate": 1.212213027027842e-05, "loss": 0.0153, "step": 593000 }, { "epoch": 8.95, "learning_rate": 1.2107056181130255e-05, "loss": 0.016, "step": 593500 }, { "epoch": 8.95, "learning_rate": 1.2091982091982092e-05, "loss": 0.0151, "step": 594000 }, { "epoch": 8.96, "learning_rate": 1.207690800283393e-05, "loss": 0.0147, "step": 594500 }, { "epoch": 8.97, "learning_rate": 1.2061833913685765e-05, "loss": 0.0147, "step": 595000 }, { "epoch": 8.98, "learning_rate": 1.2046759824537603e-05, "loss": 0.0144, "step": 595500 }, { "epoch": 8.98, "learning_rate": 1.2031685735389439e-05, "loss": 0.0146, "step": 596000 }, { "epoch": 8.99, "learning_rate": 1.2016611646241276e-05, "loss": 0.0155, "step": 596500 }, { "epoch": 9.0, "learning_rate": 1.2001537557093113e-05, "loss": 0.0144, "step": 597000 }, { "epoch": 9.0, "eval_accuracy": 0.9860240497236058, "eval_f1": 0.9384198326957164, "eval_loss": 0.06151273846626282, "eval_precision": 0.9249589912844127, "eval_recall": 0.9522782486967775, "eval_runtime": 246.0213, "eval_samples_per_second": 479.324, "eval_steps_per_second": 29.961, "step": 597051 }, { "epoch": 9.01, "learning_rate": 1.198646346794495e-05, "loss": 0.0118, "step": 597500 }, { "epoch": 9.01, "learning_rate": 1.1971389378796787e-05, "loss": 0.0108, "step": 598000 }, { "epoch": 9.02, "learning_rate": 1.1956315289648624e-05, "loss": 0.0123, "step": 598500 }, { "epoch": 9.03, "learning_rate": 1.194124120050046e-05, "loss": 0.0119, "step": 599000 }, { "epoch": 9.04, "learning_rate": 1.1926167111352297e-05, "loss": 0.0121, "step": 599500 }, { "epoch": 9.04, "learning_rate": 1.1911093022204133e-05, "loss": 0.0112, "step": 600000 }, { "epoch": 9.05, "learning_rate": 1.189601893305597e-05, "loss": 0.0126, "step": 600500 }, { "epoch": 9.06, "learning_rate": 1.1880944843907808e-05, "loss": 0.0127, "step": 601000 }, { "epoch": 9.07, "learning_rate": 1.1865870754759643e-05, "loss": 0.0128, "step": 601500 }, { "epoch": 9.07, "learning_rate": 1.1850796665611481e-05, "loss": 0.0122, "step": 602000 }, { "epoch": 9.08, "learning_rate": 1.1835722576463318e-05, "loss": 0.0124, "step": 602500 }, { "epoch": 9.09, "learning_rate": 1.1820648487315154e-05, "loss": 0.0119, "step": 603000 }, { "epoch": 9.1, "learning_rate": 1.1805574398166991e-05, "loss": 0.0117, "step": 603500 }, { "epoch": 9.1, "learning_rate": 1.1790500309018827e-05, "loss": 0.0117, "step": 604000 }, { "epoch": 9.11, "learning_rate": 1.1775426219870665e-05, "loss": 0.013, "step": 604500 }, { "epoch": 9.12, "learning_rate": 1.1760352130722502e-05, "loss": 0.0122, "step": 605000 }, { "epoch": 9.13, "learning_rate": 1.1745278041574338e-05, "loss": 0.0131, "step": 605500 }, { "epoch": 9.13, "learning_rate": 1.1730203952426175e-05, "loss": 0.012, "step": 606000 }, { "epoch": 9.14, "learning_rate": 1.1715129863278013e-05, "loss": 0.0118, "step": 606500 }, { "epoch": 9.15, "learning_rate": 1.1700055774129848e-05, "loss": 0.012, "step": 607000 }, { "epoch": 9.16, "learning_rate": 1.1684981684981686e-05, "loss": 0.0125, "step": 607500 }, { "epoch": 9.17, "learning_rate": 1.1669907595833521e-05, "loss": 0.0113, "step": 608000 }, { "epoch": 9.17, "learning_rate": 1.1654833506685359e-05, "loss": 0.0119, "step": 608500 }, { "epoch": 9.18, "learning_rate": 1.1639759417537196e-05, "loss": 0.0115, "step": 609000 }, { "epoch": 9.19, "learning_rate": 1.1624685328389032e-05, "loss": 0.0129, "step": 609500 }, { "epoch": 9.2, "learning_rate": 1.160961123924087e-05, "loss": 0.0127, "step": 610000 }, { "epoch": 9.2, "learning_rate": 1.1594537150092707e-05, "loss": 0.0124, "step": 610500 }, { "epoch": 9.21, "learning_rate": 1.1579463060944543e-05, "loss": 0.0131, "step": 611000 }, { "epoch": 9.22, "learning_rate": 1.156438897179638e-05, "loss": 0.0108, "step": 611500 }, { "epoch": 9.23, "learning_rate": 1.1549314882648216e-05, "loss": 0.0116, "step": 612000 }, { "epoch": 9.23, "learning_rate": 1.1534240793500053e-05, "loss": 0.0117, "step": 612500 }, { "epoch": 9.24, "learning_rate": 1.151916670435189e-05, "loss": 0.0129, "step": 613000 }, { "epoch": 9.25, "learning_rate": 1.1504092615203726e-05, "loss": 0.0126, "step": 613500 }, { "epoch": 9.26, "learning_rate": 1.1489018526055564e-05, "loss": 0.0129, "step": 614000 }, { "epoch": 9.26, "learning_rate": 1.1473944436907401e-05, "loss": 0.013, "step": 614500 }, { "epoch": 9.27, "learning_rate": 1.1458870347759237e-05, "loss": 0.0126, "step": 615000 }, { "epoch": 9.28, "learning_rate": 1.1443796258611074e-05, "loss": 0.0132, "step": 615500 }, { "epoch": 9.29, "learning_rate": 1.142872216946291e-05, "loss": 0.0123, "step": 616000 }, { "epoch": 9.29, "learning_rate": 1.1413648080314747e-05, "loss": 0.0127, "step": 616500 }, { "epoch": 9.3, "learning_rate": 1.1398573991166585e-05, "loss": 0.0127, "step": 617000 }, { "epoch": 9.31, "learning_rate": 1.138349990201842e-05, "loss": 0.0112, "step": 617500 }, { "epoch": 9.32, "learning_rate": 1.1368425812870258e-05, "loss": 0.0126, "step": 618000 }, { "epoch": 9.32, "learning_rate": 1.1353351723722094e-05, "loss": 0.0123, "step": 618500 }, { "epoch": 9.33, "learning_rate": 1.1338277634573931e-05, "loss": 0.0128, "step": 619000 }, { "epoch": 9.34, "learning_rate": 1.1323203545425769e-05, "loss": 0.0109, "step": 619500 }, { "epoch": 9.35, "learning_rate": 1.1308129456277604e-05, "loss": 0.0127, "step": 620000 }, { "epoch": 9.35, "learning_rate": 1.1293055367129442e-05, "loss": 0.0137, "step": 620500 }, { "epoch": 9.36, "learning_rate": 1.127798127798128e-05, "loss": 0.0122, "step": 621000 }, { "epoch": 9.37, "learning_rate": 1.1262907188833115e-05, "loss": 0.0118, "step": 621500 }, { "epoch": 9.38, "learning_rate": 1.1247833099684952e-05, "loss": 0.0124, "step": 622000 }, { "epoch": 9.38, "learning_rate": 1.1232759010536788e-05, "loss": 0.0134, "step": 622500 }, { "epoch": 9.39, "learning_rate": 1.1217684921388626e-05, "loss": 0.0122, "step": 623000 }, { "epoch": 9.4, "learning_rate": 1.1202610832240463e-05, "loss": 0.013, "step": 623500 }, { "epoch": 9.41, "learning_rate": 1.1187536743092299e-05, "loss": 0.0123, "step": 624000 }, { "epoch": 9.41, "learning_rate": 1.1172462653944136e-05, "loss": 0.012, "step": 624500 }, { "epoch": 9.42, "learning_rate": 1.1157388564795974e-05, "loss": 0.0124, "step": 625000 }, { "epoch": 9.43, "learning_rate": 1.114231447564781e-05, "loss": 0.0134, "step": 625500 }, { "epoch": 9.44, "learning_rate": 1.1127240386499647e-05, "loss": 0.0127, "step": 626000 }, { "epoch": 9.44, "learning_rate": 1.1112166297351482e-05, "loss": 0.0124, "step": 626500 }, { "epoch": 9.45, "learning_rate": 1.109709220820332e-05, "loss": 0.012, "step": 627000 }, { "epoch": 9.46, "learning_rate": 1.1082018119055157e-05, "loss": 0.0116, "step": 627500 }, { "epoch": 9.47, "learning_rate": 1.1066944029906993e-05, "loss": 0.0135, "step": 628000 }, { "epoch": 9.47, "learning_rate": 1.105186994075883e-05, "loss": 0.0121, "step": 628500 }, { "epoch": 9.48, "learning_rate": 1.1036795851610668e-05, "loss": 0.012, "step": 629000 }, { "epoch": 9.49, "learning_rate": 1.1021721762462504e-05, "loss": 0.013, "step": 629500 }, { "epoch": 9.5, "learning_rate": 1.1006647673314341e-05, "loss": 0.0133, "step": 630000 }, { "epoch": 9.5, "learning_rate": 1.0991573584166177e-05, "loss": 0.0127, "step": 630500 }, { "epoch": 9.51, "learning_rate": 1.0976499495018014e-05, "loss": 0.0121, "step": 631000 }, { "epoch": 9.52, "learning_rate": 1.0961425405869852e-05, "loss": 0.0132, "step": 631500 }, { "epoch": 9.53, "learning_rate": 1.0946351316721687e-05, "loss": 0.0131, "step": 632000 }, { "epoch": 9.53, "learning_rate": 1.0931277227573525e-05, "loss": 0.0133, "step": 632500 }, { "epoch": 9.54, "learning_rate": 1.0916203138425362e-05, "loss": 0.0138, "step": 633000 }, { "epoch": 9.55, "learning_rate": 1.0901129049277198e-05, "loss": 0.0119, "step": 633500 }, { "epoch": 9.56, "learning_rate": 1.0886054960129035e-05, "loss": 0.0133, "step": 634000 }, { "epoch": 9.56, "learning_rate": 1.0870980870980871e-05, "loss": 0.0125, "step": 634500 }, { "epoch": 9.57, "learning_rate": 1.0855906781832708e-05, "loss": 0.0133, "step": 635000 }, { "epoch": 9.58, "learning_rate": 1.0840832692684546e-05, "loss": 0.0115, "step": 635500 }, { "epoch": 9.59, "learning_rate": 1.0825758603536382e-05, "loss": 0.0129, "step": 636000 }, { "epoch": 9.59, "learning_rate": 1.0810684514388219e-05, "loss": 0.0123, "step": 636500 }, { "epoch": 9.6, "learning_rate": 1.0795610425240056e-05, "loss": 0.013, "step": 637000 }, { "epoch": 9.61, "learning_rate": 1.0780536336091892e-05, "loss": 0.0128, "step": 637500 }, { "epoch": 9.62, "learning_rate": 1.076546224694373e-05, "loss": 0.0128, "step": 638000 }, { "epoch": 9.62, "learning_rate": 1.0750388157795565e-05, "loss": 0.0126, "step": 638500 }, { "epoch": 9.63, "learning_rate": 1.0735314068647403e-05, "loss": 0.0133, "step": 639000 }, { "epoch": 9.64, "learning_rate": 1.072023997949924e-05, "loss": 0.0126, "step": 639500 }, { "epoch": 9.65, "learning_rate": 1.0705165890351076e-05, "loss": 0.0131, "step": 640000 }, { "epoch": 9.65, "learning_rate": 1.0690091801202913e-05, "loss": 0.0122, "step": 640500 }, { "epoch": 9.66, "learning_rate": 1.067501771205475e-05, "loss": 0.0141, "step": 641000 }, { "epoch": 9.67, "learning_rate": 1.0659943622906586e-05, "loss": 0.0123, "step": 641500 }, { "epoch": 9.68, "learning_rate": 1.0644869533758424e-05, "loss": 0.013, "step": 642000 }, { "epoch": 9.69, "learning_rate": 1.062979544461026e-05, "loss": 0.0129, "step": 642500 }, { "epoch": 9.69, "learning_rate": 1.0614721355462097e-05, "loss": 0.0124, "step": 643000 }, { "epoch": 9.7, "learning_rate": 1.0599647266313934e-05, "loss": 0.0133, "step": 643500 }, { "epoch": 9.71, "learning_rate": 1.058457317716577e-05, "loss": 0.0133, "step": 644000 }, { "epoch": 9.72, "learning_rate": 1.0569499088017608e-05, "loss": 0.0132, "step": 644500 }, { "epoch": 9.72, "learning_rate": 1.0554424998869443e-05, "loss": 0.0127, "step": 645000 }, { "epoch": 9.73, "learning_rate": 1.053935090972128e-05, "loss": 0.0137, "step": 645500 }, { "epoch": 9.74, "learning_rate": 1.0524276820573118e-05, "loss": 0.0123, "step": 646000 }, { "epoch": 9.75, "learning_rate": 1.0509202731424954e-05, "loss": 0.0129, "step": 646500 }, { "epoch": 9.75, "learning_rate": 1.0494128642276791e-05, "loss": 0.0133, "step": 647000 }, { "epoch": 9.76, "learning_rate": 1.0479054553128629e-05, "loss": 0.0121, "step": 647500 }, { "epoch": 9.77, "learning_rate": 1.0463980463980464e-05, "loss": 0.0129, "step": 648000 }, { "epoch": 9.78, "learning_rate": 1.0448906374832302e-05, "loss": 0.013, "step": 648500 }, { "epoch": 9.78, "learning_rate": 1.0433832285684138e-05, "loss": 0.0125, "step": 649000 }, { "epoch": 9.79, "learning_rate": 1.0418758196535975e-05, "loss": 0.0129, "step": 649500 }, { "epoch": 9.8, "learning_rate": 1.0403684107387812e-05, "loss": 0.0132, "step": 650000 }, { "epoch": 9.81, "learning_rate": 1.0388610018239648e-05, "loss": 0.0122, "step": 650500 }, { "epoch": 9.81, "learning_rate": 1.0373535929091486e-05, "loss": 0.0124, "step": 651000 }, { "epoch": 9.82, "learning_rate": 1.0358461839943323e-05, "loss": 0.0133, "step": 651500 }, { "epoch": 9.83, "learning_rate": 1.0343387750795159e-05, "loss": 0.0124, "step": 652000 }, { "epoch": 9.84, "learning_rate": 1.0328313661646996e-05, "loss": 0.0131, "step": 652500 }, { "epoch": 9.84, "learning_rate": 1.0313239572498832e-05, "loss": 0.0142, "step": 653000 }, { "epoch": 9.85, "learning_rate": 1.029816548335067e-05, "loss": 0.0125, "step": 653500 }, { "epoch": 9.86, "learning_rate": 1.0283091394202507e-05, "loss": 0.0131, "step": 654000 }, { "epoch": 9.87, "learning_rate": 1.0268017305054342e-05, "loss": 0.0126, "step": 654500 }, { "epoch": 9.87, "learning_rate": 1.025294321590618e-05, "loss": 0.0132, "step": 655000 }, { "epoch": 9.88, "learning_rate": 1.0237869126758017e-05, "loss": 0.0129, "step": 655500 }, { "epoch": 9.89, "learning_rate": 1.0222795037609853e-05, "loss": 0.0127, "step": 656000 }, { "epoch": 9.9, "learning_rate": 1.020772094846169e-05, "loss": 0.0129, "step": 656500 }, { "epoch": 9.9, "learning_rate": 1.0192646859313526e-05, "loss": 0.0125, "step": 657000 }, { "epoch": 9.91, "learning_rate": 1.0177572770165364e-05, "loss": 0.0133, "step": 657500 }, { "epoch": 9.92, "learning_rate": 1.0162498681017201e-05, "loss": 0.0136, "step": 658000 }, { "epoch": 9.93, "learning_rate": 1.0147424591869037e-05, "loss": 0.012, "step": 658500 }, { "epoch": 9.93, "learning_rate": 1.0132350502720874e-05, "loss": 0.0135, "step": 659000 }, { "epoch": 9.94, "learning_rate": 1.0117276413572712e-05, "loss": 0.0128, "step": 659500 }, { "epoch": 9.95, "learning_rate": 1.0102202324424547e-05, "loss": 0.0132, "step": 660000 }, { "epoch": 9.96, "learning_rate": 1.0087128235276385e-05, "loss": 0.0134, "step": 660500 }, { "epoch": 9.96, "learning_rate": 1.007205414612822e-05, "loss": 0.0123, "step": 661000 }, { "epoch": 9.97, "learning_rate": 1.0056980056980058e-05, "loss": 0.0136, "step": 661500 }, { "epoch": 9.98, "learning_rate": 1.0041905967831895e-05, "loss": 0.0134, "step": 662000 }, { "epoch": 9.99, "learning_rate": 1.0026831878683731e-05, "loss": 0.0121, "step": 662500 }, { "epoch": 9.99, "learning_rate": 1.0011757789535568e-05, "loss": 0.0137, "step": 663000 }, { "epoch": 10.0, "eval_accuracy": 0.9862538121035589, "eval_f1": 0.9400368616848146, "eval_loss": 0.06027080863714218, "eval_precision": 0.9228673143726427, "eval_recall": 0.9578573841139104, "eval_runtime": 252.8093, "eval_samples_per_second": 466.454, "eval_steps_per_second": 29.156, "step": 663390 }, { "epoch": 10.0, "learning_rate": 9.996683700387406e-06, "loss": 0.0121, "step": 663500 }, { "epoch": 10.01, "learning_rate": 9.981609611239242e-06, "loss": 0.0098, "step": 664000 }, { "epoch": 10.02, "learning_rate": 9.966535522091079e-06, "loss": 0.0107, "step": 664500 }, { "epoch": 10.02, "learning_rate": 9.951461432942915e-06, "loss": 0.0105, "step": 665000 }, { "epoch": 10.03, "learning_rate": 9.936387343794752e-06, "loss": 0.0103, "step": 665500 }, { "epoch": 10.04, "learning_rate": 9.92131325464659e-06, "loss": 0.0095, "step": 666000 }, { "epoch": 10.05, "learning_rate": 9.906239165498425e-06, "loss": 0.0104, "step": 666500 }, { "epoch": 10.05, "learning_rate": 9.891165076350263e-06, "loss": 0.0109, "step": 667000 }, { "epoch": 10.06, "learning_rate": 9.8760909872021e-06, "loss": 0.0098, "step": 667500 }, { "epoch": 10.07, "learning_rate": 9.861016898053936e-06, "loss": 0.0105, "step": 668000 }, { "epoch": 10.08, "learning_rate": 9.845942808905773e-06, "loss": 0.0106, "step": 668500 }, { "epoch": 10.08, "learning_rate": 9.830868719757609e-06, "loss": 0.0108, "step": 669000 }, { "epoch": 10.09, "learning_rate": 9.815794630609446e-06, "loss": 0.0107, "step": 669500 }, { "epoch": 10.1, "learning_rate": 9.800720541461284e-06, "loss": 0.0111, "step": 670000 }, { "epoch": 10.11, "learning_rate": 9.78564645231312e-06, "loss": 0.01, "step": 670500 }, { "epoch": 10.11, "learning_rate": 9.770572363164957e-06, "loss": 0.0104, "step": 671000 }, { "epoch": 10.12, "learning_rate": 9.755498274016793e-06, "loss": 0.0105, "step": 671500 }, { "epoch": 10.13, "learning_rate": 9.740424184868628e-06, "loss": 0.0106, "step": 672000 }, { "epoch": 10.14, "learning_rate": 9.725350095720466e-06, "loss": 0.0106, "step": 672500 }, { "epoch": 10.14, "learning_rate": 9.710276006572302e-06, "loss": 0.01, "step": 673000 }, { "epoch": 10.15, "learning_rate": 9.695201917424139e-06, "loss": 0.0116, "step": 673500 }, { "epoch": 10.16, "learning_rate": 9.680127828275976e-06, "loss": 0.0102, "step": 674000 }, { "epoch": 10.17, "learning_rate": 9.665053739127812e-06, "loss": 0.0103, "step": 674500 }, { "epoch": 10.18, "learning_rate": 9.64997964997965e-06, "loss": 0.0108, "step": 675000 }, { "epoch": 10.18, "learning_rate": 9.634905560831487e-06, "loss": 0.0106, "step": 675500 }, { "epoch": 10.19, "learning_rate": 9.619831471683323e-06, "loss": 0.0099, "step": 676000 }, { "epoch": 10.2, "learning_rate": 9.60475738253516e-06, "loss": 0.0113, "step": 676500 }, { "epoch": 10.21, "learning_rate": 9.589683293386996e-06, "loss": 0.0106, "step": 677000 }, { "epoch": 10.21, "learning_rate": 9.574609204238833e-06, "loss": 0.0105, "step": 677500 }, { "epoch": 10.22, "learning_rate": 9.55953511509067e-06, "loss": 0.0104, "step": 678000 }, { "epoch": 10.23, "learning_rate": 9.544461025942506e-06, "loss": 0.0109, "step": 678500 }, { "epoch": 10.24, "learning_rate": 9.529386936794344e-06, "loss": 0.0098, "step": 679000 }, { "epoch": 10.24, "learning_rate": 9.51431284764618e-06, "loss": 0.0103, "step": 679500 }, { "epoch": 10.25, "learning_rate": 9.499238758498017e-06, "loss": 0.0103, "step": 680000 }, { "epoch": 10.26, "learning_rate": 9.484164669349854e-06, "loss": 0.0109, "step": 680500 }, { "epoch": 10.27, "learning_rate": 9.46909058020169e-06, "loss": 0.0106, "step": 681000 }, { "epoch": 10.27, "learning_rate": 9.454016491053528e-06, "loss": 0.0107, "step": 681500 }, { "epoch": 10.28, "learning_rate": 9.438942401905365e-06, "loss": 0.0097, "step": 682000 }, { "epoch": 10.29, "learning_rate": 9.4238683127572e-06, "loss": 0.0106, "step": 682500 }, { "epoch": 10.3, "learning_rate": 9.408794223609038e-06, "loss": 0.0097, "step": 683000 }, { "epoch": 10.3, "learning_rate": 9.393720134460874e-06, "loss": 0.0115, "step": 683500 }, { "epoch": 10.31, "learning_rate": 9.378646045312711e-06, "loss": 0.0108, "step": 684000 }, { "epoch": 10.32, "learning_rate": 9.363571956164549e-06, "loss": 0.0096, "step": 684500 }, { "epoch": 10.33, "learning_rate": 9.348497867016385e-06, "loss": 0.0101, "step": 685000 }, { "epoch": 10.33, "learning_rate": 9.333423777868222e-06, "loss": 0.0104, "step": 685500 }, { "epoch": 10.34, "learning_rate": 9.31834968872006e-06, "loss": 0.0109, "step": 686000 }, { "epoch": 10.35, "learning_rate": 9.303275599571895e-06, "loss": 0.0108, "step": 686500 }, { "epoch": 10.36, "learning_rate": 9.288201510423733e-06, "loss": 0.011, "step": 687000 }, { "epoch": 10.36, "learning_rate": 9.273127421275568e-06, "loss": 0.0101, "step": 687500 }, { "epoch": 10.37, "learning_rate": 9.258053332127406e-06, "loss": 0.0102, "step": 688000 }, { "epoch": 10.38, "learning_rate": 9.242979242979243e-06, "loss": 0.0118, "step": 688500 }, { "epoch": 10.39, "learning_rate": 9.227905153831079e-06, "loss": 0.0111, "step": 689000 }, { "epoch": 10.39, "learning_rate": 9.212831064682916e-06, "loss": 0.0099, "step": 689500 }, { "epoch": 10.4, "learning_rate": 9.197756975534754e-06, "loss": 0.0116, "step": 690000 }, { "epoch": 10.41, "learning_rate": 9.18268288638659e-06, "loss": 0.0104, "step": 690500 }, { "epoch": 10.42, "learning_rate": 9.167608797238427e-06, "loss": 0.0114, "step": 691000 }, { "epoch": 10.42, "learning_rate": 9.152534708090263e-06, "loss": 0.0106, "step": 691500 }, { "epoch": 10.43, "learning_rate": 9.1374606189421e-06, "loss": 0.0105, "step": 692000 }, { "epoch": 10.44, "learning_rate": 9.122386529793937e-06, "loss": 0.0106, "step": 692500 }, { "epoch": 10.45, "learning_rate": 9.107312440645773e-06, "loss": 0.0124, "step": 693000 }, { "epoch": 10.45, "learning_rate": 9.09223835149761e-06, "loss": 0.0107, "step": 693500 }, { "epoch": 10.46, "learning_rate": 9.077164262349448e-06, "loss": 0.0109, "step": 694000 }, { "epoch": 10.47, "learning_rate": 9.062090173201284e-06, "loss": 0.0105, "step": 694500 }, { "epoch": 10.48, "learning_rate": 9.047016084053121e-06, "loss": 0.0111, "step": 695000 }, { "epoch": 10.48, "learning_rate": 9.031941994904957e-06, "loss": 0.0103, "step": 695500 }, { "epoch": 10.49, "learning_rate": 9.016867905756794e-06, "loss": 0.0108, "step": 696000 }, { "epoch": 10.5, "learning_rate": 9.001793816608632e-06, "loss": 0.0104, "step": 696500 }, { "epoch": 10.51, "learning_rate": 8.986719727460467e-06, "loss": 0.0119, "step": 697000 }, { "epoch": 10.51, "learning_rate": 8.971645638312305e-06, "loss": 0.0103, "step": 697500 }, { "epoch": 10.52, "learning_rate": 8.956571549164142e-06, "loss": 0.0118, "step": 698000 }, { "epoch": 10.53, "learning_rate": 8.941497460015978e-06, "loss": 0.0118, "step": 698500 }, { "epoch": 10.54, "learning_rate": 8.926423370867815e-06, "loss": 0.0113, "step": 699000 }, { "epoch": 10.54, "learning_rate": 8.911349281719651e-06, "loss": 0.0106, "step": 699500 }, { "epoch": 10.55, "learning_rate": 8.896275192571489e-06, "loss": 0.0108, "step": 700000 }, { "epoch": 10.56, "learning_rate": 8.881201103423326e-06, "loss": 0.01, "step": 700500 }, { "epoch": 10.57, "learning_rate": 8.866127014275162e-06, "loss": 0.0112, "step": 701000 }, { "epoch": 10.57, "learning_rate": 8.851052925126999e-06, "loss": 0.0102, "step": 701500 }, { "epoch": 10.58, "learning_rate": 8.835978835978837e-06, "loss": 0.011, "step": 702000 }, { "epoch": 10.59, "learning_rate": 8.820904746830672e-06, "loss": 0.0122, "step": 702500 }, { "epoch": 10.6, "learning_rate": 8.80583065768251e-06, "loss": 0.0115, "step": 703000 }, { "epoch": 10.6, "learning_rate": 8.790756568534345e-06, "loss": 0.0104, "step": 703500 }, { "epoch": 10.61, "learning_rate": 8.775682479386183e-06, "loss": 0.0107, "step": 704000 }, { "epoch": 10.62, "learning_rate": 8.76060839023802e-06, "loss": 0.0115, "step": 704500 }, { "epoch": 10.63, "learning_rate": 8.745534301089856e-06, "loss": 0.0107, "step": 705000 }, { "epoch": 10.63, "learning_rate": 8.730460211941693e-06, "loss": 0.0111, "step": 705500 }, { "epoch": 10.64, "learning_rate": 8.715386122793529e-06, "loss": 0.0118, "step": 706000 }, { "epoch": 10.65, "learning_rate": 8.700312033645367e-06, "loss": 0.011, "step": 706500 }, { "epoch": 10.66, "learning_rate": 8.685237944497204e-06, "loss": 0.0104, "step": 707000 }, { "epoch": 10.66, "learning_rate": 8.67016385534904e-06, "loss": 0.0112, "step": 707500 }, { "epoch": 10.67, "learning_rate": 8.655089766200877e-06, "loss": 0.0108, "step": 708000 }, { "epoch": 10.68, "learning_rate": 8.640015677052715e-06, "loss": 0.0104, "step": 708500 }, { "epoch": 10.69, "learning_rate": 8.62494158790455e-06, "loss": 0.0116, "step": 709000 }, { "epoch": 10.7, "learning_rate": 8.609867498756388e-06, "loss": 0.0107, "step": 709500 }, { "epoch": 10.7, "learning_rate": 8.594793409608223e-06, "loss": 0.0108, "step": 710000 }, { "epoch": 10.71, "learning_rate": 8.57971932046006e-06, "loss": 0.0112, "step": 710500 }, { "epoch": 10.72, "learning_rate": 8.564645231311898e-06, "loss": 0.0109, "step": 711000 }, { "epoch": 10.73, "learning_rate": 8.549571142163734e-06, "loss": 0.011, "step": 711500 }, { "epoch": 10.73, "learning_rate": 8.534497053015571e-06, "loss": 0.0104, "step": 712000 }, { "epoch": 10.74, "learning_rate": 8.519422963867409e-06, "loss": 0.0108, "step": 712500 }, { "epoch": 10.75, "learning_rate": 8.504348874719245e-06, "loss": 0.011, "step": 713000 }, { "epoch": 10.76, "learning_rate": 8.489274785571082e-06, "loss": 0.0114, "step": 713500 }, { "epoch": 10.76, "learning_rate": 8.474200696422918e-06, "loss": 0.0104, "step": 714000 }, { "epoch": 10.77, "learning_rate": 8.459126607274755e-06, "loss": 0.0103, "step": 714500 }, { "epoch": 10.78, "learning_rate": 8.444052518126593e-06, "loss": 0.0114, "step": 715000 }, { "epoch": 10.79, "learning_rate": 8.428978428978428e-06, "loss": 0.0106, "step": 715500 }, { "epoch": 10.79, "learning_rate": 8.413904339830266e-06, "loss": 0.0106, "step": 716000 }, { "epoch": 10.8, "learning_rate": 8.398830250682103e-06, "loss": 0.011, "step": 716500 }, { "epoch": 10.81, "learning_rate": 8.383756161533939e-06, "loss": 0.0121, "step": 717000 }, { "epoch": 10.82, "learning_rate": 8.368682072385776e-06, "loss": 0.0117, "step": 717500 }, { "epoch": 10.82, "learning_rate": 8.353607983237612e-06, "loss": 0.0104, "step": 718000 }, { "epoch": 10.83, "learning_rate": 8.33853389408945e-06, "loss": 0.0111, "step": 718500 }, { "epoch": 10.84, "learning_rate": 8.323459804941287e-06, "loss": 0.0102, "step": 719000 }, { "epoch": 10.85, "learning_rate": 8.308385715793123e-06, "loss": 0.0102, "step": 719500 }, { "epoch": 10.85, "learning_rate": 8.29331162664496e-06, "loss": 0.0109, "step": 720000 }, { "epoch": 10.86, "learning_rate": 8.278237537496797e-06, "loss": 0.0115, "step": 720500 }, { "epoch": 10.87, "learning_rate": 8.263163448348633e-06, "loss": 0.0115, "step": 721000 }, { "epoch": 10.88, "learning_rate": 8.24808935920047e-06, "loss": 0.0109, "step": 721500 }, { "epoch": 10.88, "learning_rate": 8.233015270052306e-06, "loss": 0.0098, "step": 722000 }, { "epoch": 10.89, "learning_rate": 8.217941180904144e-06, "loss": 0.0118, "step": 722500 }, { "epoch": 10.9, "learning_rate": 8.202867091755981e-06, "loss": 0.0107, "step": 723000 }, { "epoch": 10.91, "learning_rate": 8.187793002607817e-06, "loss": 0.0111, "step": 723500 }, { "epoch": 10.91, "learning_rate": 8.172718913459654e-06, "loss": 0.0106, "step": 724000 }, { "epoch": 10.92, "learning_rate": 8.157644824311492e-06, "loss": 0.0113, "step": 724500 }, { "epoch": 10.93, "learning_rate": 8.142570735163327e-06, "loss": 0.0099, "step": 725000 }, { "epoch": 10.94, "learning_rate": 8.127496646015165e-06, "loss": 0.0107, "step": 725500 }, { "epoch": 10.94, "learning_rate": 8.112422556867e-06, "loss": 0.0115, "step": 726000 }, { "epoch": 10.95, "learning_rate": 8.097348467718838e-06, "loss": 0.0116, "step": 726500 }, { "epoch": 10.96, "learning_rate": 8.082274378570675e-06, "loss": 0.0104, "step": 727000 }, { "epoch": 10.97, "learning_rate": 8.067200289422511e-06, "loss": 0.0111, "step": 727500 }, { "epoch": 10.97, "learning_rate": 8.052126200274349e-06, "loss": 0.0103, "step": 728000 }, { "epoch": 10.98, "learning_rate": 8.037052111126186e-06, "loss": 0.0103, "step": 728500 }, { "epoch": 10.99, "learning_rate": 8.021978021978022e-06, "loss": 0.0113, "step": 729000 }, { "epoch": 11.0, "learning_rate": 8.00690393282986e-06, "loss": 0.0111, "step": 729500 }, { "epoch": 11.0, "eval_accuracy": 0.9867219357549302, "eval_f1": 0.9427907322472874, "eval_loss": 0.07028166949748993, "eval_precision": 0.9310166962925343, "eval_recall": 0.9548663815379858, "eval_runtime": 246.7237, "eval_samples_per_second": 477.96, "eval_steps_per_second": 29.876, "step": 729729 }, { "epoch": 11.0, "learning_rate": 7.991829843681695e-06, "loss": 0.0095, "step": 730000 }, { "epoch": 11.01, "learning_rate": 7.976755754533532e-06, "loss": 0.0093, "step": 730500 }, { "epoch": 11.02, "learning_rate": 7.96168166538537e-06, "loss": 0.0084, "step": 731000 }, { "epoch": 11.03, "learning_rate": 7.946607576237205e-06, "loss": 0.0089, "step": 731500 }, { "epoch": 11.03, "learning_rate": 7.931533487089043e-06, "loss": 0.0087, "step": 732000 }, { "epoch": 11.04, "learning_rate": 7.91645939794088e-06, "loss": 0.008, "step": 732500 }, { "epoch": 11.05, "learning_rate": 7.901385308792716e-06, "loss": 0.0089, "step": 733000 }, { "epoch": 11.06, "learning_rate": 7.886311219644553e-06, "loss": 0.0084, "step": 733500 }, { "epoch": 11.06, "learning_rate": 7.87123713049639e-06, "loss": 0.0095, "step": 734000 }, { "epoch": 11.07, "learning_rate": 7.856163041348227e-06, "loss": 0.0084, "step": 734500 }, { "epoch": 11.08, "learning_rate": 7.841088952200064e-06, "loss": 0.009, "step": 735000 }, { "epoch": 11.09, "learning_rate": 7.8260148630519e-06, "loss": 0.009, "step": 735500 }, { "epoch": 11.09, "learning_rate": 7.810940773903737e-06, "loss": 0.0085, "step": 736000 }, { "epoch": 11.1, "learning_rate": 7.795866684755573e-06, "loss": 0.0078, "step": 736500 }, { "epoch": 11.11, "learning_rate": 7.78079259560741e-06, "loss": 0.0084, "step": 737000 }, { "epoch": 11.12, "learning_rate": 7.765718506459248e-06, "loss": 0.009, "step": 737500 }, { "epoch": 11.12, "learning_rate": 7.750644417311083e-06, "loss": 0.0089, "step": 738000 }, { "epoch": 11.13, "learning_rate": 7.735570328162921e-06, "loss": 0.0091, "step": 738500 }, { "epoch": 11.14, "learning_rate": 7.720496239014758e-06, "loss": 0.0087, "step": 739000 }, { "epoch": 11.15, "learning_rate": 7.705422149866594e-06, "loss": 0.0098, "step": 739500 }, { "epoch": 11.15, "learning_rate": 7.690348060718431e-06, "loss": 0.009, "step": 740000 }, { "epoch": 11.16, "learning_rate": 7.675273971570267e-06, "loss": 0.0085, "step": 740500 }, { "epoch": 11.17, "learning_rate": 7.660199882422105e-06, "loss": 0.0093, "step": 741000 }, { "epoch": 11.18, "learning_rate": 7.645125793273942e-06, "loss": 0.0091, "step": 741500 }, { "epoch": 11.18, "learning_rate": 7.630051704125778e-06, "loss": 0.0082, "step": 742000 }, { "epoch": 11.19, "learning_rate": 7.614977614977615e-06, "loss": 0.0092, "step": 742500 }, { "epoch": 11.2, "learning_rate": 7.599903525829452e-06, "loss": 0.0095, "step": 743000 }, { "epoch": 11.21, "learning_rate": 7.584829436681288e-06, "loss": 0.0085, "step": 743500 }, { "epoch": 11.22, "learning_rate": 7.569755347533126e-06, "loss": 0.0086, "step": 744000 }, { "epoch": 11.22, "learning_rate": 7.554681258384962e-06, "loss": 0.0096, "step": 744500 }, { "epoch": 11.23, "learning_rate": 7.539607169236799e-06, "loss": 0.0089, "step": 745000 }, { "epoch": 11.24, "learning_rate": 7.5245330800886355e-06, "loss": 0.0103, "step": 745500 }, { "epoch": 11.25, "learning_rate": 7.509458990940473e-06, "loss": 0.0094, "step": 746000 }, { "epoch": 11.25, "learning_rate": 7.4943849017923095e-06, "loss": 0.0091, "step": 746500 }, { "epoch": 11.26, "learning_rate": 7.479310812644146e-06, "loss": 0.0088, "step": 747000 }, { "epoch": 11.27, "learning_rate": 7.464236723495983e-06, "loss": 0.0085, "step": 747500 }, { "epoch": 11.28, "learning_rate": 7.44916263434782e-06, "loss": 0.0088, "step": 748000 }, { "epoch": 11.28, "learning_rate": 7.434088545199657e-06, "loss": 0.0088, "step": 748500 }, { "epoch": 11.29, "learning_rate": 7.419014456051493e-06, "loss": 0.0099, "step": 749000 }, { "epoch": 11.3, "learning_rate": 7.40394036690333e-06, "loss": 0.0091, "step": 749500 }, { "epoch": 11.31, "learning_rate": 7.388866277755167e-06, "loss": 0.0088, "step": 750000 }, { "epoch": 11.31, "learning_rate": 7.373792188607004e-06, "loss": 0.0083, "step": 750500 }, { "epoch": 11.32, "learning_rate": 7.35871809945884e-06, "loss": 0.0086, "step": 751000 }, { "epoch": 11.33, "learning_rate": 7.343644010310677e-06, "loss": 0.0085, "step": 751500 }, { "epoch": 11.34, "learning_rate": 7.328569921162514e-06, "loss": 0.0098, "step": 752000 }, { "epoch": 11.34, "learning_rate": 7.313495832014351e-06, "loss": 0.0092, "step": 752500 }, { "epoch": 11.35, "learning_rate": 7.2984217428661875e-06, "loss": 0.0087, "step": 753000 }, { "epoch": 11.36, "learning_rate": 7.283347653718024e-06, "loss": 0.0091, "step": 753500 }, { "epoch": 11.37, "learning_rate": 7.2682735645698615e-06, "loss": 0.0097, "step": 754000 }, { "epoch": 11.37, "learning_rate": 7.253199475421698e-06, "loss": 0.009, "step": 754500 }, { "epoch": 11.38, "learning_rate": 7.238125386273535e-06, "loss": 0.009, "step": 755000 }, { "epoch": 11.39, "learning_rate": 7.223051297125371e-06, "loss": 0.0097, "step": 755500 }, { "epoch": 11.4, "learning_rate": 7.207977207977209e-06, "loss": 0.0085, "step": 756000 }, { "epoch": 11.4, "learning_rate": 7.192903118829045e-06, "loss": 0.0084, "step": 756500 }, { "epoch": 11.41, "learning_rate": 7.177829029680882e-06, "loss": 0.0102, "step": 757000 }, { "epoch": 11.42, "learning_rate": 7.162754940532718e-06, "loss": 0.0092, "step": 757500 }, { "epoch": 11.43, "learning_rate": 7.147680851384556e-06, "loss": 0.0091, "step": 758000 }, { "epoch": 11.43, "learning_rate": 7.132606762236392e-06, "loss": 0.009, "step": 758500 }, { "epoch": 11.44, "learning_rate": 7.117532673088229e-06, "loss": 0.0095, "step": 759000 }, { "epoch": 11.45, "learning_rate": 7.1024585839400655e-06, "loss": 0.01, "step": 759500 }, { "epoch": 11.46, "learning_rate": 7.087384494791903e-06, "loss": 0.0104, "step": 760000 }, { "epoch": 11.46, "learning_rate": 7.0723104056437395e-06, "loss": 0.0099, "step": 760500 }, { "epoch": 11.47, "learning_rate": 7.057236316495576e-06, "loss": 0.0089, "step": 761000 }, { "epoch": 11.48, "learning_rate": 7.042162227347413e-06, "loss": 0.0105, "step": 761500 }, { "epoch": 11.49, "learning_rate": 7.02708813819925e-06, "loss": 0.0088, "step": 762000 }, { "epoch": 11.49, "learning_rate": 7.012014049051087e-06, "loss": 0.0093, "step": 762500 }, { "epoch": 11.5, "learning_rate": 6.996939959902923e-06, "loss": 0.0099, "step": 763000 }, { "epoch": 11.51, "learning_rate": 6.98186587075476e-06, "loss": 0.0092, "step": 763500 }, { "epoch": 11.52, "learning_rate": 6.966791781606596e-06, "loss": 0.0091, "step": 764000 }, { "epoch": 11.52, "learning_rate": 6.951717692458434e-06, "loss": 0.0091, "step": 764500 }, { "epoch": 11.53, "learning_rate": 6.93664360331027e-06, "loss": 0.0083, "step": 765000 }, { "epoch": 11.54, "learning_rate": 6.921569514162107e-06, "loss": 0.0101, "step": 765500 }, { "epoch": 11.55, "learning_rate": 6.9064954250139435e-06, "loss": 0.0083, "step": 766000 }, { "epoch": 11.55, "learning_rate": 6.891421335865781e-06, "loss": 0.0101, "step": 766500 }, { "epoch": 11.56, "learning_rate": 6.8763472467176175e-06, "loss": 0.0098, "step": 767000 }, { "epoch": 11.57, "learning_rate": 6.861273157569454e-06, "loss": 0.0084, "step": 767500 }, { "epoch": 11.58, "learning_rate": 6.846199068421291e-06, "loss": 0.0093, "step": 768000 }, { "epoch": 11.58, "learning_rate": 6.831124979273128e-06, "loss": 0.0094, "step": 768500 }, { "epoch": 11.59, "learning_rate": 6.816050890124965e-06, "loss": 0.0094, "step": 769000 }, { "epoch": 11.6, "learning_rate": 6.800976800976801e-06, "loss": 0.0098, "step": 769500 }, { "epoch": 11.61, "learning_rate": 6.785902711828638e-06, "loss": 0.0096, "step": 770000 }, { "epoch": 11.61, "learning_rate": 6.770828622680475e-06, "loss": 0.0099, "step": 770500 }, { "epoch": 11.62, "learning_rate": 6.755754533532312e-06, "loss": 0.0091, "step": 771000 }, { "epoch": 11.63, "learning_rate": 6.740680444384148e-06, "loss": 0.0092, "step": 771500 }, { "epoch": 11.64, "learning_rate": 6.725606355235985e-06, "loss": 0.0102, "step": 772000 }, { "epoch": 11.64, "learning_rate": 6.710532266087822e-06, "loss": 0.0092, "step": 772500 }, { "epoch": 11.65, "learning_rate": 6.695458176939659e-06, "loss": 0.0099, "step": 773000 }, { "epoch": 11.66, "learning_rate": 6.6803840877914955e-06, "loss": 0.0092, "step": 773500 }, { "epoch": 11.67, "learning_rate": 6.665309998643332e-06, "loss": 0.009, "step": 774000 }, { "epoch": 11.67, "learning_rate": 6.6502359094951695e-06, "loss": 0.01, "step": 774500 }, { "epoch": 11.68, "learning_rate": 6.635161820347006e-06, "loss": 0.0083, "step": 775000 }, { "epoch": 11.69, "learning_rate": 6.620087731198843e-06, "loss": 0.0087, "step": 775500 }, { "epoch": 11.7, "learning_rate": 6.605013642050679e-06, "loss": 0.0087, "step": 776000 }, { "epoch": 11.71, "learning_rate": 6.589939552902517e-06, "loss": 0.0098, "step": 776500 }, { "epoch": 11.71, "learning_rate": 6.574865463754353e-06, "loss": 0.0096, "step": 777000 }, { "epoch": 11.72, "learning_rate": 6.55979137460619e-06, "loss": 0.0098, "step": 777500 }, { "epoch": 11.73, "learning_rate": 6.544717285458026e-06, "loss": 0.009, "step": 778000 }, { "epoch": 11.74, "learning_rate": 6.529643196309864e-06, "loss": 0.0096, "step": 778500 }, { "epoch": 11.74, "learning_rate": 6.5145691071617e-06, "loss": 0.0099, "step": 779000 }, { "epoch": 11.75, "learning_rate": 6.499495018013537e-06, "loss": 0.0092, "step": 779500 }, { "epoch": 11.76, "learning_rate": 6.4844209288653736e-06, "loss": 0.0092, "step": 780000 }, { "epoch": 11.77, "learning_rate": 6.469346839717211e-06, "loss": 0.0107, "step": 780500 }, { "epoch": 11.77, "learning_rate": 6.4542727505690476e-06, "loss": 0.0088, "step": 781000 }, { "epoch": 11.78, "learning_rate": 6.439198661420884e-06, "loss": 0.0104, "step": 781500 }, { "epoch": 11.79, "learning_rate": 6.424124572272721e-06, "loss": 0.0086, "step": 782000 }, { "epoch": 11.8, "learning_rate": 6.409050483124558e-06, "loss": 0.0085, "step": 782500 }, { "epoch": 11.8, "learning_rate": 6.393976393976395e-06, "loss": 0.0084, "step": 783000 }, { "epoch": 11.81, "learning_rate": 6.3789023048282304e-06, "loss": 0.0103, "step": 783500 }, { "epoch": 11.82, "learning_rate": 6.363828215680067e-06, "loss": 0.009, "step": 784000 }, { "epoch": 11.83, "learning_rate": 6.3487541265319044e-06, "loss": 0.0103, "step": 784500 }, { "epoch": 11.83, "learning_rate": 6.333680037383741e-06, "loss": 0.0094, "step": 785000 }, { "epoch": 11.84, "learning_rate": 6.3186059482355776e-06, "loss": 0.0084, "step": 785500 }, { "epoch": 11.85, "learning_rate": 6.303531859087414e-06, "loss": 0.0092, "step": 786000 }, { "epoch": 11.86, "learning_rate": 6.2884577699392516e-06, "loss": 0.0087, "step": 786500 }, { "epoch": 11.86, "learning_rate": 6.273383680791088e-06, "loss": 0.0089, "step": 787000 }, { "epoch": 11.87, "learning_rate": 6.258309591642925e-06, "loss": 0.009, "step": 787500 }, { "epoch": 11.88, "learning_rate": 6.243235502494761e-06, "loss": 0.0085, "step": 788000 }, { "epoch": 11.89, "learning_rate": 6.228161413346599e-06, "loss": 0.0093, "step": 788500 }, { "epoch": 11.89, "learning_rate": 6.213087324198435e-06, "loss": 0.009, "step": 789000 }, { "epoch": 11.9, "learning_rate": 6.198013235050272e-06, "loss": 0.0093, "step": 789500 }, { "epoch": 11.91, "learning_rate": 6.1829391459021084e-06, "loss": 0.0102, "step": 790000 }, { "epoch": 11.92, "learning_rate": 6.167865056753946e-06, "loss": 0.0092, "step": 790500 }, { "epoch": 11.92, "learning_rate": 6.1527909676057824e-06, "loss": 0.0097, "step": 791000 }, { "epoch": 11.93, "learning_rate": 6.137716878457619e-06, "loss": 0.0089, "step": 791500 }, { "epoch": 11.94, "learning_rate": 6.122642789309456e-06, "loss": 0.0088, "step": 792000 }, { "epoch": 11.95, "learning_rate": 6.107568700161293e-06, "loss": 0.0095, "step": 792500 }, { "epoch": 11.95, "learning_rate": 6.09249461101313e-06, "loss": 0.0093, "step": 793000 }, { "epoch": 11.96, "learning_rate": 6.077420521864966e-06, "loss": 0.0088, "step": 793500 }, { "epoch": 11.97, "learning_rate": 6.062346432716803e-06, "loss": 0.0095, "step": 794000 }, { "epoch": 11.98, "learning_rate": 6.047272343568639e-06, "loss": 0.0093, "step": 794500 }, { "epoch": 11.98, "learning_rate": 6.032198254420477e-06, "loss": 0.01, "step": 795000 }, { "epoch": 11.99, "learning_rate": 6.017124165272313e-06, "loss": 0.0095, "step": 795500 }, { "epoch": 12.0, "learning_rate": 6.00205007612415e-06, "loss": 0.0097, "step": 796000 }, { "epoch": 12.0, "eval_accuracy": 0.9868254664081725, "eval_f1": 0.9437939908653451, "eval_loss": 0.07172608375549316, "eval_precision": 0.9306023638866474, "eval_recall": 0.9573649877714793, "eval_runtime": 246.5574, "eval_samples_per_second": 478.282, "eval_steps_per_second": 29.896, "step": 796068 }, { "epoch": 12.01, "learning_rate": 5.9869759869759865e-06, "loss": 0.0072, "step": 796500 }, { "epoch": 12.01, "learning_rate": 5.971901897827824e-06, "loss": 0.0075, "step": 797000 }, { "epoch": 12.02, "learning_rate": 5.9568278086796605e-06, "loss": 0.007, "step": 797500 }, { "epoch": 12.03, "learning_rate": 5.941753719531497e-06, "loss": 0.0078, "step": 798000 }, { "epoch": 12.04, "learning_rate": 5.926679630383334e-06, "loss": 0.0083, "step": 798500 }, { "epoch": 12.04, "learning_rate": 5.911605541235171e-06, "loss": 0.0079, "step": 799000 }, { "epoch": 12.05, "learning_rate": 5.896531452087008e-06, "loss": 0.0081, "step": 799500 }, { "epoch": 12.06, "learning_rate": 5.881457362938844e-06, "loss": 0.0075, "step": 800000 }, { "epoch": 12.07, "learning_rate": 5.866383273790681e-06, "loss": 0.0074, "step": 800500 }, { "epoch": 12.07, "learning_rate": 5.851309184642518e-06, "loss": 0.0074, "step": 801000 }, { "epoch": 12.08, "learning_rate": 5.836235095494355e-06, "loss": 0.0076, "step": 801500 }, { "epoch": 12.09, "learning_rate": 5.821161006346191e-06, "loss": 0.0077, "step": 802000 }, { "epoch": 12.1, "learning_rate": 5.806086917198028e-06, "loss": 0.0076, "step": 802500 }, { "epoch": 12.1, "learning_rate": 5.791012828049865e-06, "loss": 0.0066, "step": 803000 }, { "epoch": 12.11, "learning_rate": 5.775938738901702e-06, "loss": 0.007, "step": 803500 }, { "epoch": 12.12, "learning_rate": 5.7608646497535385e-06, "loss": 0.0084, "step": 804000 }, { "epoch": 12.13, "learning_rate": 5.745790560605375e-06, "loss": 0.0074, "step": 804500 }, { "epoch": 12.13, "learning_rate": 5.7307164714572125e-06, "loss": 0.0077, "step": 805000 }, { "epoch": 12.14, "learning_rate": 5.715642382309049e-06, "loss": 0.0085, "step": 805500 }, { "epoch": 12.15, "learning_rate": 5.700568293160886e-06, "loss": 0.0077, "step": 806000 }, { "epoch": 12.16, "learning_rate": 5.685494204012722e-06, "loss": 0.0074, "step": 806500 }, { "epoch": 12.16, "learning_rate": 5.67042011486456e-06, "loss": 0.0085, "step": 807000 }, { "epoch": 12.17, "learning_rate": 5.655346025716396e-06, "loss": 0.0074, "step": 807500 }, { "epoch": 12.18, "learning_rate": 5.640271936568233e-06, "loss": 0.0082, "step": 808000 }, { "epoch": 12.19, "learning_rate": 5.625197847420069e-06, "loss": 0.0076, "step": 808500 }, { "epoch": 12.19, "learning_rate": 5.610123758271907e-06, "loss": 0.0084, "step": 809000 }, { "epoch": 12.2, "learning_rate": 5.595049669123743e-06, "loss": 0.008, "step": 809500 }, { "epoch": 12.21, "learning_rate": 5.57997557997558e-06, "loss": 0.008, "step": 810000 }, { "epoch": 12.22, "learning_rate": 5.5649014908274165e-06, "loss": 0.008, "step": 810500 }, { "epoch": 12.23, "learning_rate": 5.549827401679254e-06, "loss": 0.0086, "step": 811000 }, { "epoch": 12.23, "learning_rate": 5.5347533125310905e-06, "loss": 0.0081, "step": 811500 }, { "epoch": 12.24, "learning_rate": 5.519679223382927e-06, "loss": 0.0082, "step": 812000 }, { "epoch": 12.25, "learning_rate": 5.504605134234764e-06, "loss": 0.0076, "step": 812500 }, { "epoch": 12.26, "learning_rate": 5.489531045086601e-06, "loss": 0.0078, "step": 813000 }, { "epoch": 12.26, "learning_rate": 5.474456955938438e-06, "loss": 0.0079, "step": 813500 }, { "epoch": 12.27, "learning_rate": 5.459382866790274e-06, "loss": 0.0071, "step": 814000 }, { "epoch": 12.28, "learning_rate": 5.444308777642111e-06, "loss": 0.0073, "step": 814500 }, { "epoch": 12.29, "learning_rate": 5.429234688493948e-06, "loss": 0.0078, "step": 815000 }, { "epoch": 12.29, "learning_rate": 5.414160599345785e-06, "loss": 0.0096, "step": 815500 }, { "epoch": 12.3, "learning_rate": 5.399086510197621e-06, "loss": 0.0084, "step": 816000 }, { "epoch": 12.31, "learning_rate": 5.384012421049458e-06, "loss": 0.0082, "step": 816500 }, { "epoch": 12.32, "learning_rate": 5.368938331901295e-06, "loss": 0.008, "step": 817000 }, { "epoch": 12.32, "learning_rate": 5.353864242753132e-06, "loss": 0.0078, "step": 817500 }, { "epoch": 12.33, "learning_rate": 5.3387901536049685e-06, "loss": 0.0086, "step": 818000 }, { "epoch": 12.34, "learning_rate": 5.323716064456805e-06, "loss": 0.0081, "step": 818500 }, { "epoch": 12.35, "learning_rate": 5.3086419753086425e-06, "loss": 0.0077, "step": 819000 }, { "epoch": 12.35, "learning_rate": 5.293567886160479e-06, "loss": 0.0075, "step": 819500 }, { "epoch": 12.36, "learning_rate": 5.278493797012316e-06, "loss": 0.0079, "step": 820000 }, { "epoch": 12.37, "learning_rate": 5.263419707864152e-06, "loss": 0.007, "step": 820500 }, { "epoch": 12.38, "learning_rate": 5.24834561871599e-06, "loss": 0.0078, "step": 821000 }, { "epoch": 12.38, "learning_rate": 5.233271529567826e-06, "loss": 0.0077, "step": 821500 }, { "epoch": 12.39, "learning_rate": 5.218197440419663e-06, "loss": 0.008, "step": 822000 }, { "epoch": 12.4, "learning_rate": 5.203123351271499e-06, "loss": 0.0076, "step": 822500 }, { "epoch": 12.41, "learning_rate": 5.188049262123336e-06, "loss": 0.0075, "step": 823000 }, { "epoch": 12.41, "learning_rate": 5.172975172975173e-06, "loss": 0.0079, "step": 823500 }, { "epoch": 12.42, "learning_rate": 5.15790108382701e-06, "loss": 0.0078, "step": 824000 }, { "epoch": 12.43, "learning_rate": 5.1428269946788465e-06, "loss": 0.0087, "step": 824500 }, { "epoch": 12.44, "learning_rate": 5.127752905530683e-06, "loss": 0.0071, "step": 825000 }, { "epoch": 12.44, "learning_rate": 5.1126788163825205e-06, "loss": 0.0079, "step": 825500 }, { "epoch": 12.45, "learning_rate": 5.097604727234357e-06, "loss": 0.008, "step": 826000 }, { "epoch": 12.46, "learning_rate": 5.082530638086194e-06, "loss": 0.008, "step": 826500 }, { "epoch": 12.47, "learning_rate": 5.06745654893803e-06, "loss": 0.0083, "step": 827000 }, { "epoch": 12.47, "learning_rate": 5.052382459789868e-06, "loss": 0.0066, "step": 827500 }, { "epoch": 12.48, "learning_rate": 5.037308370641704e-06, "loss": 0.0085, "step": 828000 }, { "epoch": 12.49, "learning_rate": 5.022234281493541e-06, "loss": 0.0081, "step": 828500 }, { "epoch": 12.5, "learning_rate": 5.007160192345377e-06, "loss": 0.0084, "step": 829000 }, { "epoch": 12.5, "learning_rate": 4.992086103197215e-06, "loss": 0.0075, "step": 829500 }, { "epoch": 12.51, "learning_rate": 4.977012014049051e-06, "loss": 0.0079, "step": 830000 }, { "epoch": 12.52, "learning_rate": 4.961937924900888e-06, "loss": 0.0079, "step": 830500 }, { "epoch": 12.53, "learning_rate": 4.9468638357527245e-06, "loss": 0.0081, "step": 831000 }, { "epoch": 12.53, "learning_rate": 4.931789746604562e-06, "loss": 0.0082, "step": 831500 }, { "epoch": 12.54, "learning_rate": 4.9167156574563985e-06, "loss": 0.0072, "step": 832000 }, { "epoch": 12.55, "learning_rate": 4.901641568308235e-06, "loss": 0.008, "step": 832500 }, { "epoch": 12.56, "learning_rate": 4.886567479160072e-06, "loss": 0.0082, "step": 833000 }, { "epoch": 12.56, "learning_rate": 4.871493390011909e-06, "loss": 0.0074, "step": 833500 }, { "epoch": 12.57, "learning_rate": 4.856419300863746e-06, "loss": 0.0074, "step": 834000 }, { "epoch": 12.58, "learning_rate": 4.841345211715582e-06, "loss": 0.0087, "step": 834500 }, { "epoch": 12.59, "learning_rate": 4.826271122567419e-06, "loss": 0.008, "step": 835000 }, { "epoch": 12.59, "learning_rate": 4.811197033419256e-06, "loss": 0.0071, "step": 835500 }, { "epoch": 12.6, "learning_rate": 4.796122944271093e-06, "loss": 0.0084, "step": 836000 }, { "epoch": 12.61, "learning_rate": 4.781048855122929e-06, "loss": 0.0082, "step": 836500 }, { "epoch": 12.62, "learning_rate": 4.765974765974766e-06, "loss": 0.0085, "step": 837000 }, { "epoch": 12.62, "learning_rate": 4.750900676826603e-06, "loss": 0.0075, "step": 837500 }, { "epoch": 12.63, "learning_rate": 4.73582658767844e-06, "loss": 0.0086, "step": 838000 }, { "epoch": 12.64, "learning_rate": 4.7207524985302765e-06, "loss": 0.0077, "step": 838500 }, { "epoch": 12.65, "learning_rate": 4.705678409382113e-06, "loss": 0.0075, "step": 839000 }, { "epoch": 12.65, "learning_rate": 4.6906043202339505e-06, "loss": 0.0088, "step": 839500 }, { "epoch": 12.66, "learning_rate": 4.675530231085787e-06, "loss": 0.0079, "step": 840000 }, { "epoch": 12.67, "learning_rate": 4.660456141937624e-06, "loss": 0.0077, "step": 840500 }, { "epoch": 12.68, "learning_rate": 4.64538205278946e-06, "loss": 0.0074, "step": 841000 }, { "epoch": 12.68, "learning_rate": 4.630307963641298e-06, "loss": 0.0089, "step": 841500 }, { "epoch": 12.69, "learning_rate": 4.615233874493134e-06, "loss": 0.0075, "step": 842000 }, { "epoch": 12.7, "learning_rate": 4.600159785344971e-06, "loss": 0.0072, "step": 842500 }, { "epoch": 12.71, "learning_rate": 4.585085696196807e-06, "loss": 0.0081, "step": 843000 }, { "epoch": 12.71, "learning_rate": 4.570011607048645e-06, "loss": 0.0076, "step": 843500 }, { "epoch": 12.72, "learning_rate": 4.554937517900481e-06, "loss": 0.0084, "step": 844000 }, { "epoch": 12.73, "learning_rate": 4.539863428752318e-06, "loss": 0.0082, "step": 844500 }, { "epoch": 12.74, "learning_rate": 4.5247893396041546e-06, "loss": 0.0071, "step": 845000 }, { "epoch": 12.75, "learning_rate": 4.509715250455992e-06, "loss": 0.0076, "step": 845500 }, { "epoch": 12.75, "learning_rate": 4.4946411613078285e-06, "loss": 0.0088, "step": 846000 }, { "epoch": 12.76, "learning_rate": 4.479567072159665e-06, "loss": 0.0077, "step": 846500 }, { "epoch": 12.77, "learning_rate": 4.464492983011502e-06, "loss": 0.0079, "step": 847000 }, { "epoch": 12.78, "learning_rate": 4.449418893863339e-06, "loss": 0.0086, "step": 847500 }, { "epoch": 12.78, "learning_rate": 4.434344804715176e-06, "loss": 0.0077, "step": 848000 }, { "epoch": 12.79, "learning_rate": 4.419270715567012e-06, "loss": 0.0088, "step": 848500 }, { "epoch": 12.8, "learning_rate": 4.404196626418849e-06, "loss": 0.0085, "step": 849000 }, { "epoch": 12.81, "learning_rate": 4.389122537270686e-06, "loss": 0.0079, "step": 849500 }, { "epoch": 12.81, "learning_rate": 4.374048448122523e-06, "loss": 0.0073, "step": 850000 }, { "epoch": 12.82, "learning_rate": 4.358974358974359e-06, "loss": 0.0083, "step": 850500 }, { "epoch": 12.83, "learning_rate": 4.343900269826196e-06, "loss": 0.0072, "step": 851000 }, { "epoch": 12.84, "learning_rate": 4.3288261806780326e-06, "loss": 0.0078, "step": 851500 }, { "epoch": 12.84, "learning_rate": 4.31375209152987e-06, "loss": 0.0079, "step": 852000 }, { "epoch": 12.85, "learning_rate": 4.2986780023817066e-06, "loss": 0.0087, "step": 852500 }, { "epoch": 12.86, "learning_rate": 4.283603913233543e-06, "loss": 0.008, "step": 853000 }, { "epoch": 12.87, "learning_rate": 4.26852982408538e-06, "loss": 0.0073, "step": 853500 }, { "epoch": 12.87, "learning_rate": 4.253455734937217e-06, "loss": 0.0077, "step": 854000 }, { "epoch": 12.88, "learning_rate": 4.238381645789054e-06, "loss": 0.0077, "step": 854500 }, { "epoch": 12.89, "learning_rate": 4.22330755664089e-06, "loss": 0.0077, "step": 855000 }, { "epoch": 12.9, "learning_rate": 4.208233467492727e-06, "loss": 0.0076, "step": 855500 }, { "epoch": 12.9, "learning_rate": 4.193159378344564e-06, "loss": 0.0078, "step": 856000 }, { "epoch": 12.91, "learning_rate": 4.178085289196401e-06, "loss": 0.0082, "step": 856500 }, { "epoch": 12.92, "learning_rate": 4.1630112000482374e-06, "loss": 0.0078, "step": 857000 }, { "epoch": 12.93, "learning_rate": 4.147937110900074e-06, "loss": 0.0081, "step": 857500 }, { "epoch": 12.93, "learning_rate": 4.132863021751911e-06, "loss": 0.008, "step": 858000 }, { "epoch": 12.94, "learning_rate": 4.117788932603747e-06, "loss": 0.0089, "step": 858500 }, { "epoch": 12.95, "learning_rate": 4.102714843455584e-06, "loss": 0.008, "step": 859000 }, { "epoch": 12.96, "learning_rate": 4.08764075430742e-06, "loss": 0.0085, "step": 859500 }, { "epoch": 12.96, "learning_rate": 4.072566665159258e-06, "loss": 0.0075, "step": 860000 }, { "epoch": 12.97, "learning_rate": 4.057492576011094e-06, "loss": 0.0077, "step": 860500 }, { "epoch": 12.98, "learning_rate": 4.042418486862931e-06, "loss": 0.0081, "step": 861000 }, { "epoch": 12.99, "learning_rate": 4.0273443977147675e-06, "loss": 0.0083, "step": 861500 }, { "epoch": 12.99, "learning_rate": 4.012270308566605e-06, "loss": 0.008, "step": 862000 }, { "epoch": 13.0, "eval_accuracy": 0.9868350971666137, "eval_f1": 0.9444285207463515, "eval_loss": 0.08315455913543701, "eval_precision": 0.9305644428644784, "eval_recall": 0.9587119562784603, "eval_runtime": 247.292, "eval_samples_per_second": 476.861, "eval_steps_per_second": 29.807, "step": 862407 }, { "epoch": 13.0, "learning_rate": 3.9971962194184415e-06, "loss": 0.0071, "step": 862500 }, { "epoch": 13.01, "learning_rate": 3.982122130270278e-06, "loss": 0.0065, "step": 863000 }, { "epoch": 13.02, "learning_rate": 3.967048041122115e-06, "loss": 0.007, "step": 863500 }, { "epoch": 13.02, "learning_rate": 3.951973951973952e-06, "loss": 0.0069, "step": 864000 }, { "epoch": 13.03, "learning_rate": 3.936899862825789e-06, "loss": 0.0059, "step": 864500 }, { "epoch": 13.04, "learning_rate": 3.921825773677625e-06, "loss": 0.0062, "step": 865000 }, { "epoch": 13.05, "learning_rate": 3.906751684529462e-06, "loss": 0.0072, "step": 865500 }, { "epoch": 13.05, "learning_rate": 3.891677595381299e-06, "loss": 0.0063, "step": 866000 }, { "epoch": 13.06, "learning_rate": 3.876603506233136e-06, "loss": 0.0067, "step": 866500 }, { "epoch": 13.07, "learning_rate": 3.861529417084972e-06, "loss": 0.0077, "step": 867000 }, { "epoch": 13.08, "learning_rate": 3.846455327936809e-06, "loss": 0.0068, "step": 867500 }, { "epoch": 13.08, "learning_rate": 3.831381238788646e-06, "loss": 0.0064, "step": 868000 }, { "epoch": 13.09, "learning_rate": 3.816307149640483e-06, "loss": 0.0069, "step": 868500 }, { "epoch": 13.1, "learning_rate": 3.8012330604923195e-06, "loss": 0.0057, "step": 869000 }, { "epoch": 13.11, "learning_rate": 3.7861589713441565e-06, "loss": 0.007, "step": 869500 }, { "epoch": 13.11, "learning_rate": 3.771084882195993e-06, "loss": 0.0069, "step": 870000 }, { "epoch": 13.12, "learning_rate": 3.75601079304783e-06, "loss": 0.0063, "step": 870500 }, { "epoch": 13.13, "learning_rate": 3.740936703899667e-06, "loss": 0.0067, "step": 871000 }, { "epoch": 13.14, "learning_rate": 3.725862614751504e-06, "loss": 0.0071, "step": 871500 }, { "epoch": 13.14, "learning_rate": 3.7107885256033406e-06, "loss": 0.006, "step": 872000 }, { "epoch": 13.15, "learning_rate": 3.6957144364551776e-06, "loss": 0.0058, "step": 872500 }, { "epoch": 13.16, "learning_rate": 3.680640347307014e-06, "loss": 0.0058, "step": 873000 }, { "epoch": 13.17, "learning_rate": 3.665566258158851e-06, "loss": 0.0072, "step": 873500 }, { "epoch": 13.17, "learning_rate": 3.6504921690106878e-06, "loss": 0.0061, "step": 874000 }, { "epoch": 13.18, "learning_rate": 3.6354180798625248e-06, "loss": 0.007, "step": 874500 }, { "epoch": 13.19, "learning_rate": 3.6203439907143613e-06, "loss": 0.0067, "step": 875000 }, { "epoch": 13.2, "learning_rate": 3.6052699015661983e-06, "loss": 0.0064, "step": 875500 }, { "epoch": 13.2, "learning_rate": 3.590195812418035e-06, "loss": 0.007, "step": 876000 }, { "epoch": 13.21, "learning_rate": 3.5751217232698715e-06, "loss": 0.0064, "step": 876500 }, { "epoch": 13.22, "learning_rate": 3.560047634121708e-06, "loss": 0.0063, "step": 877000 }, { "epoch": 13.23, "learning_rate": 3.544973544973545e-06, "loss": 0.0065, "step": 877500 }, { "epoch": 13.24, "learning_rate": 3.5298994558253816e-06, "loss": 0.0068, "step": 878000 }, { "epoch": 13.24, "learning_rate": 3.5148253666772186e-06, "loss": 0.0068, "step": 878500 }, { "epoch": 13.25, "learning_rate": 3.499751277529055e-06, "loss": 0.0061, "step": 879000 }, { "epoch": 13.26, "learning_rate": 3.484677188380892e-06, "loss": 0.0062, "step": 879500 }, { "epoch": 13.27, "learning_rate": 3.4696030992327288e-06, "loss": 0.0077, "step": 880000 }, { "epoch": 13.27, "learning_rate": 3.4545290100845658e-06, "loss": 0.0072, "step": 880500 }, { "epoch": 13.28, "learning_rate": 3.4394549209364023e-06, "loss": 0.0074, "step": 881000 }, { "epoch": 13.29, "learning_rate": 3.424380831788239e-06, "loss": 0.007, "step": 881500 }, { "epoch": 13.3, "learning_rate": 3.409306742640076e-06, "loss": 0.0066, "step": 882000 }, { "epoch": 13.3, "learning_rate": 3.3942326534919125e-06, "loss": 0.0065, "step": 882500 }, { "epoch": 13.31, "learning_rate": 3.3791585643437495e-06, "loss": 0.0071, "step": 883000 }, { "epoch": 13.32, "learning_rate": 3.364084475195586e-06, "loss": 0.0073, "step": 883500 }, { "epoch": 13.33, "learning_rate": 3.349010386047423e-06, "loss": 0.0065, "step": 884000 }, { "epoch": 13.33, "learning_rate": 3.3339362968992596e-06, "loss": 0.007, "step": 884500 }, { "epoch": 13.34, "learning_rate": 3.3188622077510966e-06, "loss": 0.0075, "step": 885000 }, { "epoch": 13.35, "learning_rate": 3.3037881186029332e-06, "loss": 0.0072, "step": 885500 }, { "epoch": 13.36, "learning_rate": 3.2887140294547702e-06, "loss": 0.0069, "step": 886000 }, { "epoch": 13.36, "learning_rate": 3.2736399403066068e-06, "loss": 0.0074, "step": 886500 }, { "epoch": 13.37, "learning_rate": 3.2585658511584438e-06, "loss": 0.0066, "step": 887000 }, { "epoch": 13.38, "learning_rate": 3.2434917620102804e-06, "loss": 0.0066, "step": 887500 }, { "epoch": 13.39, "learning_rate": 3.2284176728621174e-06, "loss": 0.0068, "step": 888000 }, { "epoch": 13.39, "learning_rate": 3.213343583713954e-06, "loss": 0.0073, "step": 888500 }, { "epoch": 13.4, "learning_rate": 3.198269494565791e-06, "loss": 0.0076, "step": 889000 }, { "epoch": 13.41, "learning_rate": 3.1831954054176275e-06, "loss": 0.0065, "step": 889500 }, { "epoch": 13.42, "learning_rate": 3.1681213162694645e-06, "loss": 0.0064, "step": 890000 }, { "epoch": 13.42, "learning_rate": 3.153047227121301e-06, "loss": 0.0072, "step": 890500 }, { "epoch": 13.43, "learning_rate": 3.137973137973138e-06, "loss": 0.0065, "step": 891000 }, { "epoch": 13.44, "learning_rate": 3.1228990488249747e-06, "loss": 0.0074, "step": 891500 }, { "epoch": 13.45, "learning_rate": 3.1078249596768117e-06, "loss": 0.0064, "step": 892000 }, { "epoch": 13.45, "learning_rate": 3.0927508705286482e-06, "loss": 0.0071, "step": 892500 }, { "epoch": 13.46, "learning_rate": 3.0776767813804852e-06, "loss": 0.006, "step": 893000 }, { "epoch": 13.47, "learning_rate": 3.062602692232322e-06, "loss": 0.0074, "step": 893500 }, { "epoch": 13.48, "learning_rate": 3.047528603084159e-06, "loss": 0.0071, "step": 894000 }, { "epoch": 13.48, "learning_rate": 3.0324545139359954e-06, "loss": 0.0062, "step": 894500 }, { "epoch": 13.49, "learning_rate": 3.0173804247878324e-06, "loss": 0.0077, "step": 895000 }, { "epoch": 13.5, "learning_rate": 3.002306335639669e-06, "loss": 0.0068, "step": 895500 }, { "epoch": 13.51, "learning_rate": 2.987232246491506e-06, "loss": 0.0065, "step": 896000 }, { "epoch": 13.51, "learning_rate": 2.9721581573433425e-06, "loss": 0.0073, "step": 896500 }, { "epoch": 13.52, "learning_rate": 2.9570840681951795e-06, "loss": 0.007, "step": 897000 }, { "epoch": 13.53, "learning_rate": 2.942009979047016e-06, "loss": 0.0071, "step": 897500 }, { "epoch": 13.54, "learning_rate": 2.926935889898853e-06, "loss": 0.0072, "step": 898000 }, { "epoch": 13.54, "learning_rate": 2.9118618007506897e-06, "loss": 0.0063, "step": 898500 }, { "epoch": 13.55, "learning_rate": 2.8967877116025267e-06, "loss": 0.0078, "step": 899000 }, { "epoch": 13.56, "learning_rate": 2.8817136224543632e-06, "loss": 0.0078, "step": 899500 }, { "epoch": 13.57, "learning_rate": 2.8666395333062002e-06, "loss": 0.006, "step": 900000 }, { "epoch": 13.57, "learning_rate": 2.851565444158037e-06, "loss": 0.0066, "step": 900500 }, { "epoch": 13.58, "learning_rate": 2.836491355009874e-06, "loss": 0.0064, "step": 901000 }, { "epoch": 13.59, "learning_rate": 2.8214172658617104e-06, "loss": 0.0069, "step": 901500 }, { "epoch": 13.6, "learning_rate": 2.8063431767135474e-06, "loss": 0.0064, "step": 902000 }, { "epoch": 13.6, "learning_rate": 2.791269087565384e-06, "loss": 0.0067, "step": 902500 }, { "epoch": 13.61, "learning_rate": 2.776194998417221e-06, "loss": 0.0074, "step": 903000 }, { "epoch": 13.62, "learning_rate": 2.7611209092690575e-06, "loss": 0.0075, "step": 903500 }, { "epoch": 13.63, "learning_rate": 2.7460468201208945e-06, "loss": 0.0068, "step": 904000 }, { "epoch": 13.63, "learning_rate": 2.730972730972731e-06, "loss": 0.0071, "step": 904500 }, { "epoch": 13.64, "learning_rate": 2.715898641824568e-06, "loss": 0.0069, "step": 905000 }, { "epoch": 13.65, "learning_rate": 2.7008245526764047e-06, "loss": 0.0076, "step": 905500 }, { "epoch": 13.66, "learning_rate": 2.6857504635282417e-06, "loss": 0.0075, "step": 906000 }, { "epoch": 13.66, "learning_rate": 2.6706763743800783e-06, "loss": 0.0078, "step": 906500 }, { "epoch": 13.67, "learning_rate": 2.6556022852319153e-06, "loss": 0.0068, "step": 907000 }, { "epoch": 13.68, "learning_rate": 2.640528196083752e-06, "loss": 0.0069, "step": 907500 }, { "epoch": 13.69, "learning_rate": 2.625454106935589e-06, "loss": 0.0079, "step": 908000 }, { "epoch": 13.69, "learning_rate": 2.6103800177874254e-06, "loss": 0.0074, "step": 908500 }, { "epoch": 13.7, "learning_rate": 2.5953059286392624e-06, "loss": 0.0063, "step": 909000 }, { "epoch": 13.71, "learning_rate": 2.580231839491099e-06, "loss": 0.0064, "step": 909500 }, { "epoch": 13.72, "learning_rate": 2.5651577503429355e-06, "loss": 0.0075, "step": 910000 }, { "epoch": 13.72, "learning_rate": 2.5500836611947725e-06, "loss": 0.007, "step": 910500 }, { "epoch": 13.73, "learning_rate": 2.535009572046609e-06, "loss": 0.0065, "step": 911000 }, { "epoch": 13.74, "learning_rate": 2.519935482898446e-06, "loss": 0.0068, "step": 911500 }, { "epoch": 13.75, "learning_rate": 2.5048613937502827e-06, "loss": 0.0069, "step": 912000 }, { "epoch": 13.76, "learning_rate": 2.4897873046021197e-06, "loss": 0.0067, "step": 912500 }, { "epoch": 13.76, "learning_rate": 2.4747132154539563e-06, "loss": 0.0064, "step": 913000 }, { "epoch": 13.77, "learning_rate": 2.4596391263057933e-06, "loss": 0.0071, "step": 913500 }, { "epoch": 13.78, "learning_rate": 2.4445650371576294e-06, "loss": 0.0067, "step": 914000 }, { "epoch": 13.79, "learning_rate": 2.4294909480094664e-06, "loss": 0.0075, "step": 914500 }, { "epoch": 13.79, "learning_rate": 2.414416858861303e-06, "loss": 0.0063, "step": 915000 }, { "epoch": 13.8, "learning_rate": 2.39934276971314e-06, "loss": 0.0062, "step": 915500 }, { "epoch": 13.81, "learning_rate": 2.3842686805649766e-06, "loss": 0.0071, "step": 916000 }, { "epoch": 13.82, "learning_rate": 2.3691945914168136e-06, "loss": 0.0065, "step": 916500 }, { "epoch": 13.82, "learning_rate": 2.35412050226865e-06, "loss": 0.0061, "step": 917000 }, { "epoch": 13.83, "learning_rate": 2.339046413120487e-06, "loss": 0.0064, "step": 917500 }, { "epoch": 13.84, "learning_rate": 2.3239723239723237e-06, "loss": 0.0072, "step": 918000 }, { "epoch": 13.85, "learning_rate": 2.3088982348241607e-06, "loss": 0.0061, "step": 918500 }, { "epoch": 13.85, "learning_rate": 2.2938241456759973e-06, "loss": 0.0061, "step": 919000 }, { "epoch": 13.86, "learning_rate": 2.2787500565278343e-06, "loss": 0.0064, "step": 919500 }, { "epoch": 13.87, "learning_rate": 2.263675967379671e-06, "loss": 0.0064, "step": 920000 }, { "epoch": 13.88, "learning_rate": 2.248601878231508e-06, "loss": 0.0071, "step": 920500 }, { "epoch": 13.88, "learning_rate": 2.2335277890833444e-06, "loss": 0.0066, "step": 921000 }, { "epoch": 13.89, "learning_rate": 2.2184536999351814e-06, "loss": 0.0069, "step": 921500 }, { "epoch": 13.9, "learning_rate": 2.203379610787018e-06, "loss": 0.0061, "step": 922000 }, { "epoch": 13.91, "learning_rate": 2.188305521638855e-06, "loss": 0.0063, "step": 922500 }, { "epoch": 13.91, "learning_rate": 2.1732314324906916e-06, "loss": 0.0064, "step": 923000 }, { "epoch": 13.92, "learning_rate": 2.1581573433425286e-06, "loss": 0.0072, "step": 923500 }, { "epoch": 13.93, "learning_rate": 2.143083254194365e-06, "loss": 0.0067, "step": 924000 }, { "epoch": 13.94, "learning_rate": 2.128009165046202e-06, "loss": 0.0066, "step": 924500 }, { "epoch": 13.94, "learning_rate": 2.1129350758980387e-06, "loss": 0.0067, "step": 925000 }, { "epoch": 13.95, "learning_rate": 2.0978609867498757e-06, "loss": 0.007, "step": 925500 }, { "epoch": 13.96, "learning_rate": 2.0827868976017123e-06, "loss": 0.006, "step": 926000 }, { "epoch": 13.97, "learning_rate": 2.0677128084535493e-06, "loss": 0.0068, "step": 926500 }, { "epoch": 13.97, "learning_rate": 2.052638719305386e-06, "loss": 0.0067, "step": 927000 }, { "epoch": 13.98, "learning_rate": 2.037564630157223e-06, "loss": 0.0073, "step": 927500 }, { "epoch": 13.99, "learning_rate": 2.0224905410090594e-06, "loss": 0.0065, "step": 928000 }, { "epoch": 14.0, "learning_rate": 2.0074164518608964e-06, "loss": 0.0063, "step": 928500 }, { "epoch": 14.0, "eval_accuracy": 0.9873058004854246, "eval_f1": 0.9463840873791273, "eval_loss": 0.08857506513595581, "eval_precision": 0.932367970051888, "eval_recall": 0.9608280397335363, "eval_runtime": 248.0108, "eval_samples_per_second": 475.479, "eval_steps_per_second": 29.72, "step": 928746 }, { "epoch": 14.0, "learning_rate": 1.992342362712733e-06, "loss": 0.0063, "step": 929000 }, { "epoch": 14.01, "learning_rate": 1.97726827356457e-06, "loss": 0.0058, "step": 929500 }, { "epoch": 14.02, "learning_rate": 1.9621941844164066e-06, "loss": 0.006, "step": 930000 }, { "epoch": 14.03, "learning_rate": 1.9471200952682436e-06, "loss": 0.0061, "step": 930500 }, { "epoch": 14.03, "learning_rate": 1.93204600612008e-06, "loss": 0.0065, "step": 931000 }, { "epoch": 14.04, "learning_rate": 1.916971916971917e-06, "loss": 0.0065, "step": 931500 }, { "epoch": 14.05, "learning_rate": 1.9018978278237537e-06, "loss": 0.0054, "step": 932000 }, { "epoch": 14.06, "learning_rate": 1.8868237386755905e-06, "loss": 0.0061, "step": 932500 }, { "epoch": 14.06, "learning_rate": 1.8717496495274273e-06, "loss": 0.006, "step": 933000 }, { "epoch": 14.07, "learning_rate": 1.856675560379264e-06, "loss": 0.0056, "step": 933500 }, { "epoch": 14.08, "learning_rate": 1.8416014712311009e-06, "loss": 0.0054, "step": 934000 }, { "epoch": 14.09, "learning_rate": 1.8265273820829377e-06, "loss": 0.0061, "step": 934500 }, { "epoch": 14.09, "learning_rate": 1.8114532929347745e-06, "loss": 0.0055, "step": 935000 }, { "epoch": 14.1, "learning_rate": 1.7963792037866112e-06, "loss": 0.0062, "step": 935500 }, { "epoch": 14.11, "learning_rate": 1.781305114638448e-06, "loss": 0.0061, "step": 936000 }, { "epoch": 14.12, "learning_rate": 1.7662310254902848e-06, "loss": 0.0055, "step": 936500 }, { "epoch": 14.12, "learning_rate": 1.7511569363421216e-06, "loss": 0.0064, "step": 937000 }, { "epoch": 14.13, "learning_rate": 1.7360828471939584e-06, "loss": 0.007, "step": 937500 }, { "epoch": 14.14, "learning_rate": 1.7210087580457952e-06, "loss": 0.006, "step": 938000 }, { "epoch": 14.15, "learning_rate": 1.705934668897632e-06, "loss": 0.006, "step": 938500 }, { "epoch": 14.15, "learning_rate": 1.6908605797494688e-06, "loss": 0.0063, "step": 939000 }, { "epoch": 14.16, "learning_rate": 1.6757864906013055e-06, "loss": 0.0055, "step": 939500 }, { "epoch": 14.17, "learning_rate": 1.6607124014531423e-06, "loss": 0.006, "step": 940000 }, { "epoch": 14.18, "learning_rate": 1.6456383123049791e-06, "loss": 0.0062, "step": 940500 }, { "epoch": 14.18, "learning_rate": 1.630564223156816e-06, "loss": 0.0057, "step": 941000 }, { "epoch": 14.19, "learning_rate": 1.6154901340086527e-06, "loss": 0.0059, "step": 941500 }, { "epoch": 14.2, "learning_rate": 1.6004160448604893e-06, "loss": 0.0061, "step": 942000 }, { "epoch": 14.21, "learning_rate": 1.585341955712326e-06, "loss": 0.0054, "step": 942500 }, { "epoch": 14.21, "learning_rate": 1.5702678665641628e-06, "loss": 0.006, "step": 943000 }, { "epoch": 14.22, "learning_rate": 1.5551937774159996e-06, "loss": 0.0059, "step": 943500 }, { "epoch": 14.23, "learning_rate": 1.5401196882678364e-06, "loss": 0.0064, "step": 944000 }, { "epoch": 14.24, "learning_rate": 1.5250455991196732e-06, "loss": 0.0064, "step": 944500 }, { "epoch": 14.25, "learning_rate": 1.50997150997151e-06, "loss": 0.0058, "step": 945000 }, { "epoch": 14.25, "learning_rate": 1.4948974208233468e-06, "loss": 0.0062, "step": 945500 }, { "epoch": 14.26, "learning_rate": 1.4798233316751836e-06, "loss": 0.0061, "step": 946000 }, { "epoch": 14.27, "learning_rate": 1.4647492425270203e-06, "loss": 0.0065, "step": 946500 }, { "epoch": 14.28, "learning_rate": 1.4496751533788571e-06, "loss": 0.0059, "step": 947000 }, { "epoch": 14.28, "learning_rate": 1.434601064230694e-06, "loss": 0.0058, "step": 947500 }, { "epoch": 14.29, "learning_rate": 1.4195269750825307e-06, "loss": 0.0063, "step": 948000 }, { "epoch": 14.3, "learning_rate": 1.4044528859343675e-06, "loss": 0.0063, "step": 948500 }, { "epoch": 14.31, "learning_rate": 1.3893787967862043e-06, "loss": 0.0064, "step": 949000 }, { "epoch": 14.31, "learning_rate": 1.374304707638041e-06, "loss": 0.0067, "step": 949500 }, { "epoch": 14.32, "learning_rate": 1.3592306184898778e-06, "loss": 0.0052, "step": 950000 }, { "epoch": 14.33, "learning_rate": 1.3441565293417146e-06, "loss": 0.0058, "step": 950500 }, { "epoch": 14.34, "learning_rate": 1.3290824401935514e-06, "loss": 0.0068, "step": 951000 }, { "epoch": 14.34, "learning_rate": 1.3140083510453882e-06, "loss": 0.0058, "step": 951500 }, { "epoch": 14.35, "learning_rate": 1.298934261897225e-06, "loss": 0.0064, "step": 952000 }, { "epoch": 14.36, "learning_rate": 1.2838601727490618e-06, "loss": 0.0062, "step": 952500 }, { "epoch": 14.37, "learning_rate": 1.2687860836008986e-06, "loss": 0.006, "step": 953000 }, { "epoch": 14.37, "learning_rate": 1.2537119944527354e-06, "loss": 0.006, "step": 953500 }, { "epoch": 14.38, "learning_rate": 1.2386379053045721e-06, "loss": 0.0055, "step": 954000 }, { "epoch": 14.39, "learning_rate": 1.2235638161564087e-06, "loss": 0.0062, "step": 954500 }, { "epoch": 14.4, "learning_rate": 1.2084897270082455e-06, "loss": 0.0058, "step": 955000 }, { "epoch": 14.4, "learning_rate": 1.1934156378600823e-06, "loss": 0.0056, "step": 955500 }, { "epoch": 14.41, "learning_rate": 1.178341548711919e-06, "loss": 0.0062, "step": 956000 }, { "epoch": 14.42, "learning_rate": 1.1632674595637559e-06, "loss": 0.0062, "step": 956500 }, { "epoch": 14.43, "learning_rate": 1.1481933704155926e-06, "loss": 0.0065, "step": 957000 }, { "epoch": 14.43, "learning_rate": 1.1331192812674294e-06, "loss": 0.0064, "step": 957500 }, { "epoch": 14.44, "learning_rate": 1.1180451921192662e-06, "loss": 0.0058, "step": 958000 }, { "epoch": 14.45, "learning_rate": 1.102971102971103e-06, "loss": 0.0057, "step": 958500 }, { "epoch": 14.46, "learning_rate": 1.0878970138229398e-06, "loss": 0.0058, "step": 959000 }, { "epoch": 14.46, "learning_rate": 1.0728229246747766e-06, "loss": 0.0054, "step": 959500 }, { "epoch": 14.47, "learning_rate": 1.0577488355266134e-06, "loss": 0.0058, "step": 960000 }, { "epoch": 14.48, "learning_rate": 1.04267474637845e-06, "loss": 0.0066, "step": 960500 }, { "epoch": 14.49, "learning_rate": 1.0276006572302867e-06, "loss": 0.0054, "step": 961000 }, { "epoch": 14.49, "learning_rate": 1.0125265680821235e-06, "loss": 0.0062, "step": 961500 }, { "epoch": 14.5, "learning_rate": 9.974524789339603e-07, "loss": 0.0075, "step": 962000 }, { "epoch": 14.51, "learning_rate": 9.82378389785797e-07, "loss": 0.006, "step": 962500 }, { "epoch": 14.52, "learning_rate": 9.673043006376339e-07, "loss": 0.0064, "step": 963000 }, { "epoch": 14.52, "learning_rate": 9.522302114894707e-07, "loss": 0.0057, "step": 963500 }, { "epoch": 14.53, "learning_rate": 9.371561223413076e-07, "loss": 0.006, "step": 964000 }, { "epoch": 14.54, "learning_rate": 9.220820331931443e-07, "loss": 0.0065, "step": 964500 }, { "epoch": 14.55, "learning_rate": 9.070079440449811e-07, "loss": 0.0048, "step": 965000 }, { "epoch": 14.55, "learning_rate": 8.919338548968178e-07, "loss": 0.0055, "step": 965500 }, { "epoch": 14.56, "learning_rate": 8.768597657486546e-07, "loss": 0.0059, "step": 966000 }, { "epoch": 14.57, "learning_rate": 8.617856766004914e-07, "loss": 0.0067, "step": 966500 }, { "epoch": 14.58, "learning_rate": 8.467115874523282e-07, "loss": 0.0061, "step": 967000 }, { "epoch": 14.58, "learning_rate": 8.31637498304165e-07, "loss": 0.0054, "step": 967500 }, { "epoch": 14.59, "learning_rate": 8.165634091560017e-07, "loss": 0.0058, "step": 968000 }, { "epoch": 14.6, "learning_rate": 8.014893200078385e-07, "loss": 0.006, "step": 968500 }, { "epoch": 14.61, "learning_rate": 7.864152308596753e-07, "loss": 0.0054, "step": 969000 }, { "epoch": 14.61, "learning_rate": 7.713411417115121e-07, "loss": 0.0057, "step": 969500 }, { "epoch": 14.62, "learning_rate": 7.562670525633489e-07, "loss": 0.0061, "step": 970000 }, { "epoch": 14.63, "learning_rate": 7.411929634151857e-07, "loss": 0.0062, "step": 970500 }, { "epoch": 14.64, "learning_rate": 7.261188742670225e-07, "loss": 0.0058, "step": 971000 }, { "epoch": 14.64, "learning_rate": 7.110447851188592e-07, "loss": 0.0065, "step": 971500 }, { "epoch": 14.65, "learning_rate": 6.95970695970696e-07, "loss": 0.0059, "step": 972000 }, { "epoch": 14.66, "learning_rate": 6.808966068225328e-07, "loss": 0.0062, "step": 972500 }, { "epoch": 14.67, "learning_rate": 6.658225176743696e-07, "loss": 0.0061, "step": 973000 }, { "epoch": 14.67, "learning_rate": 6.507484285262064e-07, "loss": 0.006, "step": 973500 }, { "epoch": 14.68, "learning_rate": 6.356743393780432e-07, "loss": 0.0058, "step": 974000 }, { "epoch": 14.69, "learning_rate": 6.206002502298799e-07, "loss": 0.006, "step": 974500 }, { "epoch": 14.7, "learning_rate": 6.055261610817167e-07, "loss": 0.0058, "step": 975000 }, { "epoch": 14.7, "learning_rate": 5.904520719335534e-07, "loss": 0.0066, "step": 975500 }, { "epoch": 14.71, "learning_rate": 5.753779827853902e-07, "loss": 0.0051, "step": 976000 }, { "epoch": 14.72, "learning_rate": 5.603038936372269e-07, "loss": 0.0056, "step": 976500 }, { "epoch": 14.73, "learning_rate": 5.452298044890637e-07, "loss": 0.0061, "step": 977000 }, { "epoch": 14.73, "learning_rate": 5.301557153409005e-07, "loss": 0.0053, "step": 977500 }, { "epoch": 14.74, "learning_rate": 5.150816261927373e-07, "loss": 0.006, "step": 978000 }, { "epoch": 14.75, "learning_rate": 5.00007537044574e-07, "loss": 0.0057, "step": 978500 }, { "epoch": 14.76, "learning_rate": 4.849334478964108e-07, "loss": 0.0068, "step": 979000 }, { "epoch": 14.77, "learning_rate": 4.698593587482477e-07, "loss": 0.0064, "step": 979500 }, { "epoch": 14.77, "learning_rate": 4.547852696000844e-07, "loss": 0.0058, "step": 980000 }, { "epoch": 14.78, "learning_rate": 4.397111804519212e-07, "loss": 0.0063, "step": 980500 }, { "epoch": 14.79, "learning_rate": 4.24637091303758e-07, "loss": 0.0066, "step": 981000 }, { "epoch": 14.8, "learning_rate": 4.095630021555947e-07, "loss": 0.006, "step": 981500 }, { "epoch": 14.8, "learning_rate": 3.944889130074315e-07, "loss": 0.0061, "step": 982000 }, { "epoch": 14.81, "learning_rate": 3.794148238592683e-07, "loss": 0.006, "step": 982500 }, { "epoch": 14.82, "learning_rate": 3.643407347111051e-07, "loss": 0.0052, "step": 983000 }, { "epoch": 14.83, "learning_rate": 3.4926664556294186e-07, "loss": 0.0067, "step": 983500 }, { "epoch": 14.83, "learning_rate": 3.3419255641477865e-07, "loss": 0.0054, "step": 984000 }, { "epoch": 14.84, "learning_rate": 3.1911846726661544e-07, "loss": 0.006, "step": 984500 }, { "epoch": 14.85, "learning_rate": 3.040443781184522e-07, "loss": 0.0061, "step": 985000 }, { "epoch": 14.86, "learning_rate": 2.88970288970289e-07, "loss": 0.0057, "step": 985500 }, { "epoch": 14.86, "learning_rate": 2.738961998221258e-07, "loss": 0.0064, "step": 986000 }, { "epoch": 14.87, "learning_rate": 2.5882211067396253e-07, "loss": 0.0052, "step": 986500 }, { "epoch": 14.88, "learning_rate": 2.437480215257993e-07, "loss": 0.0057, "step": 987000 }, { "epoch": 14.89, "learning_rate": 2.2867393237763608e-07, "loss": 0.0057, "step": 987500 }, { "epoch": 14.89, "learning_rate": 2.1359984322947287e-07, "loss": 0.0061, "step": 988000 }, { "epoch": 14.9, "learning_rate": 1.9852575408130965e-07, "loss": 0.0063, "step": 988500 }, { "epoch": 14.91, "learning_rate": 1.8345166493314644e-07, "loss": 0.0067, "step": 989000 }, { "epoch": 14.92, "learning_rate": 1.683775757849832e-07, "loss": 0.0061, "step": 989500 }, { "epoch": 14.92, "learning_rate": 1.5330348663681996e-07, "loss": 0.006, "step": 990000 }, { "epoch": 14.93, "learning_rate": 1.3822939748865675e-07, "loss": 0.0069, "step": 990500 }, { "epoch": 14.94, "learning_rate": 1.2315530834049353e-07, "loss": 0.0065, "step": 991000 }, { "epoch": 14.95, "learning_rate": 1.080812191923303e-07, "loss": 0.0061, "step": 991500 }, { "epoch": 14.95, "learning_rate": 9.300713004416708e-08, "loss": 0.0062, "step": 992000 }, { "epoch": 14.96, "learning_rate": 7.793304089600385e-08, "loss": 0.0065, "step": 992500 }, { "epoch": 14.97, "learning_rate": 6.285895174784064e-08, "loss": 0.0059, "step": 993000 }, { "epoch": 14.98, "learning_rate": 4.778486259967741e-08, "loss": 0.0059, "step": 993500 }, { "epoch": 14.98, "learning_rate": 3.271077345151419e-08, "loss": 0.0061, "step": 994000 }, { "epoch": 14.99, "learning_rate": 1.763668430335097e-08, "loss": 0.0056, "step": 994500 }, { "epoch": 15.0, "learning_rate": 2.562595155187748e-09, "loss": 0.0064, "step": 995000 }, { "epoch": 15.0, "eval_accuracy": 0.9870947836888659, "eval_f1": 0.9460905457195457, "eval_loss": 0.09196844696998596, "eval_precision": 0.9336466224997623, "eval_recall": 0.958870662537591, "eval_runtime": 276.4676, "eval_samples_per_second": 426.538, "eval_steps_per_second": 26.661, "step": 995085 }, { "epoch": 15.0, "step": 995085, "total_flos": 4.160281266207326e+18, "train_loss": 0.0, "train_runtime": 0.7612, "train_samples_per_second": 20915215.892, "train_steps_per_second": 1307218.235 } ], "max_steps": 995085, "num_train_epochs": 15, "total_flos": 4.160281266207326e+18, "trial_name": null, "trial_params": null }