{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9118125313435558, "eval_steps": 100, "global_step": 7500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012157500417914077, "eval_accuracy": 0.9982292163897571, "eval_f1": 0.5714285714285715, "eval_loss": NaN, "eval_precision": 0.5992217898832685, "eval_recall": 0.5460992907801419, "eval_runtime": 4.2246, "eval_samples_per_second": 12.072, "eval_steps_per_second": 12.072, "step": 100 }, { "epoch": 0.024315000835828153, "eval_accuracy": 0.9979520502594583, "eval_f1": 0.561128526645768, "eval_loss": NaN, "eval_precision": 0.5028089887640449, "eval_recall": 0.6347517730496454, "eval_runtime": 4.0691, "eval_samples_per_second": 12.534, "eval_steps_per_second": 12.534, "step": 200 }, { "epoch": 0.03647250125374223, "eval_accuracy": 0.9982908088631569, "eval_f1": 0.6003401360544217, "eval_loss": NaN, "eval_precision": 0.576797385620915, "eval_recall": 0.625886524822695, "eval_runtime": 3.8202, "eval_samples_per_second": 13.35, "eval_steps_per_second": 13.35, "step": 300 }, { "epoch": 0.048630001671656306, "eval_accuracy": 0.9983370032182067, "eval_f1": 0.5949579831932772, "eval_loss": NaN, "eval_precision": 0.5654952076677316, "eval_recall": 0.6276595744680851, "eval_runtime": 3.892, "eval_samples_per_second": 13.104, "eval_steps_per_second": 13.104, "step": 400 }, { "epoch": 0.060787502089570386, "grad_norm": 0.56640625, "learning_rate": 9.39209726443769e-05, "loss": 0.0667, "step": 500 }, { "epoch": 0.060787502089570386, "eval_accuracy": 0.9980906333246077, "eval_f1": 0.5690789473684211, "eval_loss": NaN, "eval_precision": 0.5306748466257669, "eval_recall": 0.6134751773049646, "eval_runtime": 3.9741, "eval_samples_per_second": 12.833, "eval_steps_per_second": 12.833, "step": 500 }, { "epoch": 0.07294500250748447, "eval_accuracy": 0.9981522257980074, "eval_f1": 0.5753646677471637, "eval_loss": NaN, "eval_precision": 0.5298507462686567, "eval_recall": 0.6294326241134752, "eval_runtime": 3.9469, "eval_samples_per_second": 12.922, "eval_steps_per_second": 12.922, "step": 600 }, { "epoch": 0.08510250292539853, "eval_accuracy": 0.9983524013365567, "eval_f1": 0.608108108108108, "eval_loss": NaN, "eval_precision": 0.5806451612903226, "eval_recall": 0.6382978723404256, "eval_runtime": 4.9419, "eval_samples_per_second": 10.32, "eval_steps_per_second": 10.32, "step": 700 }, { "epoch": 0.09726000334331261, "eval_accuracy": 0.9978904577860586, "eval_f1": 0.5460317460317461, "eval_loss": NaN, "eval_precision": 0.4942528735632184, "eval_recall": 0.6099290780141844, "eval_runtime": 3.9464, "eval_samples_per_second": 12.923, "eval_steps_per_second": 12.923, "step": 800 }, { "epoch": 0.10941750376122669, "eval_accuracy": 0.9981984201530573, "eval_f1": 0.5831960461285007, "eval_loss": NaN, "eval_precision": 0.5446153846153846, "eval_recall": 0.6276595744680851, "eval_runtime": 3.9207, "eval_samples_per_second": 13.008, "eval_steps_per_second": 13.008, "step": 900 }, { "epoch": 0.12157500417914077, "grad_norm": 0.49609375, "learning_rate": 8.78419452887538e-05, "loss": 0.0273, "step": 1000 }, { "epoch": 0.12157500417914077, "eval_accuracy": 0.9981368276796575, "eval_f1": 0.5692821368948245, "eval_loss": NaN, "eval_precision": 0.5378548895899053, "eval_recall": 0.6046099290780141, "eval_runtime": 3.9151, "eval_samples_per_second": 13.026, "eval_steps_per_second": 13.026, "step": 1000 }, { "epoch": 0.13373250459705485, "eval_accuracy": 0.9985679749934558, "eval_f1": 0.6378091872791519, "eval_loss": NaN, "eval_precision": 0.6355633802816901, "eval_recall": 0.6400709219858156, "eval_runtime": 3.9167, "eval_samples_per_second": 13.021, "eval_steps_per_second": 13.021, "step": 1100 }, { "epoch": 0.14589000501496893, "eval_accuracy": 0.9982446145081071, "eval_f1": 0.5888324873096447, "eval_loss": NaN, "eval_precision": 0.5631067961165048, "eval_recall": 0.6170212765957447, "eval_runtime": 3.9347, "eval_samples_per_second": 12.962, "eval_steps_per_second": 12.962, "step": 1200 }, { "epoch": 0.158047505432883, "eval_accuracy": 0.9982908088631569, "eval_f1": 0.6126418152350082, "eval_loss": NaN, "eval_precision": 0.564179104477612, "eval_recall": 0.6702127659574468, "eval_runtime": 3.9694, "eval_samples_per_second": 12.848, "eval_steps_per_second": 12.848, "step": 1300 }, { "epoch": 0.17020500585079706, "eval_accuracy": 0.9983524013365567, "eval_f1": 0.6128500823723229, "eval_loss": NaN, "eval_precision": 0.5723076923076923, "eval_recall": 0.6595744680851063, "eval_runtime": 3.9587, "eval_samples_per_second": 12.883, "eval_steps_per_second": 12.883, "step": 1400 }, { "epoch": 0.18236250626871114, "grad_norm": 1.453125, "learning_rate": 8.17629179331307e-05, "loss": 0.0198, "step": 1500 }, { "epoch": 0.18236250626871114, "eval_accuracy": 0.9984139938099564, "eval_f1": 0.6216216216216217, "eval_loss": NaN, "eval_precision": 0.5935483870967742, "eval_recall": 0.6524822695035462, "eval_runtime": 3.9699, "eval_samples_per_second": 12.847, "eval_steps_per_second": 12.847, "step": 1500 }, { "epoch": 0.19452000668662522, "eval_accuracy": 0.9983985956916065, "eval_f1": 0.6113013698630136, "eval_loss": NaN, "eval_precision": 0.5910596026490066, "eval_recall": 0.6329787234042553, "eval_runtime": 3.9634, "eval_samples_per_second": 12.868, "eval_steps_per_second": 12.868, "step": 1600 }, { "epoch": 0.2066775071045393, "eval_accuracy": 0.998537178756756, "eval_f1": 0.6450511945392491, "eval_loss": NaN, "eval_precision": 0.6217105263157895, "eval_recall": 0.6702127659574468, "eval_runtime": 3.9348, "eval_samples_per_second": 12.961, "eval_steps_per_second": 12.961, "step": 1700 }, { "epoch": 0.21883500752245338, "eval_accuracy": 0.9985063825200561, "eval_f1": 0.61101243339254, "eval_loss": NaN, "eval_precision": 0.6120996441281139, "eval_recall": 0.6099290780141844, "eval_runtime": 3.9403, "eval_samples_per_second": 12.943, "eval_steps_per_second": 12.943, "step": 1800 }, { "epoch": 0.23099250794036746, "eval_accuracy": 0.9985987712301557, "eval_f1": 0.638888888888889, "eval_loss": NaN, "eval_precision": 0.6258503401360545, "eval_recall": 0.6524822695035462, "eval_runtime": 3.8792, "eval_samples_per_second": 13.147, "eval_steps_per_second": 13.147, "step": 1900 }, { "epoch": 0.24315000835828154, "grad_norm": 0.337890625, "learning_rate": 7.56838905775076e-05, "loss": 0.015, "step": 2000 }, { "epoch": 0.24315000835828154, "eval_accuracy": 0.9983677994549066, "eval_f1": 0.6179966044142615, "eval_loss": NaN, "eval_precision": 0.5928338762214984, "eval_recall": 0.6453900709219859, "eval_runtime": 3.8941, "eval_samples_per_second": 13.097, "eval_steps_per_second": 13.097, "step": 2000 }, { "epoch": 0.2553075087761956, "eval_accuracy": 0.998521780638406, "eval_f1": 0.6296928327645053, "eval_loss": NaN, "eval_precision": 0.6069078947368421, "eval_recall": 0.6542553191489362, "eval_runtime": 3.8421, "eval_samples_per_second": 13.274, "eval_steps_per_second": 13.274, "step": 2100 }, { "epoch": 0.2674650091941097, "eval_accuracy": 0.9984755862833562, "eval_f1": 0.6352739726027398, "eval_loss": NaN, "eval_precision": 0.6142384105960265, "eval_recall": 0.6578014184397163, "eval_runtime": 3.8929, "eval_samples_per_second": 13.101, "eval_steps_per_second": 13.101, "step": 2200 }, { "epoch": 0.2796225096120238, "eval_accuracy": 0.9985679749934558, "eval_f1": 0.6507666098807496, "eval_loss": NaN, "eval_precision": 0.6262295081967213, "eval_recall": 0.6773049645390071, "eval_runtime": 4.0354, "eval_samples_per_second": 12.638, "eval_steps_per_second": 12.638, "step": 2300 }, { "epoch": 0.29178001002993786, "eval_accuracy": 0.998275410744807, "eval_f1": 0.5949152542372882, "eval_loss": NaN, "eval_precision": 0.5698051948051948, "eval_recall": 0.6223404255319149, "eval_runtime": 3.9548, "eval_samples_per_second": 12.896, "eval_steps_per_second": 12.896, "step": 2400 }, { "epoch": 0.30393751044785194, "grad_norm": 0.5, "learning_rate": 6.96048632218845e-05, "loss": 0.0122, "step": 2500 }, { "epoch": 0.30393751044785194, "eval_accuracy": 0.9983677994549066, "eval_f1": 0.6200657894736843, "eval_loss": NaN, "eval_precision": 0.5782208588957055, "eval_recall": 0.6684397163120568, "eval_runtime": 3.9191, "eval_samples_per_second": 13.013, "eval_steps_per_second": 13.013, "step": 2500 }, { "epoch": 0.316095010865766, "eval_accuracy": 0.998537178756756, "eval_f1": 0.6379310344827586, "eval_loss": NaN, "eval_precision": 0.6208053691275168, "eval_recall": 0.6560283687943262, "eval_runtime": 3.9133, "eval_samples_per_second": 13.033, "eval_steps_per_second": 13.033, "step": 2600 }, { "epoch": 0.32825251128368005, "eval_accuracy": 0.9984293919283064, "eval_f1": 0.6284722222222222, "eval_loss": NaN, "eval_precision": 0.6156462585034014, "eval_recall": 0.6418439716312057, "eval_runtime": 3.9458, "eval_samples_per_second": 12.925, "eval_steps_per_second": 12.925, "step": 2700 }, { "epoch": 0.34041001170159413, "eval_accuracy": 0.9984293919283064, "eval_f1": 0.6254295532646049, "eval_loss": NaN, "eval_precision": 0.6066666666666667, "eval_recall": 0.6453900709219859, "eval_runtime": 5.0134, "eval_samples_per_second": 10.173, "eval_steps_per_second": 10.173, "step": 2800 }, { "epoch": 0.3525675121195082, "eval_accuracy": 0.9984601881650063, "eval_f1": 0.6185567010309279, "eval_loss": NaN, "eval_precision": 0.6, "eval_recall": 0.6382978723404256, "eval_runtime": 3.9121, "eval_samples_per_second": 13.037, "eval_steps_per_second": 13.037, "step": 2900 }, { "epoch": 0.3647250125374223, "grad_norm": 0.47265625, "learning_rate": 6.352583586626139e-05, "loss": 0.0102, "step": 3000 }, { "epoch": 0.3647250125374223, "eval_accuracy": 0.9984601881650063, "eval_f1": 0.6175438596491228, "eval_loss": NaN, "eval_precision": 0.6111111111111112, "eval_recall": 0.624113475177305, "eval_runtime": 3.9032, "eval_samples_per_second": 13.066, "eval_steps_per_second": 13.066, "step": 3000 }, { "epoch": 0.37688251295533637, "eval_accuracy": 0.9983985956916065, "eval_f1": 0.6067615658362988, "eval_loss": NaN, "eval_precision": 0.6089285714285714, "eval_recall": 0.6046099290780141, "eval_runtime": 3.9167, "eval_samples_per_second": 13.021, "eval_steps_per_second": 13.021, "step": 3100 }, { "epoch": 0.38904001337325045, "eval_accuracy": 0.9981984201530573, "eval_f1": 0.5792163543441226, "eval_loss": NaN, "eval_precision": 0.5573770491803278, "eval_recall": 0.6028368794326241, "eval_runtime": 3.9349, "eval_samples_per_second": 12.961, "eval_steps_per_second": 12.961, "step": 3200 }, { "epoch": 0.40119751379116453, "eval_accuracy": 0.998275410744807, "eval_f1": 0.6016528925619835, "eval_loss": NaN, "eval_precision": 0.5634674922600619, "eval_recall": 0.6453900709219859, "eval_runtime": 5.1154, "eval_samples_per_second": 9.97, "eval_steps_per_second": 9.97, "step": 3300 }, { "epoch": 0.4133550142090786, "eval_accuracy": 0.9983985956916065, "eval_f1": 0.6064735945485519, "eval_loss": NaN, "eval_precision": 0.5836065573770491, "eval_recall": 0.6312056737588653, "eval_runtime": 3.9386, "eval_samples_per_second": 12.949, "eval_steps_per_second": 12.949, "step": 3400 }, { "epoch": 0.4255125146269927, "grad_norm": 0.63671875, "learning_rate": 5.744680851063831e-05, "loss": 0.0087, "step": 3500 }, { "epoch": 0.4255125146269927, "eval_accuracy": 0.9984601881650063, "eval_f1": 0.6166950596252129, "eval_loss": NaN, "eval_precision": 0.5934426229508196, "eval_recall": 0.6418439716312057, "eval_runtime": 3.9507, "eval_samples_per_second": 12.909, "eval_steps_per_second": 12.909, "step": 3500 }, { "epoch": 0.43767001504490677, "eval_accuracy": 0.9985525768751059, "eval_f1": 0.6412859560067682, "eval_loss": NaN, "eval_precision": 0.6132686084142395, "eval_recall": 0.6719858156028369, "eval_runtime": 3.9696, "eval_samples_per_second": 12.848, "eval_steps_per_second": 12.848, "step": 3600 }, { "epoch": 0.44982751546282085, "eval_accuracy": 0.998521780638406, "eval_f1": 0.638655462184874, "eval_loss": NaN, "eval_precision": 0.6070287539936102, "eval_recall": 0.6737588652482269, "eval_runtime": 3.9563, "eval_samples_per_second": 12.891, "eval_steps_per_second": 12.891, "step": 3700 }, { "epoch": 0.46198501588073493, "eval_accuracy": 0.9984909844017061, "eval_f1": 0.6237288135593221, "eval_loss": NaN, "eval_precision": 0.5974025974025974, "eval_recall": 0.6524822695035462, "eval_runtime": 3.9225, "eval_samples_per_second": 13.002, "eval_steps_per_second": 13.002, "step": 3800 }, { "epoch": 0.474142516298649, "eval_accuracy": 0.9984601881650063, "eval_f1": 0.6193656093489149, "eval_loss": NaN, "eval_precision": 0.5851735015772871, "eval_recall": 0.6578014184397163, "eval_runtime": 3.8167, "eval_samples_per_second": 13.362, "eval_steps_per_second": 13.362, "step": 3900 }, { "epoch": 0.4863000167165631, "grad_norm": 0.2578125, "learning_rate": 5.13677811550152e-05, "loss": 0.0074, "step": 4000 }, { "epoch": 0.4863000167165631, "eval_accuracy": 0.9984293919283064, "eval_f1": 0.6102564102564102, "eval_loss": NaN, "eval_precision": 0.5891089108910891, "eval_recall": 0.6329787234042553, "eval_runtime": 3.9064, "eval_samples_per_second": 13.055, "eval_steps_per_second": 13.055, "step": 4000 }, { "epoch": 0.49845751713447717, "eval_accuracy": 0.9983677994549066, "eval_f1": 0.5982905982905984, "eval_loss": NaN, "eval_precision": 0.5775577557755776, "eval_recall": 0.6205673758865248, "eval_runtime": 3.8733, "eval_samples_per_second": 13.167, "eval_steps_per_second": 13.167, "step": 4100 }, { "epoch": 0.5106150175523912, "eval_accuracy": 0.9985525768751059, "eval_f1": 0.6456558773424191, "eval_loss": NaN, "eval_precision": 0.6213114754098361, "eval_recall": 0.6719858156028369, "eval_runtime": 3.9945, "eval_samples_per_second": 12.767, "eval_steps_per_second": 12.767, "step": 4200 }, { "epoch": 0.5227725179703053, "eval_accuracy": 0.9984755862833562, "eval_f1": 0.6348408710217756, "eval_loss": NaN, "eval_precision": 0.6015873015873016, "eval_recall": 0.6719858156028369, "eval_runtime": 3.9449, "eval_samples_per_second": 12.928, "eval_steps_per_second": 12.928, "step": 4300 }, { "epoch": 0.5349300183882194, "eval_accuracy": 0.9984447900466563, "eval_f1": 0.6231155778894472, "eval_loss": NaN, "eval_precision": 0.5904761904761905, "eval_recall": 0.6595744680851063, "eval_runtime": 3.9223, "eval_samples_per_second": 13.003, "eval_steps_per_second": 13.003, "step": 4400 }, { "epoch": 0.5470875188061335, "grad_norm": 0.796875, "learning_rate": 4.52887537993921e-05, "loss": 0.0069, "step": 4500 }, { "epoch": 0.5470875188061335, "eval_accuracy": 0.9983985956916065, "eval_f1": 0.6139767054908487, "eval_loss": NaN, "eval_precision": 0.5783699059561128, "eval_recall": 0.6542553191489362, "eval_runtime": 3.8802, "eval_samples_per_second": 13.144, "eval_steps_per_second": 13.144, "step": 4500 }, { "epoch": 0.5592450192240476, "eval_accuracy": 0.9983216050998568, "eval_f1": 0.5902192242833052, "eval_loss": NaN, "eval_precision": 0.5627009646302251, "eval_recall": 0.6205673758865248, "eval_runtime": 3.8007, "eval_samples_per_second": 13.418, "eval_steps_per_second": 13.418, "step": 4600 }, { "epoch": 0.5714025196419616, "eval_accuracy": 0.9984755862833562, "eval_f1": 0.6243739565943239, "eval_loss": NaN, "eval_precision": 0.5899053627760252, "eval_recall": 0.6631205673758865, "eval_runtime": 5.2589, "eval_samples_per_second": 9.698, "eval_steps_per_second": 9.698, "step": 4700 }, { "epoch": 0.5835600200598757, "eval_accuracy": 0.9984755862833562, "eval_f1": 0.6231155778894472, "eval_loss": NaN, "eval_precision": 0.5904761904761905, "eval_recall": 0.6595744680851063, "eval_runtime": 3.9226, "eval_samples_per_second": 13.001, "eval_steps_per_second": 13.001, "step": 4800 }, { "epoch": 0.5957175204777898, "eval_accuracy": 0.9984755862833562, "eval_f1": 0.6298157453936348, "eval_loss": NaN, "eval_precision": 0.5968253968253968, "eval_recall": 0.6666666666666666, "eval_runtime": 3.9435, "eval_samples_per_second": 12.933, "eval_steps_per_second": 12.933, "step": 4900 }, { "epoch": 0.6078750208957039, "grad_norm": 0.298828125, "learning_rate": 3.9209726443769e-05, "loss": 0.0065, "step": 5000 }, { "epoch": 0.6078750208957039, "eval_accuracy": 0.9984293919283064, "eval_f1": 0.6074450084602369, "eval_loss": NaN, "eval_precision": 0.580906148867314, "eval_recall": 0.6365248226950354, "eval_runtime": 3.9158, "eval_samples_per_second": 13.024, "eval_steps_per_second": 13.024, "step": 5000 }, { "epoch": 0.620032521313618, "eval_accuracy": 0.9984909844017061, "eval_f1": 0.6282271944922547, "eval_loss": NaN, "eval_precision": 0.6103678929765887, "eval_recall": 0.6471631205673759, "eval_runtime": 3.9051, "eval_samples_per_second": 13.06, "eval_steps_per_second": 13.06, "step": 5100 }, { "epoch": 0.632190021731532, "eval_accuracy": 0.9984139938099564, "eval_f1": 0.6184873949579831, "eval_loss": NaN, "eval_precision": 0.5878594249201278, "eval_recall": 0.6524822695035462, "eval_runtime": 3.9165, "eval_samples_per_second": 13.022, "eval_steps_per_second": 13.022, "step": 5200 }, { "epoch": 0.6443475221494461, "eval_accuracy": 0.9984139938099564, "eval_f1": 0.6115843270868824, "eval_loss": NaN, "eval_precision": 0.5885245901639344, "eval_recall": 0.6365248226950354, "eval_runtime": 3.9234, "eval_samples_per_second": 12.999, "eval_steps_per_second": 12.999, "step": 5300 }, { "epoch": 0.6565050225673601, "eval_accuracy": 0.9985063825200561, "eval_f1": 0.6408094435075886, "eval_loss": NaN, "eval_precision": 0.6109324758842444, "eval_recall": 0.6737588652482269, "eval_runtime": 3.9194, "eval_samples_per_second": 13.012, "eval_steps_per_second": 13.012, "step": 5400 }, { "epoch": 0.6686625229852742, "grad_norm": 0.40625, "learning_rate": 3.31306990881459e-05, "loss": 0.0059, "step": 5500 }, { "epoch": 0.6686625229852742, "eval_accuracy": 0.998537178756756, "eval_f1": 0.6480541455160745, "eval_loss": NaN, "eval_precision": 0.6197411003236246, "eval_recall": 0.6790780141843972, "eval_runtime": 3.9129, "eval_samples_per_second": 13.034, "eval_steps_per_second": 13.034, "step": 5500 }, { "epoch": 0.6808200234031883, "eval_accuracy": 0.9984601881650063, "eval_f1": 0.6254295532646049, "eval_loss": NaN, "eval_precision": 0.6066666666666667, "eval_recall": 0.6453900709219859, "eval_runtime": 5.213, "eval_samples_per_second": 9.783, "eval_steps_per_second": 9.783, "step": 5600 }, { "epoch": 0.6929775238211023, "eval_accuracy": 0.9984909844017061, "eval_f1": 0.626465661641541, "eval_loss": NaN, "eval_precision": 0.5936507936507937, "eval_recall": 0.6631205673758865, "eval_runtime": 3.9034, "eval_samples_per_second": 13.066, "eval_steps_per_second": 13.066, "step": 5700 }, { "epoch": 0.7051350242390164, "eval_accuracy": 0.9984601881650063, "eval_f1": 0.6192893401015229, "eval_loss": NaN, "eval_precision": 0.5922330097087378, "eval_recall": 0.648936170212766, "eval_runtime": 3.9098, "eval_samples_per_second": 13.044, "eval_steps_per_second": 13.044, "step": 5800 }, { "epoch": 0.7172925246569305, "eval_accuracy": 0.9984909844017061, "eval_f1": 0.6323777403035413, "eval_loss": NaN, "eval_precision": 0.6028938906752411, "eval_recall": 0.6648936170212766, "eval_runtime": 3.9284, "eval_samples_per_second": 12.982, "eval_steps_per_second": 12.982, "step": 5900 }, { "epoch": 0.7294500250748446, "grad_norm": 0.41796875, "learning_rate": 2.7051671732522798e-05, "loss": 0.0058, "step": 6000 }, { "epoch": 0.7294500250748446, "eval_accuracy": 0.9984293919283064, "eval_f1": 0.6126279863481229, "eval_loss": NaN, "eval_precision": 0.5904605263157895, "eval_recall": 0.6365248226950354, "eval_runtime": 5.2952, "eval_samples_per_second": 9.631, "eval_steps_per_second": 9.631, "step": 6000 }, { "epoch": 0.7416075254927587, "eval_accuracy": 0.9984755862833562, "eval_f1": 0.6201022146507666, "eval_loss": NaN, "eval_precision": 0.5967213114754099, "eval_recall": 0.6453900709219859, "eval_runtime": 5.2342, "eval_samples_per_second": 9.744, "eval_steps_per_second": 9.744, "step": 6100 }, { "epoch": 0.7537650259106727, "eval_accuracy": 0.9984909844017061, "eval_f1": 0.6306913996627319, "eval_loss": NaN, "eval_precision": 0.6012861736334405, "eval_recall": 0.6631205673758865, "eval_runtime": 3.8769, "eval_samples_per_second": 13.155, "eval_steps_per_second": 13.155, "step": 6200 }, { "epoch": 0.7659225263285868, "eval_accuracy": 0.9984909844017061, "eval_f1": 0.6281833616298812, "eval_loss": NaN, "eval_precision": 0.6026058631921825, "eval_recall": 0.6560283687943262, "eval_runtime": 3.8104, "eval_samples_per_second": 13.384, "eval_steps_per_second": 13.384, "step": 6300 }, { "epoch": 0.7780800267465009, "eval_accuracy": 0.9984601881650063, "eval_f1": 0.6302521008403361, "eval_loss": NaN, "eval_precision": 0.5990415335463258, "eval_recall": 0.6648936170212766, "eval_runtime": 3.9796, "eval_samples_per_second": 12.815, "eval_steps_per_second": 12.815, "step": 6400 }, { "epoch": 0.790237527164415, "grad_norm": 0.5390625, "learning_rate": 2.0972644376899697e-05, "loss": 0.0055, "step": 6500 }, { "epoch": 0.790237527164415, "eval_accuracy": 0.9984601881650063, "eval_f1": 0.6302521008403361, "eval_loss": NaN, "eval_precision": 0.5990415335463258, "eval_recall": 0.6648936170212766, "eval_runtime": 3.9236, "eval_samples_per_second": 12.998, "eval_steps_per_second": 12.998, "step": 6500 }, { "epoch": 0.8023950275823291, "eval_accuracy": 0.9985063825200561, "eval_f1": 0.6256410256410256, "eval_loss": NaN, "eval_precision": 0.6039603960396039, "eval_recall": 0.648936170212766, "eval_runtime": 3.9065, "eval_samples_per_second": 13.055, "eval_steps_per_second": 13.055, "step": 6600 }, { "epoch": 0.8145525280002431, "eval_accuracy": 0.9984755862833562, "eval_f1": 0.6209262435677531, "eval_loss": NaN, "eval_precision": 0.6013289036544851, "eval_recall": 0.6418439716312057, "eval_runtime": 3.883, "eval_samples_per_second": 13.134, "eval_steps_per_second": 13.134, "step": 6700 }, { "epoch": 0.8267100284181572, "eval_accuracy": 0.9984293919283064, "eval_f1": 0.6214405360134003, "eval_loss": NaN, "eval_precision": 0.5888888888888889, "eval_recall": 0.6578014184397163, "eval_runtime": 3.8972, "eval_samples_per_second": 13.086, "eval_steps_per_second": 13.086, "step": 6800 }, { "epoch": 0.8388675288360713, "eval_accuracy": 0.9984293919283064, "eval_f1": 0.6121416526138279, "eval_loss": NaN, "eval_precision": 0.5836012861736335, "eval_recall": 0.6436170212765957, "eval_runtime": 3.9065, "eval_samples_per_second": 13.055, "eval_steps_per_second": 13.055, "step": 6900 }, { "epoch": 0.8510250292539854, "grad_norm": 0.5234375, "learning_rate": 1.4893617021276596e-05, "loss": 0.0052, "step": 7000 }, { "epoch": 0.8510250292539854, "eval_accuracy": 0.9984601881650063, "eval_f1": 0.6235294117647059, "eval_loss": NaN, "eval_precision": 0.5926517571884984, "eval_recall": 0.6578014184397163, "eval_runtime": 4.6336, "eval_samples_per_second": 11.007, "eval_steps_per_second": 11.007, "step": 7000 }, { "epoch": 0.8631825296718995, "eval_accuracy": 0.9984601881650063, "eval_f1": 0.6302521008403361, "eval_loss": NaN, "eval_precision": 0.5990415335463258, "eval_recall": 0.6648936170212766, "eval_runtime": 3.9703, "eval_samples_per_second": 12.845, "eval_steps_per_second": 12.845, "step": 7100 }, { "epoch": 0.8753400300898135, "eval_accuracy": 0.9984293919283064, "eval_f1": 0.6214405360134003, "eval_loss": NaN, "eval_precision": 0.5888888888888889, "eval_recall": 0.6578014184397163, "eval_runtime": 3.9965, "eval_samples_per_second": 12.761, "eval_steps_per_second": 12.761, "step": 7200 }, { "epoch": 0.8874975305077276, "eval_accuracy": 0.998537178756756, "eval_f1": 0.637137989778535, "eval_loss": NaN, "eval_precision": 0.6131147540983607, "eval_recall": 0.6631205673758865, "eval_runtime": 4.0553, "eval_samples_per_second": 12.576, "eval_steps_per_second": 12.576, "step": 7300 }, { "epoch": 0.8996550309256417, "eval_accuracy": 0.9985525768751059, "eval_f1": 0.6417657045840408, "eval_loss": NaN, "eval_precision": 0.6156351791530945, "eval_recall": 0.6702127659574468, "eval_runtime": 4.0275, "eval_samples_per_second": 12.663, "eval_steps_per_second": 12.663, "step": 7400 }, { "epoch": 0.9118125313435558, "grad_norm": 0.2412109375, "learning_rate": 8.814589665653496e-06, "loss": 0.0052, "step": 7500 }, { "epoch": 0.9118125313435558, "eval_accuracy": 0.9985525768751059, "eval_f1": 0.6417657045840408, "eval_loss": NaN, "eval_precision": 0.6156351791530945, "eval_recall": 0.6702127659574468, "eval_runtime": 4.1063, "eval_samples_per_second": 12.42, "eval_steps_per_second": 12.42, "step": 7500 } ], "logging_steps": 500, "max_steps": 8225, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.941757589813818e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }