{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 1000, "global_step": 15153, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 4.835016168415495e-05, "loss": 0.8808, "step": 500 }, { "epoch": 0.2, "learning_rate": 4.670032336830991e-05, "loss": 0.6038, "step": 1000 }, { "epoch": 0.2, "eval_COMMENT": { "f1": 0.5911795137092601, "number": 6900, "precision": 0.5337459131247081, "recall": 0.662463768115942 }, "eval_NAME": { "f1": 0.7920792079207921, "number": 8840, "precision": 0.7791639308382579, "recall": 0.8054298642533937 }, "eval_QTY": { "f1": 0.9777005904828066, "number": 7169, "precision": 0.9738444505950733, "recall": 0.9815873901520435 }, "eval_RANGE_END": { "f1": 0.5829145728643217, "number": 94, "precision": 0.5523809523809524, "recall": 0.6170212765957447 }, "eval_UNIT": { "f1": 0.9443229604709841, "number": 5778, "precision": 0.918520942408377, "recall": 0.9716164762893735 }, "eval_loss": 0.5653780698776245, "eval_overall_accuracy": 0.7991183879093199, "eval_overall_f1": 0.8143376831425424, "eval_overall_precision": 0.7834323326376625, "eval_overall_recall": 0.8477815225322262, "eval_runtime": 9.1463, "eval_samples_per_second": 929.995, "eval_steps_per_second": 29.083, "step": 1000 }, { "epoch": 0.3, "learning_rate": 4.505048505246486e-05, "loss": 0.5757, "step": 1500 }, { "epoch": 0.4, "learning_rate": 4.3400646736619816e-05, "loss": 0.5583, "step": 2000 }, { "epoch": 0.4, "eval_COMMENT": { "f1": 0.6243035287874767, "number": 6900, "precision": 0.575587084148728, "recall": 0.6820289855072463 }, "eval_NAME": { "f1": 0.7976755880873889, "number": 8840, "precision": 0.7881196864303853, "recall": 0.8074660633484163 }, "eval_QTY": { "f1": 0.9807208717518859, "number": 7169, "precision": 0.982230306422275, "recall": 0.9792160691867764 }, "eval_RANGE_END": { "f1": 0.6640625, "number": 94, "precision": 0.5246913580246914, "recall": 0.9042553191489362 }, "eval_UNIT": { "f1": 0.9456703443664326, "number": 5778, "precision": 0.9144843194309732, "recall": 0.9790584977500866 }, "eval_loss": 0.5386040806770325, "eval_overall_accuracy": 0.8086405617891764, "eval_overall_f1": 0.8269673494765498, "eval_overall_precision": 0.8007680291590732, "eval_overall_recall": 0.8549390222716375, "eval_runtime": 12.5089, "eval_samples_per_second": 679.995, "eval_steps_per_second": 21.265, "step": 2000 }, { "epoch": 0.49, "learning_rate": 4.1750808420774766e-05, "loss": 0.535, "step": 2500 }, { "epoch": 0.59, "learning_rate": 4.010097010492972e-05, "loss": 0.5351, "step": 3000 }, { "epoch": 0.59, "eval_COMMENT": { "f1": 0.6408767695218214, "number": 6900, "precision": 0.5827500296595088, "recall": 0.7118840579710145 }, "eval_NAME": { "f1": 0.7941633377718598, "number": 8840, "precision": 0.7792900696864111, "recall": 0.8096153846153846 }, "eval_QTY": { "f1": 0.9805879090404881, "number": 7169, "precision": 0.974776016540317, "recall": 0.9864695215511229 }, "eval_RANGE_END": { "f1": 0.7333333333333333, "number": 94, "precision": 0.6027397260273972, "recall": 0.9361702127659575 }, "eval_UNIT": { "f1": 0.946524064171123, "number": 5778, "precision": 0.9150242326332795, "recall": 0.980269989615784 }, "eval_loss": 0.5204864740371704, "eval_overall_accuracy": 0.8152240287000992, "eval_overall_f1": 0.8299741602067184, "eval_overall_precision": 0.797750288424561, "eval_overall_recall": 0.8649108787047011, "eval_runtime": 9.7563, "eval_samples_per_second": 871.849, "eval_steps_per_second": 27.264, "step": 3000 }, { "epoch": 0.69, "learning_rate": 3.845113178908467e-05, "loss": 0.5267, "step": 3500 }, { "epoch": 0.79, "learning_rate": 3.680129347323962e-05, "loss": 0.5238, "step": 4000 }, { "epoch": 0.79, "eval_COMMENT": { "f1": 0.64383198621969, "number": 6900, "precision": 0.592994874298267, "recall": 0.7042028985507246 }, "eval_NAME": { "f1": 0.8008923591745678, "number": 8840, "precision": 0.7898789878987899, "recall": 0.8122171945701357 }, "eval_QTY": { "f1": 0.9833298458533863, "number": 7169, "precision": 0.9833984375, "recall": 0.983261263774585 }, "eval_RANGE_END": { "f1": 0.7445887445887446, "number": 94, "precision": 0.6277372262773723, "recall": 0.9148936170212766 }, "eval_UNIT": { "f1": 0.948472641826116, "number": 5778, "precision": 0.9206581948517433, "recall": 0.9780200761509172 }, "eval_loss": 0.507017195224762, "eval_overall_accuracy": 0.820261812075414, "eval_overall_f1": 0.83434160112926, "eval_overall_precision": 0.8079213720831842, "eval_overall_recall": 0.8625482088878079, "eval_runtime": 7.298, "eval_samples_per_second": 1165.532, "eval_steps_per_second": 36.449, "step": 4000 }, { "epoch": 0.89, "learning_rate": 3.515145515739457e-05, "loss": 0.5177, "step": 4500 }, { "epoch": 0.99, "learning_rate": 3.3501616841549535e-05, "loss": 0.5134, "step": 5000 }, { "epoch": 0.99, "eval_COMMENT": { "f1": 0.6618449602209219, "number": 6900, "precision": 0.6057287278854254, "recall": 0.7294202898550725 }, "eval_NAME": { "f1": 0.8053840591801545, "number": 8840, "precision": 0.7922092132618448, "recall": 0.8190045248868778 }, "eval_QTY": { "f1": 0.9847285853116757, "number": 7169, "precision": 0.980102252314495, "recall": 0.9893988003905705 }, "eval_RANGE_END": { "f1": 0.7627906976744186, "number": 94, "precision": 0.6776859504132231, "recall": 0.8723404255319149 }, "eval_UNIT": { "f1": 0.9481246345334559, "number": 5778, "precision": 0.9163571774584208, "recall": 0.9821737625475944 }, "eval_loss": 0.49935489892959595, "eval_overall_accuracy": 0.8229906114037097, "eval_overall_f1": 0.8405152224824355, "eval_overall_precision": 0.8104454982418787, "eval_overall_recall": 0.8729022619088982, "eval_runtime": 9.399, "eval_samples_per_second": 904.995, "eval_steps_per_second": 28.301, "step": 5000 }, { "epoch": 1.09, "learning_rate": 3.1851778525704485e-05, "loss": 0.5019, "step": 5500 }, { "epoch": 1.19, "learning_rate": 3.0201940209859435e-05, "loss": 0.4954, "step": 6000 }, { "epoch": 1.19, "eval_COMMENT": { "f1": 0.6517109756914216, "number": 6900, "precision": 0.6057512759865554, "recall": 0.7052173913043478 }, "eval_NAME": { "f1": 0.7966298985643812, "number": 8840, "precision": 0.7810020649929356, "recall": 0.81289592760181 }, "eval_QTY": { "f1": 0.9840166782487838, "number": 7169, "precision": 0.9804736186123806, "recall": 0.9875854372994839 }, "eval_RANGE_END": { "f1": 0.7906976744186046, "number": 94, "precision": 0.7024793388429752, "recall": 0.9042553191489362 }, "eval_UNIT": { "f1": 0.9480010015858442, "number": 5778, "precision": 0.9155247460906013, "recall": 0.9828660436137072 }, "eval_loss": 0.502167820930481, "eval_overall_accuracy": 0.8203381421265552, "eval_overall_f1": 0.8359973136333109, "eval_overall_precision": 0.8088631859384645, "eval_overall_recall": 0.8650151141377992, "eval_runtime": 6.9755, "eval_samples_per_second": 1219.408, "eval_steps_per_second": 38.133, "step": 6000 }, { "epoch": 1.29, "learning_rate": 2.855210189401439e-05, "loss": 0.5008, "step": 6500 }, { "epoch": 1.39, "learning_rate": 2.690226357816934e-05, "loss": 0.4998, "step": 7000 }, { "epoch": 1.39, "eval_COMMENT": { "f1": 0.672593378889405, "number": 6900, "precision": 0.6202128961213753, "recall": 0.7346376811594203 }, "eval_NAME": { "f1": 0.8033342595165324, "number": 8840, "precision": 0.7895139268159476, "recall": 0.8176470588235294 }, "eval_QTY": { "f1": 0.9836797321802204, "number": 7169, "precision": 0.9836797321802204, "recall": 0.9836797321802204 }, "eval_RANGE_END": { "f1": 0.7565217391304346, "number": 94, "precision": 0.6397058823529411, "recall": 0.925531914893617 }, "eval_UNIT": { "f1": 0.9496971736204576, "number": 5778, "precision": 0.9238952536824877, "recall": 0.976981654551748 }, "eval_loss": 0.4921141266822815, "eval_overall_accuracy": 0.825166017861232, "eval_overall_f1": 0.8427189032995095, "eval_overall_precision": 0.8158279933643431, "eval_overall_recall": 0.871442965845523, "eval_runtime": 8.6591, "eval_samples_per_second": 982.314, "eval_steps_per_second": 30.719, "step": 7000 }, { "epoch": 1.48, "learning_rate": 2.5252425262324292e-05, "loss": 0.4981, "step": 7500 }, { "epoch": 1.58, "learning_rate": 2.3602586946479245e-05, "loss": 0.4912, "step": 8000 }, { "epoch": 1.58, "eval_COMMENT": { "f1": 0.6753212169629186, "number": 6900, "precision": 0.6245536264006896, "recall": 0.735072463768116 }, "eval_NAME": { "f1": 0.8065398136264716, "number": 8840, "precision": 0.7958374628344896, "recall": 0.8175339366515837 }, "eval_QTY": { "f1": 0.9842043003270474, "number": 7169, "precision": 0.9819494584837545, "recall": 0.9864695215511229 }, "eval_RANGE_END": { "f1": 0.7665198237885463, "number": 94, "precision": 0.6541353383458647, "recall": 0.925531914893617 }, "eval_UNIT": { "f1": 0.9495798319327731, "number": 5778, "precision": 0.9229010127409344, "recall": 0.9778470058843891 }, "eval_loss": 0.4943971037864685, "eval_overall_accuracy": 0.8256049156552935, "eval_overall_f1": 0.8448183041722745, "eval_overall_precision": 0.818943866401383, "eval_overall_recall": 0.8723810847434071, "eval_runtime": 8.4356, "eval_samples_per_second": 1008.348, "eval_steps_per_second": 31.533, "step": 8000 }, { "epoch": 1.68, "learning_rate": 2.19527486306342e-05, "loss": 0.4874, "step": 8500 }, { "epoch": 1.78, "learning_rate": 2.0302910314789152e-05, "loss": 0.4974, "step": 9000 }, { "epoch": 1.78, "eval_COMMENT": { "f1": 0.6773682124455014, "number": 6900, "precision": 0.6223597960670065, "recall": 0.7430434782608696 }, "eval_NAME": { "f1": 0.8045016435456014, "number": 8840, "precision": 0.7926226808650785, "recall": 0.8167420814479638 }, "eval_QTY": { "f1": 0.984450177811868, "number": 7169, "precision": 0.9842442833240379, "recall": 0.9846561584600363 }, "eval_RANGE_END": { "f1": 0.7531380753138076, "number": 94, "precision": 0.6206896551724138, "recall": 0.9574468085106383 }, "eval_UNIT": { "f1": 0.949092741935484, "number": 5778, "precision": 0.9221351616062684, "recall": 0.9776739356178609 }, "eval_loss": 0.4863777756690979, "eval_overall_accuracy": 0.8257766582703611, "eval_overall_f1": 0.8442027160866864, "eval_overall_precision": 0.8166612536537837, "eval_overall_recall": 0.8736666550849519, "eval_runtime": 8.6924, "eval_samples_per_second": 978.559, "eval_steps_per_second": 30.602, "step": 9000 }, { "epoch": 1.88, "learning_rate": 1.8653071998944105e-05, "loss": 0.4939, "step": 9500 }, { "epoch": 1.98, "learning_rate": 1.700323368309906e-05, "loss": 0.4872, "step": 10000 }, { "epoch": 1.98, "eval_COMMENT": { "f1": 0.6780160857908847, "number": 6900, "precision": 0.6306733167082295, "recall": 0.7330434782608696 }, "eval_NAME": { "f1": 0.8058138887341983, "number": 8840, "precision": 0.7935724470768893, "recall": 0.8184389140271493 }, "eval_QTY": { "f1": 0.9854324945981738, "number": 7169, "precision": 0.9848147116188354, "recall": 0.9860510531454875 }, "eval_RANGE_END": { "f1": 0.7733333333333333, "number": 94, "precision": 0.6641221374045801, "recall": 0.925531914893617 }, "eval_UNIT": { "f1": 0.949941206114564, "number": 5778, "precision": 0.922813315926893, "recall": 0.9787123572170301 }, "eval_loss": 0.4847513437271118, "eval_overall_accuracy": 0.8277421570872453, "eval_overall_f1": 0.8458933535506697, "eval_overall_precision": 0.8210898148753843, "eval_overall_recall": 0.8722421041659428, "eval_runtime": 7.9849, "eval_samples_per_second": 1065.267, "eval_steps_per_second": 33.313, "step": 10000 }, { "epoch": 2.08, "learning_rate": 1.535339536725401e-05, "loss": 0.4725, "step": 10500 }, { "epoch": 2.18, "learning_rate": 1.3703557051408963e-05, "loss": 0.4777, "step": 11000 }, { "epoch": 2.18, "eval_COMMENT": { "f1": 0.6794315831609847, "number": 6900, "precision": 0.6294968475707752, "recall": 0.7379710144927536 }, "eval_NAME": { "f1": 0.8072463768115943, "number": 8840, "precision": 0.7957142857142857, "recall": 0.8191176470588235 }, "eval_QTY": { "f1": 0.9849162011173185, "number": 7169, "precision": 0.9861557824080548, "recall": 0.9836797321802204 }, "eval_RANGE_END": { "f1": 0.7837837837837838, "number": 94, "precision": 0.6796875, "recall": 0.925531914893617 }, "eval_UNIT": { "f1": 0.949970570924073, "number": 5778, "precision": 0.9237939493049877, "recall": 0.9776739356178609 }, "eval_loss": 0.4820418059825897, "eval_overall_accuracy": 0.8306617815433937, "eval_overall_f1": 0.8463378478539182, "eval_overall_precision": 0.8214040480005231, "eval_overall_recall": 0.872832771620166, "eval_runtime": 7.6368, "eval_samples_per_second": 1113.817, "eval_steps_per_second": 34.831, "step": 11000 }, { "epoch": 2.28, "learning_rate": 1.2053718735563915e-05, "loss": 0.4827, "step": 11500 }, { "epoch": 2.38, "learning_rate": 1.0403880419718868e-05, "loss": 0.4781, "step": 12000 }, { "epoch": 2.38, "eval_COMMENT": { "f1": 0.6840429765220851, "number": 6900, "precision": 0.6305942773294204, "recall": 0.7473913043478261 }, "eval_NAME": { "f1": 0.8081213743864346, "number": 8840, "precision": 0.7970950704225352, "recall": 0.8194570135746606 }, "eval_QTY": { "f1": 0.9850746268656716, "number": 7169, "precision": 0.9850746268656716, "recall": 0.9850746268656716 }, "eval_RANGE_END": { "f1": 0.7705627705627707, "number": 94, "precision": 0.6496350364963503, "recall": 0.9468085106382979 }, "eval_UNIT": { "f1": 0.9505249895002099, "number": 5778, "precision": 0.923453566182471, "recall": 0.9792315680166147 }, "eval_loss": 0.48209530115127563, "eval_overall_accuracy": 0.8303755438516144, "eval_overall_f1": 0.847679892400807, "eval_overall_precision": 0.821199387602202, "eval_overall_recall": 0.8759250894687467, "eval_runtime": 9.3635, "eval_samples_per_second": 908.425, "eval_steps_per_second": 28.408, "step": 12000 }, { "epoch": 2.47, "learning_rate": 8.75404210387382e-06, "loss": 0.472, "step": 12500 }, { "epoch": 2.57, "learning_rate": 7.104203788028774e-06, "loss": 0.4804, "step": 13000 }, { "epoch": 2.57, "eval_COMMENT": { "f1": 0.6835392320534225, "number": 6900, "precision": 0.6338080495356038, "recall": 0.7417391304347826 }, "eval_NAME": { "f1": 0.8073610023492561, "number": 8840, "precision": 0.7985173710998008, "recall": 0.8164027149321267 }, "eval_QTY": { "f1": 0.9856365918281969, "number": 7169, "precision": 0.9853617733166039, "recall": 0.9859115636769424 }, "eval_RANGE_END": { "f1": 0.7659574468085107, "number": 94, "precision": 0.6382978723404256, "recall": 0.9574468085106383 }, "eval_UNIT": { "f1": 0.9502095557418274, "number": 5778, "precision": 0.9213263979193758, "recall": 0.9809622706818969 }, "eval_loss": 0.4809066951274872, "eval_overall_accuracy": 0.8298221509808411, "eval_overall_f1": 0.8477425876010781, "eval_overall_precision": 0.8228195820661238, "eval_overall_recall": 0.8742225773948091, "eval_runtime": 7.6878, "eval_samples_per_second": 1106.424, "eval_steps_per_second": 34.6, "step": 13000 }, { "epoch": 2.67, "learning_rate": 5.4543654721837265e-06, "loss": 0.4797, "step": 13500 }, { "epoch": 2.77, "learning_rate": 3.804527156338679e-06, "loss": 0.4721, "step": 14000 }, { "epoch": 2.77, "eval_COMMENT": { "f1": 0.6857370941990419, "number": 6900, "precision": 0.6337924249877029, "recall": 0.7469565217391304 }, "eval_NAME": { "f1": 0.8065774804905238, "number": 8840, "precision": 0.795054945054945, "recall": 0.8184389140271493 }, "eval_QTY": { "f1": 0.9850059278889741, "number": 7169, "precision": 0.9849372384937238, "recall": 0.9850746268656716 }, "eval_RANGE_END": { "f1": 0.7753303964757708, "number": 94, "precision": 0.6616541353383458, "recall": 0.9361702127659575 }, "eval_UNIT": { "f1": 0.9506711409395974, "number": 5778, "precision": 0.9225008140670792, "recall": 0.9806161301488404 }, "eval_loss": 0.479864239692688, "eval_overall_accuracy": 0.8301847187237615, "eval_overall_f1": 0.8478253557132766, "eval_overall_precision": 0.8216253219024025, "eval_overall_recall": 0.8757513637469164, "eval_runtime": 9.172, "eval_samples_per_second": 927.389, "eval_steps_per_second": 29.001, "step": 14000 }, { "epoch": 2.87, "learning_rate": 2.154688840493632e-06, "loss": 0.4635, "step": 14500 }, { "epoch": 2.97, "learning_rate": 5.048505246485845e-07, "loss": 0.4792, "step": 15000 }, { "epoch": 2.97, "eval_COMMENT": { "f1": 0.6867325146823279, "number": 6900, "precision": 0.6364423552696685, "recall": 0.7456521739130435 }, "eval_NAME": { "f1": 0.8062162312705399, "number": 8840, "precision": 0.7941402392186986, "recall": 0.8186651583710407 }, "eval_QTY": { "f1": 0.9851495503032839, "number": 7169, "precision": 0.9848062447727907, "recall": 0.985493095271307 }, "eval_RANGE_END": { "f1": 0.7753303964757708, "number": 94, "precision": 0.6616541353383458, "recall": 0.9361702127659575 }, "eval_UNIT": { "f1": 0.9499538938720764, "number": 5778, "precision": 0.921151032352463, "recall": 0.9806161301488404 }, "eval_loss": 0.4792616069316864, "eval_overall_accuracy": 0.8303755438516144, "eval_overall_f1": 0.8480045763510331, "eval_overall_precision": 0.8220844886641657, "eval_overall_recall": 0.8756123831694521, "eval_runtime": 9.9787, "eval_samples_per_second": 852.419, "eval_steps_per_second": 26.657, "step": 15000 }, { "epoch": 3.0, "step": 15153, "total_flos": 468631722156180.0, "train_loss": 0.5153773278912515, "train_runtime": 629.31, "train_samples_per_second": 770.382, "train_steps_per_second": 24.079 } ], "logging_steps": 500, "max_steps": 15153, "num_train_epochs": 3, "save_steps": 500, "total_flos": 468631722156180.0, "trial_name": null, "trial_params": null }