napsternxg's picture
End of training
e419073
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 1000,
"global_step": 15153,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"learning_rate": 4.835016168415495e-05,
"loss": 0.8808,
"step": 500
},
{
"epoch": 0.2,
"learning_rate": 4.670032336830991e-05,
"loss": 0.6038,
"step": 1000
},
{
"epoch": 0.2,
"eval_COMMENT": {
"f1": 0.5911795137092601,
"number": 6900,
"precision": 0.5337459131247081,
"recall": 0.662463768115942
},
"eval_NAME": {
"f1": 0.7920792079207921,
"number": 8840,
"precision": 0.7791639308382579,
"recall": 0.8054298642533937
},
"eval_QTY": {
"f1": 0.9777005904828066,
"number": 7169,
"precision": 0.9738444505950733,
"recall": 0.9815873901520435
},
"eval_RANGE_END": {
"f1": 0.5829145728643217,
"number": 94,
"precision": 0.5523809523809524,
"recall": 0.6170212765957447
},
"eval_UNIT": {
"f1": 0.9443229604709841,
"number": 5778,
"precision": 0.918520942408377,
"recall": 0.9716164762893735
},
"eval_loss": 0.5653780698776245,
"eval_overall_accuracy": 0.7991183879093199,
"eval_overall_f1": 0.8143376831425424,
"eval_overall_precision": 0.7834323326376625,
"eval_overall_recall": 0.8477815225322262,
"eval_runtime": 9.1463,
"eval_samples_per_second": 929.995,
"eval_steps_per_second": 29.083,
"step": 1000
},
{
"epoch": 0.3,
"learning_rate": 4.505048505246486e-05,
"loss": 0.5757,
"step": 1500
},
{
"epoch": 0.4,
"learning_rate": 4.3400646736619816e-05,
"loss": 0.5583,
"step": 2000
},
{
"epoch": 0.4,
"eval_COMMENT": {
"f1": 0.6243035287874767,
"number": 6900,
"precision": 0.575587084148728,
"recall": 0.6820289855072463
},
"eval_NAME": {
"f1": 0.7976755880873889,
"number": 8840,
"precision": 0.7881196864303853,
"recall": 0.8074660633484163
},
"eval_QTY": {
"f1": 0.9807208717518859,
"number": 7169,
"precision": 0.982230306422275,
"recall": 0.9792160691867764
},
"eval_RANGE_END": {
"f1": 0.6640625,
"number": 94,
"precision": 0.5246913580246914,
"recall": 0.9042553191489362
},
"eval_UNIT": {
"f1": 0.9456703443664326,
"number": 5778,
"precision": 0.9144843194309732,
"recall": 0.9790584977500866
},
"eval_loss": 0.5386040806770325,
"eval_overall_accuracy": 0.8086405617891764,
"eval_overall_f1": 0.8269673494765498,
"eval_overall_precision": 0.8007680291590732,
"eval_overall_recall": 0.8549390222716375,
"eval_runtime": 12.5089,
"eval_samples_per_second": 679.995,
"eval_steps_per_second": 21.265,
"step": 2000
},
{
"epoch": 0.49,
"learning_rate": 4.1750808420774766e-05,
"loss": 0.535,
"step": 2500
},
{
"epoch": 0.59,
"learning_rate": 4.010097010492972e-05,
"loss": 0.5351,
"step": 3000
},
{
"epoch": 0.59,
"eval_COMMENT": {
"f1": 0.6408767695218214,
"number": 6900,
"precision": 0.5827500296595088,
"recall": 0.7118840579710145
},
"eval_NAME": {
"f1": 0.7941633377718598,
"number": 8840,
"precision": 0.7792900696864111,
"recall": 0.8096153846153846
},
"eval_QTY": {
"f1": 0.9805879090404881,
"number": 7169,
"precision": 0.974776016540317,
"recall": 0.9864695215511229
},
"eval_RANGE_END": {
"f1": 0.7333333333333333,
"number": 94,
"precision": 0.6027397260273972,
"recall": 0.9361702127659575
},
"eval_UNIT": {
"f1": 0.946524064171123,
"number": 5778,
"precision": 0.9150242326332795,
"recall": 0.980269989615784
},
"eval_loss": 0.5204864740371704,
"eval_overall_accuracy": 0.8152240287000992,
"eval_overall_f1": 0.8299741602067184,
"eval_overall_precision": 0.797750288424561,
"eval_overall_recall": 0.8649108787047011,
"eval_runtime": 9.7563,
"eval_samples_per_second": 871.849,
"eval_steps_per_second": 27.264,
"step": 3000
},
{
"epoch": 0.69,
"learning_rate": 3.845113178908467e-05,
"loss": 0.5267,
"step": 3500
},
{
"epoch": 0.79,
"learning_rate": 3.680129347323962e-05,
"loss": 0.5238,
"step": 4000
},
{
"epoch": 0.79,
"eval_COMMENT": {
"f1": 0.64383198621969,
"number": 6900,
"precision": 0.592994874298267,
"recall": 0.7042028985507246
},
"eval_NAME": {
"f1": 0.8008923591745678,
"number": 8840,
"precision": 0.7898789878987899,
"recall": 0.8122171945701357
},
"eval_QTY": {
"f1": 0.9833298458533863,
"number": 7169,
"precision": 0.9833984375,
"recall": 0.983261263774585
},
"eval_RANGE_END": {
"f1": 0.7445887445887446,
"number": 94,
"precision": 0.6277372262773723,
"recall": 0.9148936170212766
},
"eval_UNIT": {
"f1": 0.948472641826116,
"number": 5778,
"precision": 0.9206581948517433,
"recall": 0.9780200761509172
},
"eval_loss": 0.507017195224762,
"eval_overall_accuracy": 0.820261812075414,
"eval_overall_f1": 0.83434160112926,
"eval_overall_precision": 0.8079213720831842,
"eval_overall_recall": 0.8625482088878079,
"eval_runtime": 7.298,
"eval_samples_per_second": 1165.532,
"eval_steps_per_second": 36.449,
"step": 4000
},
{
"epoch": 0.89,
"learning_rate": 3.515145515739457e-05,
"loss": 0.5177,
"step": 4500
},
{
"epoch": 0.99,
"learning_rate": 3.3501616841549535e-05,
"loss": 0.5134,
"step": 5000
},
{
"epoch": 0.99,
"eval_COMMENT": {
"f1": 0.6618449602209219,
"number": 6900,
"precision": 0.6057287278854254,
"recall": 0.7294202898550725
},
"eval_NAME": {
"f1": 0.8053840591801545,
"number": 8840,
"precision": 0.7922092132618448,
"recall": 0.8190045248868778
},
"eval_QTY": {
"f1": 0.9847285853116757,
"number": 7169,
"precision": 0.980102252314495,
"recall": 0.9893988003905705
},
"eval_RANGE_END": {
"f1": 0.7627906976744186,
"number": 94,
"precision": 0.6776859504132231,
"recall": 0.8723404255319149
},
"eval_UNIT": {
"f1": 0.9481246345334559,
"number": 5778,
"precision": 0.9163571774584208,
"recall": 0.9821737625475944
},
"eval_loss": 0.49935489892959595,
"eval_overall_accuracy": 0.8229906114037097,
"eval_overall_f1": 0.8405152224824355,
"eval_overall_precision": 0.8104454982418787,
"eval_overall_recall": 0.8729022619088982,
"eval_runtime": 9.399,
"eval_samples_per_second": 904.995,
"eval_steps_per_second": 28.301,
"step": 5000
},
{
"epoch": 1.09,
"learning_rate": 3.1851778525704485e-05,
"loss": 0.5019,
"step": 5500
},
{
"epoch": 1.19,
"learning_rate": 3.0201940209859435e-05,
"loss": 0.4954,
"step": 6000
},
{
"epoch": 1.19,
"eval_COMMENT": {
"f1": 0.6517109756914216,
"number": 6900,
"precision": 0.6057512759865554,
"recall": 0.7052173913043478
},
"eval_NAME": {
"f1": 0.7966298985643812,
"number": 8840,
"precision": 0.7810020649929356,
"recall": 0.81289592760181
},
"eval_QTY": {
"f1": 0.9840166782487838,
"number": 7169,
"precision": 0.9804736186123806,
"recall": 0.9875854372994839
},
"eval_RANGE_END": {
"f1": 0.7906976744186046,
"number": 94,
"precision": 0.7024793388429752,
"recall": 0.9042553191489362
},
"eval_UNIT": {
"f1": 0.9480010015858442,
"number": 5778,
"precision": 0.9155247460906013,
"recall": 0.9828660436137072
},
"eval_loss": 0.502167820930481,
"eval_overall_accuracy": 0.8203381421265552,
"eval_overall_f1": 0.8359973136333109,
"eval_overall_precision": 0.8088631859384645,
"eval_overall_recall": 0.8650151141377992,
"eval_runtime": 6.9755,
"eval_samples_per_second": 1219.408,
"eval_steps_per_second": 38.133,
"step": 6000
},
{
"epoch": 1.29,
"learning_rate": 2.855210189401439e-05,
"loss": 0.5008,
"step": 6500
},
{
"epoch": 1.39,
"learning_rate": 2.690226357816934e-05,
"loss": 0.4998,
"step": 7000
},
{
"epoch": 1.39,
"eval_COMMENT": {
"f1": 0.672593378889405,
"number": 6900,
"precision": 0.6202128961213753,
"recall": 0.7346376811594203
},
"eval_NAME": {
"f1": 0.8033342595165324,
"number": 8840,
"precision": 0.7895139268159476,
"recall": 0.8176470588235294
},
"eval_QTY": {
"f1": 0.9836797321802204,
"number": 7169,
"precision": 0.9836797321802204,
"recall": 0.9836797321802204
},
"eval_RANGE_END": {
"f1": 0.7565217391304346,
"number": 94,
"precision": 0.6397058823529411,
"recall": 0.925531914893617
},
"eval_UNIT": {
"f1": 0.9496971736204576,
"number": 5778,
"precision": 0.9238952536824877,
"recall": 0.976981654551748
},
"eval_loss": 0.4921141266822815,
"eval_overall_accuracy": 0.825166017861232,
"eval_overall_f1": 0.8427189032995095,
"eval_overall_precision": 0.8158279933643431,
"eval_overall_recall": 0.871442965845523,
"eval_runtime": 8.6591,
"eval_samples_per_second": 982.314,
"eval_steps_per_second": 30.719,
"step": 7000
},
{
"epoch": 1.48,
"learning_rate": 2.5252425262324292e-05,
"loss": 0.4981,
"step": 7500
},
{
"epoch": 1.58,
"learning_rate": 2.3602586946479245e-05,
"loss": 0.4912,
"step": 8000
},
{
"epoch": 1.58,
"eval_COMMENT": {
"f1": 0.6753212169629186,
"number": 6900,
"precision": 0.6245536264006896,
"recall": 0.735072463768116
},
"eval_NAME": {
"f1": 0.8065398136264716,
"number": 8840,
"precision": 0.7958374628344896,
"recall": 0.8175339366515837
},
"eval_QTY": {
"f1": 0.9842043003270474,
"number": 7169,
"precision": 0.9819494584837545,
"recall": 0.9864695215511229
},
"eval_RANGE_END": {
"f1": 0.7665198237885463,
"number": 94,
"precision": 0.6541353383458647,
"recall": 0.925531914893617
},
"eval_UNIT": {
"f1": 0.9495798319327731,
"number": 5778,
"precision": 0.9229010127409344,
"recall": 0.9778470058843891
},
"eval_loss": 0.4943971037864685,
"eval_overall_accuracy": 0.8256049156552935,
"eval_overall_f1": 0.8448183041722745,
"eval_overall_precision": 0.818943866401383,
"eval_overall_recall": 0.8723810847434071,
"eval_runtime": 8.4356,
"eval_samples_per_second": 1008.348,
"eval_steps_per_second": 31.533,
"step": 8000
},
{
"epoch": 1.68,
"learning_rate": 2.19527486306342e-05,
"loss": 0.4874,
"step": 8500
},
{
"epoch": 1.78,
"learning_rate": 2.0302910314789152e-05,
"loss": 0.4974,
"step": 9000
},
{
"epoch": 1.78,
"eval_COMMENT": {
"f1": 0.6773682124455014,
"number": 6900,
"precision": 0.6223597960670065,
"recall": 0.7430434782608696
},
"eval_NAME": {
"f1": 0.8045016435456014,
"number": 8840,
"precision": 0.7926226808650785,
"recall": 0.8167420814479638
},
"eval_QTY": {
"f1": 0.984450177811868,
"number": 7169,
"precision": 0.9842442833240379,
"recall": 0.9846561584600363
},
"eval_RANGE_END": {
"f1": 0.7531380753138076,
"number": 94,
"precision": 0.6206896551724138,
"recall": 0.9574468085106383
},
"eval_UNIT": {
"f1": 0.949092741935484,
"number": 5778,
"precision": 0.9221351616062684,
"recall": 0.9776739356178609
},
"eval_loss": 0.4863777756690979,
"eval_overall_accuracy": 0.8257766582703611,
"eval_overall_f1": 0.8442027160866864,
"eval_overall_precision": 0.8166612536537837,
"eval_overall_recall": 0.8736666550849519,
"eval_runtime": 8.6924,
"eval_samples_per_second": 978.559,
"eval_steps_per_second": 30.602,
"step": 9000
},
{
"epoch": 1.88,
"learning_rate": 1.8653071998944105e-05,
"loss": 0.4939,
"step": 9500
},
{
"epoch": 1.98,
"learning_rate": 1.700323368309906e-05,
"loss": 0.4872,
"step": 10000
},
{
"epoch": 1.98,
"eval_COMMENT": {
"f1": 0.6780160857908847,
"number": 6900,
"precision": 0.6306733167082295,
"recall": 0.7330434782608696
},
"eval_NAME": {
"f1": 0.8058138887341983,
"number": 8840,
"precision": 0.7935724470768893,
"recall": 0.8184389140271493
},
"eval_QTY": {
"f1": 0.9854324945981738,
"number": 7169,
"precision": 0.9848147116188354,
"recall": 0.9860510531454875
},
"eval_RANGE_END": {
"f1": 0.7733333333333333,
"number": 94,
"precision": 0.6641221374045801,
"recall": 0.925531914893617
},
"eval_UNIT": {
"f1": 0.949941206114564,
"number": 5778,
"precision": 0.922813315926893,
"recall": 0.9787123572170301
},
"eval_loss": 0.4847513437271118,
"eval_overall_accuracy": 0.8277421570872453,
"eval_overall_f1": 0.8458933535506697,
"eval_overall_precision": 0.8210898148753843,
"eval_overall_recall": 0.8722421041659428,
"eval_runtime": 7.9849,
"eval_samples_per_second": 1065.267,
"eval_steps_per_second": 33.313,
"step": 10000
},
{
"epoch": 2.08,
"learning_rate": 1.535339536725401e-05,
"loss": 0.4725,
"step": 10500
},
{
"epoch": 2.18,
"learning_rate": 1.3703557051408963e-05,
"loss": 0.4777,
"step": 11000
},
{
"epoch": 2.18,
"eval_COMMENT": {
"f1": 0.6794315831609847,
"number": 6900,
"precision": 0.6294968475707752,
"recall": 0.7379710144927536
},
"eval_NAME": {
"f1": 0.8072463768115943,
"number": 8840,
"precision": 0.7957142857142857,
"recall": 0.8191176470588235
},
"eval_QTY": {
"f1": 0.9849162011173185,
"number": 7169,
"precision": 0.9861557824080548,
"recall": 0.9836797321802204
},
"eval_RANGE_END": {
"f1": 0.7837837837837838,
"number": 94,
"precision": 0.6796875,
"recall": 0.925531914893617
},
"eval_UNIT": {
"f1": 0.949970570924073,
"number": 5778,
"precision": 0.9237939493049877,
"recall": 0.9776739356178609
},
"eval_loss": 0.4820418059825897,
"eval_overall_accuracy": 0.8306617815433937,
"eval_overall_f1": 0.8463378478539182,
"eval_overall_precision": 0.8214040480005231,
"eval_overall_recall": 0.872832771620166,
"eval_runtime": 7.6368,
"eval_samples_per_second": 1113.817,
"eval_steps_per_second": 34.831,
"step": 11000
},
{
"epoch": 2.28,
"learning_rate": 1.2053718735563915e-05,
"loss": 0.4827,
"step": 11500
},
{
"epoch": 2.38,
"learning_rate": 1.0403880419718868e-05,
"loss": 0.4781,
"step": 12000
},
{
"epoch": 2.38,
"eval_COMMENT": {
"f1": 0.6840429765220851,
"number": 6900,
"precision": 0.6305942773294204,
"recall": 0.7473913043478261
},
"eval_NAME": {
"f1": 0.8081213743864346,
"number": 8840,
"precision": 0.7970950704225352,
"recall": 0.8194570135746606
},
"eval_QTY": {
"f1": 0.9850746268656716,
"number": 7169,
"precision": 0.9850746268656716,
"recall": 0.9850746268656716
},
"eval_RANGE_END": {
"f1": 0.7705627705627707,
"number": 94,
"precision": 0.6496350364963503,
"recall": 0.9468085106382979
},
"eval_UNIT": {
"f1": 0.9505249895002099,
"number": 5778,
"precision": 0.923453566182471,
"recall": 0.9792315680166147
},
"eval_loss": 0.48209530115127563,
"eval_overall_accuracy": 0.8303755438516144,
"eval_overall_f1": 0.847679892400807,
"eval_overall_precision": 0.821199387602202,
"eval_overall_recall": 0.8759250894687467,
"eval_runtime": 9.3635,
"eval_samples_per_second": 908.425,
"eval_steps_per_second": 28.408,
"step": 12000
},
{
"epoch": 2.47,
"learning_rate": 8.75404210387382e-06,
"loss": 0.472,
"step": 12500
},
{
"epoch": 2.57,
"learning_rate": 7.104203788028774e-06,
"loss": 0.4804,
"step": 13000
},
{
"epoch": 2.57,
"eval_COMMENT": {
"f1": 0.6835392320534225,
"number": 6900,
"precision": 0.6338080495356038,
"recall": 0.7417391304347826
},
"eval_NAME": {
"f1": 0.8073610023492561,
"number": 8840,
"precision": 0.7985173710998008,
"recall": 0.8164027149321267
},
"eval_QTY": {
"f1": 0.9856365918281969,
"number": 7169,
"precision": 0.9853617733166039,
"recall": 0.9859115636769424
},
"eval_RANGE_END": {
"f1": 0.7659574468085107,
"number": 94,
"precision": 0.6382978723404256,
"recall": 0.9574468085106383
},
"eval_UNIT": {
"f1": 0.9502095557418274,
"number": 5778,
"precision": 0.9213263979193758,
"recall": 0.9809622706818969
},
"eval_loss": 0.4809066951274872,
"eval_overall_accuracy": 0.8298221509808411,
"eval_overall_f1": 0.8477425876010781,
"eval_overall_precision": 0.8228195820661238,
"eval_overall_recall": 0.8742225773948091,
"eval_runtime": 7.6878,
"eval_samples_per_second": 1106.424,
"eval_steps_per_second": 34.6,
"step": 13000
},
{
"epoch": 2.67,
"learning_rate": 5.4543654721837265e-06,
"loss": 0.4797,
"step": 13500
},
{
"epoch": 2.77,
"learning_rate": 3.804527156338679e-06,
"loss": 0.4721,
"step": 14000
},
{
"epoch": 2.77,
"eval_COMMENT": {
"f1": 0.6857370941990419,
"number": 6900,
"precision": 0.6337924249877029,
"recall": 0.7469565217391304
},
"eval_NAME": {
"f1": 0.8065774804905238,
"number": 8840,
"precision": 0.795054945054945,
"recall": 0.8184389140271493
},
"eval_QTY": {
"f1": 0.9850059278889741,
"number": 7169,
"precision": 0.9849372384937238,
"recall": 0.9850746268656716
},
"eval_RANGE_END": {
"f1": 0.7753303964757708,
"number": 94,
"precision": 0.6616541353383458,
"recall": 0.9361702127659575
},
"eval_UNIT": {
"f1": 0.9506711409395974,
"number": 5778,
"precision": 0.9225008140670792,
"recall": 0.9806161301488404
},
"eval_loss": 0.479864239692688,
"eval_overall_accuracy": 0.8301847187237615,
"eval_overall_f1": 0.8478253557132766,
"eval_overall_precision": 0.8216253219024025,
"eval_overall_recall": 0.8757513637469164,
"eval_runtime": 9.172,
"eval_samples_per_second": 927.389,
"eval_steps_per_second": 29.001,
"step": 14000
},
{
"epoch": 2.87,
"learning_rate": 2.154688840493632e-06,
"loss": 0.4635,
"step": 14500
},
{
"epoch": 2.97,
"learning_rate": 5.048505246485845e-07,
"loss": 0.4792,
"step": 15000
},
{
"epoch": 2.97,
"eval_COMMENT": {
"f1": 0.6867325146823279,
"number": 6900,
"precision": 0.6364423552696685,
"recall": 0.7456521739130435
},
"eval_NAME": {
"f1": 0.8062162312705399,
"number": 8840,
"precision": 0.7941402392186986,
"recall": 0.8186651583710407
},
"eval_QTY": {
"f1": 0.9851495503032839,
"number": 7169,
"precision": 0.9848062447727907,
"recall": 0.985493095271307
},
"eval_RANGE_END": {
"f1": 0.7753303964757708,
"number": 94,
"precision": 0.6616541353383458,
"recall": 0.9361702127659575
},
"eval_UNIT": {
"f1": 0.9499538938720764,
"number": 5778,
"precision": 0.921151032352463,
"recall": 0.9806161301488404
},
"eval_loss": 0.4792616069316864,
"eval_overall_accuracy": 0.8303755438516144,
"eval_overall_f1": 0.8480045763510331,
"eval_overall_precision": 0.8220844886641657,
"eval_overall_recall": 0.8756123831694521,
"eval_runtime": 9.9787,
"eval_samples_per_second": 852.419,
"eval_steps_per_second": 26.657,
"step": 15000
},
{
"epoch": 3.0,
"step": 15153,
"total_flos": 468631722156180.0,
"train_loss": 0.5153773278912515,
"train_runtime": 629.31,
"train_samples_per_second": 770.382,
"train_steps_per_second": 24.079
}
],
"logging_steps": 500,
"max_steps": 15153,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 468631722156180.0,
"trial_name": null,
"trial_params": null
}