napsternxg's picture
End of training
f9d4f2d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 1000,
"global_step": 15153,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"learning_rate": 4.835016168415495e-05,
"loss": 0.5803,
"step": 500
},
{
"epoch": 0.2,
"learning_rate": 4.670032336830991e-05,
"loss": 0.5473,
"step": 1000
},
{
"epoch": 0.2,
"eval_COMMENT": {
"f1": 0.5783916594727406,
"number": 6879,
"precision": 0.53239608801956,
"recall": 0.6330862043901729
},
"eval_NAME": {
"f1": 0.7901518890168339,
"number": 8827,
"precision": 0.7656748140276302,
"recall": 0.816245610060043
},
"eval_QTY": {
"f1": 0.9788678722372341,
"number": 7190,
"precision": 0.9752864835013116,
"recall": 0.9824756606397774
},
"eval_RANGE_END": {
"f1": 0.6629834254143646,
"number": 82,
"precision": 0.6060606060606061,
"recall": 0.7317073170731707
},
"eval_UNIT": {
"f1": 0.9520847343644923,
"number": 5762,
"precision": 0.923214867949136,
"recall": 0.9828184658104825
},
"eval_loss": 0.5438547134399414,
"eval_overall_accuracy": 0.8057357133291243,
"eval_overall_f1": 0.8141992442229877,
"eval_overall_precision": 0.783718534732505,
"eval_overall_recall": 0.8471468336812804,
"eval_runtime": 7.0491,
"eval_samples_per_second": 1206.676,
"eval_steps_per_second": 37.735,
"step": 1000
},
{
"epoch": 0.3,
"learning_rate": 4.505048505246486e-05,
"loss": 0.5755,
"step": 1500
},
{
"epoch": 0.4,
"learning_rate": 4.3400646736619816e-05,
"loss": 0.5634,
"step": 2000
},
{
"epoch": 0.4,
"eval_COMMENT": {
"f1": 0.6060124486822938,
"number": 6879,
"precision": 0.5564878997932629,
"recall": 0.6652129670010176
},
"eval_NAME": {
"f1": 0.8080031209942595,
"number": 8827,
"precision": 0.7951952610794208,
"recall": 0.8212303160756769
},
"eval_QTY": {
"f1": 0.9806960492631287,
"number": 7190,
"precision": 0.9757675891504888,
"recall": 0.9856745479833101
},
"eval_RANGE_END": {
"f1": 0.6893203883495146,
"number": 82,
"precision": 0.5725806451612904,
"recall": 0.8658536585365854
},
"eval_UNIT": {
"f1": 0.9526850995882007,
"number": 5762,
"precision": 0.9235782955841616,
"recall": 0.9836862200624783
},
"eval_loss": 0.5236877799034119,
"eval_overall_accuracy": 0.8120300751879699,
"eval_overall_f1": 0.8271731288693521,
"eval_overall_precision": 0.7987233904675501,
"eval_overall_recall": 0.8577244258872652,
"eval_runtime": 10.0134,
"eval_samples_per_second": 849.459,
"eval_steps_per_second": 26.564,
"step": 2000
},
{
"epoch": 0.49,
"learning_rate": 4.1750808420774766e-05,
"loss": 0.5454,
"step": 2500
},
{
"epoch": 0.59,
"learning_rate": 4.010097010492972e-05,
"loss": 0.5535,
"step": 3000
},
{
"epoch": 0.59,
"eval_COMMENT": {
"f1": 0.6490723804546643,
"number": 6879,
"precision": 0.5893937596393404,
"recall": 0.7221979938944614
},
"eval_NAME": {
"f1": 0.8041903488242506,
"number": 8827,
"precision": 0.7913148371531966,
"recall": 0.8174917865639515
},
"eval_QTY": {
"f1": 0.9824975691068204,
"number": 7190,
"precision": 0.9812708102108768,
"recall": 0.9837273991655077
},
"eval_RANGE_END": {
"f1": 0.7004608294930875,
"number": 82,
"precision": 0.562962962962963,
"recall": 0.926829268292683
},
"eval_UNIT": {
"f1": 0.9546377792823292,
"number": 5762,
"precision": 0.931615460852329,
"recall": 0.9788267962513016
},
"eval_loss": 0.5021980404853821,
"eval_overall_accuracy": 0.8213281294840153,
"eval_overall_f1": 0.8368266733685181,
"eval_overall_precision": 0.8070124414283406,
"eval_overall_recall": 0.86892832289492,
"eval_runtime": 7.7747,
"eval_samples_per_second": 1094.065,
"eval_steps_per_second": 34.214,
"step": 3000
},
{
"epoch": 0.69,
"learning_rate": 3.845113178908467e-05,
"loss": 0.5391,
"step": 3500
},
{
"epoch": 0.79,
"learning_rate": 3.680129347323962e-05,
"loss": 0.5366,
"step": 4000
},
{
"epoch": 0.79,
"eval_COMMENT": {
"f1": 0.6484485427744499,
"number": 6879,
"precision": 0.6037854098771622,
"recall": 0.7002471289431603
},
"eval_NAME": {
"f1": 0.8109129299327665,
"number": 8827,
"precision": 0.7957470010905126,
"recall": 0.826668177183641
},
"eval_QTY": {
"f1": 0.9822575077666552,
"number": 7190,
"precision": 0.9751884852638794,
"recall": 0.9894297635605007
},
"eval_RANGE_END": {
"f1": 0.7090909090909091,
"number": 82,
"precision": 0.5652173913043478,
"recall": 0.9512195121951219
},
"eval_UNIT": {
"f1": 0.955166020562953,
"number": 5762,
"precision": 0.9284076015727392,
"recall": 0.9835126692120791
},
"eval_loss": 0.4891820251941681,
"eval_overall_accuracy": 0.8250970938797375,
"eval_overall_f1": 0.8404880100967607,
"eval_overall_precision": 0.8138504155124654,
"eval_overall_recall": 0.86892832289492,
"eval_runtime": 9.7418,
"eval_samples_per_second": 873.148,
"eval_steps_per_second": 27.305,
"step": 4000
},
{
"epoch": 0.89,
"learning_rate": 3.515145515739457e-05,
"loss": 0.5234,
"step": 4500
},
{
"epoch": 0.99,
"learning_rate": 3.3501616841549535e-05,
"loss": 0.5256,
"step": 5000
},
{
"epoch": 0.99,
"eval_COMMENT": {
"f1": 0.6685729485303898,
"number": 6879,
"precision": 0.6161294276259346,
"recall": 0.730774821921791
},
"eval_NAME": {
"f1": 0.8137271260915513,
"number": 8827,
"precision": 0.7992788461538461,
"recall": 0.8287073750991277
},
"eval_QTY": {
"f1": 0.9845196959225985,
"number": 7190,
"precision": 0.9784340659340659,
"recall": 0.9906815020862308
},
"eval_RANGE_END": {
"f1": 0.7225130890052357,
"number": 82,
"precision": 0.6330275229357798,
"recall": 0.8414634146341463
},
"eval_UNIT": {
"f1": 0.9555686704325098,
"number": 5762,
"precision": 0.9291687161829808,
"recall": 0.9835126692120791
},
"eval_loss": 0.4813206195831299,
"eval_overall_accuracy": 0.8299374390173908,
"eval_overall_f1": 0.8465375636137659,
"eval_overall_precision": 0.8182408519757135,
"eval_overall_recall": 0.8768615170494085,
"eval_runtime": 8.6073,
"eval_samples_per_second": 988.232,
"eval_steps_per_second": 30.904,
"step": 5000
},
{
"epoch": 1.09,
"learning_rate": 3.1851778525704485e-05,
"loss": 0.5108,
"step": 5500
},
{
"epoch": 1.19,
"learning_rate": 3.0201940209859435e-05,
"loss": 0.5079,
"step": 6000
},
{
"epoch": 1.19,
"eval_COMMENT": {
"f1": 0.6735661347399347,
"number": 6879,
"precision": 0.6228698444060262,
"recall": 0.7332461113533943
},
"eval_NAME": {
"f1": 0.8162536280419737,
"number": 8827,
"precision": 0.8044889426779623,
"recall": 0.82836750877988
},
"eval_QTY": {
"f1": 0.9861911040177642,
"number": 7190,
"precision": 0.9840742279462679,
"recall": 0.988317107093185
},
"eval_RANGE_END": {
"f1": 0.7253886010362693,
"number": 82,
"precision": 0.6306306306306306,
"recall": 0.8536585365853658
},
"eval_UNIT": {
"f1": 0.9569530855893728,
"number": 5762,
"precision": 0.928082191780822,
"recall": 0.9876778896216591
},
"eval_loss": 0.4765673577785492,
"eval_overall_accuracy": 0.8312575331458417,
"eval_overall_f1": 0.8493879544038659,
"eval_overall_precision": 0.8229095298685198,
"eval_overall_recall": 0.8776270006958943,
"eval_runtime": 12.1667,
"eval_samples_per_second": 699.123,
"eval_steps_per_second": 21.863,
"step": 6000
},
{
"epoch": 1.29,
"learning_rate": 2.855210189401439e-05,
"loss": 0.5028,
"step": 6500
},
{
"epoch": 1.39,
"learning_rate": 2.690226357816934e-05,
"loss": 0.5047,
"step": 7000
},
{
"epoch": 1.39,
"eval_COMMENT": {
"f1": 0.6810760790534734,
"number": 6879,
"precision": 0.6244848484848485,
"recall": 0.7489460677424045
},
"eval_NAME": {
"f1": 0.8180240694094598,
"number": 8827,
"precision": 0.8084753263996459,
"recall": 0.8278010649144669
},
"eval_QTY": {
"f1": 0.9849878934624697,
"number": 7190,
"precision": 0.9799036476256022,
"recall": 0.990125173852573
},
"eval_RANGE_END": {
"f1": 0.7264150943396225,
"number": 82,
"precision": 0.5923076923076923,
"recall": 0.9390243902439024
},
"eval_UNIT": {
"f1": 0.9571404370658986,
"number": 5762,
"precision": 0.9348113831899404,
"recall": 0.9805623047552933
},
"eval_loss": 0.47799554467201233,
"eval_overall_accuracy": 0.8304731293883564,
"eval_overall_f1": 0.8510602519044176,
"eval_overall_precision": 0.8235428125101703,
"eval_overall_recall": 0.8804801670146137,
"eval_runtime": 8.0121,
"eval_samples_per_second": 1061.644,
"eval_steps_per_second": 33.2,
"step": 7000
},
{
"epoch": 1.48,
"learning_rate": 2.5252425262324292e-05,
"loss": 0.5058,
"step": 7500
},
{
"epoch": 1.58,
"learning_rate": 2.3602586946479245e-05,
"loss": 0.4912,
"step": 8000
},
{
"epoch": 1.58,
"eval_COMMENT": {
"f1": 0.6825715049452018,
"number": 6879,
"precision": 0.6316635745207174,
"recall": 0.7424044192469835
},
"eval_NAME": {
"f1": 0.8178567437702537,
"number": 8827,
"precision": 0.8068570168669386,
"recall": 0.8291605301914581
},
"eval_QTY": {
"f1": 0.9859722222222222,
"number": 7190,
"precision": 0.9846047156726768,
"recall": 0.9873435326842838
},
"eval_RANGE_END": {
"f1": 0.7422680412371134,
"number": 82,
"precision": 0.6428571428571429,
"recall": 0.8780487804878049
},
"eval_UNIT": {
"f1": 0.9566829597168379,
"number": 5762,
"precision": 0.9298820445609436,
"recall": 0.9850746268656716
},
"eval_loss": 0.4724733531475067,
"eval_overall_accuracy": 0.8342229619851155,
"eval_overall_f1": 0.8520616297495027,
"eval_overall_precision": 0.8264011510038585,
"eval_overall_recall": 0.8793667362560891,
"eval_runtime": 10.131,
"eval_samples_per_second": 839.6,
"eval_steps_per_second": 26.256,
"step": 8000
},
{
"epoch": 1.68,
"learning_rate": 2.19527486306342e-05,
"loss": 0.4994,
"step": 8500
},
{
"epoch": 1.78,
"learning_rate": 2.0302910314789152e-05,
"loss": 0.4955,
"step": 9000
},
{
"epoch": 1.78,
"eval_COMMENT": {
"f1": 0.688906860762906,
"number": 6879,
"precision": 0.6421661012690036,
"recall": 0.7429858991132432
},
"eval_NAME": {
"f1": 0.8154938237102454,
"number": 8827,
"precision": 0.8048323036187114,
"recall": 0.8264415996374759
},
"eval_QTY": {
"f1": 0.9862274205827393,
"number": 7190,
"precision": 0.9815401570464252,
"recall": 0.9909596662030598
},
"eval_RANGE_END": {
"f1": 0.7222222222222221,
"number": 82,
"precision": 0.582089552238806,
"recall": 0.9512195121951219
},
"eval_UNIT": {
"f1": 0.9569620253164556,
"number": 5762,
"precision": 0.9313403416557161,
"recall": 0.9840333217632766
},
"eval_loss": 0.47245046496391296,
"eval_overall_accuracy": 0.8331898448411104,
"eval_overall_f1": 0.8533859968942002,
"eval_overall_precision": 0.8287109887227905,
"eval_overall_recall": 0.8795755045233125,
"eval_runtime": 9.9308,
"eval_samples_per_second": 856.526,
"eval_steps_per_second": 26.785,
"step": 9000
},
{
"epoch": 1.88,
"learning_rate": 1.8653071998944105e-05,
"loss": 0.5006,
"step": 9500
},
{
"epoch": 1.98,
"learning_rate": 1.700323368309906e-05,
"loss": 0.4917,
"step": 10000
},
{
"epoch": 1.98,
"eval_COMMENT": {
"f1": 0.6934379363074265,
"number": 6879,
"precision": 0.6389365351629502,
"recall": 0.7581043756359936
},
"eval_NAME": {
"f1": 0.8204812534974818,
"number": 8827,
"precision": 0.8106822956983302,
"recall": 0.8305199954684491
},
"eval_QTY": {
"f1": 0.9863869981941935,
"number": 7190,
"precision": 0.9851553829078802,
"recall": 0.9876216968011127
},
"eval_RANGE_END": {
"f1": 0.7411167512690355,
"number": 82,
"precision": 0.6347826086956522,
"recall": 0.8902439024390244
},
"eval_UNIT": {
"f1": 0.9581293263548878,
"number": 5762,
"precision": 0.9327744904667982,
"recall": 0.9849010760152724
},
"eval_loss": 0.469653844833374,
"eval_overall_accuracy": 0.8341081711913371,
"eval_overall_f1": 0.8557420137484835,
"eval_overall_precision": 0.8295766366130929,
"eval_overall_recall": 0.8836116910229646,
"eval_runtime": 7.3483,
"eval_samples_per_second": 1157.542,
"eval_steps_per_second": 36.199,
"step": 10000
},
{
"epoch": 2.08,
"learning_rate": 1.535339536725401e-05,
"loss": 0.488,
"step": 10500
},
{
"epoch": 2.18,
"learning_rate": 1.3703557051408963e-05,
"loss": 0.4913,
"step": 11000
},
{
"epoch": 2.18,
"eval_COMMENT": {
"f1": 0.6905655320289467,
"number": 6879,
"precision": 0.6405220633934121,
"recall": 0.7490914377089694
},
"eval_NAME": {
"f1": 0.8180196253345228,
"number": 8827,
"precision": 0.8053573388955978,
"recall": 0.8310864393338621
},
"eval_QTY": {
"f1": 0.9862671660424469,
"number": 7190,
"precision": 0.9836745987825124,
"recall": 0.9888734353268428
},
"eval_RANGE_END": {
"f1": 0.7395833333333335,
"number": 82,
"precision": 0.6454545454545455,
"recall": 0.8658536585365854
},
"eval_UNIT": {
"f1": 0.9573139868398851,
"number": 5762,
"precision": 0.9313854235062377,
"recall": 0.9847275251648733
},
"eval_loss": 0.46854740381240845,
"eval_overall_accuracy": 0.8354856607166772,
"eval_overall_f1": 0.8544265390061359,
"eval_overall_precision": 0.8286685848809835,
"eval_overall_recall": 0.8818371607515657,
"eval_runtime": 10.1454,
"eval_samples_per_second": 838.411,
"eval_steps_per_second": 26.219,
"step": 11000
},
{
"epoch": 2.28,
"learning_rate": 1.2053718735563915e-05,
"loss": 0.4817,
"step": 11500
},
{
"epoch": 2.38,
"learning_rate": 1.0403880419718868e-05,
"loss": 0.4769,
"step": 12000
},
{
"epoch": 2.38,
"eval_COMMENT": {
"f1": 0.6879323081055672,
"number": 6879,
"precision": 0.6392910634048926,
"recall": 0.7445849687454572
},
"eval_NAME": {
"f1": 0.8153271236544146,
"number": 8827,
"precision": 0.8030103274005713,
"recall": 0.8280276424606321
},
"eval_QTY": {
"f1": 0.9868704411253908,
"number": 7190,
"precision": 0.9858431644691187,
"recall": 0.9878998609179416
},
"eval_RANGE_END": {
"f1": 0.7628865979381443,
"number": 82,
"precision": 0.6607142857142857,
"recall": 0.9024390243902439
},
"eval_UNIT": {
"f1": 0.958024275118004,
"number": 5762,
"precision": 0.9313339888561127,
"recall": 0.9862894828184658
},
"eval_loss": 0.465949147939682,
"eval_overall_accuracy": 0.8352943427270466,
"eval_overall_f1": 0.8533733740488925,
"eval_overall_precision": 0.8283169030229588,
"eval_overall_recall": 0.8799930410577592,
"eval_runtime": 7.683,
"eval_samples_per_second": 1107.123,
"eval_steps_per_second": 34.622,
"step": 12000
},
{
"epoch": 2.47,
"learning_rate": 8.75404210387382e-06,
"loss": 0.4792,
"step": 12500
},
{
"epoch": 2.57,
"learning_rate": 7.104203788028774e-06,
"loss": 0.4752,
"step": 13000
},
{
"epoch": 2.57,
"eval_COMMENT": {
"f1": 0.6899657235029236,
"number": 6879,
"precision": 0.641625,
"recall": 0.7461840383776712
},
"eval_NAME": {
"f1": 0.8208305425318152,
"number": 8827,
"precision": 0.8089998899768952,
"recall": 0.833012348476266
},
"eval_QTY": {
"f1": 0.9868759113950422,
"number": 7190,
"precision": 0.9854389127721537,
"recall": 0.988317107093185
},
"eval_RANGE_END": {
"f1": 0.7419354838709676,
"number": 82,
"precision": 0.6634615384615384,
"recall": 0.8414634146341463
},
"eval_UNIT": {
"f1": 0.95803428185426,
"number": 5762,
"precision": 0.932905772076961,
"recall": 0.9845539743144741
},
"eval_loss": 0.46514269709587097,
"eval_overall_accuracy": 0.8359448238917906,
"eval_overall_f1": 0.8555170958210215,
"eval_overall_precision": 0.831031654912252,
"eval_overall_recall": 0.8814892136395268,
"eval_runtime": 12.3762,
"eval_samples_per_second": 687.286,
"eval_steps_per_second": 21.493,
"step": 13000
},
{
"epoch": 2.67,
"learning_rate": 5.4543654721837265e-06,
"loss": 0.471,
"step": 13500
},
{
"epoch": 2.77,
"learning_rate": 3.804527156338679e-06,
"loss": 0.4834,
"step": 14000
},
{
"epoch": 2.77,
"eval_COMMENT": {
"f1": 0.694272653939231,
"number": 6879,
"precision": 0.6457421533074903,
"recall": 0.7506905073411834
},
"eval_NAME": {
"f1": 0.8180143981248955,
"number": 8827,
"precision": 0.8060932688077431,
"recall": 0.830293417922284
},
"eval_QTY": {
"f1": 0.9868311616301636,
"number": 7190,
"precision": 0.9835589941972921,
"recall": 0.990125173852573
},
"eval_RANGE_END": {
"f1": 0.7437185929648242,
"number": 82,
"precision": 0.6324786324786325,
"recall": 0.9024390243902439
},
"eval_UNIT": {
"f1": 0.9579080556727119,
"number": 5762,
"precision": 0.9318890530116527,
"recall": 0.9854217285664699
},
"eval_loss": 0.46282991766929626,
"eval_overall_accuracy": 0.8365187778606823,
"eval_overall_f1": 0.8557788012213844,
"eval_overall_precision": 0.8305989455414743,
"eval_overall_recall": 0.8825330549756437,
"eval_runtime": 8.3554,
"eval_samples_per_second": 1018.028,
"eval_steps_per_second": 31.836,
"step": 14000
},
{
"epoch": 2.87,
"learning_rate": 2.154688840493632e-06,
"loss": 0.4731,
"step": 14500
},
{
"epoch": 2.97,
"learning_rate": 5.048505246485845e-07,
"loss": 0.4784,
"step": 15000
},
{
"epoch": 2.97,
"eval_COMMENT": {
"f1": 0.6956345998383185,
"number": 6879,
"precision": 0.6482109227871939,
"recall": 0.7505451373746184
},
"eval_NAME": {
"f1": 0.8189838079285315,
"number": 8827,
"precision": 0.8074424749532093,
"recall": 0.8308598617876969
},
"eval_QTY": {
"f1": 0.9866888519134775,
"number": 7190,
"precision": 0.9836881393419962,
"recall": 0.9897079276773296
},
"eval_RANGE_END": {
"f1": 0.7487179487179487,
"number": 82,
"precision": 0.6460176991150443,
"recall": 0.8902439024390244
},
"eval_UNIT": {
"f1": 0.958502024291498,
"number": 5762,
"precision": 0.9323925172300623,
"recall": 0.9861159319680667
},
"eval_loss": 0.46261611580848694,
"eval_overall_accuracy": 0.8369970728347587,
"eval_overall_f1": 0.856607405156258,
"eval_overall_precision": 0.8320377841188625,
"eval_overall_recall": 0.8826722338204593,
"eval_runtime": 11.364,
"eval_samples_per_second": 748.503,
"eval_steps_per_second": 23.407,
"step": 15000
},
{
"epoch": 3.0,
"step": 15153,
"total_flos": 465391972741860.0,
"train_loss": 0.5097090245898089,
"train_runtime": 609.9382,
"train_samples_per_second": 794.849,
"train_steps_per_second": 24.843
}
],
"logging_steps": 500,
"max_steps": 15153,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 465391972741860.0,
"trial_name": null,
"trial_params": null
}