xlm-v-base-ner / trainer_state.json
Xmm's picture
Upload 10 files
f734060 verified
{
"best_metric": 0.267339825630188,
"best_model_checkpoint": "./checkpoint-xlm-v-base/checkpoint-62000",
"epoch": 3.032583672746591,
"eval_steps": 1000,
"global_step": 68500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"grad_norm": 46.0612678527832,
"learning_rate": 1.9873510587163855e-05,
"loss": 0.943,
"step": 1000
},
{
"epoch": 0.04,
"eval_LOC_f1": 0.6007898672649095,
"eval_ORG_f1": 0.5205864729691723,
"eval_PER_f1": 0.6537866457692849,
"eval_loss": 0.5755352973937988,
"eval_overall_accuracy": 0.813493507822672,
"eval_overall_f1": 0.5959796923883924,
"eval_overall_precision": 0.571592844668358,
"eval_overall_recall": 0.622540194436182,
"eval_runtime": 909.7639,
"eval_samples_per_second": 72.217,
"eval_steps_per_second": 0.282,
"step": 1000
},
{
"epoch": 0.09,
"grad_norm": 5.543514251708984,
"learning_rate": 1.974702117432771e-05,
"loss": 0.5825,
"step": 2000
},
{
"epoch": 0.09,
"eval_LOC_f1": 0.6761863812260971,
"eval_ORG_f1": 0.573783382357224,
"eval_PER_f1": 0.7231398018028106,
"eval_loss": 0.5157074332237244,
"eval_overall_accuracy": 0.844698117312631,
"eval_overall_f1": 0.664661446599853,
"eval_overall_precision": 0.690302943433134,
"eval_overall_recall": 0.640856646367237,
"eval_runtime": 884.4485,
"eval_samples_per_second": 74.284,
"eval_steps_per_second": 0.291,
"step": 2000
},
{
"epoch": 0.13,
"grad_norm": 30.680952072143555,
"learning_rate": 1.9620531761491565e-05,
"loss": 0.5153,
"step": 3000
},
{
"epoch": 0.13,
"eval_LOC_f1": 0.6448332585613877,
"eval_ORG_f1": 0.5780655943179445,
"eval_PER_f1": 0.749264457627936,
"eval_loss": 0.4841216504573822,
"eval_overall_accuracy": 0.8415159100197845,
"eval_overall_f1": 0.6599932560127353,
"eval_overall_precision": 0.6612013701212998,
"eval_overall_recall": 0.6587895486638383,
"eval_runtime": 887.5751,
"eval_samples_per_second": 74.022,
"eval_steps_per_second": 0.29,
"step": 3000
},
{
"epoch": 0.18,
"grad_norm": 14.460062026977539,
"learning_rate": 1.949404234865542e-05,
"loss": 0.4744,
"step": 4000
},
{
"epoch": 0.18,
"eval_LOC_f1": 0.7135048963789569,
"eval_ORG_f1": 0.6044746860257756,
"eval_PER_f1": 0.7562344421814889,
"eval_loss": 0.4284209907054901,
"eval_overall_accuracy": 0.8650805108611886,
"eval_overall_f1": 0.6945276906141412,
"eval_overall_precision": 0.6951840204528166,
"eval_overall_recall": 0.6938725989010129,
"eval_runtime": 884.4505,
"eval_samples_per_second": 74.283,
"eval_steps_per_second": 0.291,
"step": 4000
},
{
"epoch": 0.22,
"grad_norm": 2.3655357360839844,
"learning_rate": 1.9367552935819272e-05,
"loss": 0.4385,
"step": 5000
},
{
"epoch": 0.22,
"eval_LOC_f1": 0.7295629535257298,
"eval_ORG_f1": 0.6200475216886777,
"eval_PER_f1": 0.7408250910983861,
"eval_loss": 0.4239448308944702,
"eval_overall_accuracy": 0.8635053351958905,
"eval_overall_f1": 0.7025319684063077,
"eval_overall_precision": 0.7043199823770524,
"eval_overall_recall": 0.7007530096904989,
"eval_runtime": 907.5971,
"eval_samples_per_second": 72.389,
"eval_steps_per_second": 0.283,
"step": 5000
},
{
"epoch": 0.27,
"grad_norm": 9.867854118347168,
"learning_rate": 1.924106352298313e-05,
"loss": 0.4279,
"step": 6000
},
{
"epoch": 0.27,
"eval_LOC_f1": 0.7370099725835874,
"eval_ORG_f1": 0.6403813434199981,
"eval_PER_f1": 0.7776639577500056,
"eval_loss": 0.38233184814453125,
"eval_overall_accuracy": 0.8816618781055326,
"eval_overall_f1": 0.7238370468534203,
"eval_overall_precision": 0.7385234418271267,
"eval_overall_recall": 0.7097233746105797,
"eval_runtime": 948.2573,
"eval_samples_per_second": 69.285,
"eval_steps_per_second": 0.271,
"step": 6000
},
{
"epoch": 0.31,
"grad_norm": 25.877347946166992,
"learning_rate": 1.9114574110146982e-05,
"loss": 0.4099,
"step": 7000
},
{
"epoch": 0.31,
"eval_LOC_f1": 0.7575684397708062,
"eval_ORG_f1": 0.630874803840732,
"eval_PER_f1": 0.7838224767358626,
"eval_loss": 0.38040244579315186,
"eval_overall_accuracy": 0.883136741379065,
"eval_overall_f1": 0.7288917006049582,
"eval_overall_precision": 0.7364569017865703,
"eval_overall_recall": 0.7214803450381201,
"eval_runtime": 907.1265,
"eval_samples_per_second": 72.427,
"eval_steps_per_second": 0.283,
"step": 7000
},
{
"epoch": 0.35,
"grad_norm": 30.637121200561523,
"learning_rate": 1.8988084697310836e-05,
"loss": 0.3874,
"step": 8000
},
{
"epoch": 0.35,
"eval_LOC_f1": 0.7474734456723695,
"eval_ORG_f1": 0.6407748343462335,
"eval_PER_f1": 0.7854664027017585,
"eval_loss": 0.37021398544311523,
"eval_overall_accuracy": 0.8872800498308584,
"eval_overall_f1": 0.7295473133392094,
"eval_overall_precision": 0.74022719948437,
"eval_overall_recall": 0.7191712196878376,
"eval_runtime": 884.9913,
"eval_samples_per_second": 74.238,
"eval_steps_per_second": 0.29,
"step": 8000
},
{
"epoch": 0.4,
"grad_norm": 20.109619140625,
"learning_rate": 1.8861595284474693e-05,
"loss": 0.3841,
"step": 9000
},
{
"epoch": 0.4,
"eval_LOC_f1": 0.7684967782745274,
"eval_ORG_f1": 0.655326947582435,
"eval_PER_f1": 0.789217873159736,
"eval_loss": 0.3808096945285797,
"eval_overall_accuracy": 0.8879015799879489,
"eval_overall_f1": 0.743771496693436,
"eval_overall_precision": 0.76602787456446,
"eval_overall_recall": 0.7227718897255663,
"eval_runtime": 885.0879,
"eval_samples_per_second": 74.23,
"eval_steps_per_second": 0.29,
"step": 9000
},
{
"epoch": 0.44,
"grad_norm": 10.265982627868652,
"learning_rate": 1.8735105871638546e-05,
"loss": 0.3764,
"step": 10000
},
{
"epoch": 0.44,
"eval_LOC_f1": 0.7831821749367751,
"eval_ORG_f1": 0.6622161847467495,
"eval_PER_f1": 0.7948864849077164,
"eval_loss": 0.34247785806655884,
"eval_overall_accuracy": 0.8911041208737209,
"eval_overall_f1": 0.7509862429761675,
"eval_overall_precision": 0.7436605881991772,
"eval_overall_recall": 0.7584576608168825,
"eval_runtime": 885.3889,
"eval_samples_per_second": 74.205,
"eval_steps_per_second": 0.29,
"step": 10000
},
{
"epoch": 0.49,
"grad_norm": 4.401586055755615,
"learning_rate": 1.86086164588024e-05,
"loss": 0.3564,
"step": 11000
},
{
"epoch": 0.49,
"eval_LOC_f1": 0.789610444706057,
"eval_ORG_f1": 0.6701892389389907,
"eval_PER_f1": 0.8054954166474735,
"eval_loss": 0.35062676668167114,
"eval_overall_accuracy": 0.8928101093201735,
"eval_overall_f1": 0.7598466310260445,
"eval_overall_precision": 0.7565214692509428,
"eval_overall_recall": 0.7632011522144121,
"eval_runtime": 885.0018,
"eval_samples_per_second": 74.237,
"eval_steps_per_second": 0.29,
"step": 11000
},
{
"epoch": 0.53,
"grad_norm": 1.0011909008026123,
"learning_rate": 1.8482127045966253e-05,
"loss": 0.3484,
"step": 12000
},
{
"epoch": 0.53,
"eval_LOC_f1": 0.7712024123633622,
"eval_ORG_f1": 0.6809512535185331,
"eval_PER_f1": 0.8100081183474653,
"eval_loss": 0.37064051628112793,
"eval_overall_accuracy": 0.8851477337194005,
"eval_overall_f1": 0.758193057536852,
"eval_overall_precision": 0.7375241450255696,
"eval_overall_recall": 0.7800538534996947,
"eval_runtime": 950.4008,
"eval_samples_per_second": 69.129,
"eval_steps_per_second": 0.27,
"step": 12000
},
{
"epoch": 0.58,
"grad_norm": 13.891754150390625,
"learning_rate": 1.8355637633130106e-05,
"loss": 0.3563,
"step": 13000
},
{
"epoch": 0.58,
"eval_LOC_f1": 0.7934942596408595,
"eval_ORG_f1": 0.6686377545091862,
"eval_PER_f1": 0.8152987398240509,
"eval_loss": 0.3389296531677246,
"eval_overall_accuracy": 0.8935766857734662,
"eval_overall_f1": 0.7638085016673694,
"eval_overall_precision": 0.7483700877298401,
"eval_overall_recall": 0.7798973026284891,
"eval_runtime": 970.0425,
"eval_samples_per_second": 67.729,
"eval_steps_per_second": 0.265,
"step": 13000
},
{
"epoch": 0.62,
"grad_norm": 19.86951446533203,
"learning_rate": 1.8229148220293963e-05,
"loss": 0.3396,
"step": 14000
},
{
"epoch": 0.62,
"eval_LOC_f1": 0.7943446440452429,
"eval_ORG_f1": 0.6860717813631874,
"eval_PER_f1": 0.808841180333809,
"eval_loss": 0.33801111578941345,
"eval_overall_accuracy": 0.8965501348456104,
"eval_overall_f1": 0.767065352823492,
"eval_overall_precision": 0.770362767931157,
"eval_overall_recall": 0.7637960455249934,
"eval_runtime": 892.9487,
"eval_samples_per_second": 73.576,
"eval_steps_per_second": 0.288,
"step": 14000
},
{
"epoch": 0.66,
"grad_norm": 6.008892059326172,
"learning_rate": 1.8102658807457817e-05,
"loss": 0.3513,
"step": 15000
},
{
"epoch": 0.66,
"eval_LOC_f1": 0.796750172086523,
"eval_ORG_f1": 0.6869723599718148,
"eval_PER_f1": 0.8222321051851345,
"eval_loss": 0.3108769357204437,
"eval_overall_accuracy": 0.900560597156957,
"eval_overall_f1": 0.773478314631055,
"eval_overall_precision": 0.7637860773210824,
"eval_overall_recall": 0.7834196972306151,
"eval_runtime": 885.3039,
"eval_samples_per_second": 74.212,
"eval_steps_per_second": 0.29,
"step": 15000
},
{
"epoch": 0.71,
"grad_norm": 36.22490692138672,
"learning_rate": 1.7976169394621674e-05,
"loss": 0.3332,
"step": 16000
},
{
"epoch": 0.71,
"eval_LOC_f1": 0.8066277986085525,
"eval_ORG_f1": 0.6983725665012176,
"eval_PER_f1": 0.8332057011578333,
"eval_loss": 0.31944143772125244,
"eval_overall_accuracy": 0.9018524580941595,
"eval_overall_f1": 0.7838363709114303,
"eval_overall_precision": 0.7856547450536315,
"eval_overall_recall": 0.7820263944768853,
"eval_runtime": 885.7636,
"eval_samples_per_second": 74.173,
"eval_steps_per_second": 0.29,
"step": 16000
},
{
"epoch": 0.75,
"grad_norm": 12.729876518249512,
"learning_rate": 1.7849679981785527e-05,
"loss": 0.3324,
"step": 17000
},
{
"epoch": 0.75,
"eval_LOC_f1": 0.8091387580118001,
"eval_ORG_f1": 0.7010727593385756,
"eval_PER_f1": 0.8321347376235326,
"eval_loss": 0.3180248737335205,
"eval_overall_accuracy": 0.9008195115735422,
"eval_overall_f1": 0.7850356248119096,
"eval_overall_precision": 0.7839540384210074,
"eval_overall_recall": 0.7861201997589117,
"eval_runtime": 884.7821,
"eval_samples_per_second": 74.256,
"eval_steps_per_second": 0.29,
"step": 17000
},
{
"epoch": 0.8,
"grad_norm": 8.6387939453125,
"learning_rate": 1.772319056894938e-05,
"loss": 0.3385,
"step": 18000
},
{
"epoch": 0.8,
"eval_LOC_f1": 0.7763382604086921,
"eval_ORG_f1": 0.6755694544225161,
"eval_PER_f1": 0.8256535378180551,
"eval_loss": 0.33690232038497925,
"eval_overall_accuracy": 0.8964328777930731,
"eval_overall_f1": 0.7651045588831604,
"eval_overall_precision": 0.755082284607938,
"eval_overall_recall": 0.7753964650813282,
"eval_runtime": 890.8661,
"eval_samples_per_second": 73.748,
"eval_steps_per_second": 0.288,
"step": 18000
},
{
"epoch": 0.84,
"grad_norm": 9.216795921325684,
"learning_rate": 1.7596701156113234e-05,
"loss": 0.3267,
"step": 19000
},
{
"epoch": 0.84,
"eval_LOC_f1": 0.8015086633534421,
"eval_ORG_f1": 0.70105107410561,
"eval_PER_f1": 0.8364990020855292,
"eval_loss": 0.3270108103752136,
"eval_overall_accuracy": 0.9019283701744147,
"eval_overall_f1": 0.7856358496296815,
"eval_overall_precision": 0.7816849022099619,
"eval_overall_recall": 0.789626939273917,
"eval_runtime": 950.9161,
"eval_samples_per_second": 69.091,
"eval_steps_per_second": 0.27,
"step": 19000
},
{
"epoch": 0.89,
"grad_norm": 10.781560897827148,
"learning_rate": 1.747021174327709e-05,
"loss": 0.3233,
"step": 20000
},
{
"epoch": 0.89,
"eval_LOC_f1": 0.8056958546374753,
"eval_ORG_f1": 0.6917519315097098,
"eval_PER_f1": 0.8345942771968653,
"eval_loss": 0.30491939187049866,
"eval_overall_accuracy": 0.9033815442821598,
"eval_overall_f1": 0.783789045768533,
"eval_overall_precision": 0.7798233379823338,
"eval_overall_recall": 0.7877952940808116,
"eval_runtime": 917.217,
"eval_samples_per_second": 71.63,
"eval_steps_per_second": 0.28,
"step": 20000
},
{
"epoch": 0.93,
"grad_norm": 15.803696632385254,
"learning_rate": 1.7343722330440944e-05,
"loss": 0.3112,
"step": 21000
},
{
"epoch": 0.93,
"eval_LOC_f1": 0.8185129588612579,
"eval_ORG_f1": 0.7198663442992582,
"eval_PER_f1": 0.8407628833170422,
"eval_loss": 0.3230852782726288,
"eval_overall_accuracy": 0.9054094812832666,
"eval_overall_f1": 0.7978603713205666,
"eval_overall_precision": 0.7936613663999752,
"eval_overall_recall": 0.8021040437090032,
"eval_runtime": 886.1519,
"eval_samples_per_second": 74.141,
"eval_steps_per_second": 0.29,
"step": 21000
},
{
"epoch": 0.97,
"grad_norm": 2.6037917137145996,
"learning_rate": 1.7217232917604798e-05,
"loss": 0.3256,
"step": 22000
},
{
"epoch": 0.97,
"eval_LOC_f1": 0.8135399708226737,
"eval_ORG_f1": 0.7164829968607569,
"eval_PER_f1": 0.8380035321776889,
"eval_loss": 0.3068985044956207,
"eval_overall_accuracy": 0.9048909746636654,
"eval_overall_f1": 0.7934551505253751,
"eval_overall_precision": 0.781072268142868,
"eval_overall_recall": 0.806236986708831,
"eval_runtime": 885.0947,
"eval_samples_per_second": 74.229,
"eval_steps_per_second": 0.29,
"step": 22000
},
{
"epoch": 1.02,
"grad_norm": 20.057506561279297,
"learning_rate": 1.709074350476865e-05,
"loss": 0.2931,
"step": 23000
},
{
"epoch": 1.02,
"eval_LOC_f1": 0.8090675101559834,
"eval_ORG_f1": 0.7063356635658061,
"eval_PER_f1": 0.8313781014023732,
"eval_loss": 0.3428688645362854,
"eval_overall_accuracy": 0.8993378704357015,
"eval_overall_f1": 0.7878607280931577,
"eval_overall_precision": 0.779137676333938,
"eval_overall_recall": 0.7967813140880129,
"eval_runtime": 913.4969,
"eval_samples_per_second": 71.921,
"eval_steps_per_second": 0.281,
"step": 23000
},
{
"epoch": 1.06,
"grad_norm": 10.215392112731934,
"learning_rate": 1.6964254091932504e-05,
"loss": 0.2936,
"step": 24000
},
{
"epoch": 1.06,
"eval_LOC_f1": 0.8279096724337782,
"eval_ORG_f1": 0.7213703143205346,
"eval_PER_f1": 0.8399587047500638,
"eval_loss": 0.3276561200618744,
"eval_overall_accuracy": 0.9048035402140856,
"eval_overall_f1": 0.8012747941254013,
"eval_overall_precision": 0.7938388261504187,
"eval_overall_recall": 0.8088513862579645,
"eval_runtime": 888.4065,
"eval_samples_per_second": 73.953,
"eval_steps_per_second": 0.289,
"step": 24000
},
{
"epoch": 1.11,
"grad_norm": 13.276248931884766,
"learning_rate": 1.683776467909636e-05,
"loss": 0.2797,
"step": 25000
},
{
"epoch": 1.11,
"eval_LOC_f1": 0.8137454037894649,
"eval_ORG_f1": 0.7157125069541951,
"eval_PER_f1": 0.8422392139071162,
"eval_loss": 0.30677318572998047,
"eval_overall_accuracy": 0.9050170429398038,
"eval_overall_f1": 0.7953167244565965,
"eval_overall_precision": 0.7764588335296837,
"eval_overall_recall": 0.8151134211061885,
"eval_runtime": 923.9779,
"eval_samples_per_second": 71.106,
"eval_steps_per_second": 0.278,
"step": 25000
},
{
"epoch": 1.15,
"grad_norm": 0.08989755064249039,
"learning_rate": 1.6711275266260215e-05,
"loss": 0.2792,
"step": 26000
},
{
"epoch": 1.15,
"eval_LOC_f1": 0.8237387838615486,
"eval_ORG_f1": 0.717827626918536,
"eval_PER_f1": 0.8449089917750373,
"eval_loss": 0.3350381851196289,
"eval_overall_accuracy": 0.9065115620198306,
"eval_overall_f1": 0.7990867935095444,
"eval_overall_precision": 0.7941203753807146,
"eval_overall_recall": 0.8041157224039952,
"eval_runtime": 950.4133,
"eval_samples_per_second": 69.128,
"eval_steps_per_second": 0.27,
"step": 26000
},
{
"epoch": 1.2,
"grad_norm": 32.182918548583984,
"learning_rate": 1.658478585342407e-05,
"loss": 0.2698,
"step": 27000
},
{
"epoch": 1.2,
"eval_LOC_f1": 0.8267147626869356,
"eval_ORG_f1": 0.7322118816415036,
"eval_PER_f1": 0.8463502705378134,
"eval_loss": 0.3303050696849823,
"eval_overall_accuracy": 0.9053356025623038,
"eval_overall_f1": 0.8061476513209491,
"eval_overall_precision": 0.803686040812516,
"eval_overall_recall": 0.8086243874947164,
"eval_runtime": 903.1604,
"eval_samples_per_second": 72.745,
"eval_steps_per_second": 0.285,
"step": 27000
},
{
"epoch": 1.24,
"grad_norm": 27.704275131225586,
"learning_rate": 1.6458296440587925e-05,
"loss": 0.2846,
"step": 28000
},
{
"epoch": 1.24,
"eval_LOC_f1": 0.8198407012516331,
"eval_ORG_f1": 0.7170252756930114,
"eval_PER_f1": 0.8497716275494053,
"eval_loss": 0.3040228486061096,
"eval_overall_accuracy": 0.9088763588770705,
"eval_overall_f1": 0.7998951624404026,
"eval_overall_precision": 0.7879266486958503,
"eval_overall_recall": 0.8122328850760054,
"eval_runtime": 883.6437,
"eval_samples_per_second": 74.351,
"eval_steps_per_second": 0.291,
"step": 28000
},
{
"epoch": 1.28,
"grad_norm": 1.532094120979309,
"learning_rate": 1.633180702775178e-05,
"loss": 0.2765,
"step": 29000
},
{
"epoch": 1.28,
"eval_LOC_f1": 0.8226323815533471,
"eval_ORG_f1": 0.7322743544720759,
"eval_PER_f1": 0.8458698818030955,
"eval_loss": 0.3010263741016388,
"eval_overall_accuracy": 0.9093941877102408,
"eval_overall_f1": 0.8048208514659728,
"eval_overall_precision": 0.799766577265244,
"eval_overall_recall": 0.8099394148128435,
"eval_runtime": 883.3843,
"eval_samples_per_second": 74.373,
"eval_steps_per_second": 0.291,
"step": 29000
},
{
"epoch": 1.33,
"grad_norm": 2.7648439407348633,
"learning_rate": 1.6205317614915632e-05,
"loss": 0.2758,
"step": 30000
},
{
"epoch": 1.33,
"eval_LOC_f1": 0.8158232882579698,
"eval_ORG_f1": 0.7211553763726063,
"eval_PER_f1": 0.8392668350824088,
"eval_loss": 0.2979504466056824,
"eval_overall_accuracy": 0.907270682822384,
"eval_overall_f1": 0.7967142515352101,
"eval_overall_precision": 0.7737616641463505,
"eval_overall_recall": 0.8210701817555615,
"eval_runtime": 884.2556,
"eval_samples_per_second": 74.3,
"eval_steps_per_second": 0.291,
"step": 30000
},
{
"epoch": 1.37,
"grad_norm": 2.9498727321624756,
"learning_rate": 1.607882820207949e-05,
"loss": 0.2745,
"step": 31000
},
{
"epoch": 1.37,
"eval_LOC_f1": 0.8269012485811577,
"eval_ORG_f1": 0.7277582167305856,
"eval_PER_f1": 0.856517895595802,
"eval_loss": 0.2944641709327698,
"eval_overall_accuracy": 0.9108385505943848,
"eval_overall_f1": 0.807803496021649,
"eval_overall_precision": 0.7947313807024321,
"eval_overall_recall": 0.8213128356059302,
"eval_runtime": 883.9066,
"eval_samples_per_second": 74.329,
"eval_steps_per_second": 0.291,
"step": 31000
},
{
"epoch": 1.42,
"grad_norm": 11.60289192199707,
"learning_rate": 1.5952338789243342e-05,
"loss": 0.2645,
"step": 32000
},
{
"epoch": 1.42,
"eval_LOC_f1": 0.8305319969159598,
"eval_ORG_f1": 0.7228604829282057,
"eval_PER_f1": 0.8315148384875288,
"eval_loss": 0.32325080037117004,
"eval_overall_accuracy": 0.9048618298471388,
"eval_overall_f1": 0.7998450483255535,
"eval_overall_precision": 0.7917570997998328,
"eval_overall_recall": 0.8080999420761776,
"eval_runtime": 933.3011,
"eval_samples_per_second": 70.395,
"eval_steps_per_second": 0.275,
"step": 32000
},
{
"epoch": 1.46,
"grad_norm": 42.618431091308594,
"learning_rate": 1.5825849376407196e-05,
"loss": 0.2779,
"step": 33000
},
{
"epoch": 1.46,
"eval_LOC_f1": 0.8264125401549256,
"eval_ORG_f1": 0.7424042624042624,
"eval_PER_f1": 0.8601716304896517,
"eval_loss": 0.2943771183490753,
"eval_overall_accuracy": 0.9127221190857203,
"eval_overall_f1": 0.8132353632361465,
"eval_overall_precision": 0.8138473840171838,
"eval_overall_recall": 0.8126242622540194,
"eval_runtime": 953.7502,
"eval_samples_per_second": 68.886,
"eval_steps_per_second": 0.269,
"step": 33000
},
{
"epoch": 1.51,
"grad_norm": 15.319729804992676,
"learning_rate": 1.569935996357105e-05,
"loss": 0.2709,
"step": 34000
},
{
"epoch": 1.51,
"eval_LOC_f1": 0.832774509183695,
"eval_ORG_f1": 0.7316936984844457,
"eval_PER_f1": 0.8539732494099136,
"eval_loss": 0.2914768159389496,
"eval_overall_accuracy": 0.9130203451152948,
"eval_overall_f1": 0.8107029247351679,
"eval_overall_precision": 0.7998217523118878,
"eval_overall_recall": 0.8218842462858306,
"eval_runtime": 894.9182,
"eval_samples_per_second": 73.415,
"eval_steps_per_second": 0.287,
"step": 34000
},
{
"epoch": 1.55,
"grad_norm": 1.931920051574707,
"learning_rate": 1.5572870550734906e-05,
"loss": 0.2631,
"step": 35000
},
{
"epoch": 1.55,
"eval_LOC_f1": 0.8323614548810673,
"eval_ORG_f1": 0.7279775567457282,
"eval_PER_f1": 0.8522675037838443,
"eval_loss": 0.3124816417694092,
"eval_overall_accuracy": 0.9096829247297835,
"eval_overall_f1": 0.8079843932416348,
"eval_overall_precision": 0.7857095311702623,
"eval_overall_recall": 0.8315590901263366,
"eval_runtime": 886.0992,
"eval_samples_per_second": 74.145,
"eval_steps_per_second": 0.29,
"step": 35000
},
{
"epoch": 1.59,
"grad_norm": 2.4540863037109375,
"learning_rate": 1.544638113789876e-05,
"loss": 0.2684,
"step": 36000
},
{
"epoch": 1.59,
"eval_LOC_f1": 0.8353469255313396,
"eval_ORG_f1": 0.743517370545253,
"eval_PER_f1": 0.8544175455688603,
"eval_loss": 0.31003931164741516,
"eval_overall_accuracy": 0.9140255023922472,
"eval_overall_f1": 0.8147700607298496,
"eval_overall_precision": 0.8114867383067271,
"eval_overall_recall": 0.8180800601155346,
"eval_runtime": 886.9365,
"eval_samples_per_second": 74.075,
"eval_steps_per_second": 0.29,
"step": 36000
},
{
"epoch": 1.64,
"grad_norm": 1.3480443954467773,
"learning_rate": 1.5319891725062616e-05,
"loss": 0.2546,
"step": 37000
},
{
"epoch": 1.64,
"eval_LOC_f1": 0.8268920250802105,
"eval_ORG_f1": 0.7359205250232403,
"eval_PER_f1": 0.8566762684569846,
"eval_loss": 0.3172565698623657,
"eval_overall_accuracy": 0.9102766656432092,
"eval_overall_f1": 0.8115168704156479,
"eval_overall_precision": 0.8111456076827428,
"eval_overall_recall": 0.8118884731593531,
"eval_runtime": 886.1348,
"eval_samples_per_second": 74.142,
"eval_steps_per_second": 0.29,
"step": 37000
},
{
"epoch": 1.68,
"grad_norm": 2.66180419921875,
"learning_rate": 1.5193402312226468e-05,
"loss": 0.2642,
"step": 38000
},
{
"epoch": 1.68,
"eval_LOC_f1": 0.8459391601383606,
"eval_ORG_f1": 0.7362593503366764,
"eval_PER_f1": 0.8645872824401172,
"eval_loss": 0.2804827094078064,
"eval_overall_accuracy": 0.913356527184997,
"eval_overall_f1": 0.8191614534186092,
"eval_overall_precision": 0.8128202954617264,
"eval_overall_recall": 0.8256023294769635,
"eval_runtime": 901.1127,
"eval_samples_per_second": 72.91,
"eval_steps_per_second": 0.285,
"step": 38000
},
{
"epoch": 1.73,
"grad_norm": 47.826175689697266,
"learning_rate": 1.5066912899390323e-05,
"loss": 0.2776,
"step": 39000
},
{
"epoch": 1.73,
"eval_LOC_f1": 0.8417204029165086,
"eval_ORG_f1": 0.7461313828771049,
"eval_PER_f1": 0.8621870343195805,
"eval_loss": 0.2955803871154785,
"eval_overall_accuracy": 0.9141780043391887,
"eval_overall_f1": 0.8213932893138981,
"eval_overall_precision": 0.8195021231836067,
"eval_overall_recall": 0.8232932041266809,
"eval_runtime": 949.1162,
"eval_samples_per_second": 69.222,
"eval_steps_per_second": 0.271,
"step": 39000
},
{
"epoch": 1.77,
"grad_norm": 14.713150024414062,
"learning_rate": 1.4940423486554176e-05,
"loss": 0.2616,
"step": 40000
},
{
"epoch": 1.77,
"eval_LOC_f1": 0.8312933303965682,
"eval_ORG_f1": 0.738299968952903,
"eval_PER_f1": 0.8513267743278481,
"eval_loss": 0.29292425513267517,
"eval_overall_accuracy": 0.9104176452208262,
"eval_overall_f1": 0.8118819476942669,
"eval_overall_precision": 0.796034420507883,
"eval_overall_recall": 0.8283732798973026,
"eval_runtime": 908.7427,
"eval_samples_per_second": 72.298,
"eval_steps_per_second": 0.283,
"step": 40000
},
{
"epoch": 1.82,
"grad_norm": 16.184900283813477,
"learning_rate": 1.481393407371803e-05,
"loss": 0.2701,
"step": 41000
},
{
"epoch": 1.82,
"eval_LOC_f1": 0.8392716598242965,
"eval_ORG_f1": 0.74401776384535,
"eval_PER_f1": 0.8639952804501724,
"eval_loss": 0.2767677903175354,
"eval_overall_accuracy": 0.9163774213073009,
"eval_overall_f1": 0.8195078963845922,
"eval_overall_precision": 0.8094401856885441,
"eval_overall_recall": 0.8298292029995147,
"eval_runtime": 885.7415,
"eval_samples_per_second": 74.175,
"eval_steps_per_second": 0.29,
"step": 41000
},
{
"epoch": 1.86,
"grad_norm": 16.436620712280273,
"learning_rate": 1.4687444660881885e-05,
"loss": 0.2669,
"step": 42000
},
{
"epoch": 1.86,
"eval_LOC_f1": 0.8361988121287902,
"eval_ORG_f1": 0.7500968409804315,
"eval_PER_f1": 0.8611851501962505,
"eval_loss": 0.29421770572662354,
"eval_overall_accuracy": 0.9147995344962793,
"eval_overall_f1": 0.819935938895562,
"eval_overall_precision": 0.8067546477976939,
"eval_overall_recall": 0.833555113734208,
"eval_runtime": 886.1039,
"eval_samples_per_second": 74.145,
"eval_steps_per_second": 0.29,
"step": 42000
},
{
"epoch": 1.9,
"grad_norm": 4.060434341430664,
"learning_rate": 1.456095524804574e-05,
"loss": 0.2422,
"step": 43000
},
{
"epoch": 1.9,
"eval_LOC_f1": 0.8396598172309967,
"eval_ORG_f1": 0.752934357339516,
"eval_PER_f1": 0.8587656968190062,
"eval_loss": 0.29513150453567505,
"eval_overall_accuracy": 0.9147182001245772,
"eval_overall_f1": 0.8206534155814486,
"eval_overall_precision": 0.8120531232517684,
"eval_overall_recall": 0.8294378258215007,
"eval_runtime": 885.9872,
"eval_samples_per_second": 74.155,
"eval_steps_per_second": 0.29,
"step": 43000
},
{
"epoch": 1.95,
"grad_norm": 4.084081172943115,
"learning_rate": 1.4434465835209595e-05,
"loss": 0.2616,
"step": 44000
},
{
"epoch": 1.95,
"eval_LOC_f1": 0.8452747626229368,
"eval_ORG_f1": 0.7507735621040889,
"eval_PER_f1": 0.8679754713527367,
"eval_loss": 0.29186713695526123,
"eval_overall_accuracy": 0.915251618045657,
"eval_overall_f1": 0.8252959748971241,
"eval_overall_precision": 0.820965230928905,
"eval_overall_recall": 0.8296726521283091,
"eval_runtime": 886.49,
"eval_samples_per_second": 74.113,
"eval_steps_per_second": 0.29,
"step": 44000
},
{
"epoch": 1.99,
"grad_norm": 12.051443099975586,
"learning_rate": 1.4307976422373449e-05,
"loss": 0.2449,
"step": 45000
},
{
"epoch": 1.99,
"eval_LOC_f1": 0.8420882739030321,
"eval_ORG_f1": 0.7511743283897188,
"eval_PER_f1": 0.8660442600276625,
"eval_loss": 0.28106340765953064,
"eval_overall_accuracy": 0.9165787238772637,
"eval_overall_f1": 0.8232545031821703,
"eval_overall_precision": 0.823325400056368,
"eval_overall_recall": 0.8231836185168371,
"eval_runtime": 931.6188,
"eval_samples_per_second": 70.522,
"eval_steps_per_second": 0.276,
"step": 45000
},
{
"epoch": 2.04,
"grad_norm": 16.095355987548828,
"learning_rate": 1.4181487009537302e-05,
"loss": 0.2379,
"step": 46000
},
{
"epoch": 2.04,
"eval_LOC_f1": 0.8334692878701362,
"eval_ORG_f1": 0.7499450670182377,
"eval_PER_f1": 0.8676157711285138,
"eval_loss": 0.2910194396972656,
"eval_overall_accuracy": 0.914796145564125,
"eval_overall_f1": 0.821648434727601,
"eval_overall_precision": 0.8099514821518198,
"eval_overall_recall": 0.8336881819747327,
"eval_runtime": 955.4465,
"eval_samples_per_second": 68.764,
"eval_steps_per_second": 0.269,
"step": 46000
},
{
"epoch": 2.08,
"grad_norm": 6.1998419761657715,
"learning_rate": 1.4054997596701157e-05,
"loss": 0.2128,
"step": 47000
},
{
"epoch": 2.08,
"eval_LOC_f1": 0.8394146138221968,
"eval_ORG_f1": 0.7394133361546803,
"eval_PER_f1": 0.864682724271338,
"eval_loss": 0.30839666724205017,
"eval_overall_accuracy": 0.9148042790012952,
"eval_overall_f1": 0.8188789651986448,
"eval_overall_precision": 0.8056906504249807,
"eval_overall_recall": 0.8325062228971304,
"eval_runtime": 901.3734,
"eval_samples_per_second": 72.889,
"eval_steps_per_second": 0.285,
"step": 47000
},
{
"epoch": 2.13,
"grad_norm": 5.403193950653076,
"learning_rate": 1.3928508183865012e-05,
"loss": 0.2237,
"step": 48000
},
{
"epoch": 2.13,
"eval_LOC_f1": 0.8372631513660468,
"eval_ORG_f1": 0.7524156839779593,
"eval_PER_f1": 0.8648470673721019,
"eval_loss": 0.3043561279773712,
"eval_overall_accuracy": 0.9152800850757528,
"eval_overall_f1": 0.8220857007666829,
"eval_overall_precision": 0.8082709895080826,
"eval_overall_recall": 0.836380856959469,
"eval_runtime": 884.5053,
"eval_samples_per_second": 74.279,
"eval_steps_per_second": 0.291,
"step": 48000
},
{
"epoch": 2.17,
"grad_norm": 13.765303611755371,
"learning_rate": 1.3802018771028867e-05,
"loss": 0.2246,
"step": 49000
},
{
"epoch": 2.17,
"eval_LOC_f1": 0.8349439826902872,
"eval_ORG_f1": 0.7424747298710351,
"eval_PER_f1": 0.859314059653789,
"eval_loss": 0.28388652205467224,
"eval_overall_accuracy": 0.9144118406578324,
"eval_overall_f1": 0.817645207294658,
"eval_overall_precision": 0.8118686576378439,
"eval_overall_recall": 0.8235045478028086,
"eval_runtime": 886.1908,
"eval_samples_per_second": 74.138,
"eval_steps_per_second": 0.29,
"step": 49000
},
{
"epoch": 2.21,
"grad_norm": 26.609722137451172,
"learning_rate": 1.3675529358192721e-05,
"loss": 0.2231,
"step": 50000
},
{
"epoch": 2.21,
"eval_LOC_f1": 0.8453938301706774,
"eval_ORG_f1": 0.7530178399743618,
"eval_PER_f1": 0.8633811603243918,
"eval_loss": 0.30370599031448364,
"eval_overall_accuracy": 0.9166329467917318,
"eval_overall_f1": 0.8246776205110672,
"eval_overall_precision": 0.8187134051793966,
"eval_overall_recall": 0.8307293705089469,
"eval_runtime": 885.5464,
"eval_samples_per_second": 74.191,
"eval_steps_per_second": 0.29,
"step": 50000
},
{
"epoch": 2.26,
"grad_norm": 18.287857055664062,
"learning_rate": 1.3549039945356574e-05,
"loss": 0.2156,
"step": 51000
},
{
"epoch": 2.26,
"eval_LOC_f1": 0.8369090369642839,
"eval_ORG_f1": 0.7570827451034141,
"eval_PER_f1": 0.8699436414871374,
"eval_loss": 0.2922073304653168,
"eval_overall_accuracy": 0.916367932297269,
"eval_overall_f1": 0.8256348807545127,
"eval_overall_precision": 0.8155410977732979,
"eval_overall_recall": 0.8359816522378947,
"eval_runtime": 885.263,
"eval_samples_per_second": 74.215,
"eval_steps_per_second": 0.29,
"step": 51000
},
{
"epoch": 2.3,
"grad_norm": 35.76387405395508,
"learning_rate": 1.3422550532520428e-05,
"loss": 0.2279,
"step": 52000
},
{
"epoch": 2.3,
"eval_LOC_f1": 0.8493380871850663,
"eval_ORG_f1": 0.7652859960552268,
"eval_PER_f1": 0.8658015544747966,
"eval_loss": 0.30765289068222046,
"eval_overall_accuracy": 0.9169528619870936,
"eval_overall_f1": 0.8303520832274882,
"eval_overall_precision": 0.8291563575626546,
"eval_overall_recall": 0.8315512625827762,
"eval_runtime": 939.0823,
"eval_samples_per_second": 69.962,
"eval_steps_per_second": 0.274,
"step": 52000
},
{
"epoch": 2.35,
"grad_norm": 12.871335983276367,
"learning_rate": 1.3296061119684283e-05,
"loss": 0.2192,
"step": 53000
},
{
"epoch": 2.35,
"eval_LOC_f1": 0.8450333357909482,
"eval_ORG_f1": 0.7589152754918096,
"eval_PER_f1": 0.8706159740642501,
"eval_loss": 0.29916831851005554,
"eval_overall_accuracy": 0.9182454007107268,
"eval_overall_f1": 0.8283387559440156,
"eval_overall_precision": 0.8151347746682732,
"eval_overall_recall": 0.8419775506050691,
"eval_runtime": 927.2359,
"eval_samples_per_second": 70.856,
"eval_steps_per_second": 0.277,
"step": 53000
},
{
"epoch": 2.39,
"grad_norm": 12.074441909790039,
"learning_rate": 1.3169571706848138e-05,
"loss": 0.2199,
"step": 54000
},
{
"epoch": 2.39,
"eval_LOC_f1": 0.845725804758205,
"eval_ORG_f1": 0.7583743578767123,
"eval_PER_f1": 0.8723780235920504,
"eval_loss": 0.29886308312416077,
"eval_overall_accuracy": 0.9192946141056846,
"eval_overall_f1": 0.8293654188671028,
"eval_overall_precision": 0.8241158649684679,
"eval_overall_recall": 0.8346822800068883,
"eval_runtime": 890.1649,
"eval_samples_per_second": 73.807,
"eval_steps_per_second": 0.289,
"step": 54000
},
{
"epoch": 2.43,
"grad_norm": 23.149980545043945,
"learning_rate": 1.3043082294011993e-05,
"loss": 0.2255,
"step": 55000
},
{
"epoch": 2.43,
"eval_LOC_f1": 0.8466151994355207,
"eval_ORG_f1": 0.7544473410506125,
"eval_PER_f1": 0.870403734801872,
"eval_loss": 0.2841680943965912,
"eval_overall_accuracy": 0.9179688638469395,
"eval_overall_f1": 0.8283511691203761,
"eval_overall_precision": 0.8183908572825472,
"eval_overall_recall": 0.8385569140692268,
"eval_runtime": 885.5261,
"eval_samples_per_second": 74.193,
"eval_steps_per_second": 0.29,
"step": 55000
},
{
"epoch": 2.48,
"grad_norm": 9.740825653076172,
"learning_rate": 1.2916592881175847e-05,
"loss": 0.2166,
"step": 56000
},
{
"epoch": 2.48,
"eval_LOC_f1": 0.8564384031559538,
"eval_ORG_f1": 0.7616027673681177,
"eval_PER_f1": 0.8733214429549507,
"eval_loss": 0.2920551300048828,
"eval_overall_accuracy": 0.9202286038073975,
"eval_overall_f1": 0.8339421536254372,
"eval_overall_precision": 0.8307068573159461,
"eval_overall_recall": 0.8372027490332984,
"eval_runtime": 885.9705,
"eval_samples_per_second": 74.156,
"eval_steps_per_second": 0.29,
"step": 56000
},
{
"epoch": 2.52,
"grad_norm": 7.81465482711792,
"learning_rate": 1.27901034683397e-05,
"loss": 0.2195,
"step": 57000
},
{
"epoch": 2.52,
"eval_LOC_f1": 0.8524520572659642,
"eval_ORG_f1": 0.7654914529914529,
"eval_PER_f1": 0.8711133515111243,
"eval_loss": 0.2894265651702881,
"eval_overall_accuracy": 0.9196877302355783,
"eval_overall_f1": 0.8334080883643471,
"eval_overall_precision": 0.8305412821928031,
"eval_overall_recall": 0.836294753980306,
"eval_runtime": 886.104,
"eval_samples_per_second": 74.145,
"eval_steps_per_second": 0.29,
"step": 57000
},
{
"epoch": 2.57,
"grad_norm": 4.749297618865967,
"learning_rate": 1.2663614055503555e-05,
"loss": 0.2198,
"step": 58000
},
{
"epoch": 2.57,
"eval_LOC_f1": 0.8479476339833629,
"eval_ORG_f1": 0.7569071497897121,
"eval_PER_f1": 0.8715211159515157,
"eval_loss": 0.2978798449039459,
"eval_overall_accuracy": 0.917242276793067,
"eval_overall_f1": 0.8290111404616975,
"eval_overall_precision": 0.8113011029852536,
"eval_overall_recall": 0.847511623902187,
"eval_runtime": 912.438,
"eval_samples_per_second": 72.005,
"eval_steps_per_second": 0.282,
"step": 58000
},
{
"epoch": 2.61,
"grad_norm": 8.958308219909668,
"learning_rate": 1.2537124642667409e-05,
"loss": 0.2186,
"step": 59000
},
{
"epoch": 2.61,
"eval_LOC_f1": 0.8410615339749197,
"eval_ORG_f1": 0.7623071419893903,
"eval_PER_f1": 0.8698487455846391,
"eval_loss": 0.2916683554649353,
"eval_overall_accuracy": 0.9165563569250457,
"eval_overall_f1": 0.828324104278644,
"eval_overall_precision": 0.8190848632805027,
"eval_overall_recall": 0.8377741597131988,
"eval_runtime": 949.686,
"eval_samples_per_second": 69.181,
"eval_steps_per_second": 0.271,
"step": 59000
},
{
"epoch": 2.66,
"grad_norm": 2.5494885444641113,
"learning_rate": 1.2410635229831265e-05,
"loss": 0.2105,
"step": 60000
},
{
"epoch": 2.66,
"eval_LOC_f1": 0.8486577670408396,
"eval_ORG_f1": 0.7658020018726344,
"eval_PER_f1": 0.8764428548203543,
"eval_loss": 0.28897759318351746,
"eval_overall_accuracy": 0.9185185486423599,
"eval_overall_f1": 0.8335781872027352,
"eval_overall_precision": 0.8256759558603319,
"eval_overall_recall": 0.8416331386884168,
"eval_runtime": 906.7935,
"eval_samples_per_second": 72.453,
"eval_steps_per_second": 0.283,
"step": 60000
},
{
"epoch": 2.7,
"grad_norm": 2.0383992195129395,
"learning_rate": 1.2284145816995119e-05,
"loss": 0.2117,
"step": 61000
},
{
"epoch": 2.7,
"eval_LOC_f1": 0.8508689748097309,
"eval_ORG_f1": 0.7658278739306382,
"eval_PER_f1": 0.8731916232956014,
"eval_loss": 0.28091031312942505,
"eval_overall_accuracy": 0.9196301183889559,
"eval_overall_f1": 0.833681650059079,
"eval_overall_precision": 0.8200686015431561,
"eval_overall_recall": 0.8477542777525557,
"eval_runtime": 885.0192,
"eval_samples_per_second": 74.236,
"eval_steps_per_second": 0.29,
"step": 61000
},
{
"epoch": 2.74,
"grad_norm": 1.6501883268356323,
"learning_rate": 1.2157656404158972e-05,
"loss": 0.1994,
"step": 62000
},
{
"epoch": 2.74,
"eval_LOC_f1": 0.8548620423851409,
"eval_ORG_f1": 0.7666146057733736,
"eval_PER_f1": 0.8740409497434966,
"eval_loss": 0.267339825630188,
"eval_overall_accuracy": 0.9214282857900041,
"eval_overall_f1": 0.835940143844595,
"eval_overall_precision": 0.8308500027062763,
"eval_overall_recall": 0.8410930381827575,
"eval_runtime": 886.6936,
"eval_samples_per_second": 74.095,
"eval_steps_per_second": 0.29,
"step": 62000
},
{
"epoch": 2.79,
"grad_norm": 3.6416823863983154,
"learning_rate": 1.2031166991322827e-05,
"loss": 0.2075,
"step": 63000
},
{
"epoch": 2.79,
"eval_LOC_f1": 0.8514531524204939,
"eval_ORG_f1": 0.7741699019900634,
"eval_PER_f1": 0.8762775257778839,
"eval_loss": 0.2862880229949951,
"eval_overall_accuracy": 0.9219467924096053,
"eval_overall_f1": 0.8376946930582835,
"eval_overall_precision": 0.8390993269298734,
"eval_overall_recall": 0.836294753980306,
"eval_runtime": 883.1341,
"eval_samples_per_second": 74.394,
"eval_steps_per_second": 0.291,
"step": 63000
},
{
"epoch": 2.83,
"grad_norm": 3.767646312713623,
"learning_rate": 1.1904677578486681e-05,
"loss": 0.2144,
"step": 64000
},
{
"epoch": 2.83,
"eval_LOC_f1": 0.8542264412564663,
"eval_ORG_f1": 0.762882333169584,
"eval_PER_f1": 0.8737852991218755,
"eval_loss": 0.2919914424419403,
"eval_overall_accuracy": 0.9192736027263282,
"eval_overall_f1": 0.8335330637616842,
"eval_overall_precision": 0.8234296712697055,
"eval_overall_recall": 0.8438874712337774,
"eval_runtime": 882.9236,
"eval_samples_per_second": 74.412,
"eval_steps_per_second": 0.291,
"step": 64000
},
{
"epoch": 2.88,
"grad_norm": 1.2373511791229248,
"learning_rate": 1.1778188165650538e-05,
"loss": 0.2107,
"step": 65000
},
{
"epoch": 2.88,
"eval_LOC_f1": 0.8536771728748805,
"eval_ORG_f1": 0.7691218130311614,
"eval_PER_f1": 0.8777075297286194,
"eval_loss": 0.2956686019897461,
"eval_overall_accuracy": 0.92124189452152,
"eval_overall_f1": 0.8368564609614728,
"eval_overall_precision": 0.8285232067510548,
"eval_overall_recall": 0.84535904942311,
"eval_runtime": 930.5121,
"eval_samples_per_second": 70.606,
"eval_steps_per_second": 0.276,
"step": 65000
},
{
"epoch": 2.92,
"grad_norm": 13.250840187072754,
"learning_rate": 1.1651698752814391e-05,
"loss": 0.2133,
"step": 66000
},
{
"epoch": 2.92,
"eval_LOC_f1": 0.8533000763334159,
"eval_ORG_f1": 0.7736413979491799,
"eval_PER_f1": 0.8740390436699679,
"eval_loss": 0.2793155908584595,
"eval_overall_accuracy": 0.9226618570941534,
"eval_overall_f1": 0.8369070216139791,
"eval_overall_precision": 0.8351781983723613,
"eval_overall_recall": 0.8386430170483898,
"eval_runtime": 953.5624,
"eval_samples_per_second": 68.9,
"eval_steps_per_second": 0.27,
"step": 66000
},
{
"epoch": 2.97,
"grad_norm": 25.819507598876953,
"learning_rate": 1.1525209339978245e-05,
"loss": 0.2112,
"step": 67000
},
{
"epoch": 2.97,
"eval_LOC_f1": 0.8548619072433559,
"eval_ORG_f1": 0.7661784507158363,
"eval_PER_f1": 0.8776364551402296,
"eval_loss": 0.2820794880390167,
"eval_overall_accuracy": 0.9220823496957755,
"eval_overall_f1": 0.8374375390381013,
"eval_overall_precision": 0.8353114340451381,
"eval_overall_recall": 0.8395744947320631,
"eval_runtime": 899.3376,
"eval_samples_per_second": 73.054,
"eval_steps_per_second": 0.286,
"step": 67000
},
{
"epoch": 3.01,
"grad_norm": 13.493629455566406,
"learning_rate": 1.13987199271421e-05,
"loss": 0.1983,
"step": 68000
},
{
"epoch": 3.01,
"eval_LOC_f1": 0.8558231253148143,
"eval_ORG_f1": 0.7679850431851696,
"eval_PER_f1": 0.8760919620026149,
"eval_loss": 0.2852949798107147,
"eval_overall_accuracy": 0.9224734324663767,
"eval_overall_f1": 0.8365820844153812,
"eval_overall_precision": 0.8359935591789517,
"eval_overall_recall": 0.8371714388590572,
"eval_runtime": 881.1865,
"eval_samples_per_second": 74.559,
"eval_steps_per_second": 0.292,
"step": 68000
}
],
"logging_steps": 1000,
"max_steps": 158116,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 500,
"total_flos": 2.1809439865622904e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}