nerui-base-3 / trainer_state.json
apwic's picture
End of training
f8e8ed9 verified
raw
history blame contribute delete
No virus
110 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"eval_steps": 500,
"global_step": 9600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 7.524846076965332,
"learning_rate": 4.9500000000000004e-05,
"loss": 0.2442,
"step": 96
},
{
"epoch": 1.0,
"eval_LOCATION_f1": 0.8972972972972972,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8383838383838383,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.898936170212766,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.8535353535353535,
"eval_ORGANIZATION_recall": 0.949438202247191,
"eval_PERSON_f1": 0.9727626459143969,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9689922480620154,
"eval_PERSON_recall": 0.9765625,
"eval_loss": 0.05805998295545578,
"eval_overall_accuracy": 0.9821862348178138,
"eval_overall_f1": 0.9217603911980441,
"eval_overall_precision": 0.8849765258215962,
"eval_overall_recall": 0.9617346938775511,
"eval_runtime": 0.2705,
"eval_samples_per_second": 628.537,
"eval_steps_per_second": 11.092,
"step": 96
},
{
"epoch": 2.0,
"grad_norm": 2.2480790615081787,
"learning_rate": 4.9e-05,
"loss": 0.0581,
"step": 192
},
{
"epoch": 2.0,
"eval_LOCATION_f1": 0.8864864864864866,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8282828282828283,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9190751445086704,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9464285714285714,
"eval_ORGANIZATION_recall": 0.8932584269662921,
"eval_PERSON_f1": 0.9727626459143969,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9689922480620154,
"eval_PERSON_recall": 0.9765625,
"eval_loss": 0.05479570850729942,
"eval_overall_accuracy": 0.9851551956815114,
"eval_overall_f1": 0.9289340101522842,
"eval_overall_precision": 0.9242424242424242,
"eval_overall_recall": 0.9336734693877551,
"eval_runtime": 0.2786,
"eval_samples_per_second": 610.099,
"eval_steps_per_second": 10.766,
"step": 192
},
{
"epoch": 3.0,
"grad_norm": 0.7553579807281494,
"learning_rate": 4.85e-05,
"loss": 0.0357,
"step": 288
},
{
"epoch": 3.0,
"eval_LOCATION_f1": 0.9010989010989011,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8541666666666666,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9204545454545454,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9310344827586207,
"eval_ORGANIZATION_recall": 0.9101123595505618,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.05141273885965347,
"eval_overall_accuracy": 0.9856950067476383,
"eval_overall_f1": 0.934010152284264,
"eval_overall_precision": 0.9292929292929293,
"eval_overall_recall": 0.9387755102040817,
"eval_runtime": 0.2743,
"eval_samples_per_second": 619.871,
"eval_steps_per_second": 10.939,
"step": 288
},
{
"epoch": 4.0,
"grad_norm": 2.5006167888641357,
"learning_rate": 4.8e-05,
"loss": 0.0251,
"step": 384
},
{
"epoch": 4.0,
"eval_LOCATION_f1": 0.9142857142857143,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.898876404494382,
"eval_LOCATION_recall": 0.9302325581395349,
"eval_ORGANIZATION_f1": 0.9209809264305177,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.8941798941798942,
"eval_ORGANIZATION_recall": 0.949438202247191,
"eval_PERSON_f1": 0.9641434262948206,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.983739837398374,
"eval_PERSON_recall": 0.9453125,
"eval_loss": 0.06069042533636093,
"eval_overall_accuracy": 0.9851551956815114,
"eval_overall_f1": 0.9331651954602775,
"eval_overall_precision": 0.9226932668329177,
"eval_overall_recall": 0.9438775510204082,
"eval_runtime": 0.2776,
"eval_samples_per_second": 612.329,
"eval_steps_per_second": 10.806,
"step": 384
},
{
"epoch": 5.0,
"grad_norm": 0.45395660400390625,
"learning_rate": 4.75e-05,
"loss": 0.0146,
"step": 480
},
{
"epoch": 5.0,
"eval_LOCATION_f1": 0.9101123595505618,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8804347826086957,
"eval_LOCATION_recall": 0.9418604651162791,
"eval_ORGANIZATION_f1": 0.9333333333333335,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9230769230769231,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.9644268774703557,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.06169137358665466,
"eval_overall_accuracy": 0.9865047233468286,
"eval_overall_f1": 0.9380530973451328,
"eval_overall_precision": 0.9298245614035088,
"eval_overall_recall": 0.9464285714285714,
"eval_runtime": 0.2738,
"eval_samples_per_second": 620.856,
"eval_steps_per_second": 10.956,
"step": 480
},
{
"epoch": 6.0,
"grad_norm": 0.7863659262657166,
"learning_rate": 4.7e-05,
"loss": 0.0117,
"step": 576
},
{
"epoch": 6.0,
"eval_LOCATION_f1": 0.888888888888889,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.851063829787234,
"eval_LOCATION_recall": 0.9302325581395349,
"eval_ORGANIZATION_f1": 0.9166666666666666,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9065934065934066,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.9603174603174603,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9758064516129032,
"eval_PERSON_recall": 0.9453125,
"eval_loss": 0.0706215351819992,
"eval_overall_accuracy": 0.9856950067476383,
"eval_overall_f1": 0.9242424242424242,
"eval_overall_precision": 0.915,
"eval_overall_recall": 0.9336734693877551,
"eval_runtime": 0.28,
"eval_samples_per_second": 607.104,
"eval_steps_per_second": 10.714,
"step": 576
},
{
"epoch": 7.0,
"grad_norm": 0.042494997382164,
"learning_rate": 4.6500000000000005e-05,
"loss": 0.0083,
"step": 672
},
{
"epoch": 7.0,
"eval_LOCATION_f1": 0.8526315789473684,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.7788461538461539,
"eval_LOCATION_recall": 0.9418604651162791,
"eval_ORGANIZATION_f1": 0.9187675070028011,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9162011173184358,
"eval_ORGANIZATION_recall": 0.9213483146067416,
"eval_PERSON_f1": 0.9534883720930233,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9461538461538461,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.0925956517457962,
"eval_overall_accuracy": 0.9819163292847504,
"eval_overall_f1": 0.9142857142857143,
"eval_overall_precision": 0.8910411622276029,
"eval_overall_recall": 0.9387755102040817,
"eval_runtime": 0.2741,
"eval_samples_per_second": 620.296,
"eval_steps_per_second": 10.946,
"step": 672
},
{
"epoch": 8.0,
"grad_norm": 0.1783752143383026,
"learning_rate": 4.600000000000001e-05,
"loss": 0.008,
"step": 768
},
{
"epoch": 8.0,
"eval_LOCATION_f1": 0.8999999999999999,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8617021276595744,
"eval_LOCATION_recall": 0.9418604651162791,
"eval_ORGANIZATION_f1": 0.9371428571428573,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9534883720930233,
"eval_ORGANIZATION_recall": 0.9213483146067416,
"eval_PERSON_f1": 0.9723320158102766,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.07805542647838593,
"eval_overall_accuracy": 0.9856950067476383,
"eval_overall_f1": 0.9399744572158366,
"eval_overall_precision": 0.9411764705882353,
"eval_overall_recall": 0.9387755102040817,
"eval_runtime": 0.2886,
"eval_samples_per_second": 589.03,
"eval_steps_per_second": 10.395,
"step": 768
},
{
"epoch": 9.0,
"grad_norm": 0.3219904899597168,
"learning_rate": 4.55e-05,
"loss": 0.0042,
"step": 864
},
{
"epoch": 9.0,
"eval_LOCATION_f1": 0.8914285714285715,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8764044943820225,
"eval_LOCATION_recall": 0.9069767441860465,
"eval_ORGANIZATION_f1": 0.9662921348314607,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9662921348314607,
"eval_ORGANIZATION_recall": 0.9662921348314607,
"eval_PERSON_f1": 0.9725490196078432,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9763779527559056,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.06586796045303345,
"eval_overall_accuracy": 0.9889338731443995,
"eval_overall_f1": 0.9516539440203563,
"eval_overall_precision": 0.949238578680203,
"eval_overall_recall": 0.9540816326530612,
"eval_runtime": 0.2757,
"eval_samples_per_second": 616.613,
"eval_steps_per_second": 10.881,
"step": 864
},
{
"epoch": 10.0,
"grad_norm": 0.016961606219410896,
"learning_rate": 4.5e-05,
"loss": 0.0044,
"step": 960
},
{
"epoch": 10.0,
"eval_LOCATION_f1": 0.8926553672316384,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8681318681318682,
"eval_LOCATION_recall": 0.9186046511627907,
"eval_ORGANIZATION_f1": 0.9441340782122906,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9388888888888889,
"eval_ORGANIZATION_recall": 0.949438202247191,
"eval_PERSON_f1": 0.9494163424124514,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9457364341085271,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.07118100672960281,
"eval_overall_accuracy": 0.9873144399460189,
"eval_overall_f1": 0.9343434343434343,
"eval_overall_precision": 0.925,
"eval_overall_recall": 0.9438775510204082,
"eval_runtime": 0.2777,
"eval_samples_per_second": 612.13,
"eval_steps_per_second": 10.802,
"step": 960
},
{
"epoch": 11.0,
"grad_norm": 0.9713481664657593,
"learning_rate": 4.4500000000000004e-05,
"loss": 0.005,
"step": 1056
},
{
"epoch": 11.0,
"eval_LOCATION_f1": 0.8972972972972972,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8383838383838383,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9438202247191011,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9438202247191011,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.08548479527235031,
"eval_overall_accuracy": 0.9870445344129555,
"eval_overall_f1": 0.9408805031446542,
"eval_overall_precision": 0.9280397022332506,
"eval_overall_recall": 0.9540816326530612,
"eval_runtime": 0.2746,
"eval_samples_per_second": 619.083,
"eval_steps_per_second": 10.925,
"step": 1056
},
{
"epoch": 12.0,
"grad_norm": 0.007533730939030647,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.0036,
"step": 1152
},
{
"epoch": 12.0,
"eval_LOCATION_f1": 0.9050279329608939,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8709677419354839,
"eval_LOCATION_recall": 0.9418604651162791,
"eval_ORGANIZATION_f1": 0.9408450704225352,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.943502824858757,
"eval_ORGANIZATION_recall": 0.9382022471910112,
"eval_PERSON_f1": 0.9723320158102766,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.08587783575057983,
"eval_overall_accuracy": 0.9873144399460189,
"eval_overall_f1": 0.9428208386277002,
"eval_overall_precision": 0.9392405063291139,
"eval_overall_recall": 0.9464285714285714,
"eval_runtime": 0.2742,
"eval_samples_per_second": 619.889,
"eval_steps_per_second": 10.939,
"step": 1152
},
{
"epoch": 13.0,
"grad_norm": 0.012886933982372284,
"learning_rate": 4.35e-05,
"loss": 0.0042,
"step": 1248
},
{
"epoch": 13.0,
"eval_LOCATION_f1": 0.9152542372881357,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8901098901098901,
"eval_LOCATION_recall": 0.9418604651162791,
"eval_ORGANIZATION_f1": 0.9526462395543176,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9447513812154696,
"eval_ORGANIZATION_recall": 0.9606741573033708,
"eval_PERSON_f1": 0.9723320158102766,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.07611611485481262,
"eval_overall_accuracy": 0.9889338731443995,
"eval_overall_f1": 0.9505703422053231,
"eval_overall_precision": 0.9445843828715366,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2752,
"eval_samples_per_second": 617.726,
"eval_steps_per_second": 10.901,
"step": 1248
},
{
"epoch": 14.0,
"grad_norm": 0.020214928314089775,
"learning_rate": 4.3e-05,
"loss": 0.0036,
"step": 1344
},
{
"epoch": 14.0,
"eval_LOCATION_f1": 0.9028571428571427,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8876404494382022,
"eval_LOCATION_recall": 0.9186046511627907,
"eval_ORGANIZATION_f1": 0.9401709401709402,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.953757225433526,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.08433674275875092,
"eval_overall_accuracy": 0.9862348178137652,
"eval_overall_f1": 0.9435897435897437,
"eval_overall_precision": 0.9484536082474226,
"eval_overall_recall": 0.9387755102040817,
"eval_runtime": 0.2767,
"eval_samples_per_second": 614.444,
"eval_steps_per_second": 10.843,
"step": 1344
},
{
"epoch": 15.0,
"grad_norm": 0.005007833708077669,
"learning_rate": 4.25e-05,
"loss": 0.0028,
"step": 1440
},
{
"epoch": 15.0,
"eval_LOCATION_f1": 0.9111111111111112,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8723404255319149,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.934844192634561,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9428571428571428,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.9723320158102766,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.09056564420461655,
"eval_overall_accuracy": 0.9867746288798921,
"eval_overall_f1": 0.94147582697201,
"eval_overall_precision": 0.9390862944162437,
"eval_overall_recall": 0.9438775510204082,
"eval_runtime": 0.2739,
"eval_samples_per_second": 620.731,
"eval_steps_per_second": 10.954,
"step": 1440
},
{
"epoch": 16.0,
"grad_norm": 0.002987402491271496,
"learning_rate": 4.2e-05,
"loss": 0.0017,
"step": 1536
},
{
"epoch": 16.0,
"eval_LOCATION_f1": 0.8950276243093923,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8526315789473684,
"eval_LOCATION_recall": 0.9418604651162791,
"eval_ORGANIZATION_f1": 0.9394812680115273,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9644970414201184,
"eval_ORGANIZATION_recall": 0.9157303370786517,
"eval_PERSON_f1": 0.9606299212598425,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.09138376265764236,
"eval_overall_accuracy": 0.9862348178137652,
"eval_overall_f1": 0.9360613810741688,
"eval_overall_precision": 0.9384615384615385,
"eval_overall_recall": 0.9336734693877551,
"eval_runtime": 0.2746,
"eval_samples_per_second": 619.018,
"eval_steps_per_second": 10.924,
"step": 1536
},
{
"epoch": 17.0,
"grad_norm": 0.003726888680830598,
"learning_rate": 4.15e-05,
"loss": 0.002,
"step": 1632
},
{
"epoch": 17.0,
"eval_LOCATION_f1": 0.8876404494382023,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8586956521739131,
"eval_LOCATION_recall": 0.9186046511627907,
"eval_ORGANIZATION_f1": 0.9491525423728814,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9545454545454546,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.08281037956476212,
"eval_overall_accuracy": 0.9883940620782726,
"eval_overall_f1": 0.94147582697201,
"eval_overall_precision": 0.9390862944162437,
"eval_overall_recall": 0.9438775510204082,
"eval_runtime": 0.2742,
"eval_samples_per_second": 619.907,
"eval_steps_per_second": 10.94,
"step": 1632
},
{
"epoch": 18.0,
"grad_norm": 0.003566289786249399,
"learning_rate": 4.1e-05,
"loss": 0.0033,
"step": 1728
},
{
"epoch": 18.0,
"eval_LOCATION_f1": 0.9120879120879122,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8645833333333334,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.925207756232687,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.912568306010929,
"eval_ORGANIZATION_recall": 0.9382022471910112,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.0640731155872345,
"eval_overall_accuracy": 0.988663967611336,
"eval_overall_f1": 0.9385194479297364,
"eval_overall_precision": 0.9234567901234568,
"eval_overall_recall": 0.9540816326530612,
"eval_runtime": 0.2758,
"eval_samples_per_second": 616.283,
"eval_steps_per_second": 10.876,
"step": 1728
},
{
"epoch": 19.0,
"grad_norm": 0.016631081700325012,
"learning_rate": 4.05e-05,
"loss": 0.0024,
"step": 1824
},
{
"epoch": 19.0,
"eval_LOCATION_f1": 0.8863636363636364,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8666666666666667,
"eval_LOCATION_recall": 0.9069767441860465,
"eval_ORGANIZATION_f1": 0.9476584022038568,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9297297297297298,
"eval_ORGANIZATION_recall": 0.9662921348314607,
"eval_PERSON_f1": 0.9606299212598425,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.09817531704902649,
"eval_overall_accuracy": 0.9867746288798921,
"eval_overall_f1": 0.9382093316519546,
"eval_overall_precision": 0.9276807980049875,
"eval_overall_recall": 0.9489795918367347,
"eval_runtime": 0.2771,
"eval_samples_per_second": 613.515,
"eval_steps_per_second": 10.827,
"step": 1824
},
{
"epoch": 20.0,
"grad_norm": 0.35814717411994934,
"learning_rate": 4e-05,
"loss": 0.0037,
"step": 1920
},
{
"epoch": 20.0,
"eval_LOCATION_f1": 0.8864864864864866,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8282828282828283,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.96045197740113,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9659090909090909,
"eval_ORGANIZATION_recall": 0.9550561797752809,
"eval_PERSON_f1": 0.9723320158102766,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.0904369205236435,
"eval_overall_accuracy": 0.988663967611336,
"eval_overall_f1": 0.9469696969696969,
"eval_overall_precision": 0.9375,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2871,
"eval_samples_per_second": 592.205,
"eval_steps_per_second": 10.451,
"step": 1920
},
{
"epoch": 21.0,
"grad_norm": 0.010785204358398914,
"learning_rate": 3.9500000000000005e-05,
"loss": 0.0038,
"step": 2016
},
{
"epoch": 21.0,
"eval_LOCATION_f1": 0.9273743016759777,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8924731182795699,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9411764705882353,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9385474860335196,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.9609375,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9609375,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.0786675289273262,
"eval_overall_accuracy": 0.9878542510121457,
"eval_overall_f1": 0.9444444444444445,
"eval_overall_precision": 0.935,
"eval_overall_recall": 0.9540816326530612,
"eval_runtime": 0.2796,
"eval_samples_per_second": 607.994,
"eval_steps_per_second": 10.729,
"step": 2016
},
{
"epoch": 22.0,
"grad_norm": 0.02177988551557064,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.0024,
"step": 2112
},
{
"epoch": 22.0,
"eval_LOCATION_f1": 0.8950276243093923,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8526315789473684,
"eval_LOCATION_recall": 0.9418604651162791,
"eval_ORGANIZATION_f1": 0.9388888888888889,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9285714285714286,
"eval_ORGANIZATION_recall": 0.949438202247191,
"eval_PERSON_f1": 0.9523809523809523,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.967741935483871,
"eval_PERSON_recall": 0.9375,
"eval_loss": 0.06972306966781616,
"eval_overall_accuracy": 0.9889338731443995,
"eval_overall_f1": 0.9331651954602775,
"eval_overall_precision": 0.9226932668329177,
"eval_overall_recall": 0.9438775510204082,
"eval_runtime": 0.2769,
"eval_samples_per_second": 613.91,
"eval_steps_per_second": 10.834,
"step": 2112
},
{
"epoch": 23.0,
"grad_norm": 0.1288156807422638,
"learning_rate": 3.85e-05,
"loss": 0.0041,
"step": 2208
},
{
"epoch": 23.0,
"eval_LOCATION_f1": 0.9265536723163842,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9010989010989011,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9467787114845938,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9441340782122905,
"eval_ORGANIZATION_recall": 0.949438202247191,
"eval_PERSON_f1": 0.9647058823529412,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.0794149860739708,
"eval_overall_accuracy": 0.9875843454790824,
"eval_overall_f1": 0.9480354879594423,
"eval_overall_precision": 0.9420654911838791,
"eval_overall_recall": 0.9540816326530612,
"eval_runtime": 0.2829,
"eval_samples_per_second": 600.817,
"eval_steps_per_second": 10.603,
"step": 2208
},
{
"epoch": 24.0,
"grad_norm": 0.0011946976883336902,
"learning_rate": 3.8e-05,
"loss": 0.0033,
"step": 2304
},
{
"epoch": 24.0,
"eval_LOCATION_f1": 0.9204545454545455,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9,
"eval_LOCATION_recall": 0.9418604651162791,
"eval_ORGANIZATION_f1": 0.9333333333333335,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9230769230769231,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.9603174603174603,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9758064516129032,
"eval_PERSON_recall": 0.9453125,
"eval_loss": 0.08298368752002716,
"eval_overall_accuracy": 0.9881241565452091,
"eval_overall_f1": 0.9390862944162437,
"eval_overall_precision": 0.9343434343434344,
"eval_overall_recall": 0.9438775510204082,
"eval_runtime": 0.2768,
"eval_samples_per_second": 614.247,
"eval_steps_per_second": 10.84,
"step": 2304
},
{
"epoch": 25.0,
"grad_norm": 0.008975312113761902,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.0034,
"step": 2400
},
{
"epoch": 25.0,
"eval_LOCATION_f1": 0.9060773480662985,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8631578947368421,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9526462395543176,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9447513812154696,
"eval_ORGANIZATION_recall": 0.9606741573033708,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.08038989454507828,
"eval_overall_accuracy": 0.9881241565452091,
"eval_overall_f1": 0.9496221662468515,
"eval_overall_precision": 0.9378109452736318,
"eval_overall_recall": 0.9617346938775511,
"eval_runtime": 0.2782,
"eval_samples_per_second": 611.154,
"eval_steps_per_second": 10.785,
"step": 2400
},
{
"epoch": 26.0,
"grad_norm": 0.003603309392929077,
"learning_rate": 3.7e-05,
"loss": 0.0012,
"step": 2496
},
{
"epoch": 26.0,
"eval_LOCATION_f1": 0.9265536723163842,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9010989010989011,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9444444444444444,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9340659340659341,
"eval_ORGANIZATION_recall": 0.9550561797752809,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.07283078134059906,
"eval_overall_accuracy": 0.9902834008097166,
"eval_overall_f1": 0.9506953223767383,
"eval_overall_precision": 0.9423558897243107,
"eval_overall_recall": 0.9591836734693877,
"eval_runtime": 0.2753,
"eval_samples_per_second": 617.432,
"eval_steps_per_second": 10.896,
"step": 2496
},
{
"epoch": 27.0,
"grad_norm": 0.0058512561954557896,
"learning_rate": 3.65e-05,
"loss": 0.0015,
"step": 2592
},
{
"epoch": 27.0,
"eval_LOCATION_f1": 0.9257142857142857,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9101123595505618,
"eval_LOCATION_recall": 0.9418604651162791,
"eval_ORGANIZATION_f1": 0.9505494505494506,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9301075268817204,
"eval_ORGANIZATION_recall": 0.9719101123595506,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.09567292034626007,
"eval_overall_accuracy": 0.9881241565452091,
"eval_overall_f1": 0.9508196721311475,
"eval_overall_precision": 0.940149625935162,
"eval_overall_recall": 0.9617346938775511,
"eval_runtime": 0.2824,
"eval_samples_per_second": 601.88,
"eval_steps_per_second": 10.621,
"step": 2592
},
{
"epoch": 28.0,
"grad_norm": 1.171476125717163,
"learning_rate": 3.6e-05,
"loss": 0.0029,
"step": 2688
},
{
"epoch": 28.0,
"eval_LOCATION_f1": 0.9222222222222223,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8829787234042553,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9491525423728814,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9545454545454546,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.07663165777921677,
"eval_overall_accuracy": 0.9881241565452091,
"eval_overall_f1": 0.9517766497461929,
"eval_overall_precision": 0.946969696969697,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2755,
"eval_samples_per_second": 617.074,
"eval_steps_per_second": 10.89,
"step": 2688
},
{
"epoch": 29.0,
"grad_norm": 0.40970727801322937,
"learning_rate": 3.55e-05,
"loss": 0.0031,
"step": 2784
},
{
"epoch": 29.0,
"eval_LOCATION_f1": 0.9130434782608695,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8571428571428571,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9455587392550143,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9649122807017544,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.9725490196078432,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9763779527559056,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.08022492378950119,
"eval_overall_accuracy": 0.9878542510121457,
"eval_overall_f1": 0.9467005076142132,
"eval_overall_precision": 0.9419191919191919,
"eval_overall_recall": 0.951530612244898,
"eval_runtime": 0.2752,
"eval_samples_per_second": 617.806,
"eval_steps_per_second": 10.902,
"step": 2784
},
{
"epoch": 30.0,
"grad_norm": 0.08574865758419037,
"learning_rate": 3.5e-05,
"loss": 0.0018,
"step": 2880
},
{
"epoch": 30.0,
"eval_LOCATION_f1": 0.9050279329608939,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8709677419354839,
"eval_LOCATION_recall": 0.9418604651162791,
"eval_ORGANIZATION_f1": 0.9577464788732394,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.96045197740113,
"eval_ORGANIZATION_recall": 0.9550561797752809,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.08365346491336823,
"eval_overall_accuracy": 0.9892037786774629,
"eval_overall_f1": 0.9517766497461929,
"eval_overall_precision": 0.946969696969697,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2756,
"eval_samples_per_second": 616.825,
"eval_steps_per_second": 10.885,
"step": 2880
},
{
"epoch": 31.0,
"grad_norm": 0.002840681467205286,
"learning_rate": 3.45e-05,
"loss": 0.0017,
"step": 2976
},
{
"epoch": 31.0,
"eval_LOCATION_f1": 0.9431818181818181,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9222222222222223,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.961111111111111,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9505494505494505,
"eval_ORGANIZATION_recall": 0.9719101123595506,
"eval_PERSON_f1": 0.9606299212598425,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.07920122146606445,
"eval_overall_accuracy": 0.9902834008097166,
"eval_overall_f1": 0.9569620253164557,
"eval_overall_precision": 0.949748743718593,
"eval_overall_recall": 0.9642857142857143,
"eval_runtime": 0.2801,
"eval_samples_per_second": 606.828,
"eval_steps_per_second": 10.709,
"step": 2976
},
{
"epoch": 32.0,
"grad_norm": 0.001423178124241531,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.0017,
"step": 3072
},
{
"epoch": 32.0,
"eval_LOCATION_f1": 0.9171270718232045,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8736842105263158,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9633802816901408,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9661016949152542,
"eval_ORGANIZATION_recall": 0.9606741573033708,
"eval_PERSON_f1": 0.9644268774703557,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.06753446161746979,
"eval_overall_accuracy": 0.99055330634278,
"eval_overall_f1": 0.9531051964512041,
"eval_overall_precision": 0.947103274559194,
"eval_overall_recall": 0.9591836734693877,
"eval_runtime": 0.2741,
"eval_samples_per_second": 620.297,
"eval_steps_per_second": 10.946,
"step": 3072
},
{
"epoch": 33.0,
"grad_norm": 0.001040176604874432,
"learning_rate": 3.35e-05,
"loss": 0.0012,
"step": 3168
},
{
"epoch": 33.0,
"eval_LOCATION_f1": 0.9273743016759777,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8924731182795699,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9542857142857143,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9709302325581395,
"eval_ORGANIZATION_recall": 0.9382022471910112,
"eval_PERSON_f1": 0.9723320158102766,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.09092291444540024,
"eval_overall_accuracy": 0.9897435897435898,
"eval_overall_f1": 0.9539641943734015,
"eval_overall_precision": 0.9564102564102565,
"eval_overall_recall": 0.951530612244898,
"eval_runtime": 0.2779,
"eval_samples_per_second": 611.82,
"eval_steps_per_second": 10.797,
"step": 3168
},
{
"epoch": 34.0,
"grad_norm": 0.21219216287136078,
"learning_rate": 3.3e-05,
"loss": 0.002,
"step": 3264
},
{
"epoch": 34.0,
"eval_LOCATION_f1": 0.9257142857142857,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9101123595505618,
"eval_LOCATION_recall": 0.9418604651162791,
"eval_ORGANIZATION_f1": 0.9287749287749287,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9421965317919075,
"eval_ORGANIZATION_recall": 0.9157303370786517,
"eval_PERSON_f1": 0.9565217391304348,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.968,
"eval_PERSON_recall": 0.9453125,
"eval_loss": 0.10773035138845444,
"eval_overall_accuracy": 0.9846153846153847,
"eval_overall_f1": 0.9370988446726572,
"eval_overall_precision": 0.9431524547803618,
"eval_overall_recall": 0.9311224489795918,
"eval_runtime": 0.2753,
"eval_samples_per_second": 617.461,
"eval_steps_per_second": 10.896,
"step": 3264
},
{
"epoch": 35.0,
"grad_norm": 0.002467579208314419,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.0023,
"step": 3360
},
{
"epoch": 35.0,
"eval_LOCATION_f1": 0.9213483146067417,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8913043478260869,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.95,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9395604395604396,
"eval_ORGANIZATION_recall": 0.9606741573033708,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.091239333152771,
"eval_overall_accuracy": 0.9881241565452091,
"eval_overall_f1": 0.9494949494949495,
"eval_overall_precision": 0.94,
"eval_overall_recall": 0.9591836734693877,
"eval_runtime": 0.2753,
"eval_samples_per_second": 617.528,
"eval_steps_per_second": 10.898,
"step": 3360
},
{
"epoch": 36.0,
"grad_norm": 0.01504553947597742,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.0016,
"step": 3456
},
{
"epoch": 36.0,
"eval_LOCATION_f1": 0.9273743016759777,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8924731182795699,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9545454545454545,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9655172413793104,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.9723320158102766,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.08393337577581406,
"eval_overall_accuracy": 0.9892037786774629,
"eval_overall_f1": 0.9540816326530612,
"eval_overall_precision": 0.9540816326530612,
"eval_overall_recall": 0.9540816326530612,
"eval_runtime": 0.275,
"eval_samples_per_second": 618.195,
"eval_steps_per_second": 10.909,
"step": 3456
},
{
"epoch": 37.0,
"grad_norm": 0.003291564527899027,
"learning_rate": 3.15e-05,
"loss": 0.0012,
"step": 3552
},
{
"epoch": 37.0,
"eval_LOCATION_f1": 0.9162011173184358,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8817204301075269,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9344729344729344,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9479768786127167,
"eval_ORGANIZATION_recall": 0.9213483146067416,
"eval_PERSON_f1": 0.9644268774703557,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.10695616900920868,
"eval_overall_accuracy": 0.9856950067476383,
"eval_overall_f1": 0.9399744572158366,
"eval_overall_precision": 0.9411764705882353,
"eval_overall_recall": 0.9387755102040817,
"eval_runtime": 0.2752,
"eval_samples_per_second": 617.842,
"eval_steps_per_second": 10.903,
"step": 3552
},
{
"epoch": 38.0,
"grad_norm": 0.0029779509641230106,
"learning_rate": 3.1e-05,
"loss": 0.0009,
"step": 3648
},
{
"epoch": 38.0,
"eval_LOCATION_f1": 0.9392265193370165,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8947368421052632,
"eval_LOCATION_recall": 0.9883720930232558,
"eval_ORGANIZATION_f1": 0.9431818181818182,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9540229885057471,
"eval_ORGANIZATION_recall": 0.9325842696629213,
"eval_PERSON_f1": 0.9723320158102766,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.08564214408397675,
"eval_overall_accuracy": 0.9883940620782726,
"eval_overall_f1": 0.9516539440203563,
"eval_overall_precision": 0.949238578680203,
"eval_overall_recall": 0.9540816326530612,
"eval_runtime": 0.2745,
"eval_samples_per_second": 619.218,
"eval_steps_per_second": 10.927,
"step": 3648
},
{
"epoch": 39.0,
"grad_norm": 0.0008915510843507946,
"learning_rate": 3.05e-05,
"loss": 0.0006,
"step": 3744
},
{
"epoch": 39.0,
"eval_LOCATION_f1": 0.9333333333333332,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8936170212765957,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9375000000000001,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9482758620689655,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.9647058823529412,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.09636305272579193,
"eval_overall_accuracy": 0.9862348178137652,
"eval_overall_f1": 0.9453621346886911,
"eval_overall_precision": 0.9417721518987342,
"eval_overall_recall": 0.9489795918367347,
"eval_runtime": 0.2929,
"eval_samples_per_second": 580.423,
"eval_steps_per_second": 10.243,
"step": 3744
},
{
"epoch": 40.0,
"grad_norm": 0.0008046123548410833,
"learning_rate": 3e-05,
"loss": 0.0011,
"step": 3840
},
{
"epoch": 40.0,
"eval_LOCATION_f1": 0.9265536723163842,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9010989010989011,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9464788732394366,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9491525423728814,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.09920275211334229,
"eval_overall_accuracy": 0.9870445344129555,
"eval_overall_f1": 0.9491094147582698,
"eval_overall_precision": 0.9467005076142132,
"eval_overall_recall": 0.951530612244898,
"eval_runtime": 0.2781,
"eval_samples_per_second": 611.234,
"eval_steps_per_second": 10.786,
"step": 3840
},
{
"epoch": 41.0,
"grad_norm": 0.000929164991248399,
"learning_rate": 2.95e-05,
"loss": 0.0009,
"step": 3936
},
{
"epoch": 41.0,
"eval_LOCATION_f1": 0.9385474860335195,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9032258064516129,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9435028248587571,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9488636363636364,
"eval_ORGANIZATION_recall": 0.9382022471910112,
"eval_PERSON_f1": 0.9644268774703557,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.1071603074669838,
"eval_overall_accuracy": 0.9859649122807017,
"eval_overall_f1": 0.9491094147582698,
"eval_overall_precision": 0.9467005076142132,
"eval_overall_recall": 0.951530612244898,
"eval_runtime": 0.2831,
"eval_samples_per_second": 600.488,
"eval_steps_per_second": 10.597,
"step": 3936
},
{
"epoch": 42.0,
"grad_norm": 0.00026114823413081467,
"learning_rate": 2.9e-05,
"loss": 0.0007,
"step": 4032
},
{
"epoch": 42.0,
"eval_LOCATION_f1": 0.9333333333333332,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8936170212765957,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9458689458689458,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9595375722543352,
"eval_ORGANIZATION_recall": 0.9325842696629213,
"eval_PERSON_f1": 0.9682539682539683,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9838709677419355,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.1193474680185318,
"eval_overall_accuracy": 0.9865047233468286,
"eval_overall_f1": 0.9501915708812262,
"eval_overall_precision": 0.9514066496163683,
"eval_overall_recall": 0.9489795918367347,
"eval_runtime": 0.2756,
"eval_samples_per_second": 616.891,
"eval_steps_per_second": 10.886,
"step": 4032
},
{
"epoch": 43.0,
"grad_norm": 0.0013629100285470486,
"learning_rate": 2.8499999999999998e-05,
"loss": 0.0014,
"step": 4128
},
{
"epoch": 43.0,
"eval_LOCATION_f1": 0.9385474860335195,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9032258064516129,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9435028248587571,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9488636363636364,
"eval_ORGANIZATION_recall": 0.9382022471910112,
"eval_PERSON_f1": 0.9606299212598425,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.11290433257818222,
"eval_overall_accuracy": 0.9867746288798921,
"eval_overall_f1": 0.9479034307496824,
"eval_overall_precision": 0.9443037974683545,
"eval_overall_recall": 0.951530612244898,
"eval_runtime": 0.2779,
"eval_samples_per_second": 611.663,
"eval_steps_per_second": 10.794,
"step": 4128
},
{
"epoch": 44.0,
"grad_norm": 0.0006920368759892881,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.0007,
"step": 4224
},
{
"epoch": 44.0,
"eval_LOCATION_f1": 0.9438202247191011,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9130434782608695,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9464788732394366,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9491525423728814,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.9609375,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9609375,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.12893009185791016,
"eval_overall_accuracy": 0.9848852901484481,
"eval_overall_f1": 0.9505703422053231,
"eval_overall_precision": 0.9445843828715366,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2752,
"eval_samples_per_second": 617.77,
"eval_steps_per_second": 10.902,
"step": 4224
},
{
"epoch": 45.0,
"grad_norm": 0.0005794434691779315,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.0006,
"step": 4320
},
{
"epoch": 45.0,
"eval_LOCATION_f1": 0.9281767955801105,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8842105263157894,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9470752089136492,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9392265193370166,
"eval_ORGANIZATION_recall": 0.9550561797752809,
"eval_PERSON_f1": 0.96875,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.96875,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.11674495786428452,
"eval_overall_accuracy": 0.9867746288798921,
"eval_overall_f1": 0.949748743718593,
"eval_overall_precision": 0.9356435643564357,
"eval_overall_recall": 0.9642857142857143,
"eval_runtime": 0.2809,
"eval_samples_per_second": 605.289,
"eval_steps_per_second": 10.682,
"step": 4320
},
{
"epoch": 46.0,
"grad_norm": 0.004815615713596344,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.0014,
"step": 4416
},
{
"epoch": 46.0,
"eval_LOCATION_f1": 0.9120879120879122,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8645833333333334,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9461756373937678,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9542857142857143,
"eval_ORGANIZATION_recall": 0.9382022471910112,
"eval_PERSON_f1": 0.9682539682539683,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9838709677419355,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.11675991863012314,
"eval_overall_accuracy": 0.9873144399460189,
"eval_overall_f1": 0.9453621346886911,
"eval_overall_precision": 0.9417721518987342,
"eval_overall_recall": 0.9489795918367347,
"eval_runtime": 0.2778,
"eval_samples_per_second": 612.001,
"eval_steps_per_second": 10.8,
"step": 4416
},
{
"epoch": 47.0,
"grad_norm": 0.0005003380356356502,
"learning_rate": 2.6500000000000004e-05,
"loss": 0.0022,
"step": 4512
},
{
"epoch": 47.0,
"eval_LOCATION_f1": 0.9171270718232045,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8736842105263158,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9421965317919075,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9702380952380952,
"eval_ORGANIZATION_recall": 0.9157303370786517,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.10903999209403992,
"eval_overall_accuracy": 0.9867746288798921,
"eval_overall_f1": 0.9475032010243277,
"eval_overall_precision": 0.9511568123393316,
"eval_overall_recall": 0.9438775510204082,
"eval_runtime": 0.2786,
"eval_samples_per_second": 610.178,
"eval_steps_per_second": 10.768,
"step": 4512
},
{
"epoch": 48.0,
"grad_norm": 34.43635559082031,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.0033,
"step": 4608
},
{
"epoch": 48.0,
"eval_LOCATION_f1": 0.9431818181818181,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9222222222222223,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9385474860335196,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9333333333333333,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.9603174603174603,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9758064516129032,
"eval_PERSON_recall": 0.9453125,
"eval_loss": 0.08986053615808487,
"eval_overall_accuracy": 0.9889338731443995,
"eval_overall_f1": 0.9465648854961832,
"eval_overall_precision": 0.9441624365482234,
"eval_overall_recall": 0.9489795918367347,
"eval_runtime": 0.2751,
"eval_samples_per_second": 617.952,
"eval_steps_per_second": 10.905,
"step": 4608
},
{
"epoch": 49.0,
"grad_norm": 0.011161034926772118,
"learning_rate": 2.5500000000000003e-05,
"loss": 0.001,
"step": 4704
},
{
"epoch": 49.0,
"eval_LOCATION_f1": 0.9222222222222223,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8829787234042553,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9452449567723343,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9704142011834319,
"eval_ORGANIZATION_recall": 0.9213483146067416,
"eval_PERSON_f1": 0.9682539682539683,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9838709677419355,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.11230127513408661,
"eval_overall_accuracy": 0.9870445344129555,
"eval_overall_f1": 0.9473684210526317,
"eval_overall_precision": 0.9534883720930233,
"eval_overall_recall": 0.9413265306122449,
"eval_runtime": 0.2762,
"eval_samples_per_second": 615.553,
"eval_steps_per_second": 10.863,
"step": 4704
},
{
"epoch": 50.0,
"grad_norm": 0.007018107455223799,
"learning_rate": 2.5e-05,
"loss": 0.0007,
"step": 4800
},
{
"epoch": 50.0,
"eval_LOCATION_f1": 0.9265536723163842,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9010989010989011,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9405099150141643,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9485714285714286,
"eval_ORGANIZATION_recall": 0.9325842696629213,
"eval_PERSON_f1": 0.9723320158102766,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.09373489022254944,
"eval_overall_accuracy": 0.988663967611336,
"eval_overall_f1": 0.9476372924648786,
"eval_overall_precision": 0.948849104859335,
"eval_overall_recall": 0.9464285714285714,
"eval_runtime": 0.2788,
"eval_samples_per_second": 609.778,
"eval_steps_per_second": 10.761,
"step": 4800
},
{
"epoch": 51.0,
"grad_norm": 0.0013735599350184202,
"learning_rate": 2.45e-05,
"loss": 0.0011,
"step": 4896
},
{
"epoch": 51.0,
"eval_LOCATION_f1": 0.9385474860335195,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9032258064516129,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9329608938547486,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9277777777777778,
"eval_ORGANIZATION_recall": 0.9382022471910112,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.10816428065299988,
"eval_overall_accuracy": 0.9865047233468286,
"eval_overall_f1": 0.9481668773704172,
"eval_overall_precision": 0.9398496240601504,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2867,
"eval_samples_per_second": 593.04,
"eval_steps_per_second": 10.465,
"step": 4896
},
{
"epoch": 52.0,
"grad_norm": 2.9921071529388428,
"learning_rate": 2.4e-05,
"loss": 0.0015,
"step": 4992
},
{
"epoch": 52.0,
"eval_LOCATION_f1": 0.9265536723163842,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9010989010989011,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9394812680115273,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9644970414201184,
"eval_ORGANIZATION_recall": 0.9157303370786517,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.11124741286039352,
"eval_overall_accuracy": 0.9878542510121457,
"eval_overall_f1": 0.9460154241645244,
"eval_overall_precision": 0.9533678756476683,
"eval_overall_recall": 0.9387755102040817,
"eval_runtime": 0.2821,
"eval_samples_per_second": 602.698,
"eval_steps_per_second": 10.636,
"step": 4992
},
{
"epoch": 53.0,
"grad_norm": 0.00152446492575109,
"learning_rate": 2.35e-05,
"loss": 0.0009,
"step": 5088
},
{
"epoch": 53.0,
"eval_LOCATION_f1": 0.9273743016759777,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8924731182795699,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9444444444444444,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9340659340659341,
"eval_ORGANIZATION_recall": 0.9550561797752809,
"eval_PERSON_f1": 0.9723320158102766,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10318152606487274,
"eval_overall_accuracy": 0.9881241565452091,
"eval_overall_f1": 0.9494949494949495,
"eval_overall_precision": 0.94,
"eval_overall_recall": 0.9591836734693877,
"eval_runtime": 0.2786,
"eval_samples_per_second": 610.124,
"eval_steps_per_second": 10.767,
"step": 5088
},
{
"epoch": 54.0,
"grad_norm": 1.4761940240859985,
"learning_rate": 2.3000000000000003e-05,
"loss": 0.0033,
"step": 5184
},
{
"epoch": 54.0,
"eval_LOCATION_f1": 0.9273743016759777,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8924731182795699,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.942857142857143,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9593023255813954,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.9723320158102766,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.11812768876552582,
"eval_overall_accuracy": 0.9870445344129555,
"eval_overall_f1": 0.9488491048593349,
"eval_overall_precision": 0.9512820512820512,
"eval_overall_recall": 0.9464285714285714,
"eval_runtime": 0.2746,
"eval_samples_per_second": 619.118,
"eval_steps_per_second": 10.926,
"step": 5184
},
{
"epoch": 55.0,
"grad_norm": 0.4280019700527191,
"learning_rate": 2.25e-05,
"loss": 0.0008,
"step": 5280
},
{
"epoch": 55.0,
"eval_LOCATION_f1": 0.9325842696629213,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9021739130434783,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9485714285714285,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9651162790697675,
"eval_ORGANIZATION_recall": 0.9325842696629213,
"eval_PERSON_f1": 0.96875,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.96875,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.12067463994026184,
"eval_overall_accuracy": 0.9865047233468286,
"eval_overall_f1": 0.951530612244898,
"eval_overall_precision": 0.951530612244898,
"eval_overall_recall": 0.951530612244898,
"eval_runtime": 0.2807,
"eval_samples_per_second": 605.66,
"eval_steps_per_second": 10.688,
"step": 5280
},
{
"epoch": 56.0,
"grad_norm": 0.0007758406572975218,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.0009,
"step": 5376
},
{
"epoch": 56.0,
"eval_LOCATION_f1": 0.9060773480662985,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8631578947368421,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9421965317919075,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9702380952380952,
"eval_ORGANIZATION_recall": 0.9157303370786517,
"eval_PERSON_f1": 0.9723320158102766,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.13788987696170807,
"eval_overall_accuracy": 0.9856950067476383,
"eval_overall_f1": 0.9435897435897437,
"eval_overall_precision": 0.9484536082474226,
"eval_overall_recall": 0.9387755102040817,
"eval_runtime": 0.2774,
"eval_samples_per_second": 612.89,
"eval_steps_per_second": 10.816,
"step": 5376
},
{
"epoch": 57.0,
"grad_norm": 0.0007105050608515739,
"learning_rate": 2.15e-05,
"loss": 0.001,
"step": 5472
},
{
"epoch": 57.0,
"eval_LOCATION_f1": 0.9273743016759777,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8924731182795699,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9512893982808023,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9707602339181286,
"eval_ORGANIZATION_recall": 0.9325842696629213,
"eval_PERSON_f1": 0.9723320158102766,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.11200056970119476,
"eval_overall_accuracy": 0.9881241565452091,
"eval_overall_f1": 0.9526248399487837,
"eval_overall_precision": 0.9562982005141388,
"eval_overall_recall": 0.9489795918367347,
"eval_runtime": 0.2759,
"eval_samples_per_second": 616.121,
"eval_steps_per_second": 10.873,
"step": 5472
},
{
"epoch": 58.0,
"grad_norm": 0.0010843342170119286,
"learning_rate": 2.1e-05,
"loss": 0.0013,
"step": 5568
},
{
"epoch": 58.0,
"eval_LOCATION_f1": 0.9222222222222223,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8829787234042553,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9375000000000001,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9482758620689655,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.1086345985531807,
"eval_overall_accuracy": 0.9862348178137652,
"eval_overall_f1": 0.9465648854961832,
"eval_overall_precision": 0.9441624365482234,
"eval_overall_recall": 0.9489795918367347,
"eval_runtime": 0.2775,
"eval_samples_per_second": 612.711,
"eval_steps_per_second": 10.813,
"step": 5568
},
{
"epoch": 59.0,
"grad_norm": 0.0003581370983738452,
"learning_rate": 2.05e-05,
"loss": 0.0005,
"step": 5664
},
{
"epoch": 59.0,
"eval_LOCATION_f1": 0.9180327868852459,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.865979381443299,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9333333333333333,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9640718562874252,
"eval_ORGANIZATION_recall": 0.9044943820224719,
"eval_PERSON_f1": 0.9612403100775193,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9538461538461539,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.12184558063745499,
"eval_overall_accuracy": 0.9854251012145749,
"eval_overall_f1": 0.9389312977099236,
"eval_overall_precision": 0.9365482233502538,
"eval_overall_recall": 0.9413265306122449,
"eval_runtime": 0.2757,
"eval_samples_per_second": 616.564,
"eval_steps_per_second": 10.881,
"step": 5664
},
{
"epoch": 60.0,
"grad_norm": 0.0011471403995528817,
"learning_rate": 2e-05,
"loss": 0.0007,
"step": 5760
},
{
"epoch": 60.0,
"eval_LOCATION_f1": 0.9213483146067417,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8913043478260869,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9392265193370166,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9239130434782609,
"eval_ORGANIZATION_recall": 0.9550561797752809,
"eval_PERSON_f1": 0.9682539682539683,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9838709677419355,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.09577618539333344,
"eval_overall_accuracy": 0.9881241565452091,
"eval_overall_f1": 0.9444444444444445,
"eval_overall_precision": 0.935,
"eval_overall_recall": 0.9540816326530612,
"eval_runtime": 0.2772,
"eval_samples_per_second": 613.208,
"eval_steps_per_second": 10.821,
"step": 5760
},
{
"epoch": 61.0,
"grad_norm": 0.1609802097082138,
"learning_rate": 1.9500000000000003e-05,
"loss": 0.0002,
"step": 5856
},
{
"epoch": 61.0,
"eval_LOCATION_f1": 0.9162011173184358,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8817204301075269,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.942857142857143,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9593023255813954,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.9644268774703557,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.10759799927473068,
"eval_overall_accuracy": 0.9878542510121457,
"eval_overall_f1": 0.9437340153452686,
"eval_overall_precision": 0.9461538461538461,
"eval_overall_recall": 0.9413265306122449,
"eval_runtime": 0.278,
"eval_samples_per_second": 611.566,
"eval_steps_per_second": 10.792,
"step": 5856
},
{
"epoch": 62.0,
"grad_norm": 12.48816204071045,
"learning_rate": 1.9e-05,
"loss": 0.0023,
"step": 5952
},
{
"epoch": 62.0,
"eval_LOCATION_f1": 0.9497206703910613,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9139784946236559,
"eval_LOCATION_recall": 0.9883720930232558,
"eval_ORGANIZATION_f1": 0.949438202247191,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.949438202247191,
"eval_ORGANIZATION_recall": 0.949438202247191,
"eval_PERSON_f1": 0.9725490196078432,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9763779527559056,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.08770798146724701,
"eval_overall_accuracy": 0.9894736842105263,
"eval_overall_f1": 0.9569620253164557,
"eval_overall_precision": 0.949748743718593,
"eval_overall_recall": 0.9642857142857143,
"eval_runtime": 0.2765,
"eval_samples_per_second": 614.816,
"eval_steps_per_second": 10.85,
"step": 5952
},
{
"epoch": 63.0,
"grad_norm": 0.0009683805401436985,
"learning_rate": 1.85e-05,
"loss": 0.0013,
"step": 6048
},
{
"epoch": 63.0,
"eval_LOCATION_f1": 0.9385474860335195,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9032258064516129,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9526462395543176,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9447513812154696,
"eval_ORGANIZATION_recall": 0.9606741573033708,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.08852725476026535,
"eval_overall_accuracy": 0.9894736842105263,
"eval_overall_f1": 0.9570707070707071,
"eval_overall_precision": 0.9475,
"eval_overall_recall": 0.9668367346938775,
"eval_runtime": 0.2742,
"eval_samples_per_second": 620.036,
"eval_steps_per_second": 10.942,
"step": 6048
},
{
"epoch": 64.0,
"grad_norm": 0.0008049598545767367,
"learning_rate": 1.8e-05,
"loss": 0.0009,
"step": 6144
},
{
"epoch": 64.0,
"eval_LOCATION_f1": 0.9385474860335195,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9032258064516129,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9577464788732394,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.96045197740113,
"eval_ORGANIZATION_recall": 0.9550561797752809,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.08247757703065872,
"eval_overall_accuracy": 0.9900134952766532,
"eval_overall_f1": 0.9593908629441624,
"eval_overall_precision": 0.9545454545454546,
"eval_overall_recall": 0.9642857142857143,
"eval_runtime": 0.2795,
"eval_samples_per_second": 608.296,
"eval_steps_per_second": 10.735,
"step": 6144
},
{
"epoch": 65.0,
"grad_norm": 0.0007374592823907733,
"learning_rate": 1.75e-05,
"loss": 0.0003,
"step": 6240
},
{
"epoch": 65.0,
"eval_LOCATION_f1": 0.9431818181818181,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9222222222222223,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.951841359773371,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.96,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.0837675929069519,
"eval_overall_accuracy": 0.9883940620782726,
"eval_overall_f1": 0.9578544061302682,
"eval_overall_precision": 0.959079283887468,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2762,
"eval_samples_per_second": 615.537,
"eval_steps_per_second": 10.862,
"step": 6240
},
{
"epoch": 66.0,
"grad_norm": 0.0010951802833005786,
"learning_rate": 1.7000000000000003e-05,
"loss": 0.0006,
"step": 6336
},
{
"epoch": 66.0,
"eval_LOCATION_f1": 0.9385474860335195,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9032258064516129,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.951841359773371,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.96,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.09569484740495682,
"eval_overall_accuracy": 0.988663967611336,
"eval_overall_f1": 0.9567430025445292,
"eval_overall_precision": 0.9543147208121827,
"eval_overall_recall": 0.9591836734693877,
"eval_runtime": 0.2788,
"eval_samples_per_second": 609.729,
"eval_steps_per_second": 10.76,
"step": 6336
},
{
"epoch": 67.0,
"grad_norm": 0.005136103834956884,
"learning_rate": 1.65e-05,
"loss": 0.0004,
"step": 6432
},
{
"epoch": 67.0,
"eval_LOCATION_f1": 0.9273743016759777,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8924731182795699,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9455587392550143,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9649122807017544,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.11290714144706726,
"eval_overall_accuracy": 0.9878542510121457,
"eval_overall_f1": 0.9514066496163682,
"eval_overall_precision": 0.9538461538461539,
"eval_overall_recall": 0.9489795918367347,
"eval_runtime": 0.2755,
"eval_samples_per_second": 617.088,
"eval_steps_per_second": 10.89,
"step": 6432
},
{
"epoch": 68.0,
"grad_norm": 0.0017895177006721497,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0003,
"step": 6528
},
{
"epoch": 68.0,
"eval_LOCATION_f1": 0.9333333333333332,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8936170212765957,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9401709401709402,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.953757225433526,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.9725490196078432,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9763779527559056,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.1161164864897728,
"eval_overall_accuracy": 0.9870445344129555,
"eval_overall_f1": 0.9491094147582698,
"eval_overall_precision": 0.9467005076142132,
"eval_overall_recall": 0.951530612244898,
"eval_runtime": 0.2777,
"eval_samples_per_second": 612.091,
"eval_steps_per_second": 10.802,
"step": 6528
},
{
"epoch": 69.0,
"grad_norm": 0.00024917226983234286,
"learning_rate": 1.55e-05,
"loss": 0.0002,
"step": 6624
},
{
"epoch": 69.0,
"eval_LOCATION_f1": 0.9333333333333332,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8936170212765957,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9394812680115273,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9644970414201184,
"eval_ORGANIZATION_recall": 0.9157303370786517,
"eval_PERSON_f1": 0.96875,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.96875,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.12338589131832123,
"eval_overall_accuracy": 0.9862348178137652,
"eval_overall_f1": 0.9476372924648786,
"eval_overall_precision": 0.948849104859335,
"eval_overall_recall": 0.9464285714285714,
"eval_runtime": 0.2765,
"eval_samples_per_second": 614.72,
"eval_steps_per_second": 10.848,
"step": 6624
},
{
"epoch": 70.0,
"grad_norm": 0.002585263457149267,
"learning_rate": 1.5e-05,
"loss": 0.0006,
"step": 6720
},
{
"epoch": 70.0,
"eval_LOCATION_f1": 0.9491525423728814,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9230769230769231,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9485714285714285,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9651162790697675,
"eval_ORGANIZATION_recall": 0.9325842696629213,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.11618266254663467,
"eval_overall_accuracy": 0.9883940620782726,
"eval_overall_f1": 0.9577464788732394,
"eval_overall_precision": 0.961439588688946,
"eval_overall_recall": 0.9540816326530612,
"eval_runtime": 0.2791,
"eval_samples_per_second": 609.103,
"eval_steps_per_second": 10.749,
"step": 6720
},
{
"epoch": 71.0,
"grad_norm": 0.11092416942119598,
"learning_rate": 1.45e-05,
"loss": 0.0002,
"step": 6816
},
{
"epoch": 71.0,
"eval_LOCATION_f1": 0.9545454545454545,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9333333333333333,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.951841359773371,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.96,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.11071506142616272,
"eval_overall_accuracy": 0.9878542510121457,
"eval_overall_f1": 0.9604086845466155,
"eval_overall_precision": 0.9616368286445013,
"eval_overall_recall": 0.9591836734693877,
"eval_runtime": 0.2755,
"eval_samples_per_second": 617.089,
"eval_steps_per_second": 10.89,
"step": 6816
},
{
"epoch": 72.0,
"grad_norm": 0.000154004359501414,
"learning_rate": 1.4000000000000001e-05,
"loss": 0.0002,
"step": 6912
},
{
"epoch": 72.0,
"eval_LOCATION_f1": 0.9491525423728814,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9230769230769231,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9488636363636365,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9597701149425287,
"eval_ORGANIZATION_recall": 0.9382022471910112,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.1120668277144432,
"eval_overall_accuracy": 0.9878542510121457,
"eval_overall_f1": 0.9578544061302682,
"eval_overall_precision": 0.959079283887468,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2766,
"eval_samples_per_second": 614.651,
"eval_steps_per_second": 10.847,
"step": 6912
},
{
"epoch": 73.0,
"grad_norm": 0.00016238982789218426,
"learning_rate": 1.3500000000000001e-05,
"loss": 0.0002,
"step": 7008
},
{
"epoch": 73.0,
"eval_LOCATION_f1": 0.9491525423728814,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9230769230769231,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9461756373937678,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9542857142857143,
"eval_ORGANIZATION_recall": 0.9382022471910112,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.11216197162866592,
"eval_overall_accuracy": 0.9881241565452091,
"eval_overall_f1": 0.9566326530612245,
"eval_overall_precision": 0.9566326530612245,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.285,
"eval_samples_per_second": 596.506,
"eval_steps_per_second": 10.527,
"step": 7008
},
{
"epoch": 74.0,
"grad_norm": 0.00033696964965201914,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.0005,
"step": 7104
},
{
"epoch": 74.0,
"eval_LOCATION_f1": 0.9491525423728814,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9230769230769231,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9461756373937678,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9542857142857143,
"eval_ORGANIZATION_recall": 0.9382022471910112,
"eval_PERSON_f1": 0.9763779527559054,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9841269841269841,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.1126818060874939,
"eval_overall_accuracy": 0.9873144399460189,
"eval_overall_f1": 0.9566326530612245,
"eval_overall_precision": 0.9566326530612245,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2829,
"eval_samples_per_second": 600.942,
"eval_steps_per_second": 10.605,
"step": 7104
},
{
"epoch": 75.0,
"grad_norm": 0.0003657756024040282,
"learning_rate": 1.25e-05,
"loss": 0.0004,
"step": 7200
},
{
"epoch": 75.0,
"eval_LOCATION_f1": 0.9438202247191011,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9130434782608695,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9431818181818182,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9540229885057471,
"eval_ORGANIZATION_recall": 0.9325842696629213,
"eval_PERSON_f1": 0.96875,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.96875,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.11697709560394287,
"eval_overall_accuracy": 0.9862348178137652,
"eval_overall_f1": 0.9516539440203563,
"eval_overall_precision": 0.949238578680203,
"eval_overall_recall": 0.9540816326530612,
"eval_runtime": 0.2786,
"eval_samples_per_second": 610.145,
"eval_steps_per_second": 10.767,
"step": 7200
},
{
"epoch": 76.0,
"grad_norm": 0.00010258240217808634,
"learning_rate": 1.2e-05,
"loss": 0.0003,
"step": 7296
},
{
"epoch": 76.0,
"eval_LOCATION_f1": 0.9545454545454545,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9333333333333333,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9497206703910613,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9444444444444444,
"eval_ORGANIZATION_recall": 0.9550561797752809,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10894415527582169,
"eval_overall_accuracy": 0.9892037786774629,
"eval_overall_f1": 0.9568527918781725,
"eval_overall_precision": 0.952020202020202,
"eval_overall_recall": 0.9617346938775511,
"eval_runtime": 0.2762,
"eval_samples_per_second": 615.53,
"eval_steps_per_second": 10.862,
"step": 7296
},
{
"epoch": 77.0,
"grad_norm": 0.0007157445070333779,
"learning_rate": 1.1500000000000002e-05,
"loss": 0.001,
"step": 7392
},
{
"epoch": 77.0,
"eval_LOCATION_f1": 0.9491525423728814,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9230769230769231,
"eval_LOCATION_recall": 0.9767441860465116,
"eval_ORGANIZATION_f1": 0.9582172701949861,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9502762430939227,
"eval_ORGANIZATION_recall": 0.9662921348314607,
"eval_PERSON_f1": 0.9725490196078432,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9763779527559056,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.10824745148420334,
"eval_overall_accuracy": 0.9894736842105263,
"eval_overall_f1": 0.9608091024020227,
"eval_overall_precision": 0.9523809523809523,
"eval_overall_recall": 0.9693877551020408,
"eval_runtime": 0.2772,
"eval_samples_per_second": 613.187,
"eval_steps_per_second": 10.821,
"step": 7392
},
{
"epoch": 78.0,
"grad_norm": 0.013988692313432693,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.0012,
"step": 7488
},
{
"epoch": 78.0,
"eval_LOCATION_f1": 0.9325842696629213,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9021739130434783,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9355742296918768,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9329608938547486,
"eval_ORGANIZATION_recall": 0.9382022471910112,
"eval_PERSON_f1": 0.96875,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.96875,
"eval_PERSON_recall": 0.96875,
"eval_loss": 0.10093524307012558,
"eval_overall_accuracy": 0.9862348178137652,
"eval_overall_f1": 0.9456384323640962,
"eval_overall_precision": 0.9373433583959899,
"eval_overall_recall": 0.9540816326530612,
"eval_runtime": 0.278,
"eval_samples_per_second": 611.532,
"eval_steps_per_second": 10.792,
"step": 7488
},
{
"epoch": 79.0,
"grad_norm": 0.0004660775884985924,
"learning_rate": 1.05e-05,
"loss": 0.0002,
"step": 7584
},
{
"epoch": 79.0,
"eval_LOCATION_f1": 0.9060773480662985,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8631578947368421,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9435028248587571,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9488636363636364,
"eval_ORGANIZATION_recall": 0.9382022471910112,
"eval_PERSON_f1": 0.9644268774703557,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.10507776588201523,
"eval_overall_accuracy": 0.9865047233468286,
"eval_overall_f1": 0.9416243654822335,
"eval_overall_precision": 0.9368686868686869,
"eval_overall_recall": 0.9464285714285714,
"eval_runtime": 0.2738,
"eval_samples_per_second": 620.982,
"eval_steps_per_second": 10.959,
"step": 7584
},
{
"epoch": 80.0,
"grad_norm": 0.0003681587695609778,
"learning_rate": 1e-05,
"loss": 0.0002,
"step": 7680
},
{
"epoch": 80.0,
"eval_LOCATION_f1": 0.9111111111111112,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8723404255319149,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9431818181818182,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9540229885057471,
"eval_ORGANIZATION_recall": 0.9325842696629213,
"eval_PERSON_f1": 0.9644268774703557,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.11081729084253311,
"eval_overall_accuracy": 0.9865047233468286,
"eval_overall_f1": 0.9426751592356687,
"eval_overall_precision": 0.9414758269720102,
"eval_overall_recall": 0.9438775510204082,
"eval_runtime": 0.2753,
"eval_samples_per_second": 617.505,
"eval_steps_per_second": 10.897,
"step": 7680
},
{
"epoch": 81.0,
"grad_norm": 0.0002929773472715169,
"learning_rate": 9.5e-06,
"loss": 0.0005,
"step": 7776
},
{
"epoch": 81.0,
"eval_LOCATION_f1": 0.9213483146067417,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8913043478260869,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9461756373937678,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9542857142857143,
"eval_ORGANIZATION_recall": 0.9382022471910112,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10365120321512222,
"eval_overall_accuracy": 0.9870445344129555,
"eval_overall_f1": 0.9477707006369427,
"eval_overall_precision": 0.9465648854961832,
"eval_overall_recall": 0.9489795918367347,
"eval_runtime": 0.2735,
"eval_samples_per_second": 621.671,
"eval_steps_per_second": 10.971,
"step": 7776
},
{
"epoch": 82.0,
"grad_norm": 0.00024136666615959257,
"learning_rate": 9e-06,
"loss": 0.0003,
"step": 7872
},
{
"epoch": 82.0,
"eval_LOCATION_f1": 0.9050279329608939,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8709677419354839,
"eval_LOCATION_recall": 0.9418604651162791,
"eval_ORGANIZATION_f1": 0.9431818181818182,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9540229885057471,
"eval_ORGANIZATION_recall": 0.9325842696629213,
"eval_PERSON_f1": 0.9644268774703557,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.953125,
"eval_loss": 0.1031210720539093,
"eval_overall_accuracy": 0.9867746288798921,
"eval_overall_f1": 0.9413265306122449,
"eval_overall_precision": 0.9413265306122449,
"eval_overall_recall": 0.9413265306122449,
"eval_runtime": 0.2763,
"eval_samples_per_second": 615.215,
"eval_steps_per_second": 10.857,
"step": 7872
},
{
"epoch": 83.0,
"grad_norm": 0.00017047034634742886,
"learning_rate": 8.500000000000002e-06,
"loss": 0.0003,
"step": 7968
},
{
"epoch": 83.0,
"eval_LOCATION_f1": 0.9378531073446328,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9120879120879121,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9548022598870057,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9602272727272727,
"eval_ORGANIZATION_recall": 0.949438202247191,
"eval_PERSON_f1": 0.9647058823529412,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.09963062405586243,
"eval_overall_accuracy": 0.988663967611336,
"eval_overall_f1": 0.9541984732824428,
"eval_overall_precision": 0.9517766497461929,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2758,
"eval_samples_per_second": 616.456,
"eval_steps_per_second": 10.879,
"step": 7968
},
{
"epoch": 84.0,
"grad_norm": 0.0007244854350574315,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0002,
"step": 8064
},
{
"epoch": 84.0,
"eval_LOCATION_f1": 0.9431818181818181,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9222222222222223,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9548022598870057,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9602272727272727,
"eval_ORGANIZATION_recall": 0.949438202247191,
"eval_PERSON_f1": 0.9647058823529412,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.09869366884231567,
"eval_overall_accuracy": 0.988663967611336,
"eval_overall_f1": 0.9554140127388535,
"eval_overall_precision": 0.9541984732824428,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2758,
"eval_samples_per_second": 616.49,
"eval_steps_per_second": 10.879,
"step": 8064
},
{
"epoch": 85.0,
"grad_norm": 0.00027018680702894926,
"learning_rate": 7.5e-06,
"loss": 0.0004,
"step": 8160
},
{
"epoch": 85.0,
"eval_LOCATION_f1": 0.9431818181818181,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9222222222222223,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9548022598870057,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9602272727272727,
"eval_ORGANIZATION_recall": 0.949438202247191,
"eval_PERSON_f1": 0.9647058823529412,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10174024105072021,
"eval_overall_accuracy": 0.988663967611336,
"eval_overall_f1": 0.9554140127388535,
"eval_overall_precision": 0.9541984732824428,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2746,
"eval_samples_per_second": 618.994,
"eval_steps_per_second": 10.923,
"step": 8160
},
{
"epoch": 86.0,
"grad_norm": 0.00015681206423323601,
"learning_rate": 7.000000000000001e-06,
"loss": 0.0002,
"step": 8256
},
{
"epoch": 86.0,
"eval_LOCATION_f1": 0.9431818181818181,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9222222222222223,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9548022598870057,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9602272727272727,
"eval_ORGANIZATION_recall": 0.949438202247191,
"eval_PERSON_f1": 0.9647058823529412,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10175755620002747,
"eval_overall_accuracy": 0.988663967611336,
"eval_overall_f1": 0.9554140127388535,
"eval_overall_precision": 0.9541984732824428,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2831,
"eval_samples_per_second": 600.454,
"eval_steps_per_second": 10.596,
"step": 8256
},
{
"epoch": 87.0,
"grad_norm": 0.00022313217050395906,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.0001,
"step": 8352
},
{
"epoch": 87.0,
"eval_LOCATION_f1": 0.9431818181818181,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9222222222222223,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.957983193277311,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9553072625698324,
"eval_ORGANIZATION_recall": 0.9606741573033708,
"eval_PERSON_f1": 0.9647058823529412,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10168375074863434,
"eval_overall_accuracy": 0.9889338731443995,
"eval_overall_f1": 0.9568527918781725,
"eval_overall_precision": 0.952020202020202,
"eval_overall_recall": 0.9617346938775511,
"eval_runtime": 0.2755,
"eval_samples_per_second": 617.136,
"eval_steps_per_second": 10.891,
"step": 8352
},
{
"epoch": 88.0,
"grad_norm": 0.0003491460520308465,
"learning_rate": 6e-06,
"loss": 0.0002,
"step": 8448
},
{
"epoch": 88.0,
"eval_LOCATION_f1": 0.9431818181818181,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9222222222222223,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9548022598870057,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9602272727272727,
"eval_ORGANIZATION_recall": 0.949438202247191,
"eval_PERSON_f1": 0.9647058823529412,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10284104943275452,
"eval_overall_accuracy": 0.988663967611336,
"eval_overall_f1": 0.9554140127388535,
"eval_overall_precision": 0.9541984732824428,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2864,
"eval_samples_per_second": 593.603,
"eval_steps_per_second": 10.475,
"step": 8448
},
{
"epoch": 89.0,
"grad_norm": 0.00023058451188262552,
"learning_rate": 5.500000000000001e-06,
"loss": 0.0001,
"step": 8544
},
{
"epoch": 89.0,
"eval_LOCATION_f1": 0.9431818181818181,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9222222222222223,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9548022598870057,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9602272727272727,
"eval_ORGANIZATION_recall": 0.949438202247191,
"eval_PERSON_f1": 0.9647058823529412,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10325466841459274,
"eval_overall_accuracy": 0.988663967611336,
"eval_overall_f1": 0.9554140127388535,
"eval_overall_precision": 0.9541984732824428,
"eval_overall_recall": 0.9566326530612245,
"eval_runtime": 0.2769,
"eval_samples_per_second": 613.882,
"eval_steps_per_second": 10.833,
"step": 8544
},
{
"epoch": 90.0,
"grad_norm": 0.00042677970486693084,
"learning_rate": 5e-06,
"loss": 0.0002,
"step": 8640
},
{
"epoch": 90.0,
"eval_LOCATION_f1": 0.9371428571428573,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9213483146067416,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9491525423728814,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9545454545454546,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.1025572419166565,
"eval_overall_accuracy": 0.9878542510121457,
"eval_overall_f1": 0.9527458492975734,
"eval_overall_precision": 0.9539641943734015,
"eval_overall_recall": 0.951530612244898,
"eval_runtime": 0.2749,
"eval_samples_per_second": 618.381,
"eval_steps_per_second": 10.913,
"step": 8640
},
{
"epoch": 91.0,
"grad_norm": 0.00043625899706967175,
"learning_rate": 4.5e-06,
"loss": 0.0002,
"step": 8736
},
{
"epoch": 91.0,
"eval_LOCATION_f1": 0.9371428571428573,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9213483146067416,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9491525423728814,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9545454545454546,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10244476050138474,
"eval_overall_accuracy": 0.9878542510121457,
"eval_overall_f1": 0.9527458492975734,
"eval_overall_precision": 0.9539641943734015,
"eval_overall_recall": 0.951530612244898,
"eval_runtime": 0.2764,
"eval_samples_per_second": 615.112,
"eval_steps_per_second": 10.855,
"step": 8736
},
{
"epoch": 92.0,
"grad_norm": 0.0001665508607402444,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0002,
"step": 8832
},
{
"epoch": 92.0,
"eval_LOCATION_f1": 0.9371428571428573,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9213483146067416,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9491525423728814,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9545454545454546,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10252244770526886,
"eval_overall_accuracy": 0.9878542510121457,
"eval_overall_f1": 0.9527458492975734,
"eval_overall_precision": 0.9539641943734015,
"eval_overall_recall": 0.951530612244898,
"eval_runtime": 0.2809,
"eval_samples_per_second": 605.297,
"eval_steps_per_second": 10.682,
"step": 8832
},
{
"epoch": 93.0,
"grad_norm": 0.0002152398374164477,
"learning_rate": 3.5000000000000004e-06,
"loss": 0.0002,
"step": 8928
},
{
"epoch": 93.0,
"eval_LOCATION_f1": 0.9371428571428573,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9213483146067416,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9491525423728814,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9545454545454546,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10386810451745987,
"eval_overall_accuracy": 0.9878542510121457,
"eval_overall_f1": 0.9527458492975734,
"eval_overall_precision": 0.9539641943734015,
"eval_overall_recall": 0.951530612244898,
"eval_runtime": 0.2851,
"eval_samples_per_second": 596.31,
"eval_steps_per_second": 10.523,
"step": 8928
},
{
"epoch": 94.0,
"grad_norm": 0.00013109896099194884,
"learning_rate": 3e-06,
"loss": 0.0001,
"step": 9024
},
{
"epoch": 94.0,
"eval_LOCATION_f1": 0.9371428571428573,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9213483146067416,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9491525423728814,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9545454545454546,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10342691838741302,
"eval_overall_accuracy": 0.9878542510121457,
"eval_overall_f1": 0.9527458492975734,
"eval_overall_precision": 0.9539641943734015,
"eval_overall_recall": 0.951530612244898,
"eval_runtime": 0.2864,
"eval_samples_per_second": 593.558,
"eval_steps_per_second": 10.475,
"step": 9024
},
{
"epoch": 95.0,
"grad_norm": 8.776304457569495e-05,
"learning_rate": 2.5e-06,
"loss": 0.0001,
"step": 9120
},
{
"epoch": 95.0,
"eval_LOCATION_f1": 0.9371428571428573,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.9213483146067416,
"eval_LOCATION_recall": 0.9534883720930233,
"eval_ORGANIZATION_f1": 0.9491525423728814,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.9545454545454546,
"eval_ORGANIZATION_recall": 0.9438202247191011,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.1035594493150711,
"eval_overall_accuracy": 0.9878542510121457,
"eval_overall_f1": 0.9527458492975734,
"eval_overall_precision": 0.9539641943734015,
"eval_overall_recall": 0.951530612244898,
"eval_runtime": 0.2771,
"eval_samples_per_second": 613.457,
"eval_steps_per_second": 10.826,
"step": 9120
},
{
"epoch": 96.0,
"grad_norm": 0.00011784955131588504,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0001,
"step": 9216
},
{
"epoch": 96.0,
"eval_LOCATION_f1": 0.9273743016759777,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8924731182795699,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9401709401709402,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.953757225433526,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.968503937007874,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10866863280534744,
"eval_overall_accuracy": 0.9873144399460189,
"eval_overall_f1": 0.9464285714285714,
"eval_overall_precision": 0.9464285714285714,
"eval_overall_recall": 0.9464285714285714,
"eval_runtime": 0.2796,
"eval_samples_per_second": 608.08,
"eval_steps_per_second": 10.731,
"step": 9216
},
{
"epoch": 97.0,
"grad_norm": 0.0001753137621562928,
"learning_rate": 1.5e-06,
"loss": 0.0005,
"step": 9312
},
{
"epoch": 97.0,
"eval_LOCATION_f1": 0.9273743016759777,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8924731182795699,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9401709401709402,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.953757225433526,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.9647058823529412,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10562655329704285,
"eval_overall_accuracy": 0.9875843454790824,
"eval_overall_f1": 0.9452229299363057,
"eval_overall_precision": 0.9440203562340967,
"eval_overall_recall": 0.9464285714285714,
"eval_runtime": 0.2763,
"eval_samples_per_second": 615.195,
"eval_steps_per_second": 10.856,
"step": 9312
},
{
"epoch": 98.0,
"grad_norm": 0.000347200024407357,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0003,
"step": 9408
},
{
"epoch": 98.0,
"eval_LOCATION_f1": 0.9273743016759777,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8924731182795699,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9401709401709402,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.953757225433526,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.9647058823529412,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10447113960981369,
"eval_overall_accuracy": 0.9875843454790824,
"eval_overall_f1": 0.9452229299363057,
"eval_overall_precision": 0.9440203562340967,
"eval_overall_recall": 0.9464285714285714,
"eval_runtime": 0.2767,
"eval_samples_per_second": 614.345,
"eval_steps_per_second": 10.841,
"step": 9408
},
{
"epoch": 99.0,
"grad_norm": 0.0003261720994487405,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0001,
"step": 9504
},
{
"epoch": 99.0,
"eval_LOCATION_f1": 0.9273743016759777,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8924731182795699,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9401709401709402,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.953757225433526,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.9647058823529412,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10466174781322479,
"eval_overall_accuracy": 0.9875843454790824,
"eval_overall_f1": 0.9452229299363057,
"eval_overall_precision": 0.9440203562340967,
"eval_overall_recall": 0.9464285714285714,
"eval_runtime": 0.2746,
"eval_samples_per_second": 619.061,
"eval_steps_per_second": 10.925,
"step": 9504
},
{
"epoch": 100.0,
"grad_norm": 9.912410314427689e-05,
"learning_rate": 0.0,
"loss": 0.0002,
"step": 9600
},
{
"epoch": 100.0,
"eval_LOCATION_f1": 0.9273743016759777,
"eval_LOCATION_number": 86,
"eval_LOCATION_precision": 0.8924731182795699,
"eval_LOCATION_recall": 0.9651162790697675,
"eval_ORGANIZATION_f1": 0.9401709401709402,
"eval_ORGANIZATION_number": 178,
"eval_ORGANIZATION_precision": 0.953757225433526,
"eval_ORGANIZATION_recall": 0.9269662921348315,
"eval_PERSON_f1": 0.9647058823529412,
"eval_PERSON_number": 128,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9609375,
"eval_loss": 0.10467950254678726,
"eval_overall_accuracy": 0.9875843454790824,
"eval_overall_f1": 0.9452229299363057,
"eval_overall_precision": 0.9440203562340967,
"eval_overall_recall": 0.9464285714285714,
"eval_runtime": 0.2862,
"eval_samples_per_second": 593.895,
"eval_steps_per_second": 10.48,
"step": 9600
},
{
"epoch": 100.0,
"step": 9600,
"total_flos": 3867927199316004.0,
"train_loss": 0.005292673466804748,
"train_runtime": 863.5274,
"train_samples_per_second": 177.296,
"train_steps_per_second": 11.117
}
],
"logging_steps": 500,
"max_steps": 9600,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 3867927199316004.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}