NLP-at-home / last-checkpoint /trainer_state.json
TeamNL's picture
Training in progress, epoch 69, checkpoint
15af97e verified
{
"best_metric": 0.8533899487562311,
"best_model_checkpoint": "./results/checkpoint-1200",
"epoch": 69.0,
"eval_steps": 500,
"global_step": 1656,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 4.4191670417785645,
"learning_rate": 4.9500000000000004e-05,
"loss": 1.1717,
"step": 24
},
{
"epoch": 1.0,
"eval_f1": 0.5559615812547036,
"eval_loss": 0.852466344833374,
"eval_runtime": 0.2518,
"eval_samples_per_second": 865.826,
"eval_steps_per_second": 15.887,
"step": 24
},
{
"epoch": 2.0,
"grad_norm": 6.028530597686768,
"learning_rate": 4.9e-05,
"loss": 0.714,
"step": 48
},
{
"epoch": 2.0,
"eval_f1": 0.7746990836466595,
"eval_loss": 0.6411612033843994,
"eval_runtime": 0.2498,
"eval_samples_per_second": 872.757,
"eval_steps_per_second": 16.014,
"step": 48
},
{
"epoch": 3.0,
"grad_norm": 6.396097660064697,
"learning_rate": 4.85e-05,
"loss": 0.4376,
"step": 72
},
{
"epoch": 3.0,
"eval_f1": 0.7904679173662312,
"eval_loss": 0.578528881072998,
"eval_runtime": 0.2466,
"eval_samples_per_second": 883.847,
"eval_steps_per_second": 16.217,
"step": 72
},
{
"epoch": 4.0,
"grad_norm": 13.205650329589844,
"learning_rate": 4.8e-05,
"loss": 0.262,
"step": 96
},
{
"epoch": 4.0,
"eval_f1": 0.8032839961396288,
"eval_loss": 0.6008332967758179,
"eval_runtime": 0.2457,
"eval_samples_per_second": 887.351,
"eval_steps_per_second": 16.282,
"step": 96
},
{
"epoch": 5.0,
"grad_norm": 9.777926445007324,
"learning_rate": 4.75e-05,
"loss": 0.1384,
"step": 120
},
{
"epoch": 5.0,
"eval_f1": 0.7854101778464291,
"eval_loss": 0.6125118732452393,
"eval_runtime": 0.2485,
"eval_samples_per_second": 877.32,
"eval_steps_per_second": 16.098,
"step": 120
},
{
"epoch": 6.0,
"grad_norm": 4.165589332580566,
"learning_rate": 4.7e-05,
"loss": 0.0982,
"step": 144
},
{
"epoch": 6.0,
"eval_f1": 0.7672888039973402,
"eval_loss": 0.7653169631958008,
"eval_runtime": 0.2569,
"eval_samples_per_second": 848.674,
"eval_steps_per_second": 15.572,
"step": 144
},
{
"epoch": 7.0,
"grad_norm": 3.7239115238189697,
"learning_rate": 4.6500000000000005e-05,
"loss": 0.0657,
"step": 168
},
{
"epoch": 7.0,
"eval_f1": 0.8147995774570269,
"eval_loss": 0.7002198100090027,
"eval_runtime": 0.2474,
"eval_samples_per_second": 881.237,
"eval_steps_per_second": 16.169,
"step": 168
},
{
"epoch": 8.0,
"grad_norm": 0.3135942220687866,
"learning_rate": 4.600000000000001e-05,
"loss": 0.0395,
"step": 192
},
{
"epoch": 8.0,
"eval_f1": 0.8087986139563679,
"eval_loss": 0.7771649956703186,
"eval_runtime": 0.244,
"eval_samples_per_second": 893.39,
"eval_steps_per_second": 16.392,
"step": 192
},
{
"epoch": 9.0,
"grad_norm": 0.4529080092906952,
"learning_rate": 4.55e-05,
"loss": 0.0214,
"step": 216
},
{
"epoch": 9.0,
"eval_f1": 0.8392522702480626,
"eval_loss": 0.7208238244056702,
"eval_runtime": 0.2501,
"eval_samples_per_second": 871.738,
"eval_steps_per_second": 15.995,
"step": 216
},
{
"epoch": 10.0,
"grad_norm": 8.327765464782715,
"learning_rate": 4.5e-05,
"loss": 0.0237,
"step": 240
},
{
"epoch": 10.0,
"eval_f1": 0.8424386724386725,
"eval_loss": 0.7019856572151184,
"eval_runtime": 0.2469,
"eval_samples_per_second": 883.057,
"eval_steps_per_second": 16.203,
"step": 240
},
{
"epoch": 11.0,
"grad_norm": 2.6359758377075195,
"learning_rate": 4.4500000000000004e-05,
"loss": 0.0159,
"step": 264
},
{
"epoch": 11.0,
"eval_f1": 0.8074501800720288,
"eval_loss": 0.9096614122390747,
"eval_runtime": 0.2471,
"eval_samples_per_second": 882.066,
"eval_steps_per_second": 16.185,
"step": 264
},
{
"epoch": 12.0,
"grad_norm": 18.37872886657715,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.0322,
"step": 288
},
{
"epoch": 12.0,
"eval_f1": 0.7848399290035054,
"eval_loss": 1.0800021886825562,
"eval_runtime": 0.2488,
"eval_samples_per_second": 876.232,
"eval_steps_per_second": 16.078,
"step": 288
},
{
"epoch": 13.0,
"grad_norm": 13.307744026184082,
"learning_rate": 4.35e-05,
"loss": 0.029,
"step": 312
},
{
"epoch": 13.0,
"eval_f1": 0.798355010551357,
"eval_loss": 1.0241613388061523,
"eval_runtime": 0.2493,
"eval_samples_per_second": 874.372,
"eval_steps_per_second": 16.044,
"step": 312
},
{
"epoch": 14.0,
"grad_norm": 0.41816067695617676,
"learning_rate": 4.3e-05,
"loss": 0.0121,
"step": 336
},
{
"epoch": 14.0,
"eval_f1": 0.8120306519736268,
"eval_loss": 1.022652268409729,
"eval_runtime": 0.2454,
"eval_samples_per_second": 888.194,
"eval_steps_per_second": 16.297,
"step": 336
},
{
"epoch": 15.0,
"grad_norm": 10.153714179992676,
"learning_rate": 4.25e-05,
"loss": 0.0082,
"step": 360
},
{
"epoch": 15.0,
"eval_f1": 0.7906564533144493,
"eval_loss": 1.096178650856018,
"eval_runtime": 0.2461,
"eval_samples_per_second": 885.781,
"eval_steps_per_second": 16.253,
"step": 360
},
{
"epoch": 16.0,
"grad_norm": 0.026447944343090057,
"learning_rate": 4.2e-05,
"loss": 0.0035,
"step": 384
},
{
"epoch": 16.0,
"eval_f1": 0.7900391731356858,
"eval_loss": 1.2207499742507935,
"eval_runtime": 0.2461,
"eval_samples_per_second": 885.87,
"eval_steps_per_second": 16.254,
"step": 384
},
{
"epoch": 17.0,
"grad_norm": 0.008760841563344002,
"learning_rate": 4.15e-05,
"loss": 0.0064,
"step": 408
},
{
"epoch": 17.0,
"eval_f1": 0.7941827325696338,
"eval_loss": 1.120672345161438,
"eval_runtime": 0.2456,
"eval_samples_per_second": 887.604,
"eval_steps_per_second": 16.286,
"step": 408
},
{
"epoch": 18.0,
"grad_norm": 0.2175062894821167,
"learning_rate": 4.1e-05,
"loss": 0.0089,
"step": 432
},
{
"epoch": 18.0,
"eval_f1": 0.8121032920020557,
"eval_loss": 1.1158227920532227,
"eval_runtime": 0.2466,
"eval_samples_per_second": 884.124,
"eval_steps_per_second": 16.222,
"step": 432
},
{
"epoch": 19.0,
"grad_norm": 0.026284487918019295,
"learning_rate": 4.05e-05,
"loss": 0.0011,
"step": 456
},
{
"epoch": 19.0,
"eval_f1": 0.8106351612230018,
"eval_loss": 1.1229100227355957,
"eval_runtime": 0.2466,
"eval_samples_per_second": 883.852,
"eval_steps_per_second": 16.217,
"step": 456
},
{
"epoch": 20.0,
"grad_norm": 7.1518473625183105,
"learning_rate": 4e-05,
"loss": 0.0036,
"step": 480
},
{
"epoch": 20.0,
"eval_f1": 0.8215225082145855,
"eval_loss": 1.0700623989105225,
"eval_runtime": 0.2447,
"eval_samples_per_second": 890.866,
"eval_steps_per_second": 16.346,
"step": 480
},
{
"epoch": 21.0,
"grad_norm": 0.030479425564408302,
"learning_rate": 3.9500000000000005e-05,
"loss": 0.0038,
"step": 504
},
{
"epoch": 21.0,
"eval_f1": 0.79625307224709,
"eval_loss": 1.1593633890151978,
"eval_runtime": 0.2451,
"eval_samples_per_second": 889.358,
"eval_steps_per_second": 16.319,
"step": 504
},
{
"epoch": 22.0,
"grad_norm": 0.8992727994918823,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.013,
"step": 528
},
{
"epoch": 22.0,
"eval_f1": 0.8198287286484007,
"eval_loss": 1.127288579940796,
"eval_runtime": 0.2483,
"eval_samples_per_second": 878.117,
"eval_steps_per_second": 16.112,
"step": 528
},
{
"epoch": 23.0,
"grad_norm": 0.008566158823668957,
"learning_rate": 3.85e-05,
"loss": 0.0051,
"step": 552
},
{
"epoch": 23.0,
"eval_f1": 0.8245210270785887,
"eval_loss": 1.0870376825332642,
"eval_runtime": 0.2503,
"eval_samples_per_second": 870.992,
"eval_steps_per_second": 15.982,
"step": 552
},
{
"epoch": 24.0,
"grad_norm": 0.0786171406507492,
"learning_rate": 3.8e-05,
"loss": 0.0037,
"step": 576
},
{
"epoch": 24.0,
"eval_f1": 0.8210009216488835,
"eval_loss": 1.1669812202453613,
"eval_runtime": 0.2462,
"eval_samples_per_second": 885.624,
"eval_steps_per_second": 16.25,
"step": 576
},
{
"epoch": 25.0,
"grad_norm": 0.02009885385632515,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.0035,
"step": 600
},
{
"epoch": 25.0,
"eval_f1": 0.823341564899699,
"eval_loss": 1.1029468774795532,
"eval_runtime": 0.246,
"eval_samples_per_second": 886.238,
"eval_steps_per_second": 16.261,
"step": 600
},
{
"epoch": 26.0,
"grad_norm": 0.10086794197559357,
"learning_rate": 3.7e-05,
"loss": 0.0051,
"step": 624
},
{
"epoch": 26.0,
"eval_f1": 0.8138320306415473,
"eval_loss": 1.1733108758926392,
"eval_runtime": 0.2405,
"eval_samples_per_second": 906.567,
"eval_steps_per_second": 16.634,
"step": 624
},
{
"epoch": 27.0,
"grad_norm": 24.029403686523438,
"learning_rate": 3.65e-05,
"loss": 0.0032,
"step": 648
},
{
"epoch": 27.0,
"eval_f1": 0.8293087494328629,
"eval_loss": 1.1072059869766235,
"eval_runtime": 0.244,
"eval_samples_per_second": 893.364,
"eval_steps_per_second": 16.392,
"step": 648
},
{
"epoch": 28.0,
"grad_norm": 19.329952239990234,
"learning_rate": 3.6e-05,
"loss": 0.024,
"step": 672
},
{
"epoch": 28.0,
"eval_f1": 0.7828629987222324,
"eval_loss": 1.3776278495788574,
"eval_runtime": 0.2463,
"eval_samples_per_second": 884.934,
"eval_steps_per_second": 16.237,
"step": 672
},
{
"epoch": 29.0,
"grad_norm": 0.0337708480656147,
"learning_rate": 3.55e-05,
"loss": 0.0097,
"step": 696
},
{
"epoch": 29.0,
"eval_f1": 0.8245008772063647,
"eval_loss": 1.161023736000061,
"eval_runtime": 0.246,
"eval_samples_per_second": 886.218,
"eval_steps_per_second": 16.261,
"step": 696
},
{
"epoch": 30.0,
"grad_norm": 0.05386161431670189,
"learning_rate": 3.5e-05,
"loss": 0.0093,
"step": 720
},
{
"epoch": 30.0,
"eval_f1": 0.8331553578084018,
"eval_loss": 1.0987571477890015,
"eval_runtime": 0.2499,
"eval_samples_per_second": 872.378,
"eval_steps_per_second": 16.007,
"step": 720
},
{
"epoch": 31.0,
"grad_norm": 0.011382571421563625,
"learning_rate": 3.45e-05,
"loss": 0.0048,
"step": 744
},
{
"epoch": 31.0,
"eval_f1": 0.8253722493039259,
"eval_loss": 1.1277176141738892,
"eval_runtime": 0.2469,
"eval_samples_per_second": 882.906,
"eval_steps_per_second": 16.2,
"step": 744
},
{
"epoch": 32.0,
"grad_norm": 0.0046822689473629,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.0041,
"step": 768
},
{
"epoch": 32.0,
"eval_f1": 0.8187101207450439,
"eval_loss": 1.1826363801956177,
"eval_runtime": 0.2504,
"eval_samples_per_second": 870.593,
"eval_steps_per_second": 15.974,
"step": 768
},
{
"epoch": 33.0,
"grad_norm": 0.003356009954586625,
"learning_rate": 3.35e-05,
"loss": 0.0033,
"step": 792
},
{
"epoch": 33.0,
"eval_f1": 0.8126497250026661,
"eval_loss": 1.1944386959075928,
"eval_runtime": 0.2453,
"eval_samples_per_second": 888.767,
"eval_steps_per_second": 16.308,
"step": 792
},
{
"epoch": 34.0,
"grad_norm": 0.005327207036316395,
"learning_rate": 3.3e-05,
"loss": 0.0087,
"step": 816
},
{
"epoch": 34.0,
"eval_f1": 0.8491252629565882,
"eval_loss": 1.03484046459198,
"eval_runtime": 0.2487,
"eval_samples_per_second": 876.726,
"eval_steps_per_second": 16.087,
"step": 816
},
{
"epoch": 35.0,
"grad_norm": 0.003193259472027421,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.0056,
"step": 840
},
{
"epoch": 35.0,
"eval_f1": 0.831069428937251,
"eval_loss": 1.0799349546432495,
"eval_runtime": 0.2433,
"eval_samples_per_second": 895.888,
"eval_steps_per_second": 16.438,
"step": 840
},
{
"epoch": 36.0,
"grad_norm": 0.0025981140788644552,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.0056,
"step": 864
},
{
"epoch": 36.0,
"eval_f1": 0.8005370004504828,
"eval_loss": 1.2397780418395996,
"eval_runtime": 0.2558,
"eval_samples_per_second": 852.208,
"eval_steps_per_second": 15.637,
"step": 864
},
{
"epoch": 37.0,
"grad_norm": 0.004587420262396336,
"learning_rate": 3.15e-05,
"loss": 0.0043,
"step": 888
},
{
"epoch": 37.0,
"eval_f1": 0.7960422975955538,
"eval_loss": 1.3035740852355957,
"eval_runtime": 0.2431,
"eval_samples_per_second": 896.654,
"eval_steps_per_second": 16.452,
"step": 888
},
{
"epoch": 38.0,
"grad_norm": 0.0024126123171299696,
"learning_rate": 3.1e-05,
"loss": 0.004,
"step": 912
},
{
"epoch": 38.0,
"eval_f1": 0.8348215283066768,
"eval_loss": 1.1289474964141846,
"eval_runtime": 0.2451,
"eval_samples_per_second": 889.465,
"eval_steps_per_second": 16.32,
"step": 912
},
{
"epoch": 39.0,
"grad_norm": 0.002631419338285923,
"learning_rate": 3.05e-05,
"loss": 0.0002,
"step": 936
},
{
"epoch": 39.0,
"eval_f1": 0.8470719620834265,
"eval_loss": 1.0962085723876953,
"eval_runtime": 0.2555,
"eval_samples_per_second": 853.196,
"eval_steps_per_second": 15.655,
"step": 936
},
{
"epoch": 40.0,
"grad_norm": 0.0036367273423820734,
"learning_rate": 3e-05,
"loss": 0.0002,
"step": 960
},
{
"epoch": 40.0,
"eval_f1": 0.8475274660470973,
"eval_loss": 1.1172648668289185,
"eval_runtime": 0.2665,
"eval_samples_per_second": 818.091,
"eval_steps_per_second": 15.011,
"step": 960
},
{
"epoch": 41.0,
"grad_norm": 0.002203166950494051,
"learning_rate": 2.95e-05,
"loss": 0.0002,
"step": 984
},
{
"epoch": 41.0,
"eval_f1": 0.8463311286594244,
"eval_loss": 1.1128673553466797,
"eval_runtime": 0.2476,
"eval_samples_per_second": 880.33,
"eval_steps_per_second": 16.153,
"step": 984
},
{
"epoch": 42.0,
"grad_norm": 0.0019745519384741783,
"learning_rate": 2.9e-05,
"loss": 0.0002,
"step": 1008
},
{
"epoch": 42.0,
"eval_f1": 0.8524668435013261,
"eval_loss": 1.1187357902526855,
"eval_runtime": 0.2433,
"eval_samples_per_second": 896.105,
"eval_steps_per_second": 16.442,
"step": 1008
},
{
"epoch": 43.0,
"grad_norm": 0.0018844620790332556,
"learning_rate": 2.8499999999999998e-05,
"loss": 0.0002,
"step": 1032
},
{
"epoch": 43.0,
"eval_f1": 0.8524668435013261,
"eval_loss": 1.1086227893829346,
"eval_runtime": 0.2535,
"eval_samples_per_second": 860.088,
"eval_steps_per_second": 15.781,
"step": 1032
},
{
"epoch": 44.0,
"grad_norm": 0.0016616833163425326,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.0002,
"step": 1056
},
{
"epoch": 44.0,
"eval_f1": 0.8524668435013261,
"eval_loss": 1.1084070205688477,
"eval_runtime": 0.2467,
"eval_samples_per_second": 883.486,
"eval_steps_per_second": 16.211,
"step": 1056
},
{
"epoch": 45.0,
"grad_norm": 0.0017370691057294607,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.0002,
"step": 1080
},
{
"epoch": 45.0,
"eval_f1": 0.8524668435013261,
"eval_loss": 1.1120808124542236,
"eval_runtime": 0.2544,
"eval_samples_per_second": 856.909,
"eval_steps_per_second": 15.723,
"step": 1080
},
{
"epoch": 46.0,
"grad_norm": 0.01709812693297863,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.0018,
"step": 1104
},
{
"epoch": 46.0,
"eval_f1": 0.8230951784833656,
"eval_loss": 1.150782585144043,
"eval_runtime": 0.2507,
"eval_samples_per_second": 869.395,
"eval_steps_per_second": 15.952,
"step": 1104
},
{
"epoch": 47.0,
"grad_norm": 0.0014819600619375706,
"learning_rate": 2.6500000000000004e-05,
"loss": 0.0033,
"step": 1128
},
{
"epoch": 47.0,
"eval_f1": 0.8482902683102467,
"eval_loss": 1.1178399324417114,
"eval_runtime": 0.251,
"eval_samples_per_second": 868.684,
"eval_steps_per_second": 15.939,
"step": 1128
},
{
"epoch": 48.0,
"grad_norm": 0.001657757442444563,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.0012,
"step": 1152
},
{
"epoch": 48.0,
"eval_f1": 0.8485012469416702,
"eval_loss": 1.1701571941375732,
"eval_runtime": 0.2457,
"eval_samples_per_second": 887.403,
"eval_steps_per_second": 16.283,
"step": 1152
},
{
"epoch": 49.0,
"grad_norm": 0.001472759060561657,
"learning_rate": 2.5500000000000003e-05,
"loss": 0.0001,
"step": 1176
},
{
"epoch": 49.0,
"eval_f1": 0.8471931986968837,
"eval_loss": 1.1576160192489624,
"eval_runtime": 0.2492,
"eval_samples_per_second": 874.869,
"eval_steps_per_second": 16.053,
"step": 1176
},
{
"epoch": 50.0,
"grad_norm": 0.0018886495381593704,
"learning_rate": 2.5e-05,
"loss": 0.0001,
"step": 1200
},
{
"epoch": 50.0,
"eval_f1": 0.8533899487562311,
"eval_loss": 1.1580414772033691,
"eval_runtime": 0.2453,
"eval_samples_per_second": 888.766,
"eval_steps_per_second": 16.308,
"step": 1200
},
{
"epoch": 51.0,
"grad_norm": 0.0017091715708374977,
"learning_rate": 2.45e-05,
"loss": 0.0001,
"step": 1224
},
{
"epoch": 51.0,
"eval_f1": 0.8533899487562311,
"eval_loss": 1.157599687576294,
"eval_runtime": 0.2534,
"eval_samples_per_second": 860.384,
"eval_steps_per_second": 15.787,
"step": 1224
},
{
"epoch": 52.0,
"grad_norm": 0.0013613449409604073,
"learning_rate": 2.4e-05,
"loss": 0.0001,
"step": 1248
},
{
"epoch": 52.0,
"eval_f1": 0.8533899487562311,
"eval_loss": 1.155985713005066,
"eval_runtime": 0.2572,
"eval_samples_per_second": 847.53,
"eval_steps_per_second": 15.551,
"step": 1248
},
{
"epoch": 53.0,
"grad_norm": 0.001274469424970448,
"learning_rate": 2.35e-05,
"loss": 0.0001,
"step": 1272
},
{
"epoch": 53.0,
"eval_f1": 0.8533899487562311,
"eval_loss": 1.1571167707443237,
"eval_runtime": 0.2412,
"eval_samples_per_second": 903.632,
"eval_steps_per_second": 16.58,
"step": 1272
},
{
"epoch": 54.0,
"grad_norm": 0.0013310050126165152,
"learning_rate": 2.3000000000000003e-05,
"loss": 0.0001,
"step": 1296
},
{
"epoch": 54.0,
"eval_f1": 0.8533899487562311,
"eval_loss": 1.1600090265274048,
"eval_runtime": 0.2473,
"eval_samples_per_second": 881.514,
"eval_steps_per_second": 16.175,
"step": 1296
},
{
"epoch": 55.0,
"grad_norm": 0.0015337098157033324,
"learning_rate": 2.25e-05,
"loss": 0.0001,
"step": 1320
},
{
"epoch": 55.0,
"eval_f1": 0.8426159574795251,
"eval_loss": 1.1735517978668213,
"eval_runtime": 0.2491,
"eval_samples_per_second": 875.11,
"eval_steps_per_second": 16.057,
"step": 1320
},
{
"epoch": 56.0,
"grad_norm": 0.008407847955822945,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.0001,
"step": 1344
},
{
"epoch": 56.0,
"eval_f1": 0.8426159574795251,
"eval_loss": 1.1661510467529297,
"eval_runtime": 0.2486,
"eval_samples_per_second": 876.938,
"eval_steps_per_second": 16.091,
"step": 1344
},
{
"epoch": 57.0,
"grad_norm": 0.0011946760350838304,
"learning_rate": 2.15e-05,
"loss": 0.0001,
"step": 1368
},
{
"epoch": 57.0,
"eval_f1": 0.8475855910205873,
"eval_loss": 1.1671065092086792,
"eval_runtime": 0.2535,
"eval_samples_per_second": 859.876,
"eval_steps_per_second": 15.778,
"step": 1368
},
{
"epoch": 58.0,
"grad_norm": 0.0011800089851021767,
"learning_rate": 2.1e-05,
"loss": 0.0001,
"step": 1392
},
{
"epoch": 58.0,
"eval_f1": 0.8475855910205873,
"eval_loss": 1.1681970357894897,
"eval_runtime": 0.2494,
"eval_samples_per_second": 874.117,
"eval_steps_per_second": 16.039,
"step": 1392
},
{
"epoch": 59.0,
"grad_norm": 0.0018475407268851995,
"learning_rate": 2.05e-05,
"loss": 0.0001,
"step": 1416
},
{
"epoch": 59.0,
"eval_f1": 0.8475855910205873,
"eval_loss": 1.1666558980941772,
"eval_runtime": 0.2484,
"eval_samples_per_second": 877.516,
"eval_steps_per_second": 16.101,
"step": 1416
},
{
"epoch": 60.0,
"grad_norm": 0.0015070955269038677,
"learning_rate": 2e-05,
"loss": 0.0001,
"step": 1440
},
{
"epoch": 60.0,
"eval_f1": 0.8475855910205873,
"eval_loss": 1.1691235303878784,
"eval_runtime": 0.2503,
"eval_samples_per_second": 870.922,
"eval_steps_per_second": 15.98,
"step": 1440
},
{
"epoch": 61.0,
"grad_norm": 0.001129466574639082,
"learning_rate": 1.9500000000000003e-05,
"loss": 0.0001,
"step": 1464
},
{
"epoch": 61.0,
"eval_f1": 0.8475855910205873,
"eval_loss": 1.1708507537841797,
"eval_runtime": 0.2483,
"eval_samples_per_second": 878.104,
"eval_steps_per_second": 16.112,
"step": 1464
},
{
"epoch": 62.0,
"grad_norm": 0.001501582097262144,
"learning_rate": 1.9e-05,
"loss": 0.0001,
"step": 1488
},
{
"epoch": 62.0,
"eval_f1": 0.8475855910205873,
"eval_loss": 1.1724032163619995,
"eval_runtime": 0.2483,
"eval_samples_per_second": 877.807,
"eval_steps_per_second": 16.107,
"step": 1488
},
{
"epoch": 63.0,
"grad_norm": 0.0010985672706738114,
"learning_rate": 1.85e-05,
"loss": 0.0001,
"step": 1512
},
{
"epoch": 63.0,
"eval_f1": 0.8475855910205873,
"eval_loss": 1.1738693714141846,
"eval_runtime": 0.2494,
"eval_samples_per_second": 874.122,
"eval_steps_per_second": 16.039,
"step": 1512
},
{
"epoch": 64.0,
"grad_norm": 0.08045843243598938,
"learning_rate": 1.8e-05,
"loss": 0.0001,
"step": 1536
},
{
"epoch": 64.0,
"eval_f1": 0.8533899487562311,
"eval_loss": 1.1738698482513428,
"eval_runtime": 0.2562,
"eval_samples_per_second": 850.855,
"eval_steps_per_second": 15.612,
"step": 1536
},
{
"epoch": 65.0,
"grad_norm": 0.002004158915951848,
"learning_rate": 1.75e-05,
"loss": 0.0001,
"step": 1560
},
{
"epoch": 65.0,
"eval_f1": 0.8471931986968837,
"eval_loss": 1.1787022352218628,
"eval_runtime": 0.2428,
"eval_samples_per_second": 897.779,
"eval_steps_per_second": 16.473,
"step": 1560
},
{
"epoch": 66.0,
"grad_norm": 0.001049485057592392,
"learning_rate": 1.7000000000000003e-05,
"loss": 0.0001,
"step": 1584
},
{
"epoch": 66.0,
"eval_f1": 0.8471931986968837,
"eval_loss": 1.180769443511963,
"eval_runtime": 0.2525,
"eval_samples_per_second": 863.258,
"eval_steps_per_second": 15.84,
"step": 1584
},
{
"epoch": 67.0,
"grad_norm": 0.0010408489033579826,
"learning_rate": 1.65e-05,
"loss": 0.0001,
"step": 1608
},
{
"epoch": 67.0,
"eval_f1": 0.8471931986968837,
"eval_loss": 1.1815507411956787,
"eval_runtime": 0.2505,
"eval_samples_per_second": 870.186,
"eval_steps_per_second": 15.967,
"step": 1608
},
{
"epoch": 68.0,
"grad_norm": 0.0012064232723787427,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0001,
"step": 1632
},
{
"epoch": 68.0,
"eval_f1": 0.8471931986968837,
"eval_loss": 1.182210922241211,
"eval_runtime": 0.2583,
"eval_samples_per_second": 844.028,
"eval_steps_per_second": 15.487,
"step": 1632
},
{
"epoch": 69.0,
"grad_norm": 0.0014143523294478655,
"learning_rate": 1.55e-05,
"loss": 0.0001,
"step": 1656
},
{
"epoch": 69.0,
"eval_f1": 0.8524668435013261,
"eval_loss": 1.1818993091583252,
"eval_runtime": 0.2452,
"eval_samples_per_second": 888.962,
"eval_steps_per_second": 16.311,
"step": 1656
}
],
"logging_steps": 500,
"max_steps": 2400,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2551863638350032.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}