roberta-base-sst2 / trainer_state.json
WillHeld's picture
End of training
bd69838
{
"best_metric": 0.19522710144519806,
"best_model_checkpoint": "./results_train/roberta-base/sst2/checkpoint-3500",
"epoch": 10.0,
"global_step": 42100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"learning_rate": 3.9588281868566905e-06,
"loss": 0.575,
"step": 500
},
{
"epoch": 0.12,
"eval_accuracy": 0.9071100917431193,
"eval_loss": 0.2664913535118103,
"eval_runtime": 2.4433,
"eval_samples_per_second": 356.901,
"eval_steps_per_second": 44.613,
"step": 500
},
{
"epoch": 0.24,
"learning_rate": 7.917656373713381e-06,
"loss": 0.2989,
"step": 1000
},
{
"epoch": 0.24,
"eval_accuracy": 0.9220183486238532,
"eval_loss": 0.20883557200431824,
"eval_runtime": 2.4454,
"eval_samples_per_second": 356.584,
"eval_steps_per_second": 44.573,
"step": 1000
},
{
"epoch": 0.36,
"learning_rate": 1.1876484560570072e-05,
"loss": 0.2725,
"step": 1500
},
{
"epoch": 0.36,
"eval_accuracy": 0.9243119266055045,
"eval_loss": 0.25596883893013,
"eval_runtime": 2.451,
"eval_samples_per_second": 355.775,
"eval_steps_per_second": 44.472,
"step": 1500
},
{
"epoch": 0.48,
"learning_rate": 1.5835312747426762e-05,
"loss": 0.2814,
"step": 2000
},
{
"epoch": 0.48,
"eval_accuracy": 0.926605504587156,
"eval_loss": 0.20158442854881287,
"eval_runtime": 2.462,
"eval_samples_per_second": 354.188,
"eval_steps_per_second": 44.274,
"step": 2000
},
{
"epoch": 0.59,
"learning_rate": 1.9794140934283453e-05,
"loss": 0.2586,
"step": 2500
},
{
"epoch": 0.59,
"eval_accuracy": 0.9174311926605505,
"eval_loss": 0.22930225729942322,
"eval_runtime": 2.4517,
"eval_samples_per_second": 355.671,
"eval_steps_per_second": 44.459,
"step": 2500
},
{
"epoch": 0.71,
"learning_rate": 1.9760448779501697e-05,
"loss": 0.2536,
"step": 3000
},
{
"epoch": 0.71,
"eval_accuracy": 0.9323394495412844,
"eval_loss": 0.23396578431129456,
"eval_runtime": 2.4584,
"eval_samples_per_second": 354.697,
"eval_steps_per_second": 44.337,
"step": 3000
},
{
"epoch": 0.83,
"learning_rate": 1.95077576186385e-05,
"loss": 0.2494,
"step": 3500
},
{
"epoch": 0.83,
"eval_accuracy": 0.9323394495412844,
"eval_loss": 0.19522710144519806,
"eval_runtime": 2.4521,
"eval_samples_per_second": 355.616,
"eval_steps_per_second": 44.452,
"step": 3500
},
{
"epoch": 0.95,
"learning_rate": 1.925506645777531e-05,
"loss": 0.2396,
"step": 4000
},
{
"epoch": 0.95,
"eval_accuracy": 0.9323394495412844,
"eval_loss": 0.24936608970165253,
"eval_runtime": 2.4569,
"eval_samples_per_second": 354.916,
"eval_steps_per_second": 44.365,
"step": 4000
},
{
"epoch": 1.07,
"learning_rate": 1.9002375296912114e-05,
"loss": 0.2123,
"step": 4500
},
{
"epoch": 1.07,
"eval_accuracy": 0.9380733944954128,
"eval_loss": 0.21870844066143036,
"eval_runtime": 2.449,
"eval_samples_per_second": 356.068,
"eval_steps_per_second": 44.509,
"step": 4500
},
{
"epoch": 1.19,
"learning_rate": 1.874968413604892e-05,
"loss": 0.2042,
"step": 5000
},
{
"epoch": 1.19,
"eval_accuracy": 0.9151376146788991,
"eval_loss": 0.2811821401119232,
"eval_runtime": 2.4602,
"eval_samples_per_second": 354.439,
"eval_steps_per_second": 44.305,
"step": 5000
},
{
"epoch": 1.31,
"learning_rate": 1.849699297518573e-05,
"loss": 0.2083,
"step": 5500
},
{
"epoch": 1.31,
"eval_accuracy": 0.9346330275229358,
"eval_loss": 0.27386215329170227,
"eval_runtime": 2.5255,
"eval_samples_per_second": 345.272,
"eval_steps_per_second": 43.159,
"step": 5500
},
{
"epoch": 1.43,
"learning_rate": 1.8244301814322537e-05,
"loss": 0.2041,
"step": 6000
},
{
"epoch": 1.43,
"eval_accuracy": 0.9380733944954128,
"eval_loss": 0.20871196687221527,
"eval_runtime": 2.4547,
"eval_samples_per_second": 355.241,
"eval_steps_per_second": 44.405,
"step": 6000
},
{
"epoch": 1.54,
"learning_rate": 1.7991610653459345e-05,
"loss": 0.1969,
"step": 6500
},
{
"epoch": 1.54,
"eval_accuracy": 0.9254587155963303,
"eval_loss": 0.25904807448387146,
"eval_runtime": 2.4532,
"eval_samples_per_second": 355.448,
"eval_steps_per_second": 44.431,
"step": 6500
},
{
"epoch": 1.66,
"learning_rate": 1.773891949259615e-05,
"loss": 0.1982,
"step": 7000
},
{
"epoch": 1.66,
"eval_accuracy": 0.930045871559633,
"eval_loss": 0.2444588840007782,
"eval_runtime": 2.4545,
"eval_samples_per_second": 355.268,
"eval_steps_per_second": 44.409,
"step": 7000
},
{
"epoch": 1.78,
"learning_rate": 1.7486228331732958e-05,
"loss": 0.1943,
"step": 7500
},
{
"epoch": 1.78,
"eval_accuracy": 0.926605504587156,
"eval_loss": 0.2798321545124054,
"eval_runtime": 2.4455,
"eval_samples_per_second": 356.567,
"eval_steps_per_second": 44.571,
"step": 7500
},
{
"epoch": 1.9,
"learning_rate": 1.7233537170869766e-05,
"loss": 0.1848,
"step": 8000
},
{
"epoch": 1.9,
"eval_accuracy": 0.9311926605504587,
"eval_loss": 0.2844010591506958,
"eval_runtime": 2.4586,
"eval_samples_per_second": 354.679,
"eval_steps_per_second": 44.335,
"step": 8000
},
{
"epoch": 2.02,
"learning_rate": 1.698084601000657e-05,
"loss": 0.1788,
"step": 8500
},
{
"epoch": 2.02,
"eval_accuracy": 0.9254587155963303,
"eval_loss": 0.2998378872871399,
"eval_runtime": 2.446,
"eval_samples_per_second": 356.496,
"eval_steps_per_second": 44.562,
"step": 8500
},
{
"epoch": 2.14,
"learning_rate": 1.672815484914338e-05,
"loss": 0.1623,
"step": 9000
},
{
"epoch": 2.14,
"eval_accuracy": 0.9392201834862385,
"eval_loss": 0.2695905268192291,
"eval_runtime": 2.4607,
"eval_samples_per_second": 354.365,
"eval_steps_per_second": 44.296,
"step": 9000
},
{
"epoch": 2.26,
"learning_rate": 1.6475463688280183e-05,
"loss": 0.1499,
"step": 9500
},
{
"epoch": 2.26,
"eval_accuracy": 0.9277522935779816,
"eval_loss": 0.25331878662109375,
"eval_runtime": 2.4449,
"eval_samples_per_second": 356.659,
"eval_steps_per_second": 44.582,
"step": 9500
},
{
"epoch": 2.38,
"learning_rate": 1.622277252741699e-05,
"loss": 0.1426,
"step": 10000
},
{
"epoch": 2.38,
"eval_accuracy": 0.930045871559633,
"eval_loss": 0.29705262184143066,
"eval_runtime": 2.4651,
"eval_samples_per_second": 353.733,
"eval_steps_per_second": 44.217,
"step": 10000
},
{
"epoch": 2.49,
"learning_rate": 1.59700813665538e-05,
"loss": 0.1479,
"step": 10500
},
{
"epoch": 2.49,
"eval_accuracy": 0.9357798165137615,
"eval_loss": 0.25958266854286194,
"eval_runtime": 2.4502,
"eval_samples_per_second": 355.883,
"eval_steps_per_second": 44.485,
"step": 10500
},
{
"epoch": 2.61,
"learning_rate": 1.5717390205690607e-05,
"loss": 0.1405,
"step": 11000
},
{
"epoch": 2.61,
"eval_accuracy": 0.9254587155963303,
"eval_loss": 0.2944609522819519,
"eval_runtime": 2.4554,
"eval_samples_per_second": 355.141,
"eval_steps_per_second": 44.393,
"step": 11000
},
{
"epoch": 2.73,
"learning_rate": 1.5464699044827415e-05,
"loss": 0.1577,
"step": 11500
},
{
"epoch": 2.73,
"eval_accuracy": 0.9002293577981652,
"eval_loss": 0.40612396597862244,
"eval_runtime": 2.4539,
"eval_samples_per_second": 355.36,
"eval_steps_per_second": 44.42,
"step": 11500
},
{
"epoch": 2.85,
"learning_rate": 1.521200788396422e-05,
"loss": 0.1521,
"step": 12000
},
{
"epoch": 2.85,
"eval_accuracy": 0.9334862385321101,
"eval_loss": 0.2724354565143585,
"eval_runtime": 2.4461,
"eval_samples_per_second": 356.483,
"eval_steps_per_second": 44.56,
"step": 12000
},
{
"epoch": 2.97,
"learning_rate": 1.4959316723101027e-05,
"loss": 0.1426,
"step": 12500
},
{
"epoch": 2.97,
"eval_accuracy": 0.9426605504587156,
"eval_loss": 0.27123740315437317,
"eval_runtime": 2.4449,
"eval_samples_per_second": 356.655,
"eval_steps_per_second": 44.582,
"step": 12500
},
{
"epoch": 3.09,
"learning_rate": 1.4706625562237835e-05,
"loss": 0.1206,
"step": 13000
},
{
"epoch": 3.09,
"eval_accuracy": 0.9357798165137615,
"eval_loss": 0.2954227328300476,
"eval_runtime": 2.467,
"eval_samples_per_second": 353.464,
"eval_steps_per_second": 44.183,
"step": 13000
},
{
"epoch": 3.21,
"learning_rate": 1.4453934401374641e-05,
"loss": 0.1074,
"step": 13500
},
{
"epoch": 3.21,
"eval_accuracy": 0.9392201834862385,
"eval_loss": 0.2653304934501648,
"eval_runtime": 2.4486,
"eval_samples_per_second": 356.118,
"eval_steps_per_second": 44.515,
"step": 13500
},
{
"epoch": 3.33,
"learning_rate": 1.420124324051145e-05,
"loss": 0.112,
"step": 14000
},
{
"epoch": 3.33,
"eval_accuracy": 0.9346330275229358,
"eval_loss": 0.2777578830718994,
"eval_runtime": 2.4566,
"eval_samples_per_second": 354.969,
"eval_steps_per_second": 44.371,
"step": 14000
},
{
"epoch": 3.44,
"learning_rate": 1.3948552079648254e-05,
"loss": 0.1147,
"step": 14500
},
{
"epoch": 3.44,
"eval_accuracy": 0.9311926605504587,
"eval_loss": 0.3704558312892914,
"eval_runtime": 2.4454,
"eval_samples_per_second": 356.589,
"eval_steps_per_second": 44.574,
"step": 14500
},
{
"epoch": 3.56,
"learning_rate": 1.3695860918785062e-05,
"loss": 0.1196,
"step": 15000
},
{
"epoch": 3.56,
"eval_accuracy": 0.9346330275229358,
"eval_loss": 0.2889645993709564,
"eval_runtime": 2.4563,
"eval_samples_per_second": 354.999,
"eval_steps_per_second": 44.375,
"step": 15000
},
{
"epoch": 3.68,
"learning_rate": 1.344316975792187e-05,
"loss": 0.1159,
"step": 15500
},
{
"epoch": 3.68,
"eval_accuracy": 0.926605504587156,
"eval_loss": 0.3448694944381714,
"eval_runtime": 2.4429,
"eval_samples_per_second": 356.949,
"eval_steps_per_second": 44.619,
"step": 15500
},
{
"epoch": 3.8,
"learning_rate": 1.3190478597058676e-05,
"loss": 0.119,
"step": 16000
},
{
"epoch": 3.8,
"eval_accuracy": 0.9334862385321101,
"eval_loss": 0.3207152187824249,
"eval_runtime": 2.461,
"eval_samples_per_second": 354.323,
"eval_steps_per_second": 44.29,
"step": 16000
},
{
"epoch": 3.92,
"learning_rate": 1.2937787436195484e-05,
"loss": 0.1268,
"step": 16500
},
{
"epoch": 3.92,
"eval_accuracy": 0.9311926605504587,
"eval_loss": 0.3234628736972809,
"eval_runtime": 2.4504,
"eval_samples_per_second": 355.858,
"eval_steps_per_second": 44.482,
"step": 16500
},
{
"epoch": 4.04,
"learning_rate": 1.2685096275332289e-05,
"loss": 0.1074,
"step": 17000
},
{
"epoch": 4.04,
"eval_accuracy": 0.9334862385321101,
"eval_loss": 0.3650290369987488,
"eval_runtime": 2.456,
"eval_samples_per_second": 355.052,
"eval_steps_per_second": 44.382,
"step": 17000
},
{
"epoch": 4.16,
"learning_rate": 1.2432405114469096e-05,
"loss": 0.0805,
"step": 17500
},
{
"epoch": 4.16,
"eval_accuracy": 0.9380733944954128,
"eval_loss": 0.33378419280052185,
"eval_runtime": 2.4457,
"eval_samples_per_second": 356.538,
"eval_steps_per_second": 44.567,
"step": 17500
},
{
"epoch": 4.28,
"learning_rate": 1.2179713953605903e-05,
"loss": 0.0838,
"step": 18000
},
{
"epoch": 4.28,
"eval_accuracy": 0.9208715596330275,
"eval_loss": 0.4302394688129425,
"eval_runtime": 2.4587,
"eval_samples_per_second": 354.661,
"eval_steps_per_second": 44.333,
"step": 18000
},
{
"epoch": 4.39,
"learning_rate": 1.192702279274271e-05,
"loss": 0.0848,
"step": 18500
},
{
"epoch": 4.39,
"eval_accuracy": 0.9323394495412844,
"eval_loss": 0.40956971049308777,
"eval_runtime": 2.4483,
"eval_samples_per_second": 356.162,
"eval_steps_per_second": 44.52,
"step": 18500
},
{
"epoch": 4.51,
"learning_rate": 1.1674331631879519e-05,
"loss": 0.0922,
"step": 19000
},
{
"epoch": 4.51,
"eval_accuracy": 0.9369266055045872,
"eval_loss": 0.3332035541534424,
"eval_runtime": 2.4597,
"eval_samples_per_second": 354.511,
"eval_steps_per_second": 44.314,
"step": 19000
},
{
"epoch": 4.63,
"learning_rate": 1.1421640471016325e-05,
"loss": 0.091,
"step": 19500
},
{
"epoch": 4.63,
"eval_accuracy": 0.9438073394495413,
"eval_loss": 0.3024330735206604,
"eval_runtime": 2.4457,
"eval_samples_per_second": 356.542,
"eval_steps_per_second": 44.568,
"step": 19500
},
{
"epoch": 4.75,
"learning_rate": 1.1168949310153133e-05,
"loss": 0.0977,
"step": 20000
},
{
"epoch": 4.75,
"eval_accuracy": 0.9495412844036697,
"eval_loss": 0.2673788070678711,
"eval_runtime": 2.4587,
"eval_samples_per_second": 354.654,
"eval_steps_per_second": 44.332,
"step": 20000
},
{
"epoch": 4.87,
"learning_rate": 1.0916258149289937e-05,
"loss": 0.0897,
"step": 20500
},
{
"epoch": 4.87,
"eval_accuracy": 0.930045871559633,
"eval_loss": 0.39930590987205505,
"eval_runtime": 2.4473,
"eval_samples_per_second": 356.313,
"eval_steps_per_second": 44.539,
"step": 20500
},
{
"epoch": 4.99,
"learning_rate": 1.0663566988426745e-05,
"loss": 0.1013,
"step": 21000
},
{
"epoch": 4.99,
"eval_accuracy": 0.9288990825688074,
"eval_loss": 0.322666198015213,
"eval_runtime": 2.4496,
"eval_samples_per_second": 355.981,
"eval_steps_per_second": 44.498,
"step": 21000
},
{
"epoch": 5.11,
"learning_rate": 1.0410875827563553e-05,
"loss": 0.0671,
"step": 21500
},
{
"epoch": 5.11,
"eval_accuracy": 0.9426605504587156,
"eval_loss": 0.3374435603618622,
"eval_runtime": 2.4457,
"eval_samples_per_second": 356.54,
"eval_steps_per_second": 44.567,
"step": 21500
},
{
"epoch": 5.23,
"learning_rate": 1.015818466670036e-05,
"loss": 0.0671,
"step": 22000
},
{
"epoch": 5.23,
"eval_accuracy": 0.9277522935779816,
"eval_loss": 0.4108366072177887,
"eval_runtime": 2.4551,
"eval_samples_per_second": 355.179,
"eval_steps_per_second": 44.397,
"step": 22000
},
{
"epoch": 5.34,
"learning_rate": 9.905493505837167e-06,
"loss": 0.0652,
"step": 22500
},
{
"epoch": 5.34,
"eval_accuracy": 0.9380733944954128,
"eval_loss": 0.3549734652042389,
"eval_runtime": 2.4475,
"eval_samples_per_second": 356.289,
"eval_steps_per_second": 44.536,
"step": 22500
},
{
"epoch": 5.46,
"learning_rate": 9.652802344973974e-06,
"loss": 0.0664,
"step": 23000
},
{
"epoch": 5.46,
"eval_accuracy": 0.9357798165137615,
"eval_loss": 0.339821994304657,
"eval_runtime": 2.4559,
"eval_samples_per_second": 355.062,
"eval_steps_per_second": 44.383,
"step": 23000
},
{
"epoch": 5.58,
"learning_rate": 9.40011118411078e-06,
"loss": 0.0742,
"step": 23500
},
{
"epoch": 5.58,
"eval_accuracy": 0.9380733944954128,
"eval_loss": 0.3286002278327942,
"eval_runtime": 2.4471,
"eval_samples_per_second": 356.342,
"eval_steps_per_second": 44.543,
"step": 23500
},
{
"epoch": 5.7,
"learning_rate": 9.147420023247588e-06,
"loss": 0.0758,
"step": 24000
},
{
"epoch": 5.7,
"eval_accuracy": 0.9311926605504587,
"eval_loss": 0.32764118909835815,
"eval_runtime": 2.4639,
"eval_samples_per_second": 353.904,
"eval_steps_per_second": 44.238,
"step": 24000
},
{
"epoch": 5.82,
"learning_rate": 8.894728862384394e-06,
"loss": 0.075,
"step": 24500
},
{
"epoch": 5.82,
"eval_accuracy": 0.9369266055045872,
"eval_loss": 0.32022935152053833,
"eval_runtime": 2.4503,
"eval_samples_per_second": 355.874,
"eval_steps_per_second": 44.484,
"step": 24500
},
{
"epoch": 5.94,
"learning_rate": 8.642037701521202e-06,
"loss": 0.0686,
"step": 25000
},
{
"epoch": 5.94,
"eval_accuracy": 0.9415137614678899,
"eval_loss": 0.3481292426586151,
"eval_runtime": 2.4555,
"eval_samples_per_second": 355.12,
"eval_steps_per_second": 44.39,
"step": 25000
},
{
"epoch": 6.06,
"learning_rate": 8.389346540658008e-06,
"loss": 0.0729,
"step": 25500
},
{
"epoch": 6.06,
"eval_accuracy": 0.9334862385321101,
"eval_loss": 0.38161903619766235,
"eval_runtime": 2.4476,
"eval_samples_per_second": 356.27,
"eval_steps_per_second": 44.534,
"step": 25500
},
{
"epoch": 6.18,
"learning_rate": 8.136655379794816e-06,
"loss": 0.0568,
"step": 26000
},
{
"epoch": 6.18,
"eval_accuracy": 0.9380733944954128,
"eval_loss": 0.31324318051338196,
"eval_runtime": 2.4707,
"eval_samples_per_second": 352.935,
"eval_steps_per_second": 44.117,
"step": 26000
},
{
"epoch": 6.29,
"learning_rate": 7.883964218931623e-06,
"loss": 0.0529,
"step": 26500
},
{
"epoch": 6.29,
"eval_accuracy": 0.930045871559633,
"eval_loss": 0.3756808340549469,
"eval_runtime": 2.4544,
"eval_samples_per_second": 355.287,
"eval_steps_per_second": 44.411,
"step": 26500
},
{
"epoch": 6.41,
"learning_rate": 7.631273058068429e-06,
"loss": 0.0506,
"step": 27000
},
{
"epoch": 6.41,
"eval_accuracy": 0.9380733944954128,
"eval_loss": 0.33958113193511963,
"eval_runtime": 2.4531,
"eval_samples_per_second": 355.471,
"eval_steps_per_second": 44.434,
"step": 27000
},
{
"epoch": 6.53,
"learning_rate": 7.378581897205236e-06,
"loss": 0.0476,
"step": 27500
},
{
"epoch": 6.53,
"eval_accuracy": 0.9403669724770642,
"eval_loss": 0.3641544580459595,
"eval_runtime": 2.4417,
"eval_samples_per_second": 357.132,
"eval_steps_per_second": 44.641,
"step": 27500
},
{
"epoch": 6.65,
"learning_rate": 7.125890736342044e-06,
"loss": 0.0555,
"step": 28000
},
{
"epoch": 6.65,
"eval_accuracy": 0.9403669724770642,
"eval_loss": 0.34298017621040344,
"eval_runtime": 2.4463,
"eval_samples_per_second": 356.452,
"eval_steps_per_second": 44.556,
"step": 28000
},
{
"epoch": 6.77,
"learning_rate": 6.87319957547885e-06,
"loss": 0.0574,
"step": 28500
},
{
"epoch": 6.77,
"eval_accuracy": 0.9392201834862385,
"eval_loss": 0.3401435613632202,
"eval_runtime": 2.4439,
"eval_samples_per_second": 356.811,
"eval_steps_per_second": 44.601,
"step": 28500
},
{
"epoch": 6.89,
"learning_rate": 6.620508414615657e-06,
"loss": 0.0524,
"step": 29000
},
{
"epoch": 6.89,
"eval_accuracy": 0.9346330275229358,
"eval_loss": 0.33783158659935,
"eval_runtime": 2.4521,
"eval_samples_per_second": 355.616,
"eval_steps_per_second": 44.452,
"step": 29000
},
{
"epoch": 7.01,
"learning_rate": 6.367817253752464e-06,
"loss": 0.0492,
"step": 29500
},
{
"epoch": 7.01,
"eval_accuracy": 0.9380733944954128,
"eval_loss": 0.3833492398262024,
"eval_runtime": 2.4457,
"eval_samples_per_second": 356.538,
"eval_steps_per_second": 44.567,
"step": 29500
},
{
"epoch": 7.13,
"learning_rate": 6.1151260928892706e-06,
"loss": 0.039,
"step": 30000
},
{
"epoch": 7.13,
"eval_accuracy": 0.9346330275229358,
"eval_loss": 0.3346712589263916,
"eval_runtime": 2.4434,
"eval_samples_per_second": 356.873,
"eval_steps_per_second": 44.609,
"step": 30000
},
{
"epoch": 7.24,
"learning_rate": 5.8624349320260785e-06,
"loss": 0.0411,
"step": 30500
},
{
"epoch": 7.24,
"eval_accuracy": 0.9334862385321101,
"eval_loss": 0.4404141902923584,
"eval_runtime": 2.4419,
"eval_samples_per_second": 357.102,
"eval_steps_per_second": 44.638,
"step": 30500
},
{
"epoch": 7.36,
"learning_rate": 5.609743771162886e-06,
"loss": 0.0412,
"step": 31000
},
{
"epoch": 7.36,
"eval_accuracy": 0.9380733944954128,
"eval_loss": 0.36179476976394653,
"eval_runtime": 2.4414,
"eval_samples_per_second": 357.173,
"eval_steps_per_second": 44.647,
"step": 31000
},
{
"epoch": 7.48,
"learning_rate": 5.357052610299692e-06,
"loss": 0.0477,
"step": 31500
},
{
"epoch": 7.48,
"eval_accuracy": 0.9380733944954128,
"eval_loss": 0.3806387484073639,
"eval_runtime": 2.4471,
"eval_samples_per_second": 356.337,
"eval_steps_per_second": 44.542,
"step": 31500
},
{
"epoch": 7.6,
"learning_rate": 5.104361449436499e-06,
"loss": 0.0435,
"step": 32000
},
{
"epoch": 7.6,
"eval_accuracy": 0.9334862385321101,
"eval_loss": 0.39115917682647705,
"eval_runtime": 2.4665,
"eval_samples_per_second": 353.536,
"eval_steps_per_second": 44.192,
"step": 32000
},
{
"epoch": 7.72,
"learning_rate": 4.851670288573306e-06,
"loss": 0.0443,
"step": 32500
},
{
"epoch": 7.72,
"eval_accuracy": 0.9392201834862385,
"eval_loss": 0.39003145694732666,
"eval_runtime": 2.4534,
"eval_samples_per_second": 355.426,
"eval_steps_per_second": 44.428,
"step": 32500
},
{
"epoch": 7.84,
"learning_rate": 4.598979127710113e-06,
"loss": 0.0421,
"step": 33000
},
{
"epoch": 7.84,
"eval_accuracy": 0.9369266055045872,
"eval_loss": 0.4152164161205292,
"eval_runtime": 2.4525,
"eval_samples_per_second": 355.556,
"eval_steps_per_second": 44.445,
"step": 33000
},
{
"epoch": 7.96,
"learning_rate": 4.34628796684692e-06,
"loss": 0.0495,
"step": 33500
},
{
"epoch": 7.96,
"eval_accuracy": 0.9288990825688074,
"eval_loss": 0.3831779360771179,
"eval_runtime": 2.447,
"eval_samples_per_second": 356.361,
"eval_steps_per_second": 44.545,
"step": 33500
},
{
"epoch": 8.08,
"learning_rate": 4.093596805983727e-06,
"loss": 0.0293,
"step": 34000
},
{
"epoch": 8.08,
"eval_accuracy": 0.9346330275229358,
"eval_loss": 0.44268128275871277,
"eval_runtime": 2.4587,
"eval_samples_per_second": 354.661,
"eval_steps_per_second": 44.333,
"step": 34000
},
{
"epoch": 8.19,
"learning_rate": 3.840905645120534e-06,
"loss": 0.0253,
"step": 34500
},
{
"epoch": 8.19,
"eval_accuracy": 0.9380733944954128,
"eval_loss": 0.44246163964271545,
"eval_runtime": 2.4427,
"eval_samples_per_second": 356.983,
"eval_steps_per_second": 44.623,
"step": 34500
},
{
"epoch": 8.31,
"learning_rate": 3.5882144842573407e-06,
"loss": 0.0407,
"step": 35000
},
{
"epoch": 8.31,
"eval_accuracy": 0.9357798165137615,
"eval_loss": 0.41019341349601746,
"eval_runtime": 2.453,
"eval_samples_per_second": 355.477,
"eval_steps_per_second": 44.435,
"step": 35000
},
{
"epoch": 8.43,
"learning_rate": 3.3355233233941482e-06,
"loss": 0.0311,
"step": 35500
},
{
"epoch": 8.43,
"eval_accuracy": 0.9369266055045872,
"eval_loss": 0.44467687606811523,
"eval_runtime": 2.4425,
"eval_samples_per_second": 357.013,
"eval_steps_per_second": 44.627,
"step": 35500
},
{
"epoch": 8.55,
"learning_rate": 3.082832162530955e-06,
"loss": 0.0291,
"step": 36000
},
{
"epoch": 8.55,
"eval_accuracy": 0.9346330275229358,
"eval_loss": 0.46120545268058777,
"eval_runtime": 2.4514,
"eval_samples_per_second": 355.714,
"eval_steps_per_second": 44.464,
"step": 36000
},
{
"epoch": 8.67,
"learning_rate": 2.8301410016677616e-06,
"loss": 0.035,
"step": 36500
},
{
"epoch": 8.67,
"eval_accuracy": 0.9346330275229358,
"eval_loss": 0.4240852892398834,
"eval_runtime": 2.4477,
"eval_samples_per_second": 356.249,
"eval_steps_per_second": 44.531,
"step": 36500
},
{
"epoch": 8.79,
"learning_rate": 2.577449840804569e-06,
"loss": 0.0381,
"step": 37000
},
{
"epoch": 8.79,
"eval_accuracy": 0.9311926605504587,
"eval_loss": 0.41976186633110046,
"eval_runtime": 2.4523,
"eval_samples_per_second": 355.586,
"eval_steps_per_second": 44.448,
"step": 37000
},
{
"epoch": 8.91,
"learning_rate": 2.3247586799413758e-06,
"loss": 0.0234,
"step": 37500
},
{
"epoch": 8.91,
"eval_accuracy": 0.9369266055045872,
"eval_loss": 0.4344768822193146,
"eval_runtime": 2.4469,
"eval_samples_per_second": 356.366,
"eval_steps_per_second": 44.546,
"step": 37500
},
{
"epoch": 9.03,
"learning_rate": 2.072067519078183e-06,
"loss": 0.0311,
"step": 38000
},
{
"epoch": 9.03,
"eval_accuracy": 0.9311926605504587,
"eval_loss": 0.45580777525901794,
"eval_runtime": 2.4545,
"eval_samples_per_second": 355.27,
"eval_steps_per_second": 44.409,
"step": 38000
},
{
"epoch": 9.14,
"learning_rate": 1.8193763582149898e-06,
"loss": 0.028,
"step": 38500
},
{
"epoch": 9.14,
"eval_accuracy": 0.9380733944954128,
"eval_loss": 0.42450448870658875,
"eval_runtime": 2.4449,
"eval_samples_per_second": 356.658,
"eval_steps_per_second": 44.582,
"step": 38500
},
{
"epoch": 9.26,
"learning_rate": 1.5666851973517969e-06,
"loss": 0.0213,
"step": 39000
},
{
"epoch": 9.26,
"eval_accuracy": 0.9380733944954128,
"eval_loss": 0.446162611246109,
"eval_runtime": 2.4606,
"eval_samples_per_second": 354.384,
"eval_steps_per_second": 44.298,
"step": 39000
},
{
"epoch": 9.38,
"learning_rate": 1.3139940364886035e-06,
"loss": 0.0276,
"step": 39500
},
{
"epoch": 9.38,
"eval_accuracy": 0.9380733944954128,
"eval_loss": 0.42100322246551514,
"eval_runtime": 2.4512,
"eval_samples_per_second": 355.743,
"eval_steps_per_second": 44.468,
"step": 39500
},
{
"epoch": 9.5,
"learning_rate": 1.0613028756254106e-06,
"loss": 0.0183,
"step": 40000
},
{
"epoch": 9.5,
"eval_accuracy": 0.9403669724770642,
"eval_loss": 0.43098002672195435,
"eval_runtime": 2.45,
"eval_samples_per_second": 355.922,
"eval_steps_per_second": 44.49,
"step": 40000
},
{
"epoch": 9.62,
"learning_rate": 8.086117147622177e-07,
"loss": 0.0184,
"step": 40500
},
{
"epoch": 9.62,
"eval_accuracy": 0.9403669724770642,
"eval_loss": 0.4437469244003296,
"eval_runtime": 2.4461,
"eval_samples_per_second": 356.492,
"eval_steps_per_second": 44.561,
"step": 40500
},
{
"epoch": 9.74,
"learning_rate": 5.559205538990246e-07,
"loss": 0.0296,
"step": 41000
},
{
"epoch": 9.74,
"eval_accuracy": 0.9392201834862385,
"eval_loss": 0.43114030361175537,
"eval_runtime": 2.4504,
"eval_samples_per_second": 355.859,
"eval_steps_per_second": 44.482,
"step": 41000
},
{
"epoch": 9.86,
"learning_rate": 3.0322939303583163e-07,
"loss": 0.019,
"step": 41500
},
{
"epoch": 9.86,
"eval_accuracy": 0.9415137614678899,
"eval_loss": 0.42435380816459656,
"eval_runtime": 2.4473,
"eval_samples_per_second": 356.311,
"eval_steps_per_second": 44.539,
"step": 41500
},
{
"epoch": 9.98,
"learning_rate": 5.053823217263861e-08,
"loss": 0.0245,
"step": 42000
},
{
"epoch": 9.98,
"eval_accuracy": 0.9415137614678899,
"eval_loss": 0.42697247862815857,
"eval_runtime": 2.46,
"eval_samples_per_second": 354.474,
"eval_steps_per_second": 44.309,
"step": 42000
},
{
"epoch": 10.0,
"step": 42100,
"total_flos": 4.43006661686016e+16,
"train_loss": 0.10745611605338416,
"train_runtime": 8358.8854,
"train_samples_per_second": 80.572,
"train_steps_per_second": 5.037
}
],
"max_steps": 42100,
"num_train_epochs": 10,
"total_flos": 4.43006661686016e+16,
"trial_name": null,
"trial_params": null
}