aguila-7b / trainer_state.json
joanllop's picture
First model version
f8df299
raw
history blame
243 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 3338128,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.9925557078698005e-05,
"loss": 5.3279,
"step": 5000
},
{
"epoch": 0.0,
"eval_accuracy": 0.31327971235572855,
"eval_loss": 3.994140625,
"eval_runtime": 39.5933,
"eval_samples_per_second": 90.116,
"eval_steps_per_second": 11.265,
"step": 5000
},
{
"epoch": 0.0,
"learning_rate": 4.985067978220128e-05,
"loss": 3.5754,
"step": 10000
},
{
"epoch": 0.0,
"eval_accuracy": 0.3823957607318666,
"eval_loss": 3.310546875,
"eval_runtime": 39.5685,
"eval_samples_per_second": 90.173,
"eval_steps_per_second": 11.272,
"step": 10000
},
{
"epoch": 0.0,
"learning_rate": 4.9775862399524525e-05,
"loss": 3.6102,
"step": 15000
},
{
"epoch": 0.0,
"eval_accuracy": 0.39768097216925513,
"eval_loss": 3.166015625,
"eval_runtime": 39.5825,
"eval_samples_per_second": 90.141,
"eval_steps_per_second": 11.268,
"step": 15000
},
{
"epoch": 0.01,
"learning_rate": 4.970101505993779e-05,
"loss": 3.0639,
"step": 20000
},
{
"epoch": 0.01,
"eval_accuracy": 0.41336891627105715,
"eval_loss": 3.021484375,
"eval_runtime": 39.5961,
"eval_samples_per_second": 90.11,
"eval_steps_per_second": 11.264,
"step": 20000
},
{
"epoch": 0.01,
"learning_rate": 4.962618269880604e-05,
"loss": 2.9477,
"step": 25000
},
{
"epoch": 0.01,
"eval_accuracy": 0.425242680676284,
"eval_loss": 2.919921875,
"eval_runtime": 39.6071,
"eval_samples_per_second": 90.085,
"eval_steps_per_second": 11.261,
"step": 25000
},
{
"epoch": 0.01,
"learning_rate": 4.9551335359219304e-05,
"loss": 2.8589,
"step": 30000
},
{
"epoch": 0.01,
"eval_accuracy": 0.4315466797294513,
"eval_loss": 2.8671875,
"eval_runtime": 39.6079,
"eval_samples_per_second": 90.083,
"eval_steps_per_second": 11.26,
"step": 30000
},
{
"epoch": 0.01,
"learning_rate": 4.947647304117757e-05,
"loss": 2.8063,
"step": 35000
},
{
"epoch": 0.01,
"eval_accuracy": 0.43875449855125825,
"eval_loss": 2.802734375,
"eval_runtime": 39.6873,
"eval_samples_per_second": 89.903,
"eval_steps_per_second": 11.238,
"step": 35000
},
{
"epoch": 0.01,
"learning_rate": 4.940162570159084e-05,
"loss": 2.7646,
"step": 40000
},
{
"epoch": 0.01,
"eval_accuracy": 0.44185690990623727,
"eval_loss": 2.771484375,
"eval_runtime": 39.6738,
"eval_samples_per_second": 89.933,
"eval_steps_per_second": 11.242,
"step": 40000
},
{
"epoch": 0.01,
"learning_rate": 4.932679334045909e-05,
"loss": 2.7306,
"step": 45000
},
{
"epoch": 0.01,
"eval_accuracy": 0.4467244957896629,
"eval_loss": 2.736328125,
"eval_runtime": 39.7305,
"eval_samples_per_second": 89.805,
"eval_steps_per_second": 11.226,
"step": 45000
},
{
"epoch": 0.01,
"learning_rate": 4.9251960979327336e-05,
"loss": 2.7106,
"step": 50000
},
{
"epoch": 0.01,
"eval_accuracy": 0.4492548623804952,
"eval_loss": 2.712890625,
"eval_runtime": 39.6605,
"eval_samples_per_second": 89.964,
"eval_steps_per_second": 11.245,
"step": 50000
},
{
"epoch": 0.02,
"learning_rate": 4.917712861819559e-05,
"loss": 2.6829,
"step": 55000
},
{
"epoch": 0.02,
"eval_accuracy": 0.45224302916332426,
"eval_loss": 2.689453125,
"eval_runtime": 39.6345,
"eval_samples_per_second": 90.023,
"eval_steps_per_second": 11.253,
"step": 55000
},
{
"epoch": 0.02,
"learning_rate": 4.9102311235518835e-05,
"loss": 2.6703,
"step": 60000
},
{
"epoch": 0.02,
"eval_accuracy": 0.45370464737056665,
"eval_loss": 2.67578125,
"eval_runtime": 39.6085,
"eval_samples_per_second": 90.082,
"eval_steps_per_second": 11.26,
"step": 60000
},
{
"epoch": 0.02,
"learning_rate": 4.90274489174771e-05,
"loss": 2.6522,
"step": 65000
},
{
"epoch": 0.02,
"eval_accuracy": 0.4559714569388372,
"eval_loss": 2.66015625,
"eval_runtime": 39.7158,
"eval_samples_per_second": 89.838,
"eval_steps_per_second": 11.23,
"step": 65000
},
{
"epoch": 0.02,
"learning_rate": 4.895260157789037e-05,
"loss": 2.6377,
"step": 70000
},
{
"epoch": 0.02,
"eval_accuracy": 0.4573840349100728,
"eval_loss": 2.6484375,
"eval_runtime": 40.0228,
"eval_samples_per_second": 89.149,
"eval_steps_per_second": 11.144,
"step": 70000
},
{
"epoch": 0.02,
"learning_rate": 4.8877769216758615e-05,
"loss": 2.6241,
"step": 75000
},
{
"epoch": 0.02,
"eval_accuracy": 0.4586505880444836,
"eval_loss": 2.634765625,
"eval_runtime": 39.747,
"eval_samples_per_second": 89.768,
"eval_steps_per_second": 11.221,
"step": 75000
},
{
"epoch": 0.02,
"learning_rate": 4.880295183408186e-05,
"loss": 2.6159,
"step": 80000
},
{
"epoch": 0.02,
"eval_accuracy": 0.46040042037619067,
"eval_loss": 2.625,
"eval_runtime": 39.8297,
"eval_samples_per_second": 89.581,
"eval_steps_per_second": 11.198,
"step": 80000
},
{
"epoch": 0.03,
"learning_rate": 4.872807453758514e-05,
"loss": 2.5959,
"step": 85000
},
{
"epoch": 0.03,
"eval_accuracy": 0.4612864322379005,
"eval_loss": 2.61328125,
"eval_runtime": 39.842,
"eval_samples_per_second": 89.554,
"eval_steps_per_second": 11.194,
"step": 85000
},
{
"epoch": 0.03,
"learning_rate": 4.86532271979984e-05,
"loss": 2.5877,
"step": 90000
},
{
"epoch": 0.03,
"eval_accuracy": 0.4624083303744811,
"eval_loss": 2.603515625,
"eval_runtime": 39.7194,
"eval_samples_per_second": 89.83,
"eval_steps_per_second": 11.229,
"step": 90000
},
{
"epoch": 0.03,
"learning_rate": 4.857840981532165e-05,
"loss": 2.5832,
"step": 95000
},
{
"epoch": 0.03,
"eval_accuracy": 0.46323187757803697,
"eval_loss": 2.599609375,
"eval_runtime": 40.0969,
"eval_samples_per_second": 88.984,
"eval_steps_per_second": 11.123,
"step": 95000
},
{
"epoch": 0.03,
"learning_rate": 4.8503562475734907e-05,
"loss": 2.5726,
"step": 100000
},
{
"epoch": 0.03,
"eval_accuracy": 0.46476143979941176,
"eval_loss": 2.5859375,
"eval_runtime": 39.6873,
"eval_samples_per_second": 89.903,
"eval_steps_per_second": 11.238,
"step": 100000
},
{
"epoch": 0.03,
"learning_rate": 4.8428775049968125e-05,
"loss": 2.5723,
"step": 105000
},
{
"epoch": 0.03,
"eval_accuracy": 0.46553211121777593,
"eval_loss": 2.580078125,
"eval_runtime": 39.7946,
"eval_samples_per_second": 89.66,
"eval_steps_per_second": 11.208,
"step": 105000
},
{
"epoch": 0.03,
"learning_rate": 4.83539127319264e-05,
"loss": 2.5584,
"step": 110000
},
{
"epoch": 0.03,
"eval_accuracy": 0.46414501225183996,
"eval_loss": 2.59375,
"eval_runtime": 39.8172,
"eval_samples_per_second": 89.61,
"eval_steps_per_second": 11.201,
"step": 110000
},
{
"epoch": 0.03,
"learning_rate": 4.827905041388467e-05,
"loss": 2.5541,
"step": 115000
},
{
"epoch": 0.03,
"eval_accuracy": 0.4673400247228542,
"eval_loss": 2.56640625,
"eval_runtime": 39.875,
"eval_samples_per_second": 89.48,
"eval_steps_per_second": 11.185,
"step": 115000
},
{
"epoch": 0.04,
"learning_rate": 4.820423303120791e-05,
"loss": 2.541,
"step": 120000
},
{
"epoch": 0.04,
"eval_accuracy": 0.46835206177206756,
"eval_loss": 2.55859375,
"eval_runtime": 39.7895,
"eval_samples_per_second": 89.672,
"eval_steps_per_second": 11.209,
"step": 120000
},
{
"epoch": 0.04,
"learning_rate": 4.812941564853116e-05,
"loss": 2.5359,
"step": 125000
},
{
"epoch": 0.04,
"eval_accuracy": 0.4673909827334534,
"eval_loss": 2.564453125,
"eval_runtime": 39.8856,
"eval_samples_per_second": 89.456,
"eval_steps_per_second": 11.182,
"step": 125000
},
{
"epoch": 0.04,
"learning_rate": 4.805458328739941e-05,
"loss": 2.5298,
"step": 130000
},
{
"epoch": 0.04,
"eval_accuracy": 0.4699054591919484,
"eval_loss": 2.544921875,
"eval_runtime": 39.8462,
"eval_samples_per_second": 89.544,
"eval_steps_per_second": 11.193,
"step": 130000
},
{
"epoch": 0.04,
"learning_rate": 4.797972096935768e-05,
"loss": 2.5258,
"step": 135000
},
{
"epoch": 0.04,
"eval_accuracy": 0.47030819185636197,
"eval_loss": 2.541015625,
"eval_runtime": 39.8831,
"eval_samples_per_second": 89.461,
"eval_steps_per_second": 11.183,
"step": 135000
},
{
"epoch": 0.04,
"learning_rate": 4.790488860822593e-05,
"loss": 2.5207,
"step": 140000
},
{
"epoch": 0.04,
"eval_accuracy": 0.47090352388341683,
"eval_loss": 2.537109375,
"eval_runtime": 40.0239,
"eval_samples_per_second": 89.147,
"eval_steps_per_second": 11.143,
"step": 140000
},
{
"epoch": 0.04,
"learning_rate": 4.783005624709418e-05,
"loss": 2.5167,
"step": 145000
},
{
"epoch": 0.04,
"eval_accuracy": 0.47193419074295684,
"eval_loss": 2.53125,
"eval_runtime": 39.9144,
"eval_samples_per_second": 89.391,
"eval_steps_per_second": 11.174,
"step": 145000
},
{
"epoch": 0.04,
"learning_rate": 4.7755223885962435e-05,
"loss": 2.5101,
"step": 150000
},
{
"epoch": 0.04,
"eval_accuracy": 0.4701947691876088,
"eval_loss": 2.544921875,
"eval_runtime": 40.0269,
"eval_samples_per_second": 89.14,
"eval_steps_per_second": 11.143,
"step": 150000
},
{
"epoch": 0.05,
"learning_rate": 4.768039152483069e-05,
"loss": 2.5058,
"step": 155000
},
{
"epoch": 0.05,
"eval_accuracy": 0.4730330755844281,
"eval_loss": 2.521484375,
"eval_runtime": 39.9333,
"eval_samples_per_second": 89.349,
"eval_steps_per_second": 11.169,
"step": 155000
},
{
"epoch": 0.05,
"learning_rate": 4.760554418524395e-05,
"loss": 2.5021,
"step": 160000
},
{
"epoch": 0.05,
"eval_accuracy": 0.473403480048569,
"eval_loss": 2.51953125,
"eval_runtime": 40.0331,
"eval_samples_per_second": 89.126,
"eval_steps_per_second": 11.141,
"step": 160000
},
{
"epoch": 0.05,
"learning_rate": 4.753084663020711e-05,
"loss": 2.8135,
"step": 165000
},
{
"epoch": 0.05,
"eval_accuracy": 0.4317466762226635,
"eval_loss": 2.83203125,
"eval_runtime": 40.0127,
"eval_samples_per_second": 89.172,
"eval_steps_per_second": 11.146,
"step": 165000
},
{
"epoch": 0.05,
"learning_rate": 4.7455954355255405e-05,
"loss": 2.7932,
"step": 170000
},
{
"epoch": 0.05,
"eval_accuracy": 0.4729974597705684,
"eval_loss": 2.521484375,
"eval_runtime": 39.9612,
"eval_samples_per_second": 89.287,
"eval_steps_per_second": 11.161,
"step": 170000
},
{
"epoch": 0.05,
"learning_rate": 4.7381077058758686e-05,
"loss": 2.4914,
"step": 175000
},
{
"epoch": 0.05,
"eval_accuracy": 0.4751818050313638,
"eval_loss": 2.505859375,
"eval_runtime": 40.3867,
"eval_samples_per_second": 88.346,
"eval_steps_per_second": 11.043,
"step": 175000
},
{
"epoch": 0.05,
"learning_rate": 4.730621474071695e-05,
"loss": 2.487,
"step": 180000
},
{
"epoch": 0.05,
"eval_accuracy": 0.4753875548483533,
"eval_loss": 2.50390625,
"eval_runtime": 40.0145,
"eval_samples_per_second": 89.168,
"eval_steps_per_second": 11.146,
"step": 180000
},
{
"epoch": 0.06,
"learning_rate": 4.723138237958521e-05,
"loss": 2.4829,
"step": 185000
},
{
"epoch": 0.06,
"eval_accuracy": 0.47510947753244875,
"eval_loss": 2.50390625,
"eval_runtime": 40.0338,
"eval_samples_per_second": 89.125,
"eval_steps_per_second": 11.141,
"step": 185000
},
{
"epoch": 0.06,
"learning_rate": 4.7156505083088486e-05,
"loss": 2.4778,
"step": 190000
},
{
"epoch": 0.06,
"eval_accuracy": 0.47625877244892145,
"eval_loss": 2.49609375,
"eval_runtime": 40.0473,
"eval_samples_per_second": 89.095,
"eval_steps_per_second": 11.137,
"step": 190000
},
{
"epoch": 0.06,
"learning_rate": 4.708170267886672e-05,
"loss": 2.4779,
"step": 195000
},
{
"epoch": 0.06,
"eval_accuracy": 0.4770064305721763,
"eval_loss": 2.4921875,
"eval_runtime": 40.1061,
"eval_samples_per_second": 88.964,
"eval_steps_per_second": 11.121,
"step": 195000
},
{
"epoch": 0.06,
"learning_rate": 4.700687031773498e-05,
"loss": 2.4685,
"step": 200000
},
{
"epoch": 0.06,
"eval_accuracy": 0.4765894515822188,
"eval_loss": 2.494140625,
"eval_runtime": 40.1514,
"eval_samples_per_second": 88.864,
"eval_steps_per_second": 11.108,
"step": 200000
},
{
"epoch": 0.06,
"learning_rate": 4.693202297814823e-05,
"loss": 2.4661,
"step": 205000
},
{
"epoch": 0.06,
"eval_accuracy": 0.47763491270290054,
"eval_loss": 2.484375,
"eval_runtime": 40.1854,
"eval_samples_per_second": 88.788,
"eval_steps_per_second": 11.099,
"step": 205000
},
{
"epoch": 0.06,
"learning_rate": 4.6857190617016483e-05,
"loss": 2.4579,
"step": 210000
},
{
"epoch": 0.06,
"eval_accuracy": 0.47826366880142374,
"eval_loss": 2.48046875,
"eval_runtime": 40.1794,
"eval_samples_per_second": 88.802,
"eval_steps_per_second": 11.1,
"step": 210000
},
{
"epoch": 0.06,
"learning_rate": 4.6782358255884736e-05,
"loss": 2.4589,
"step": 215000
},
{
"epoch": 0.06,
"eval_accuracy": 0.4787937964923355,
"eval_loss": 2.478515625,
"eval_runtime": 40.1613,
"eval_samples_per_second": 88.842,
"eval_steps_per_second": 11.105,
"step": 215000
},
{
"epoch": 0.07,
"learning_rate": 4.6707540873207975e-05,
"loss": 2.4571,
"step": 220000
},
{
"epoch": 0.07,
"eval_accuracy": 0.47927871949642525,
"eval_loss": 2.474609375,
"eval_runtime": 40.1097,
"eval_samples_per_second": 88.956,
"eval_steps_per_second": 11.119,
"step": 220000
},
{
"epoch": 0.07,
"learning_rate": 4.663269353362124e-05,
"loss": 2.4504,
"step": 225000
},
{
"epoch": 0.07,
"eval_accuracy": 0.47965542521994137,
"eval_loss": 2.47265625,
"eval_runtime": 40.1284,
"eval_samples_per_second": 88.915,
"eval_steps_per_second": 11.114,
"step": 225000
},
{
"epoch": 0.07,
"learning_rate": 4.65578461940345e-05,
"loss": 2.4538,
"step": 230000
},
{
"epoch": 0.07,
"eval_accuracy": 0.47995733773435206,
"eval_loss": 2.46875,
"eval_runtime": 40.2242,
"eval_samples_per_second": 88.703,
"eval_steps_per_second": 11.088,
"step": 230000
},
{
"epoch": 0.07,
"learning_rate": 4.6483013832902755e-05,
"loss": 2.4481,
"step": 235000
},
{
"epoch": 0.07,
"eval_accuracy": 0.4806011620618159,
"eval_loss": 2.466796875,
"eval_runtime": 40.1909,
"eval_samples_per_second": 88.776,
"eval_steps_per_second": 11.097,
"step": 235000
},
{
"epoch": 0.07,
"learning_rate": 4.640815151486103e-05,
"loss": 2.4454,
"step": 240000
},
{
"epoch": 0.07,
"eval_accuracy": 0.4809529367156302,
"eval_loss": 2.4609375,
"eval_runtime": 40.319,
"eval_samples_per_second": 88.494,
"eval_steps_per_second": 11.062,
"step": 240000
},
{
"epoch": 0.07,
"learning_rate": 4.6333319153729274e-05,
"loss": 2.44,
"step": 245000
},
{
"epoch": 0.07,
"eval_accuracy": 0.4811408786256898,
"eval_loss": 2.458984375,
"eval_runtime": 40.235,
"eval_samples_per_second": 88.679,
"eval_steps_per_second": 11.085,
"step": 245000
},
{
"epoch": 0.07,
"learning_rate": 4.625850177105252e-05,
"loss": 2.4392,
"step": 250000
},
{
"epoch": 0.07,
"eval_accuracy": 0.4810606060606061,
"eval_loss": 2.458984375,
"eval_runtime": 40.2635,
"eval_samples_per_second": 88.616,
"eval_steps_per_second": 11.077,
"step": 250000
},
{
"epoch": 0.08,
"learning_rate": 4.618366940992077e-05,
"loss": 2.431,
"step": 255000
},
{
"epoch": 0.08,
"eval_accuracy": 0.48131512214580346,
"eval_loss": 2.45703125,
"eval_runtime": 40.2108,
"eval_samples_per_second": 88.732,
"eval_steps_per_second": 11.092,
"step": 255000
},
{
"epoch": 0.08,
"learning_rate": 4.610885202724401e-05,
"loss": 2.4377,
"step": 260000
},
{
"epoch": 0.08,
"eval_accuracy": 0.482264420569064,
"eval_loss": 2.451171875,
"eval_runtime": 40.1835,
"eval_samples_per_second": 88.793,
"eval_steps_per_second": 11.099,
"step": 260000
},
{
"epoch": 0.08,
"learning_rate": 4.6033959752292307e-05,
"loss": 2.4299,
"step": 265000
},
{
"epoch": 0.08,
"eval_accuracy": 0.4825972914447528,
"eval_loss": 2.447265625,
"eval_runtime": 40.2876,
"eval_samples_per_second": 88.563,
"eval_steps_per_second": 11.07,
"step": 265000
},
{
"epoch": 0.08,
"learning_rate": 4.5959142369615546e-05,
"loss": 2.4283,
"step": 270000
},
{
"epoch": 0.08,
"eval_accuracy": 0.482810712360112,
"eval_loss": 2.447265625,
"eval_runtime": 40.3402,
"eval_samples_per_second": 88.448,
"eval_steps_per_second": 11.056,
"step": 270000
},
{
"epoch": 0.08,
"learning_rate": 4.5884295030028805e-05,
"loss": 2.4256,
"step": 275000
},
{
"epoch": 0.08,
"eval_accuracy": 0.48325371829096697,
"eval_loss": 2.443359375,
"eval_runtime": 40.3191,
"eval_samples_per_second": 88.494,
"eval_steps_per_second": 11.062,
"step": 275000
},
{
"epoch": 0.08,
"learning_rate": 4.5809462668897065e-05,
"loss": 2.4198,
"step": 280000
},
{
"epoch": 0.08,
"eval_accuracy": 0.48383589986367365,
"eval_loss": 2.44140625,
"eval_runtime": 40.3148,
"eval_samples_per_second": 88.503,
"eval_steps_per_second": 11.063,
"step": 280000
},
{
"epoch": 0.09,
"learning_rate": 4.573461532931032e-05,
"loss": 2.4174,
"step": 285000
},
{
"epoch": 0.09,
"eval_accuracy": 0.4840175405143581,
"eval_loss": 2.44140625,
"eval_runtime": 40.703,
"eval_samples_per_second": 87.659,
"eval_steps_per_second": 10.957,
"step": 285000
},
{
"epoch": 0.09,
"learning_rate": 4.565978296817857e-05,
"loss": 2.4151,
"step": 290000
},
{
"epoch": 0.09,
"eval_accuracy": 0.4844402728280929,
"eval_loss": 2.435546875,
"eval_runtime": 41.2392,
"eval_samples_per_second": 86.52,
"eval_steps_per_second": 10.815,
"step": 290000
},
{
"epoch": 0.09,
"learning_rate": 4.558493562859184e-05,
"loss": 2.4191,
"step": 295000
},
{
"epoch": 0.09,
"eval_accuracy": 0.4847421853425036,
"eval_loss": 2.43359375,
"eval_runtime": 40.6977,
"eval_samples_per_second": 87.671,
"eval_steps_per_second": 10.959,
"step": 295000
},
{
"epoch": 0.09,
"learning_rate": 4.55100882890051e-05,
"loss": 2.4071,
"step": 300000
},
{
"epoch": 0.09,
"eval_accuracy": 0.4848361562975334,
"eval_loss": 2.431640625,
"eval_runtime": 40.2058,
"eval_samples_per_second": 88.744,
"eval_steps_per_second": 11.093,
"step": 300000
},
{
"epoch": 0.09,
"learning_rate": 4.543524094941836e-05,
"loss": 2.4126,
"step": 305000
},
{
"epoch": 0.09,
"eval_accuracy": 0.48549011743355736,
"eval_loss": 2.427734375,
"eval_runtime": 40.4122,
"eval_samples_per_second": 88.29,
"eval_steps_per_second": 11.036,
"step": 305000
},
{
"epoch": 0.09,
"learning_rate": 4.536045352365158e-05,
"loss": 2.4053,
"step": 310000
},
{
"epoch": 0.09,
"eval_accuracy": 0.48513834277974305,
"eval_loss": 2.4296875,
"eval_runtime": 40.2632,
"eval_samples_per_second": 88.617,
"eval_steps_per_second": 11.077,
"step": 310000
},
{
"epoch": 0.09,
"learning_rate": 4.528559120560985e-05,
"loss": 2.4071,
"step": 315000
},
{
"epoch": 0.09,
"eval_accuracy": 0.4857840848817993,
"eval_loss": 2.42578125,
"eval_runtime": 40.3587,
"eval_samples_per_second": 88.407,
"eval_steps_per_second": 11.051,
"step": 315000
},
{
"epoch": 0.1,
"learning_rate": 4.5210743866023116e-05,
"loss": 2.4027,
"step": 320000
},
{
"epoch": 0.1,
"eval_accuracy": 0.486615303183725,
"eval_loss": 2.421875,
"eval_runtime": 40.2594,
"eval_samples_per_second": 88.625,
"eval_steps_per_second": 11.078,
"step": 320000
},
{
"epoch": 0.1,
"learning_rate": 4.513591150489136e-05,
"loss": 2.4013,
"step": 325000
},
{
"epoch": 0.1,
"eval_accuracy": 0.4867040687505753,
"eval_loss": 2.41796875,
"eval_runtime": 40.2109,
"eval_samples_per_second": 88.732,
"eval_steps_per_second": 11.092,
"step": 325000
},
{
"epoch": 0.1,
"learning_rate": 4.5061049186849636e-05,
"loss": 2.4032,
"step": 330000
},
{
"epoch": 0.1,
"eval_accuracy": 0.4866416040924214,
"eval_loss": 2.41796875,
"eval_runtime": 40.2756,
"eval_samples_per_second": 88.59,
"eval_steps_per_second": 11.074,
"step": 330000
},
{
"epoch": 0.1,
"learning_rate": 4.498621682571789e-05,
"loss": 2.3919,
"step": 335000
},
{
"epoch": 0.1,
"eval_accuracy": 0.4870514599196069,
"eval_loss": 2.416015625,
"eval_runtime": 40.3049,
"eval_samples_per_second": 88.525,
"eval_steps_per_second": 11.066,
"step": 335000
},
{
"epoch": 0.1,
"learning_rate": 4.491136948613115e-05,
"loss": 2.3936,
"step": 340000
},
{
"epoch": 0.1,
"eval_accuracy": 0.4872848804842874,
"eval_loss": 2.4140625,
"eval_runtime": 40.3719,
"eval_samples_per_second": 88.378,
"eval_steps_per_second": 11.047,
"step": 340000
},
{
"epoch": 0.1,
"learning_rate": 4.48365371249994e-05,
"loss": 2.3905,
"step": 345000
},
{
"epoch": 0.1,
"eval_accuracy": 0.48784870621446635,
"eval_loss": 2.41015625,
"eval_runtime": 40.4162,
"eval_samples_per_second": 88.281,
"eval_steps_per_second": 11.035,
"step": 345000
},
{
"epoch": 0.1,
"learning_rate": 4.4761704763867654e-05,
"loss": 2.3889,
"step": 350000
},
{
"epoch": 0.1,
"eval_accuracy": 0.4881240438523818,
"eval_loss": 2.41015625,
"eval_runtime": 40.2942,
"eval_samples_per_second": 88.549,
"eval_steps_per_second": 11.069,
"step": 350000
},
{
"epoch": 0.11,
"learning_rate": 4.468688738119089e-05,
"loss": 2.3866,
"step": 355000
},
{
"epoch": 0.11,
"eval_accuracy": 0.48837801200198133,
"eval_loss": 2.408203125,
"eval_runtime": 40.2476,
"eval_samples_per_second": 88.651,
"eval_steps_per_second": 11.081,
"step": 355000
},
{
"epoch": 0.11,
"learning_rate": 4.4612025063149173e-05,
"loss": 2.3823,
"step": 360000
},
{
"epoch": 0.11,
"eval_accuracy": 0.48875663550008985,
"eval_loss": 2.40625,
"eval_runtime": 40.3148,
"eval_samples_per_second": 88.504,
"eval_steps_per_second": 11.063,
"step": 360000
},
{
"epoch": 0.11,
"learning_rate": 4.4537177723562427e-05,
"loss": 2.3828,
"step": 365000
},
{
"epoch": 0.11,
"eval_accuracy": 0.48881882619044487,
"eval_loss": 2.40234375,
"eval_runtime": 40.2734,
"eval_samples_per_second": 88.594,
"eval_steps_per_second": 11.074,
"step": 365000
},
{
"epoch": 0.11,
"learning_rate": 4.446233038397569e-05,
"loss": 2.3795,
"step": 370000
},
{
"epoch": 0.11,
"eval_accuracy": 0.48893718027957866,
"eval_loss": 2.400390625,
"eval_runtime": 40.2791,
"eval_samples_per_second": 88.582,
"eval_steps_per_second": 11.073,
"step": 370000
},
{
"epoch": 0.11,
"learning_rate": 4.43875729151189e-05,
"loss": 2.3812,
"step": 375000
},
{
"epoch": 0.11,
"eval_accuracy": 0.48680735461076846,
"eval_loss": 2.416015625,
"eval_runtime": 40.6108,
"eval_samples_per_second": 87.858,
"eval_steps_per_second": 10.982,
"step": 375000
},
{
"epoch": 0.11,
"learning_rate": 4.431269561862217e-05,
"loss": 2.3789,
"step": 380000
},
{
"epoch": 0.11,
"eval_accuracy": 0.4895744293798684,
"eval_loss": 2.396484375,
"eval_runtime": 40.2591,
"eval_samples_per_second": 88.626,
"eval_steps_per_second": 11.078,
"step": 380000
},
{
"epoch": 0.12,
"learning_rate": 4.423786325749043e-05,
"loss": 2.372,
"step": 385000
},
{
"epoch": 0.12,
"eval_accuracy": 0.48950730726913283,
"eval_loss": 2.396484375,
"eval_runtime": 40.2108,
"eval_samples_per_second": 88.732,
"eval_steps_per_second": 11.092,
"step": 385000
},
{
"epoch": 0.12,
"learning_rate": 4.41630009394487e-05,
"loss": 2.3732,
"step": 390000
},
{
"epoch": 0.12,
"eval_accuracy": 0.4898645612789255,
"eval_loss": 2.396484375,
"eval_runtime": 40.4903,
"eval_samples_per_second": 88.12,
"eval_steps_per_second": 11.015,
"step": 390000
},
{
"epoch": 0.12,
"learning_rate": 4.408815359986196e-05,
"loss": 2.3725,
"step": 395000
},
{
"epoch": 0.12,
"eval_accuracy": 0.4903272928913027,
"eval_loss": 2.392578125,
"eval_runtime": 40.3547,
"eval_samples_per_second": 88.416,
"eval_steps_per_second": 11.052,
"step": 395000
},
{
"epoch": 0.12,
"learning_rate": 4.401332123873022e-05,
"loss": 2.3716,
"step": 400000
},
{
"epoch": 0.12,
"eval_accuracy": 0.49036948393233654,
"eval_loss": 2.390625,
"eval_runtime": 40.3654,
"eval_samples_per_second": 88.392,
"eval_steps_per_second": 11.049,
"step": 400000
},
{
"epoch": 0.12,
"learning_rate": 4.393848887759846e-05,
"loss": 2.3709,
"step": 405000
},
{
"epoch": 0.12,
"eval_accuracy": 0.49040482577839734,
"eval_loss": 2.390625,
"eval_runtime": 40.3034,
"eval_samples_per_second": 88.529,
"eval_steps_per_second": 11.066,
"step": 405000
},
{
"epoch": 0.12,
"learning_rate": 4.3863656516466716e-05,
"loss": 2.3619,
"step": 410000
},
{
"epoch": 0.12,
"eval_accuracy": 0.4906423558600616,
"eval_loss": 2.388671875,
"eval_runtime": 40.362,
"eval_samples_per_second": 88.4,
"eval_steps_per_second": 11.05,
"step": 410000
},
{
"epoch": 0.12,
"learning_rate": 4.378877921997e-05,
"loss": 2.367,
"step": 415000
},
{
"epoch": 0.12,
"eval_accuracy": 0.49115138803045644,
"eval_loss": 2.38671875,
"eval_runtime": 40.2804,
"eval_samples_per_second": 88.579,
"eval_steps_per_second": 11.072,
"step": 415000
},
{
"epoch": 0.13,
"learning_rate": 4.3713961837293236e-05,
"loss": 2.3639,
"step": 420000
},
{
"epoch": 0.13,
"eval_accuracy": 0.49116152483901654,
"eval_loss": 2.384765625,
"eval_runtime": 40.3366,
"eval_samples_per_second": 88.456,
"eval_steps_per_second": 11.057,
"step": 420000
},
{
"epoch": 0.13,
"learning_rate": 4.363914445461648e-05,
"loss": 2.3621,
"step": 425000
},
{
"epoch": 0.13,
"eval_accuracy": 0.4918730192128138,
"eval_loss": 2.3828125,
"eval_runtime": 40.2687,
"eval_samples_per_second": 88.605,
"eval_steps_per_second": 11.076,
"step": 425000
},
{
"epoch": 0.13,
"learning_rate": 4.3564282136574755e-05,
"loss": 2.3578,
"step": 430000
},
{
"epoch": 0.13,
"eval_accuracy": 0.4919609628762674,
"eval_loss": 2.380859375,
"eval_runtime": 40.2478,
"eval_samples_per_second": 88.651,
"eval_steps_per_second": 11.081,
"step": 430000
},
{
"epoch": 0.13,
"learning_rate": 4.3489449775443e-05,
"loss": 2.3608,
"step": 435000
},
{
"epoch": 0.13,
"eval_accuracy": 0.4921738358560288,
"eval_loss": 2.37890625,
"eval_runtime": 40.4074,
"eval_samples_per_second": 88.301,
"eval_steps_per_second": 11.038,
"step": 435000
},
{
"epoch": 0.13,
"learning_rate": 4.341461741431126e-05,
"loss": 2.3541,
"step": 440000
},
{
"epoch": 0.13,
"eval_accuracy": 0.4923423260523651,
"eval_loss": 2.376953125,
"eval_runtime": 40.2757,
"eval_samples_per_second": 88.589,
"eval_steps_per_second": 11.074,
"step": 440000
},
{
"epoch": 0.13,
"learning_rate": 4.333978505317951e-05,
"loss": 2.3556,
"step": 445000
},
{
"epoch": 0.13,
"eval_accuracy": 0.4925938284917744,
"eval_loss": 2.376953125,
"eval_runtime": 40.292,
"eval_samples_per_second": 88.553,
"eval_steps_per_second": 11.069,
"step": 445000
},
{
"epoch": 0.13,
"learning_rate": 4.326493771359277e-05,
"loss": 2.3562,
"step": 450000
},
{
"epoch": 0.13,
"eval_accuracy": 0.49278067453063834,
"eval_loss": 2.376953125,
"eval_runtime": 40.1882,
"eval_samples_per_second": 88.782,
"eval_steps_per_second": 11.098,
"step": 450000
},
{
"epoch": 0.14,
"learning_rate": 4.9925197595778234e-05,
"loss": 2.3641,
"step": 455000
},
{
"epoch": 0.14,
"eval_accuracy": 0.4910004317732511,
"eval_loss": 2.38671875,
"eval_runtime": 39.6555,
"eval_samples_per_second": 89.975,
"eval_steps_per_second": 11.247,
"step": 455000
},
{
"epoch": 0.14,
"learning_rate": 4.985036523464649e-05,
"loss": 2.3641,
"step": 460000
},
{
"epoch": 0.14,
"eval_accuracy": 0.4911015258910529,
"eval_loss": 2.38671875,
"eval_runtime": 39.5455,
"eval_samples_per_second": 90.225,
"eval_steps_per_second": 11.278,
"step": 460000
},
{
"epoch": 0.14,
"learning_rate": 4.977551789505975e-05,
"loss": 2.3646,
"step": 465000
},
{
"epoch": 0.14,
"eval_accuracy": 0.4910639923026007,
"eval_loss": 2.38671875,
"eval_runtime": 39.4635,
"eval_samples_per_second": 90.413,
"eval_steps_per_second": 11.302,
"step": 465000
},
{
"epoch": 0.14,
"learning_rate": 4.970071549083798e-05,
"loss": 2.3629,
"step": 470000
},
{
"epoch": 0.14,
"eval_accuracy": 0.4911439908998856,
"eval_loss": 2.384765625,
"eval_runtime": 39.5626,
"eval_samples_per_second": 90.186,
"eval_steps_per_second": 11.273,
"step": 470000
},
{
"epoch": 0.14,
"learning_rate": 4.9625868151251246e-05,
"loss": 2.3659,
"step": 475000
},
{
"epoch": 0.14,
"eval_accuracy": 0.4913645349780168,
"eval_loss": 2.3828125,
"eval_runtime": 39.4888,
"eval_samples_per_second": 90.355,
"eval_steps_per_second": 11.294,
"step": 475000
},
{
"epoch": 0.14,
"learning_rate": 4.9551020811664506e-05,
"loss": 2.3651,
"step": 480000
},
{
"epoch": 0.14,
"eval_accuracy": 0.4916360370667473,
"eval_loss": 2.3828125,
"eval_runtime": 38.5403,
"eval_samples_per_second": 92.578,
"eval_steps_per_second": 11.572,
"step": 480000
},
{
"epoch": 0.15,
"learning_rate": 4.9476173472077765e-05,
"loss": 2.3608,
"step": 485000
},
{
"epoch": 0.15,
"eval_accuracy": 0.4917579527372671,
"eval_loss": 2.380859375,
"eval_runtime": 39.5737,
"eval_samples_per_second": 90.161,
"eval_steps_per_second": 11.27,
"step": 485000
},
{
"epoch": 0.15,
"learning_rate": 4.9401356089401005e-05,
"loss": 2.3612,
"step": 490000
},
{
"epoch": 0.15,
"eval_accuracy": 0.49203685195656843,
"eval_loss": 2.380859375,
"eval_runtime": 38.5594,
"eval_samples_per_second": 92.533,
"eval_steps_per_second": 11.567,
"step": 490000
},
{
"epoch": 0.15,
"learning_rate": 4.932649377135928e-05,
"loss": 2.3569,
"step": 495000
},
{
"epoch": 0.15,
"eval_accuracy": 0.49215328827110977,
"eval_loss": 2.37890625,
"eval_runtime": 39.5649,
"eval_samples_per_second": 90.181,
"eval_steps_per_second": 11.273,
"step": 495000
},
{
"epoch": 0.15,
"learning_rate": 4.9251676388682524e-05,
"loss": 2.3557,
"step": 500000
},
{
"epoch": 0.15,
"eval_accuracy": 0.4923439698591586,
"eval_loss": 2.37890625,
"eval_runtime": 39.6114,
"eval_samples_per_second": 90.075,
"eval_steps_per_second": 11.259,
"step": 500000
},
{
"epoch": 0.15,
"learning_rate": 4.9176829049095784e-05,
"loss": 2.3541,
"step": 505000
},
{
"epoch": 0.15,
"eval_accuracy": 0.49218726027817594,
"eval_loss": 2.376953125,
"eval_runtime": 39.5989,
"eval_samples_per_second": 90.104,
"eval_steps_per_second": 11.263,
"step": 505000
},
{
"epoch": 0.15,
"learning_rate": 4.910196673105406e-05,
"loss": 2.351,
"step": 510000
},
{
"epoch": 0.15,
"eval_accuracy": 0.49274588062017544,
"eval_loss": 2.375,
"eval_runtime": 38.6221,
"eval_samples_per_second": 92.382,
"eval_steps_per_second": 11.548,
"step": 510000
},
{
"epoch": 0.15,
"learning_rate": 4.9027134369922304e-05,
"loss": 2.3504,
"step": 515000
},
{
"epoch": 0.15,
"eval_accuracy": 0.49260917068851395,
"eval_loss": 2.375,
"eval_runtime": 39.6516,
"eval_samples_per_second": 89.984,
"eval_steps_per_second": 11.248,
"step": 515000
},
{
"epoch": 0.16,
"learning_rate": 4.895231698724555e-05,
"loss": 2.3479,
"step": 520000
},
{
"epoch": 0.16,
"eval_accuracy": 0.492896014973984,
"eval_loss": 2.373046875,
"eval_runtime": 39.6699,
"eval_samples_per_second": 89.942,
"eval_steps_per_second": 11.243,
"step": 520000
},
{
"epoch": 0.16,
"learning_rate": 4.887745466920382e-05,
"loss": 2.3451,
"step": 525000
},
{
"epoch": 0.16,
"eval_accuracy": 0.4929382060150178,
"eval_loss": 2.37109375,
"eval_runtime": 39.6216,
"eval_samples_per_second": 90.052,
"eval_steps_per_second": 11.256,
"step": 525000
},
{
"epoch": 0.16,
"learning_rate": 4.880262230807207e-05,
"loss": 2.3505,
"step": 530000
},
{
"epoch": 0.16,
"eval_accuracy": 0.49343929311924395,
"eval_loss": 2.369140625,
"eval_runtime": 39.6785,
"eval_samples_per_second": 89.923,
"eval_steps_per_second": 11.24,
"step": 530000
},
{
"epoch": 0.16,
"learning_rate": 4.8727804925395315e-05,
"loss": 2.3457,
"step": 535000
},
{
"epoch": 0.16,
"eval_accuracy": 0.4933929925612263,
"eval_loss": 2.369140625,
"eval_runtime": 39.6502,
"eval_samples_per_second": 89.987,
"eval_steps_per_second": 11.248,
"step": 535000
},
{
"epoch": 0.16,
"learning_rate": 4.865297256426357e-05,
"loss": 2.3479,
"step": 540000
},
{
"epoch": 0.16,
"eval_accuracy": 0.4937496986354212,
"eval_loss": 2.369140625,
"eval_runtime": 39.6648,
"eval_samples_per_second": 89.954,
"eval_steps_per_second": 11.244,
"step": 540000
},
{
"epoch": 0.16,
"learning_rate": 4.8578110246221835e-05,
"loss": 2.3421,
"step": 545000
},
{
"epoch": 0.16,
"eval_accuracy": 0.4935861398594655,
"eval_loss": 2.3671875,
"eval_runtime": 39.7026,
"eval_samples_per_second": 89.868,
"eval_steps_per_second": 11.234,
"step": 545000
},
{
"epoch": 0.16,
"learning_rate": 4.850327788509009e-05,
"loss": 2.3433,
"step": 550000
},
{
"epoch": 0.16,
"eval_accuracy": 0.4937406576980568,
"eval_loss": 2.3671875,
"eval_runtime": 39.7646,
"eval_samples_per_second": 89.728,
"eval_steps_per_second": 11.216,
"step": 550000
},
{
"epoch": 0.17,
"learning_rate": 4.842846050241333e-05,
"loss": 2.3425,
"step": 555000
},
{
"epoch": 0.17,
"eval_accuracy": 0.4939384624488776,
"eval_loss": 2.365234375,
"eval_runtime": 39.7934,
"eval_samples_per_second": 89.663,
"eval_steps_per_second": 11.208,
"step": 555000
},
{
"epoch": 0.17,
"learning_rate": 4.835361316282659e-05,
"loss": 2.3403,
"step": 560000
},
{
"epoch": 0.17,
"eval_accuracy": 0.49420284137483617,
"eval_loss": 2.36328125,
"eval_runtime": 39.7702,
"eval_samples_per_second": 89.715,
"eval_steps_per_second": 11.214,
"step": 560000
},
{
"epoch": 0.17,
"learning_rate": 4.827876582323985e-05,
"loss": 2.3417,
"step": 565000
},
{
"epoch": 0.17,
"eval_accuracy": 0.49440420770704296,
"eval_loss": 2.361328125,
"eval_runtime": 40.0918,
"eval_samples_per_second": 88.996,
"eval_steps_per_second": 11.124,
"step": 565000
},
{
"epoch": 0.17,
"learning_rate": 4.82039484405631e-05,
"loss": 2.3382,
"step": 570000
},
{
"epoch": 0.17,
"eval_accuracy": 0.4947474893590907,
"eval_loss": 2.361328125,
"eval_runtime": 39.7167,
"eval_samples_per_second": 89.836,
"eval_steps_per_second": 11.23,
"step": 570000
},
{
"epoch": 0.17,
"learning_rate": 4.812913105788634e-05,
"loss": 2.3354,
"step": 575000
},
{
"epoch": 0.17,
"eval_accuracy": 0.4949266642995849,
"eval_loss": 2.359375,
"eval_runtime": 39.8562,
"eval_samples_per_second": 89.522,
"eval_steps_per_second": 11.19,
"step": 575000
},
{
"epoch": 0.17,
"learning_rate": 4.805425376138962e-05,
"loss": 2.3366,
"step": 580000
},
{
"epoch": 0.17,
"eval_accuracy": 0.4946513266616695,
"eval_loss": 2.359375,
"eval_runtime": 38.7841,
"eval_samples_per_second": 91.997,
"eval_steps_per_second": 11.5,
"step": 580000
},
{
"epoch": 0.18,
"learning_rate": 4.797942140025787e-05,
"loss": 2.3373,
"step": 585000
},
{
"epoch": 0.18,
"eval_accuracy": 0.49454119160650334,
"eval_loss": 2.359375,
"eval_runtime": 38.7732,
"eval_samples_per_second": 92.022,
"eval_steps_per_second": 11.503,
"step": 585000
},
{
"epoch": 0.18,
"learning_rate": 4.790460401758111e-05,
"loss": 2.3365,
"step": 590000
},
{
"epoch": 0.18,
"eval_accuracy": 0.49488255548395865,
"eval_loss": 2.359375,
"eval_runtime": 39.8158,
"eval_samples_per_second": 89.613,
"eval_steps_per_second": 11.202,
"step": 590000
},
{
"epoch": 0.18,
"learning_rate": 4.782975667799438e-05,
"loss": 2.3318,
"step": 595000
},
{
"epoch": 0.18,
"eval_accuracy": 0.49525295994809954,
"eval_loss": 2.35546875,
"eval_runtime": 39.8567,
"eval_samples_per_second": 89.521,
"eval_steps_per_second": 11.19,
"step": 595000
},
{
"epoch": 0.18,
"learning_rate": 4.7754894359952644e-05,
"loss": 2.3278,
"step": 600000
},
{
"epoch": 0.18,
"eval_accuracy": 0.4957521292777332,
"eval_loss": 2.353515625,
"eval_runtime": 39.8687,
"eval_samples_per_second": 89.494,
"eval_steps_per_second": 11.187,
"step": 600000
},
{
"epoch": 0.18,
"learning_rate": 4.768004702036591e-05,
"loss": 2.3277,
"step": 605000
},
{
"epoch": 0.18,
"eval_accuracy": 0.4959060991807267,
"eval_loss": 2.3515625,
"eval_runtime": 40.0704,
"eval_samples_per_second": 89.043,
"eval_steps_per_second": 11.13,
"step": 605000
},
{
"epoch": 0.18,
"learning_rate": 4.7605214659234157e-05,
"loss": 2.326,
"step": 610000
},
{
"epoch": 0.18,
"eval_accuracy": 0.49614417719798887,
"eval_loss": 2.3515625,
"eval_runtime": 39.9671,
"eval_samples_per_second": 89.273,
"eval_steps_per_second": 11.159,
"step": 610000
},
{
"epoch": 0.18,
"learning_rate": 4.753036731964742e-05,
"loss": 2.3273,
"step": 615000
},
{
"epoch": 0.18,
"eval_accuracy": 0.49605705543793205,
"eval_loss": 2.3515625,
"eval_runtime": 39.9201,
"eval_samples_per_second": 89.378,
"eval_steps_per_second": 11.172,
"step": 615000
},
{
"epoch": 0.19,
"learning_rate": 4.745551998006068e-05,
"loss": 2.3284,
"step": 620000
},
{
"epoch": 0.19,
"eval_accuracy": 0.49654992350819055,
"eval_loss": 2.349609375,
"eval_runtime": 39.956,
"eval_samples_per_second": 89.298,
"eval_steps_per_second": 11.162,
"step": 620000
},
{
"epoch": 0.19,
"learning_rate": 4.7380687618928936e-05,
"loss": 2.3276,
"step": 625000
},
{
"epoch": 0.19,
"eval_accuracy": 0.49658115583726753,
"eval_loss": 2.34765625,
"eval_runtime": 39.9741,
"eval_samples_per_second": 89.258,
"eval_steps_per_second": 11.157,
"step": 625000
},
{
"epoch": 0.19,
"learning_rate": 4.7305840279342196e-05,
"loss": 2.3228,
"step": 630000
},
{
"epoch": 0.19,
"eval_accuracy": 0.4966945785060207,
"eval_loss": 2.345703125,
"eval_runtime": 39.9089,
"eval_samples_per_second": 89.404,
"eval_steps_per_second": 11.175,
"step": 630000
},
{
"epoch": 0.19,
"learning_rate": 4.723103787512043e-05,
"loss": 2.3219,
"step": 635000
},
{
"epoch": 0.19,
"eval_accuracy": 0.49684580873102496,
"eval_loss": 2.345703125,
"eval_runtime": 39.9108,
"eval_samples_per_second": 89.399,
"eval_steps_per_second": 11.175,
"step": 635000
},
{
"epoch": 0.19,
"learning_rate": 4.715619053553369e-05,
"loss": 2.326,
"step": 640000
},
{
"epoch": 0.19,
"eval_accuracy": 0.49703155889869327,
"eval_loss": 2.34375,
"eval_runtime": 40.0352,
"eval_samples_per_second": 89.122,
"eval_steps_per_second": 11.14,
"step": 640000
},
{
"epoch": 0.19,
"learning_rate": 4.708135817440194e-05,
"loss": 2.3191,
"step": 645000
},
{
"epoch": 0.19,
"eval_accuracy": 0.4972345690376936,
"eval_loss": 2.341796875,
"eval_runtime": 40.0269,
"eval_samples_per_second": 89.14,
"eval_steps_per_second": 11.143,
"step": 645000
},
{
"epoch": 0.19,
"learning_rate": 4.70065258132702e-05,
"loss": 2.3167,
"step": 650000
},
{
"epoch": 0.19,
"eval_accuracy": 0.4972822394347058,
"eval_loss": 2.34375,
"eval_runtime": 40.0234,
"eval_samples_per_second": 89.148,
"eval_steps_per_second": 11.143,
"step": 650000
},
{
"epoch": 0.2,
"learning_rate": 4.693166349522847e-05,
"loss": 2.3172,
"step": 655000
},
{
"epoch": 0.2,
"eval_accuracy": 0.4974205931731608,
"eval_loss": 2.341796875,
"eval_runtime": 40.0416,
"eval_samples_per_second": 89.107,
"eval_steps_per_second": 11.138,
"step": 655000
},
{
"epoch": 0.2,
"learning_rate": 4.685683113409672e-05,
"loss": 2.3194,
"step": 660000
},
{
"epoch": 0.2,
"eval_accuracy": 0.4977205879129791,
"eval_loss": 2.337890625,
"eval_runtime": 40.5115,
"eval_samples_per_second": 88.074,
"eval_steps_per_second": 11.009,
"step": 660000
},
{
"epoch": 0.2,
"learning_rate": 4.678198379450998e-05,
"loss": 2.3204,
"step": 665000
},
{
"epoch": 0.2,
"eval_accuracy": 0.49760332969504095,
"eval_loss": 2.33984375,
"eval_runtime": 40.059,
"eval_samples_per_second": 89.069,
"eval_steps_per_second": 11.134,
"step": 665000
},
{
"epoch": 0.2,
"learning_rate": 4.670716641183322e-05,
"loss": 2.309,
"step": 670000
},
{
"epoch": 0.2,
"eval_accuracy": 0.49802085662059625,
"eval_loss": 2.3359375,
"eval_runtime": 40.172,
"eval_samples_per_second": 88.818,
"eval_steps_per_second": 11.102,
"step": 670000
},
{
"epoch": 0.2,
"learning_rate": 4.663233405070147e-05,
"loss": 2.3147,
"step": 675000
},
{
"epoch": 0.2,
"eval_accuracy": 0.49805729433785273,
"eval_loss": 2.337890625,
"eval_runtime": 40.0906,
"eval_samples_per_second": 88.999,
"eval_steps_per_second": 11.125,
"step": 675000
},
{
"epoch": 0.2,
"learning_rate": 4.655745675420475e-05,
"loss": 2.3122,
"step": 680000
},
{
"epoch": 0.2,
"eval_accuracy": 0.4980255140731779,
"eval_loss": 2.3359375,
"eval_runtime": 40.0778,
"eval_samples_per_second": 89.027,
"eval_steps_per_second": 11.128,
"step": 680000
},
{
"epoch": 0.21,
"learning_rate": 4.6482624393073005e-05,
"loss": 2.3096,
"step": 685000
},
{
"epoch": 0.21,
"eval_accuracy": 0.4984096169272648,
"eval_loss": 2.333984375,
"eval_runtime": 40.3028,
"eval_samples_per_second": 88.53,
"eval_steps_per_second": 11.066,
"step": 685000
},
{
"epoch": 0.21,
"learning_rate": 4.640780701039625e-05,
"loss": 2.3093,
"step": 690000
},
{
"epoch": 0.21,
"eval_accuracy": 0.49861701055104785,
"eval_loss": 2.333984375,
"eval_runtime": 40.1409,
"eval_samples_per_second": 88.887,
"eval_steps_per_second": 11.111,
"step": 690000
},
{
"epoch": 0.21,
"learning_rate": 4.633295967080951e-05,
"loss": 2.3048,
"step": 695000
},
{
"epoch": 0.21,
"eval_accuracy": 0.498526601177404,
"eval_loss": 2.33203125,
"eval_runtime": 40.2295,
"eval_samples_per_second": 88.691,
"eval_steps_per_second": 11.086,
"step": 695000
},
{
"epoch": 0.21,
"learning_rate": 4.6258127309677764e-05,
"loss": 2.3111,
"step": 700000
},
{
"epoch": 0.21,
"eval_accuracy": 0.4988186508510536,
"eval_loss": 2.330078125,
"eval_runtime": 40.2382,
"eval_samples_per_second": 88.672,
"eval_steps_per_second": 11.084,
"step": 700000
},
{
"epoch": 0.21,
"learning_rate": 4.6183279970091023e-05,
"loss": 2.3074,
"step": 705000
},
{
"epoch": 0.21,
"eval_accuracy": 0.4989449500063561,
"eval_loss": 2.330078125,
"eval_runtime": 40.2221,
"eval_samples_per_second": 88.707,
"eval_steps_per_second": 11.088,
"step": 705000
},
{
"epoch": 0.21,
"learning_rate": 4.610843263050428e-05,
"loss": 2.3082,
"step": 710000
},
{
"epoch": 0.21,
"eval_accuracy": 0.49918768547619985,
"eval_loss": 2.330078125,
"eval_runtime": 40.2424,
"eval_samples_per_second": 88.663,
"eval_steps_per_second": 11.083,
"step": 710000
},
{
"epoch": 0.21,
"learning_rate": 4.603357031246256e-05,
"loss": 2.3093,
"step": 715000
},
{
"epoch": 0.21,
"eval_accuracy": 0.4993685042234876,
"eval_loss": 2.328125,
"eval_runtime": 39.2194,
"eval_samples_per_second": 90.975,
"eval_steps_per_second": 11.372,
"step": 715000
},
{
"epoch": 0.22,
"learning_rate": 4.595873795133081e-05,
"loss": 2.3011,
"step": 720000
},
{
"epoch": 0.22,
"eval_accuracy": 0.4995095976399318,
"eval_loss": 2.328125,
"eval_runtime": 40.3274,
"eval_samples_per_second": 88.476,
"eval_steps_per_second": 11.059,
"step": 720000
},
{
"epoch": 0.22,
"learning_rate": 4.588390559019906e-05,
"loss": 2.2998,
"step": 725000
},
{
"epoch": 0.22,
"eval_accuracy": 0.4994558999513433,
"eval_loss": 2.326171875,
"eval_runtime": 40.2634,
"eval_samples_per_second": 88.616,
"eval_steps_per_second": 11.077,
"step": 725000
},
{
"epoch": 0.22,
"learning_rate": 4.580907322906731e-05,
"loss": 2.3012,
"step": 730000
},
{
"epoch": 0.22,
"eval_accuracy": 0.49959206194740696,
"eval_loss": 2.326171875,
"eval_runtime": 40.2894,
"eval_samples_per_second": 88.559,
"eval_steps_per_second": 11.07,
"step": 730000
},
{
"epoch": 0.22,
"learning_rate": 4.573421091102558e-05,
"loss": 2.3002,
"step": 735000
},
{
"epoch": 0.22,
"eval_accuracy": 0.4997052106483612,
"eval_loss": 2.32421875,
"eval_runtime": 40.3059,
"eval_samples_per_second": 88.523,
"eval_steps_per_second": 11.065,
"step": 735000
},
{
"epoch": 0.22,
"learning_rate": 4.5659378549893835e-05,
"loss": 2.2994,
"step": 740000
},
{
"epoch": 0.22,
"eval_accuracy": 0.5000197256815223,
"eval_loss": 2.32421875,
"eval_runtime": 40.7124,
"eval_samples_per_second": 87.639,
"eval_steps_per_second": 10.955,
"step": 740000
},
{
"epoch": 0.22,
"learning_rate": 4.558454618876209e-05,
"loss": 2.299,
"step": 745000
},
{
"epoch": 0.22,
"eval_accuracy": 0.5000953407940244,
"eval_loss": 2.322265625,
"eval_runtime": 40.3194,
"eval_samples_per_second": 88.493,
"eval_steps_per_second": 11.062,
"step": 745000
},
{
"epoch": 0.22,
"learning_rate": 4.550971382763034e-05,
"loss": 2.2969,
"step": 750000
},
{
"epoch": 0.22,
"eval_accuracy": 0.5002605433767736,
"eval_loss": 2.322265625,
"eval_runtime": 40.3665,
"eval_samples_per_second": 88.39,
"eval_steps_per_second": 11.049,
"step": 750000
},
{
"epoch": 0.23,
"learning_rate": 4.543489644495358e-05,
"loss": 2.2934,
"step": 755000
},
{
"epoch": 0.23,
"eval_accuracy": 0.5003739660455269,
"eval_loss": 2.3203125,
"eval_runtime": 40.3284,
"eval_samples_per_second": 88.474,
"eval_steps_per_second": 11.059,
"step": 755000
},
{
"epoch": 0.23,
"learning_rate": 4.536004910536684e-05,
"loss": 2.2988,
"step": 760000
},
{
"epoch": 0.23,
"eval_accuracy": 0.5004895804566715,
"eval_loss": 2.318359375,
"eval_runtime": 40.3305,
"eval_samples_per_second": 88.469,
"eval_steps_per_second": 11.059,
"step": 760000
},
{
"epoch": 0.23,
"learning_rate": 4.5285186787325113e-05,
"loss": 2.2911,
"step": 765000
},
{
"epoch": 0.23,
"eval_accuracy": 0.5007449184452656,
"eval_loss": 2.318359375,
"eval_runtime": 39.3805,
"eval_samples_per_second": 90.603,
"eval_steps_per_second": 11.325,
"step": 765000
},
{
"epoch": 0.23,
"learning_rate": 4.5210354426193366e-05,
"loss": 2.2929,
"step": 770000
},
{
"epoch": 0.23,
"eval_accuracy": 0.5008427249494803,
"eval_loss": 2.318359375,
"eval_runtime": 40.4207,
"eval_samples_per_second": 88.272,
"eval_steps_per_second": 11.034,
"step": 770000
},
{
"epoch": 0.23,
"learning_rate": 4.5135567000426584e-05,
"loss": 2.2926,
"step": 775000
},
{
"epoch": 0.23,
"eval_accuracy": 0.5008994362838569,
"eval_loss": 2.31640625,
"eval_runtime": 40.4103,
"eval_samples_per_second": 88.294,
"eval_steps_per_second": 11.037,
"step": 775000
},
{
"epoch": 0.23,
"learning_rate": 4.506070468238486e-05,
"loss": 2.292,
"step": 780000
},
{
"epoch": 0.23,
"eval_accuracy": 0.5011701164691906,
"eval_loss": 2.31640625,
"eval_runtime": 40.3936,
"eval_samples_per_second": 88.331,
"eval_steps_per_second": 11.041,
"step": 780000
},
{
"epoch": 0.24,
"learning_rate": 4.498587232125311e-05,
"loss": 2.2932,
"step": 785000
},
{
"epoch": 0.24,
"eval_accuracy": 0.5014183312950129,
"eval_loss": 2.314453125,
"eval_runtime": 40.4142,
"eval_samples_per_second": 88.286,
"eval_steps_per_second": 11.036,
"step": 785000
},
{
"epoch": 0.24,
"learning_rate": 4.4911039960121364e-05,
"loss": 2.2903,
"step": 790000
},
{
"epoch": 0.24,
"eval_accuracy": 0.5013958659355015,
"eval_loss": 2.314453125,
"eval_runtime": 40.3913,
"eval_samples_per_second": 88.336,
"eval_steps_per_second": 11.042,
"step": 790000
},
{
"epoch": 0.24,
"learning_rate": 4.483620759898962e-05,
"loss": 2.2886,
"step": 795000
},
{
"epoch": 0.24,
"eval_accuracy": 0.5015205212840104,
"eval_loss": 2.3125,
"eval_runtime": 40.3641,
"eval_samples_per_second": 88.395,
"eval_steps_per_second": 11.049,
"step": 795000
},
{
"epoch": 0.24,
"learning_rate": 4.476137523785787e-05,
"loss": 2.2924,
"step": 800000
},
{
"epoch": 0.24,
"eval_accuracy": 0.5014750426293895,
"eval_loss": 2.3125,
"eval_runtime": 40.359,
"eval_samples_per_second": 88.407,
"eval_steps_per_second": 11.051,
"step": 800000
},
{
"epoch": 0.24,
"learning_rate": 4.468652789827113e-05,
"loss": 2.2891,
"step": 805000
},
{
"epoch": 0.24,
"eval_accuracy": 0.5018673645174441,
"eval_loss": 2.310546875,
"eval_runtime": 40.4253,
"eval_samples_per_second": 88.261,
"eval_steps_per_second": 11.033,
"step": 805000
},
{
"epoch": 0.24,
"learning_rate": 4.461168055868439e-05,
"loss": 2.2862,
"step": 810000
},
{
"epoch": 0.24,
"eval_accuracy": 0.5019873624133714,
"eval_loss": 2.30859375,
"eval_runtime": 40.3854,
"eval_samples_per_second": 88.349,
"eval_steps_per_second": 11.044,
"step": 810000
},
{
"epoch": 0.24,
"learning_rate": 4.453686317600763e-05,
"loss": 2.2858,
"step": 815000
},
{
"epoch": 0.24,
"eval_accuracy": 0.5021665373538656,
"eval_loss": 2.30859375,
"eval_runtime": 40.4075,
"eval_samples_per_second": 88.301,
"eval_steps_per_second": 11.038,
"step": 815000
},
{
"epoch": 0.25,
"learning_rate": 4.44620008579659e-05,
"loss": 2.2841,
"step": 820000
},
{
"epoch": 0.25,
"eval_accuracy": 0.502265439729276,
"eval_loss": 2.306640625,
"eval_runtime": 40.4403,
"eval_samples_per_second": 88.229,
"eval_steps_per_second": 11.029,
"step": 820000
},
{
"epoch": 0.25,
"learning_rate": 4.438718347528915e-05,
"loss": 2.2843,
"step": 825000
},
{
"epoch": 0.25,
"eval_accuracy": 0.5022361251747914,
"eval_loss": 2.30859375,
"eval_runtime": 40.4536,
"eval_samples_per_second": 88.2,
"eval_steps_per_second": 11.025,
"step": 825000
},
{
"epoch": 0.25,
"learning_rate": 4.431233613570241e-05,
"loss": 2.2832,
"step": 830000
},
{
"epoch": 0.25,
"eval_accuracy": 0.5024813263548256,
"eval_loss": 2.306640625,
"eval_runtime": 40.4096,
"eval_samples_per_second": 88.296,
"eval_steps_per_second": 11.037,
"step": 830000
},
{
"epoch": 0.25,
"learning_rate": 4.423756368839062e-05,
"loss": 2.2846,
"step": 835000
},
{
"epoch": 0.25,
"eval_accuracy": 0.502600776315155,
"eval_loss": 2.306640625,
"eval_runtime": 39.3247,
"eval_samples_per_second": 90.732,
"eval_steps_per_second": 11.341,
"step": 835000
},
{
"epoch": 0.25,
"learning_rate": 4.416267141343891e-05,
"loss": 2.2784,
"step": 840000
},
{
"epoch": 0.25,
"eval_accuracy": 0.5026766653954561,
"eval_loss": 2.3046875,
"eval_runtime": 40.3768,
"eval_samples_per_second": 88.367,
"eval_steps_per_second": 11.046,
"step": 840000
},
{
"epoch": 0.25,
"learning_rate": 4.408782407385217e-05,
"loss": 2.277,
"step": 845000
},
{
"epoch": 0.25,
"eval_accuracy": 0.5028424159138032,
"eval_loss": 2.302734375,
"eval_runtime": 40.4265,
"eval_samples_per_second": 88.259,
"eval_steps_per_second": 11.032,
"step": 845000
},
{
"epoch": 0.25,
"learning_rate": 4.4013021669630405e-05,
"loss": 2.276,
"step": 850000
},
{
"epoch": 0.25,
"eval_accuracy": 0.5025583113063223,
"eval_loss": 2.306640625,
"eval_runtime": 40.7923,
"eval_samples_per_second": 87.468,
"eval_steps_per_second": 10.933,
"step": 850000
},
{
"epoch": 0.26,
"learning_rate": 4.393818930849866e-05,
"loss": 2.2802,
"step": 855000
},
{
"epoch": 0.26,
"eval_accuracy": 0.5031032332583757,
"eval_loss": 2.302734375,
"eval_runtime": 40.3592,
"eval_samples_per_second": 88.406,
"eval_steps_per_second": 11.051,
"step": 855000
},
{
"epoch": 0.26,
"learning_rate": 4.3863356947366904e-05,
"loss": 2.2781,
"step": 860000
},
{
"epoch": 0.26,
"eval_accuracy": 0.5032018616659872,
"eval_loss": 2.30078125,
"eval_runtime": 40.4036,
"eval_samples_per_second": 88.309,
"eval_steps_per_second": 11.039,
"step": 860000
},
{
"epoch": 0.26,
"learning_rate": 4.378850960778017e-05,
"loss": 2.2749,
"step": 865000
},
{
"epoch": 0.26,
"eval_accuracy": 0.5038193850847547,
"eval_loss": 2.298828125,
"eval_runtime": 40.3881,
"eval_samples_per_second": 88.343,
"eval_steps_per_second": 11.043,
"step": 865000
},
{
"epoch": 0.26,
"learning_rate": 4.371366226819343e-05,
"loss": 2.2729,
"step": 870000
},
{
"epoch": 0.26,
"eval_accuracy": 0.5037152773211648,
"eval_loss": 2.296875,
"eval_runtime": 40.4456,
"eval_samples_per_second": 88.217,
"eval_steps_per_second": 11.027,
"step": 870000
},
{
"epoch": 0.26,
"learning_rate": 4.363882990706168e-05,
"loss": 2.2708,
"step": 875000
},
{
"epoch": 0.26,
"eval_accuracy": 0.5038993836820396,
"eval_loss": 2.296875,
"eval_runtime": 40.4095,
"eval_samples_per_second": 88.296,
"eval_steps_per_second": 11.037,
"step": 875000
},
{
"epoch": 0.26,
"learning_rate": 4.3563997545929936e-05,
"loss": 2.2754,
"step": 880000
},
{
"epoch": 0.26,
"eval_accuracy": 0.5038591104155982,
"eval_loss": 2.296875,
"eval_runtime": 40.397,
"eval_samples_per_second": 88.323,
"eval_steps_per_second": 11.04,
"step": 880000
},
{
"epoch": 0.27,
"learning_rate": 4.3489150206343196e-05,
"loss": 2.2761,
"step": 885000
},
{
"epoch": 0.27,
"eval_accuracy": 0.5040640383291909,
"eval_loss": 2.294921875,
"eval_runtime": 40.3166,
"eval_samples_per_second": 88.499,
"eval_steps_per_second": 11.062,
"step": 885000
},
{
"epoch": 0.27,
"learning_rate": 4.341434780212143e-05,
"loss": 2.2742,
"step": 890000
},
{
"epoch": 0.27,
"eval_accuracy": 0.5041032157244366,
"eval_loss": 2.294921875,
"eval_runtime": 40.3381,
"eval_samples_per_second": 88.452,
"eval_steps_per_second": 11.057,
"step": 890000
},
{
"epoch": 0.27,
"learning_rate": 4.3339470505624715e-05,
"loss": 2.2734,
"step": 895000
},
{
"epoch": 0.27,
"eval_accuracy": 0.5040837640107132,
"eval_loss": 2.294921875,
"eval_runtime": 40.3833,
"eval_samples_per_second": 88.353,
"eval_steps_per_second": 11.044,
"step": 895000
},
{
"epoch": 0.27,
"learning_rate": 4.326463814449296e-05,
"loss": 2.2682,
"step": 900000
},
{
"epoch": 0.27,
"eval_accuracy": 0.5043944434946894,
"eval_loss": 2.29296875,
"eval_runtime": 40.4818,
"eval_samples_per_second": 88.138,
"eval_steps_per_second": 11.017,
"step": 900000
},
{
"epoch": 0.27,
"learning_rate": 4.318982076181621e-05,
"loss": 2.2667,
"step": 905000
},
{
"epoch": 0.27,
"eval_accuracy": 0.5045489613332808,
"eval_loss": 2.29296875,
"eval_runtime": 40.3614,
"eval_samples_per_second": 88.401,
"eval_steps_per_second": 11.05,
"step": 905000
},
{
"epoch": 0.27,
"learning_rate": 4.311498840068446e-05,
"loss": 2.2676,
"step": 910000
},
{
"epoch": 0.27,
"eval_accuracy": 0.5045801936623577,
"eval_loss": 2.29296875,
"eval_runtime": 40.4147,
"eval_samples_per_second": 88.285,
"eval_steps_per_second": 11.036,
"step": 910000
},
{
"epoch": 0.27,
"learning_rate": 4.304012608264273e-05,
"loss": 2.2707,
"step": 915000
},
{
"epoch": 0.27,
"eval_accuracy": 0.5046547129036641,
"eval_loss": 2.291015625,
"eval_runtime": 40.4009,
"eval_samples_per_second": 88.315,
"eval_steps_per_second": 11.039,
"step": 915000
},
{
"epoch": 0.28,
"learning_rate": 4.296529372151098e-05,
"loss": 2.265,
"step": 920000
},
{
"epoch": 0.28,
"eval_accuracy": 0.5047700533470098,
"eval_loss": 2.291015625,
"eval_runtime": 40.3704,
"eval_samples_per_second": 88.382,
"eval_steps_per_second": 11.048,
"step": 920000
},
{
"epoch": 0.28,
"learning_rate": 4.289046136037923e-05,
"loss": 2.2676,
"step": 925000
},
{
"epoch": 0.28,
"eval_accuracy": 0.5046149875728206,
"eval_loss": 2.291015625,
"eval_runtime": 40.3019,
"eval_samples_per_second": 88.532,
"eval_steps_per_second": 11.066,
"step": 925000
},
{
"epoch": 0.28,
"learning_rate": 4.281564397770247e-05,
"loss": 2.2662,
"step": 930000
},
{
"epoch": 0.28,
"eval_accuracy": 0.5051503206519119,
"eval_loss": 2.2890625,
"eval_runtime": 40.3203,
"eval_samples_per_second": 88.491,
"eval_steps_per_second": 11.061,
"step": 930000
},
{
"epoch": 0.28,
"learning_rate": 4.274079663811574e-05,
"loss": 2.2706,
"step": 935000
},
{
"epoch": 0.28,
"eval_accuracy": 0.5050968969311223,
"eval_loss": 2.2890625,
"eval_runtime": 40.3497,
"eval_samples_per_second": 88.427,
"eval_steps_per_second": 11.053,
"step": 935000
},
{
"epoch": 0.28,
"learning_rate": 4.2665949298529e-05,
"loss": 2.2657,
"step": 940000
},
{
"epoch": 0.28,
"eval_accuracy": 0.5048788185631814,
"eval_loss": 2.2890625,
"eval_runtime": 40.7581,
"eval_samples_per_second": 87.541,
"eval_steps_per_second": 10.943,
"step": 940000
},
{
"epoch": 0.28,
"learning_rate": 4.259110195894226e-05,
"loss": 2.2672,
"step": 945000
},
{
"epoch": 0.28,
"eval_accuracy": 0.5050453909849252,
"eval_loss": 2.287109375,
"eval_runtime": 40.3332,
"eval_samples_per_second": 88.463,
"eval_steps_per_second": 11.058,
"step": 945000
},
{
"epoch": 0.28,
"learning_rate": 4.9925167638868255e-05,
"loss": 2.2716,
"step": 950000
},
{
"epoch": 0.28,
"eval_accuracy": 0.5037065103515993,
"eval_loss": 2.296875,
"eval_runtime": 38.5412,
"eval_samples_per_second": 92.576,
"eval_steps_per_second": 11.572,
"step": 950000
},
{
"epoch": 0.29,
"learning_rate": 4.9850290342371536e-05,
"loss": 2.2702,
"step": 955000
},
{
"epoch": 0.29,
"eval_accuracy": 0.5036908941870608,
"eval_loss": 2.298828125,
"eval_runtime": 39.4253,
"eval_samples_per_second": 90.5,
"eval_steps_per_second": 11.313,
"step": 955000
},
{
"epoch": 0.29,
"learning_rate": 4.977548793814977e-05,
"loss": 2.2708,
"step": 960000
},
{
"epoch": 0.29,
"eval_accuracy": 0.5035029522770011,
"eval_loss": 2.298828125,
"eval_runtime": 39.5074,
"eval_samples_per_second": 90.312,
"eval_steps_per_second": 11.289,
"step": 960000
},
{
"epoch": 0.29,
"learning_rate": 4.970064059856303e-05,
"loss": 2.2738,
"step": 965000
},
{
"epoch": 0.29,
"eval_accuracy": 0.5035369242840674,
"eval_loss": 2.298828125,
"eval_runtime": 39.5917,
"eval_samples_per_second": 90.12,
"eval_steps_per_second": 11.265,
"step": 965000
},
{
"epoch": 0.29,
"learning_rate": 4.962582321588627e-05,
"loss": 2.2737,
"step": 970000
},
{
"epoch": 0.29,
"eval_accuracy": 0.5035577458367854,
"eval_loss": 2.298828125,
"eval_runtime": 39.4627,
"eval_samples_per_second": 90.415,
"eval_steps_per_second": 11.302,
"step": 970000
},
{
"epoch": 0.29,
"learning_rate": 4.9550990854754526e-05,
"loss": 2.2763,
"step": 975000
},
{
"epoch": 0.29,
"eval_accuracy": 0.49873289892999134,
"eval_loss": 2.330078125,
"eval_runtime": 39.5211,
"eval_samples_per_second": 90.281,
"eval_steps_per_second": 11.285,
"step": 975000
},
{
"epoch": 0.29,
"learning_rate": 4.9476143515167786e-05,
"loss": 2.2738,
"step": 980000
},
{
"epoch": 0.29,
"eval_accuracy": 0.5034662405919458,
"eval_loss": 2.296875,
"eval_runtime": 39.4696,
"eval_samples_per_second": 90.399,
"eval_steps_per_second": 11.3,
"step": 980000
},
{
"epoch": 0.3,
"learning_rate": 4.9401296175581046e-05,
"loss": 2.2737,
"step": 985000
},
{
"epoch": 0.3,
"eval_accuracy": 0.5036182927203469,
"eval_loss": 2.296875,
"eval_runtime": 39.6371,
"eval_samples_per_second": 90.017,
"eval_steps_per_second": 11.252,
"step": 985000
},
{
"epoch": 0.3,
"learning_rate": 4.932644883599431e-05,
"loss": 2.2748,
"step": 990000
},
{
"epoch": 0.3,
"eval_accuracy": 0.5036056902015965,
"eval_loss": 2.296875,
"eval_runtime": 39.6139,
"eval_samples_per_second": 90.069,
"eval_steps_per_second": 11.259,
"step": 990000
},
{
"epoch": 0.3,
"learning_rate": 4.9251631453317545e-05,
"loss": 2.2724,
"step": 995000
},
{
"epoch": 0.3,
"eval_accuracy": 0.5038232206339396,
"eval_loss": 2.296875,
"eval_runtime": 39.4746,
"eval_samples_per_second": 90.387,
"eval_steps_per_second": 11.298,
"step": 995000
},
{
"epoch": 0.3,
"learning_rate": 4.917678411373081e-05,
"loss": 2.2744,
"step": 1000000
},
{
"epoch": 0.3,
"eval_accuracy": 0.5032999421380009,
"eval_loss": 2.298828125,
"eval_runtime": 39.6576,
"eval_samples_per_second": 89.97,
"eval_steps_per_second": 11.246,
"step": 1000000
},
{
"epoch": 0.3,
"learning_rate": 4.9101951752599065e-05,
"loss": 2.2694,
"step": 1005000
},
{
"epoch": 0.3,
"eval_accuracy": 0.5033465166638174,
"eval_loss": 2.298828125,
"eval_runtime": 39.6623,
"eval_samples_per_second": 89.959,
"eval_steps_per_second": 11.245,
"step": 1005000
},
{
"epoch": 0.3,
"learning_rate": 4.9027104413012324e-05,
"loss": 2.2684,
"step": 1010000
},
{
"epoch": 0.3,
"eval_accuracy": 0.5039421226586712,
"eval_loss": 2.294921875,
"eval_runtime": 39.6275,
"eval_samples_per_second": 90.039,
"eval_steps_per_second": 11.255,
"step": 1010000
},
{
"epoch": 0.3,
"learning_rate": 4.895231698724555e-05,
"loss": 2.2731,
"step": 1015000
},
{
"epoch": 0.3,
"eval_accuracy": 0.5039547251774216,
"eval_loss": 2.294921875,
"eval_runtime": 39.6334,
"eval_samples_per_second": 90.025,
"eval_steps_per_second": 11.253,
"step": 1015000
},
{
"epoch": 0.31,
"learning_rate": 4.887746964765881e-05,
"loss": 2.2714,
"step": 1020000
},
{
"epoch": 0.31,
"eval_accuracy": 0.5042445831086797,
"eval_loss": 2.294921875,
"eval_runtime": 39.6384,
"eval_samples_per_second": 90.014,
"eval_steps_per_second": 11.252,
"step": 1020000
},
{
"epoch": 0.31,
"learning_rate": 4.880263728652706e-05,
"loss": 2.2687,
"step": 1025000
},
{
"epoch": 0.31,
"eval_accuracy": 0.5045218385211876,
"eval_loss": 2.29296875,
"eval_runtime": 39.6654,
"eval_samples_per_second": 89.952,
"eval_steps_per_second": 11.244,
"step": 1025000
},
{
"epoch": 0.31,
"learning_rate": 4.872778994694032e-05,
"loss": 2.2673,
"step": 1030000
},
{
"epoch": 0.31,
"eval_accuracy": 0.5046215627999947,
"eval_loss": 2.29296875,
"eval_runtime": 39.7655,
"eval_samples_per_second": 89.726,
"eval_steps_per_second": 11.216,
"step": 1030000
},
{
"epoch": 0.31,
"learning_rate": 4.8652957585808575e-05,
"loss": 2.2677,
"step": 1035000
},
{
"epoch": 0.31,
"eval_accuracy": 0.5044056761744452,
"eval_loss": 2.29296875,
"eval_runtime": 39.7885,
"eval_samples_per_second": 89.674,
"eval_steps_per_second": 11.209,
"step": 1035000
},
{
"epoch": 0.31,
"learning_rate": 4.8578110246221835e-05,
"loss": 2.265,
"step": 1040000
},
{
"epoch": 0.31,
"eval_accuracy": 0.5046538910002674,
"eval_loss": 2.291015625,
"eval_runtime": 39.7963,
"eval_samples_per_second": 89.657,
"eval_steps_per_second": 11.207,
"step": 1040000
},
{
"epoch": 0.31,
"learning_rate": 4.850327788509009e-05,
"loss": 2.2659,
"step": 1045000
},
{
"epoch": 0.31,
"eval_accuracy": 0.504468688768197,
"eval_loss": 2.291015625,
"eval_runtime": 40.1905,
"eval_samples_per_second": 88.777,
"eval_steps_per_second": 11.097,
"step": 1045000
},
{
"epoch": 0.31,
"learning_rate": 4.842849045932331e-05,
"loss": 2.2633,
"step": 1050000
},
{
"epoch": 0.31,
"eval_accuracy": 0.5042100631660157,
"eval_loss": 2.294921875,
"eval_runtime": 39.7629,
"eval_samples_per_second": 89.732,
"eval_steps_per_second": 11.216,
"step": 1050000
},
{
"epoch": 0.32,
"learning_rate": 4.835361316282659e-05,
"loss": 2.2689,
"step": 1055000
},
{
"epoch": 0.32,
"eval_accuracy": 0.5049952548777227,
"eval_loss": 2.2890625,
"eval_runtime": 39.8901,
"eval_samples_per_second": 89.446,
"eval_steps_per_second": 11.181,
"step": 1055000
},
{
"epoch": 0.32,
"learning_rate": 4.827876582323985e-05,
"loss": 2.2617,
"step": 1060000
},
{
"epoch": 0.32,
"eval_accuracy": 0.5049081331176659,
"eval_loss": 2.2890625,
"eval_runtime": 39.8913,
"eval_samples_per_second": 89.443,
"eval_steps_per_second": 11.18,
"step": 1060000
},
{
"epoch": 0.32,
"learning_rate": 4.820390350519813e-05,
"loss": 2.2613,
"step": 1065000
},
{
"epoch": 0.32,
"eval_accuracy": 0.5052440176391427,
"eval_loss": 2.287109375,
"eval_runtime": 39.9185,
"eval_samples_per_second": 89.382,
"eval_steps_per_second": 11.173,
"step": 1065000
},
{
"epoch": 0.32,
"learning_rate": 4.8129086122521366e-05,
"loss": 2.2649,
"step": 1070000
},
{
"epoch": 0.32,
"eval_accuracy": 0.5047081366244537,
"eval_loss": 2.2890625,
"eval_runtime": 39.8452,
"eval_samples_per_second": 89.547,
"eval_steps_per_second": 11.193,
"step": 1070000
},
{
"epoch": 0.32,
"learning_rate": 4.805422380447964e-05,
"loss": 2.2587,
"step": 1075000
},
{
"epoch": 0.32,
"eval_accuracy": 0.505284564873383,
"eval_loss": 2.287109375,
"eval_runtime": 39.8746,
"eval_samples_per_second": 89.481,
"eval_steps_per_second": 11.185,
"step": 1075000
},
{
"epoch": 0.32,
"learning_rate": 4.797939144334789e-05,
"loss": 2.2641,
"step": 1080000
},
{
"epoch": 0.32,
"eval_accuracy": 0.5054223706762402,
"eval_loss": 2.28515625,
"eval_runtime": 39.914,
"eval_samples_per_second": 89.392,
"eval_steps_per_second": 11.174,
"step": 1080000
},
{
"epoch": 0.33,
"learning_rate": 4.7904559082216145e-05,
"loss": 2.2634,
"step": 1085000
},
{
"epoch": 0.33,
"eval_accuracy": 0.5056552433053229,
"eval_loss": 2.28515625,
"eval_runtime": 39.8319,
"eval_samples_per_second": 89.576,
"eval_steps_per_second": 11.197,
"step": 1085000
},
{
"epoch": 0.33,
"learning_rate": 4.7829741699539384e-05,
"loss": 2.2597,
"step": 1090000
},
{
"epoch": 0.33,
"eval_accuracy": 0.5057451047433689,
"eval_loss": 2.283203125,
"eval_runtime": 39.976,
"eval_samples_per_second": 89.254,
"eval_steps_per_second": 11.157,
"step": 1090000
},
{
"epoch": 0.33,
"learning_rate": 4.775487938149766e-05,
"loss": 2.2572,
"step": 1095000
},
{
"epoch": 0.33,
"eval_accuracy": 0.5059667446926958,
"eval_loss": 2.283203125,
"eval_runtime": 40.0116,
"eval_samples_per_second": 89.174,
"eval_steps_per_second": 11.147,
"step": 1095000
},
{
"epoch": 0.33,
"learning_rate": 4.768003204191092e-05,
"loss": 2.2566,
"step": 1100000
},
{
"epoch": 0.33,
"eval_accuracy": 0.5055771624826304,
"eval_loss": 2.283203125,
"eval_runtime": 39.963,
"eval_samples_per_second": 89.283,
"eval_steps_per_second": 11.16,
"step": 1100000
},
{
"epoch": 0.33,
"learning_rate": 4.760524461614414e-05,
"loss": 2.2576,
"step": 1105000
},
{
"epoch": 0.33,
"eval_accuracy": 0.5055905869047775,
"eval_loss": 2.283203125,
"eval_runtime": 39.8683,
"eval_samples_per_second": 89.495,
"eval_steps_per_second": 11.187,
"step": 1105000
},
{
"epoch": 0.33,
"learning_rate": 4.753038229810241e-05,
"loss": 2.2612,
"step": 1110000
},
{
"epoch": 0.33,
"eval_accuracy": 0.505675790890242,
"eval_loss": 2.283203125,
"eval_runtime": 39.9091,
"eval_samples_per_second": 89.403,
"eval_steps_per_second": 11.175,
"step": 1110000
},
{
"epoch": 0.33,
"learning_rate": 4.7455534958515676e-05,
"loss": 2.2585,
"step": 1115000
},
{
"epoch": 0.33,
"eval_accuracy": 0.505924553651662,
"eval_loss": 2.28125,
"eval_runtime": 39.8428,
"eval_samples_per_second": 89.552,
"eval_steps_per_second": 11.194,
"step": 1115000
},
{
"epoch": 0.34,
"learning_rate": 4.738073255429391e-05,
"loss": 2.2528,
"step": 1120000
},
{
"epoch": 0.34,
"eval_accuracy": 0.5059566078841358,
"eval_loss": 2.28125,
"eval_runtime": 40.0018,
"eval_samples_per_second": 89.196,
"eval_steps_per_second": 11.149,
"step": 1120000
},
{
"epoch": 0.34,
"learning_rate": 4.730588521470717e-05,
"loss": 2.2599,
"step": 1125000
},
{
"epoch": 0.34,
"eval_accuracy": 0.5059996208285663,
"eval_loss": 2.28125,
"eval_runtime": 39.9769,
"eval_samples_per_second": 89.251,
"eval_steps_per_second": 11.156,
"step": 1125000
},
{
"epoch": 0.34,
"learning_rate": 4.723105285357542e-05,
"loss": 2.2556,
"step": 1130000
},
{
"epoch": 0.34,
"eval_accuracy": 0.5065730354317075,
"eval_loss": 2.27734375,
"eval_runtime": 39.975,
"eval_samples_per_second": 89.256,
"eval_steps_per_second": 11.157,
"step": 1130000
},
{
"epoch": 0.34,
"learning_rate": 4.715619053553369e-05,
"loss": 2.2519,
"step": 1135000
},
{
"epoch": 0.34,
"eval_accuracy": 0.5063790662300716,
"eval_loss": 2.279296875,
"eval_runtime": 40.0596,
"eval_samples_per_second": 89.067,
"eval_steps_per_second": 11.133,
"step": 1135000
},
{
"epoch": 0.34,
"learning_rate": 4.708135817440194e-05,
"loss": 2.2567,
"step": 1140000
},
{
"epoch": 0.34,
"eval_accuracy": 0.5067524843400006,
"eval_loss": 2.27734375,
"eval_runtime": 40.0409,
"eval_samples_per_second": 89.109,
"eval_steps_per_second": 11.139,
"step": 1140000
},
{
"epoch": 0.34,
"learning_rate": 4.700651083481521e-05,
"loss": 2.2516,
"step": 1145000
},
{
"epoch": 0.34,
"eval_accuracy": 0.506862071459569,
"eval_loss": 2.275390625,
"eval_runtime": 39.8652,
"eval_samples_per_second": 89.502,
"eval_steps_per_second": 11.188,
"step": 1145000
},
{
"epoch": 0.34,
"learning_rate": 4.693166349522847e-05,
"loss": 2.2533,
"step": 1150000
},
{
"epoch": 0.34,
"eval_accuracy": 0.5067752236673111,
"eval_loss": 2.275390625,
"eval_runtime": 39.9361,
"eval_samples_per_second": 89.343,
"eval_steps_per_second": 11.168,
"step": 1150000
},
{
"epoch": 0.35,
"learning_rate": 4.6856846112551706e-05,
"loss": 2.2532,
"step": 1155000
},
{
"epoch": 0.35,
"eval_accuracy": 0.5069639874807674,
"eval_loss": 2.275390625,
"eval_runtime": 40.0505,
"eval_samples_per_second": 89.088,
"eval_steps_per_second": 11.136,
"step": 1155000
},
{
"epoch": 0.35,
"learning_rate": 4.6781998772964966e-05,
"loss": 2.2572,
"step": 1160000
},
{
"epoch": 0.35,
"eval_accuracy": 0.5063629021299353,
"eval_loss": 2.279296875,
"eval_runtime": 40.0931,
"eval_samples_per_second": 88.993,
"eval_steps_per_second": 11.124,
"step": 1160000
},
{
"epoch": 0.35,
"learning_rate": 4.670712147646825e-05,
"loss": 2.2514,
"step": 1165000
},
{
"epoch": 0.35,
"eval_accuracy": 0.5071686414265613,
"eval_loss": 2.2734375,
"eval_runtime": 40.4455,
"eval_samples_per_second": 88.217,
"eval_steps_per_second": 11.027,
"step": 1165000
},
{
"epoch": 0.35,
"learning_rate": 4.66322891153365e-05,
"loss": 2.2471,
"step": 1170000
},
{
"epoch": 0.35,
"eval_accuracy": 0.5073409671720824,
"eval_loss": 2.2734375,
"eval_runtime": 40.0562,
"eval_samples_per_second": 89.075,
"eval_steps_per_second": 11.134,
"step": 1170000
},
{
"epoch": 0.35,
"learning_rate": 4.655745675420475e-05,
"loss": 2.2524,
"step": 1175000
},
{
"epoch": 0.35,
"eval_accuracy": 0.50760562006584,
"eval_loss": 2.271484375,
"eval_runtime": 40.09,
"eval_samples_per_second": 89.0,
"eval_steps_per_second": 11.125,
"step": 1175000
},
{
"epoch": 0.35,
"learning_rate": 4.648260941461802e-05,
"loss": 2.247,
"step": 1180000
},
{
"epoch": 0.35,
"eval_accuracy": 0.5072979542276519,
"eval_loss": 2.271484375,
"eval_runtime": 40.3075,
"eval_samples_per_second": 88.52,
"eval_steps_per_second": 11.065,
"step": 1180000
},
{
"epoch": 0.35,
"learning_rate": 4.640780701039625e-05,
"loss": 2.2491,
"step": 1185000
},
{
"epoch": 0.35,
"eval_accuracy": 0.5076653450460047,
"eval_loss": 2.271484375,
"eval_runtime": 40.1522,
"eval_samples_per_second": 88.862,
"eval_steps_per_second": 11.108,
"step": 1185000
},
{
"epoch": 0.36,
"learning_rate": 4.63329746492645e-05,
"loss": 2.2481,
"step": 1190000
},
{
"epoch": 0.36,
"eval_accuracy": 0.5078031508488619,
"eval_loss": 2.26953125,
"eval_runtime": 40.1488,
"eval_samples_per_second": 88.869,
"eval_steps_per_second": 11.109,
"step": 1190000
},
{
"epoch": 0.36,
"learning_rate": 4.992515266041327e-05,
"loss": 2.2465,
"step": 1195000
},
{
"epoch": 0.36,
"eval_accuracy": 0.5069333030872883,
"eval_loss": 2.2734375,
"eval_runtime": 39.5233,
"eval_samples_per_second": 90.276,
"eval_steps_per_second": 11.284,
"step": 1195000
},
{
"epoch": 0.36,
"learning_rate": 4.985030532082652e-05,
"loss": 2.2494,
"step": 1200000
},
{
"epoch": 0.36,
"eval_accuracy": 0.5067053618785863,
"eval_loss": 2.279296875,
"eval_runtime": 38.5856,
"eval_samples_per_second": 92.47,
"eval_steps_per_second": 11.559,
"step": 1200000
},
{
"epoch": 0.36,
"learning_rate": 4.977545798123979e-05,
"loss": 2.2541,
"step": 1205000
},
{
"epoch": 0.36,
"eval_accuracy": 0.5068631673307646,
"eval_loss": 2.275390625,
"eval_runtime": 39.5375,
"eval_samples_per_second": 90.244,
"eval_steps_per_second": 11.28,
"step": 1205000
},
{
"epoch": 0.36,
"learning_rate": 4.9700625620108035e-05,
"loss": 2.25,
"step": 1210000
},
{
"epoch": 0.36,
"eval_accuracy": 0.5067459091128265,
"eval_loss": 2.275390625,
"eval_runtime": 39.4824,
"eval_samples_per_second": 90.369,
"eval_steps_per_second": 11.296,
"step": 1210000
},
{
"epoch": 0.36,
"learning_rate": 4.962582321588627e-05,
"loss": 2.25,
"step": 1215000
},
{
"epoch": 0.36,
"eval_accuracy": 0.5064313940796654,
"eval_loss": 2.279296875,
"eval_runtime": 39.3855,
"eval_samples_per_second": 90.592,
"eval_steps_per_second": 11.324,
"step": 1215000
},
{
"epoch": 0.37,
"learning_rate": 4.955096089784455e-05,
"loss": 2.2508,
"step": 1220000
},
{
"epoch": 0.37,
"eval_accuracy": 0.507028095945715,
"eval_loss": 2.2734375,
"eval_runtime": 39.5116,
"eval_samples_per_second": 90.303,
"eval_steps_per_second": 11.288,
"step": 1220000
},
{
"epoch": 0.37,
"learning_rate": 4.947612853671279e-05,
"loss": 2.2496,
"step": 1225000
},
{
"epoch": 0.37,
"eval_accuracy": 0.507010562006584,
"eval_loss": 2.2734375,
"eval_runtime": 39.5165,
"eval_samples_per_second": 90.291,
"eval_steps_per_second": 11.286,
"step": 1225000
},
{
"epoch": 0.37,
"learning_rate": 4.940131115403604e-05,
"loss": 2.2499,
"step": 1230000
},
{
"epoch": 0.37,
"eval_accuracy": 0.5073475423992566,
"eval_loss": 2.2734375,
"eval_runtime": 39.4273,
"eval_samples_per_second": 90.496,
"eval_steps_per_second": 11.312,
"step": 1230000
},
{
"epoch": 0.37,
"learning_rate": 4.932647879290429e-05,
"loss": 2.2467,
"step": 1235000
},
{
"epoch": 0.37,
"eval_accuracy": 0.5075538401518439,
"eval_loss": 2.271484375,
"eval_runtime": 39.5247,
"eval_samples_per_second": 90.273,
"eval_steps_per_second": 11.284,
"step": 1235000
},
{
"epoch": 0.37,
"learning_rate": 4.925164643177254e-05,
"loss": 2.2497,
"step": 1240000
},
{
"epoch": 0.37,
"eval_accuracy": 0.5073163100701796,
"eval_loss": 2.271484375,
"eval_runtime": 38.6276,
"eval_samples_per_second": 92.369,
"eval_steps_per_second": 11.546,
"step": 1240000
},
{
"epoch": 0.37,
"learning_rate": 4.917678411373081e-05,
"loss": 2.2463,
"step": 1245000
},
{
"epoch": 0.37,
"eval_accuracy": 0.5073409671720824,
"eval_loss": 2.271484375,
"eval_runtime": 39.5689,
"eval_samples_per_second": 90.172,
"eval_steps_per_second": 11.271,
"step": 1245000
},
{
"epoch": 0.37,
"learning_rate": 4.910196673105406e-05,
"loss": 2.2479,
"step": 1250000
},
{
"epoch": 0.37,
"eval_accuracy": 0.5077573982264421,
"eval_loss": 2.26953125,
"eval_runtime": 39.6608,
"eval_samples_per_second": 89.963,
"eval_steps_per_second": 11.245,
"step": 1250000
},
{
"epoch": 0.38,
"learning_rate": 4.902711939146731e-05,
"loss": 2.2445,
"step": 1255000
},
{
"epoch": 0.38,
"eval_accuracy": 0.5078749304121791,
"eval_loss": 2.26953125,
"eval_runtime": 39.6577,
"eval_samples_per_second": 89.97,
"eval_steps_per_second": 11.246,
"step": 1255000
},
{
"epoch": 0.38,
"learning_rate": 4.89522420949706e-05,
"loss": 2.247,
"step": 1260000
},
{
"epoch": 0.38,
"eval_accuracy": 0.5078483555356837,
"eval_loss": 2.26953125,
"eval_runtime": 39.6203,
"eval_samples_per_second": 90.055,
"eval_steps_per_second": 11.257,
"step": 1260000
},
{
"epoch": 0.38,
"learning_rate": 4.887743969074883e-05,
"loss": 2.2443,
"step": 1265000
},
{
"epoch": 0.38,
"eval_accuracy": 0.5078826015105489,
"eval_loss": 2.267578125,
"eval_runtime": 39.6508,
"eval_samples_per_second": 89.986,
"eval_steps_per_second": 11.248,
"step": 1265000
},
{
"epoch": 0.38,
"learning_rate": 4.880262230807207e-05,
"loss": 2.243,
"step": 1270000
},
{
"epoch": 0.38,
"eval_accuracy": 0.5080672558070215,
"eval_loss": 2.267578125,
"eval_runtime": 39.6639,
"eval_samples_per_second": 89.956,
"eval_steps_per_second": 11.244,
"step": 1270000
},
{
"epoch": 0.38,
"learning_rate": 4.8727804925395315e-05,
"loss": 2.2454,
"step": 1275000
},
{
"epoch": 0.38,
"eval_accuracy": 0.5076889062767118,
"eval_loss": 2.271484375,
"eval_runtime": 39.6373,
"eval_samples_per_second": 90.016,
"eval_steps_per_second": 11.252,
"step": 1275000
},
{
"epoch": 0.38,
"learning_rate": 4.86529126504436e-05,
"loss": 2.2451,
"step": 1280000
},
{
"epoch": 0.38,
"eval_accuracy": 0.5080541053526733,
"eval_loss": 2.26953125,
"eval_runtime": 39.7457,
"eval_samples_per_second": 89.771,
"eval_steps_per_second": 11.221,
"step": 1280000
},
{
"epoch": 0.38,
"learning_rate": 4.8578080289311855e-05,
"loss": 2.2455,
"step": 1285000
},
{
"epoch": 0.38,
"eval_accuracy": 0.5083853324215685,
"eval_loss": 2.265625,
"eval_runtime": 39.6403,
"eval_samples_per_second": 90.009,
"eval_steps_per_second": 11.251,
"step": 1285000
},
{
"epoch": 0.39,
"learning_rate": 4.850324792818011e-05,
"loss": 2.241,
"step": 1290000
},
{
"epoch": 0.39,
"eval_accuracy": 0.508259307234065,
"eval_loss": 2.267578125,
"eval_runtime": 40.1652,
"eval_samples_per_second": 88.833,
"eval_steps_per_second": 11.104,
"step": 1290000
},
{
"epoch": 0.39,
"learning_rate": 4.8428415567048354e-05,
"loss": 2.243,
"step": 1295000
},
{
"epoch": 0.39,
"eval_accuracy": 0.5085801235266012,
"eval_loss": 2.263671875,
"eval_runtime": 39.68,
"eval_samples_per_second": 89.919,
"eval_steps_per_second": 11.24,
"step": 1295000
},
{
"epoch": 0.39,
"learning_rate": 4.8353553249006635e-05,
"loss": 2.2408,
"step": 1300000
},
{
"epoch": 0.39,
"eval_accuracy": 0.5084371123355645,
"eval_loss": 2.263671875,
"eval_runtime": 39.9223,
"eval_samples_per_second": 89.374,
"eval_steps_per_second": 11.172,
"step": 1300000
},
{
"epoch": 0.39,
"learning_rate": 4.827876582323985e-05,
"loss": 2.2508,
"step": 1305000
},
{
"epoch": 0.39,
"eval_accuracy": 0.5063459161264021,
"eval_loss": 2.279296875,
"eval_runtime": 39.7588,
"eval_samples_per_second": 89.741,
"eval_steps_per_second": 11.218,
"step": 1305000
},
{
"epoch": 0.39,
"learning_rate": 4.820396341901809e-05,
"loss": 2.252,
"step": 1310000
},
{
"epoch": 0.39,
"eval_accuracy": 0.504651699257876,
"eval_loss": 2.291015625,
"eval_runtime": 39.8037,
"eval_samples_per_second": 89.64,
"eval_steps_per_second": 11.205,
"step": 1310000
},
{
"epoch": 0.39,
"learning_rate": 4.8129146036341324e-05,
"loss": 2.7482,
"step": 1315000
},
{
"epoch": 0.39,
"eval_accuracy": 0.4505951128528157,
"eval_loss": 2.646484375,
"eval_runtime": 39.8831,
"eval_samples_per_second": 89.462,
"eval_steps_per_second": 11.183,
"step": 1315000
},
{
"epoch": 0.4,
"learning_rate": 4.805426873984461e-05,
"loss": 2.4189,
"step": 1320000
},
{
"epoch": 0.4,
"eval_accuracy": 0.5070354930762858,
"eval_loss": 2.275390625,
"eval_runtime": 39.9021,
"eval_samples_per_second": 89.419,
"eval_steps_per_second": 11.177,
"step": 1320000
},
{
"epoch": 0.4,
"learning_rate": 4.797939144334789e-05,
"loss": 2.2446,
"step": 1325000
},
{
"epoch": 0.4,
"eval_accuracy": 0.5081412271127301,
"eval_loss": 2.267578125,
"eval_runtime": 39.8679,
"eval_samples_per_second": 89.495,
"eval_steps_per_second": 11.187,
"step": 1325000
},
{
"epoch": 0.4,
"learning_rate": 4.790457406067113e-05,
"loss": 2.2416,
"step": 1330000
},
{
"epoch": 0.4,
"eval_accuracy": 0.5086858750969846,
"eval_loss": 2.263671875,
"eval_runtime": 39.8423,
"eval_samples_per_second": 89.553,
"eval_steps_per_second": 11.194,
"step": 1330000
},
{
"epoch": 0.4,
"learning_rate": 4.78297267210844e-05,
"loss": 2.2421,
"step": 1335000
},
{
"epoch": 0.4,
"eval_accuracy": 0.5087765584384274,
"eval_loss": 2.26171875,
"eval_runtime": 39.9585,
"eval_samples_per_second": 89.293,
"eval_steps_per_second": 11.162,
"step": 1335000
},
{
"epoch": 0.4,
"learning_rate": 4.775487938149766e-05,
"loss": 2.2367,
"step": 1340000
},
{
"epoch": 0.4,
"eval_accuracy": 0.509168606358683,
"eval_loss": 2.26171875,
"eval_runtime": 39.8472,
"eval_samples_per_second": 89.542,
"eval_steps_per_second": 11.193,
"step": 1340000
},
{
"epoch": 0.4,
"learning_rate": 4.768001706345593e-05,
"loss": 2.2355,
"step": 1345000
},
{
"epoch": 0.4,
"eval_accuracy": 0.5090864160190068,
"eval_loss": 2.259765625,
"eval_runtime": 39.8651,
"eval_samples_per_second": 89.502,
"eval_steps_per_second": 11.188,
"step": 1345000
},
{
"epoch": 0.4,
"learning_rate": 4.760518470232418e-05,
"loss": 2.2379,
"step": 1350000
},
{
"epoch": 0.4,
"eval_accuracy": 0.5093981913741786,
"eval_loss": 2.259765625,
"eval_runtime": 39.8769,
"eval_samples_per_second": 89.475,
"eval_steps_per_second": 11.184,
"step": 1350000
},
{
"epoch": 0.41,
"learning_rate": 4.7530337362737444e-05,
"loss": 2.2365,
"step": 1355000
},
{
"epoch": 0.41,
"eval_accuracy": 0.509393259953798,
"eval_loss": 2.259765625,
"eval_runtime": 39.8496,
"eval_samples_per_second": 89.537,
"eval_steps_per_second": 11.192,
"step": 1355000
},
{
"epoch": 0.41,
"learning_rate": 4.745550500160569e-05,
"loss": 2.2379,
"step": 1360000
},
{
"epoch": 0.41,
"eval_accuracy": 0.5091491546449597,
"eval_loss": 2.2578125,
"eval_runtime": 39.8235,
"eval_samples_per_second": 89.595,
"eval_steps_per_second": 11.199,
"step": 1360000
},
{
"epoch": 0.41,
"learning_rate": 4.738067264047394e-05,
"loss": 2.235,
"step": 1365000
},
{
"epoch": 0.41,
"eval_accuracy": 0.5094527109661638,
"eval_loss": 2.2578125,
"eval_runtime": 38.8651,
"eval_samples_per_second": 91.805,
"eval_steps_per_second": 11.476,
"step": 1365000
},
{
"epoch": 0.41,
"learning_rate": 4.730582530088721e-05,
"loss": 2.236,
"step": 1370000
},
{
"epoch": 0.41,
"eval_accuracy": 0.5093398362330085,
"eval_loss": 2.2578125,
"eval_runtime": 40.9049,
"eval_samples_per_second": 87.227,
"eval_steps_per_second": 10.903,
"step": 1370000
},
{
"epoch": 0.41,
"learning_rate": 4.7230992939755456e-05,
"loss": 2.2344,
"step": 1375000
},
{
"epoch": 0.41,
"eval_accuracy": 0.5095472298567916,
"eval_loss": 2.2578125,
"eval_runtime": 39.9487,
"eval_samples_per_second": 89.314,
"eval_steps_per_second": 11.164,
"step": 1375000
},
{
"epoch": 0.41,
"learning_rate": 4.715614560016872e-05,
"loss": 2.2348,
"step": 1380000
},
{
"epoch": 0.41,
"eval_accuracy": 0.5095688733129063,
"eval_loss": 2.255859375,
"eval_runtime": 39.951,
"eval_samples_per_second": 89.309,
"eval_steps_per_second": 11.164,
"step": 1380000
},
{
"epoch": 0.41,
"learning_rate": 4.7081313239036975e-05,
"loss": 2.2306,
"step": 1385000
},
{
"epoch": 0.41,
"eval_accuracy": 0.5097368155736447,
"eval_loss": 2.255859375,
"eval_runtime": 40.0156,
"eval_samples_per_second": 89.165,
"eval_steps_per_second": 11.146,
"step": 1385000
},
{
"epoch": 0.42,
"learning_rate": 4.7006495856360214e-05,
"loss": 2.2293,
"step": 1390000
},
{
"epoch": 0.42,
"eval_accuracy": 0.5097521577703843,
"eval_loss": 2.255859375,
"eval_runtime": 40.0042,
"eval_samples_per_second": 89.191,
"eval_steps_per_second": 11.149,
"step": 1390000
},
{
"epoch": 0.42,
"learning_rate": 4.693166349522847e-05,
"loss": 2.2311,
"step": 1395000
},
{
"epoch": 0.42,
"eval_accuracy": 0.510102562585204,
"eval_loss": 2.25390625,
"eval_runtime": 39.9554,
"eval_samples_per_second": 89.3,
"eval_steps_per_second": 11.162,
"step": 1395000
},
{
"epoch": 0.42,
"learning_rate": 4.685683113409672e-05,
"loss": 2.231,
"step": 1400000
},
{
"epoch": 0.42,
"eval_accuracy": 0.5101116035225683,
"eval_loss": 2.25390625,
"eval_runtime": 39.9882,
"eval_samples_per_second": 89.226,
"eval_steps_per_second": 11.153,
"step": 1400000
},
{
"epoch": 0.42,
"learning_rate": 4.678201375141996e-05,
"loss": 2.2272,
"step": 1405000
},
{
"epoch": 0.42,
"eval_accuracy": 0.5102126976403701,
"eval_loss": 2.251953125,
"eval_runtime": 40.3186,
"eval_samples_per_second": 88.495,
"eval_steps_per_second": 11.062,
"step": 1405000
},
{
"epoch": 0.42,
"learning_rate": 4.670718139028821e-05,
"loss": 2.2264,
"step": 1410000
},
{
"epoch": 0.42,
"eval_accuracy": 0.5102436560016482,
"eval_loss": 2.25390625,
"eval_runtime": 40.0007,
"eval_samples_per_second": 89.198,
"eval_steps_per_second": 11.15,
"step": 1410000
},
{
"epoch": 0.42,
"learning_rate": 4.6632349029156465e-05,
"loss": 2.2295,
"step": 1415000
},
{
"epoch": 0.42,
"eval_accuracy": 0.5104469401084474,
"eval_loss": 2.251953125,
"eval_runtime": 40.1353,
"eval_samples_per_second": 88.899,
"eval_steps_per_second": 11.112,
"step": 1415000
},
{
"epoch": 0.43,
"learning_rate": 4.655753164647971e-05,
"loss": 2.2281,
"step": 1420000
},
{
"epoch": 0.43,
"eval_accuracy": 0.5103937903554567,
"eval_loss": 2.251953125,
"eval_runtime": 40.0088,
"eval_samples_per_second": 89.18,
"eval_steps_per_second": 11.148,
"step": 1420000
},
{
"epoch": 0.43,
"learning_rate": 4.6482699285347956e-05,
"loss": 2.2234,
"step": 1425000
},
{
"epoch": 0.43,
"eval_accuracy": 0.5106672102187797,
"eval_loss": 2.25,
"eval_runtime": 40.1918,
"eval_samples_per_second": 88.774,
"eval_steps_per_second": 11.097,
"step": 1425000
},
{
"epoch": 0.43,
"learning_rate": 4.64078819026712e-05,
"loss": 2.2293,
"step": 1430000
},
{
"epoch": 0.43,
"eval_accuracy": 0.5107220037785639,
"eval_loss": 2.25,
"eval_runtime": 40.1338,
"eval_samples_per_second": 88.903,
"eval_steps_per_second": 11.113,
"step": 1430000
},
{
"epoch": 0.43,
"learning_rate": 4.633306451999444e-05,
"loss": 2.2256,
"step": 1435000
},
{
"epoch": 0.43,
"eval_accuracy": 0.5108652889373995,
"eval_loss": 2.25,
"eval_runtime": 40.1228,
"eval_samples_per_second": 88.927,
"eval_steps_per_second": 11.116,
"step": 1435000
},
{
"epoch": 0.43,
"learning_rate": 4.6258232158862694e-05,
"loss": 2.2247,
"step": 1440000
},
{
"epoch": 0.43,
"eval_accuracy": 0.5107954271486747,
"eval_loss": 2.25,
"eval_runtime": 40.0563,
"eval_samples_per_second": 89.075,
"eval_steps_per_second": 11.134,
"step": 1440000
},
{
"epoch": 0.43,
"learning_rate": 4.6183384819275954e-05,
"loss": 2.222,
"step": 1445000
},
{
"epoch": 0.43,
"eval_accuracy": 0.510766386561989,
"eval_loss": 2.25,
"eval_runtime": 40.2217,
"eval_samples_per_second": 88.708,
"eval_steps_per_second": 11.089,
"step": 1445000
},
{
"epoch": 0.43,
"learning_rate": 4.61085674365992e-05,
"loss": 2.2228,
"step": 1450000
},
{
"epoch": 0.43,
"eval_accuracy": 0.5106184439505719,
"eval_loss": 2.248046875,
"eval_runtime": 40.5305,
"eval_samples_per_second": 88.032,
"eval_steps_per_second": 11.004,
"step": 1450000
},
{
"epoch": 0.44,
"learning_rate": 4.603372009701246e-05,
"loss": 2.2241,
"step": 1455000
},
{
"epoch": 0.44,
"eval_accuracy": 0.5110554225898505,
"eval_loss": 2.248046875,
"eval_runtime": 40.2413,
"eval_samples_per_second": 88.665,
"eval_steps_per_second": 11.083,
"step": 1455000
},
{
"epoch": 0.44,
"learning_rate": 4.595891769279069e-05,
"loss": 2.2219,
"step": 1460000
},
{
"epoch": 0.44,
"eval_accuracy": 0.511077613981563,
"eval_loss": 2.24609375,
"eval_runtime": 40.2762,
"eval_samples_per_second": 88.588,
"eval_steps_per_second": 11.074,
"step": 1460000
},
{
"epoch": 0.44,
"learning_rate": 4.5884085331658944e-05,
"loss": 2.2219,
"step": 1465000
},
{
"epoch": 0.44,
"eval_accuracy": 0.5112833637985525,
"eval_loss": 2.24609375,
"eval_runtime": 40.2334,
"eval_samples_per_second": 88.682,
"eval_steps_per_second": 11.085,
"step": 1465000
},
{
"epoch": 0.44,
"learning_rate": 4.58092529705272e-05,
"loss": 2.2215,
"step": 1470000
},
{
"epoch": 0.44,
"eval_accuracy": 0.5112595286000464,
"eval_loss": 2.24609375,
"eval_runtime": 40.2987,
"eval_samples_per_second": 88.539,
"eval_steps_per_second": 11.067,
"step": 1470000
},
{
"epoch": 0.44,
"learning_rate": 4.573445056630543e-05,
"loss": 2.2193,
"step": 1475000
},
{
"epoch": 0.44,
"eval_accuracy": 0.5116091115114694,
"eval_loss": 2.244140625,
"eval_runtime": 40.1594,
"eval_samples_per_second": 88.846,
"eval_steps_per_second": 11.106,
"step": 1475000
},
{
"epoch": 0.44,
"learning_rate": 4.56595882482637e-05,
"loss": 2.2183,
"step": 1480000
},
{
"epoch": 0.44,
"eval_accuracy": 0.5114707577730144,
"eval_loss": 2.244140625,
"eval_runtime": 40.9626,
"eval_samples_per_second": 87.104,
"eval_steps_per_second": 10.888,
"step": 1480000
},
{
"epoch": 0.44,
"learning_rate": 4.5584755887131956e-05,
"loss": 2.2177,
"step": 1485000
},
{
"epoch": 0.44,
"eval_accuracy": 0.5116211660946219,
"eval_loss": 2.244140625,
"eval_runtime": 40.3714,
"eval_samples_per_second": 88.379,
"eval_steps_per_second": 11.047,
"step": 1485000
},
{
"epoch": 0.45,
"learning_rate": 4.55099235260002e-05,
"loss": 2.2211,
"step": 1490000
},
{
"epoch": 0.45,
"eval_accuracy": 0.5115852763129632,
"eval_loss": 2.2421875,
"eval_runtime": 40.2564,
"eval_samples_per_second": 88.632,
"eval_steps_per_second": 11.079,
"step": 1490000
},
{
"epoch": 0.45,
"learning_rate": 4.5435091164868455e-05,
"loss": 2.2183,
"step": 1495000
},
{
"epoch": 0.45,
"eval_accuracy": 0.5118184229098449,
"eval_loss": 2.2421875,
"eval_runtime": 40.2315,
"eval_samples_per_second": 88.687,
"eval_steps_per_second": 11.086,
"step": 1495000
},
{
"epoch": 0.45,
"learning_rate": 4.5360243825281715e-05,
"loss": 2.2182,
"step": 1500000
},
{
"epoch": 0.45,
"eval_accuracy": 0.5120186933708559,
"eval_loss": 2.240234375,
"eval_runtime": 40.3125,
"eval_samples_per_second": 88.509,
"eval_steps_per_second": 11.064,
"step": 1500000
},
{
"epoch": 0.45,
"learning_rate": 4.528539648569498e-05,
"loss": 2.2148,
"step": 1505000
},
{
"epoch": 0.45,
"eval_accuracy": 0.5121849918248009,
"eval_loss": 2.240234375,
"eval_runtime": 40.3172,
"eval_samples_per_second": 88.498,
"eval_steps_per_second": 11.062,
"step": 1505000
},
{
"epoch": 0.45,
"learning_rate": 4.521059408147321e-05,
"loss": 2.2217,
"step": 1510000
},
{
"epoch": 0.45,
"eval_accuracy": 0.5122685520034718,
"eval_loss": 2.240234375,
"eval_runtime": 40.2766,
"eval_samples_per_second": 88.587,
"eval_steps_per_second": 11.073,
"step": 1510000
},
{
"epoch": 0.45,
"learning_rate": 4.5135761720341466e-05,
"loss": 2.2117,
"step": 1515000
},
{
"epoch": 0.45,
"eval_accuracy": 0.5123701940568713,
"eval_loss": 2.23828125,
"eval_runtime": 40.4763,
"eval_samples_per_second": 88.15,
"eval_steps_per_second": 11.019,
"step": 1515000
},
{
"epoch": 0.46,
"learning_rate": 4.5060944337664705e-05,
"loss": 2.2152,
"step": 1520000
},
{
"epoch": 0.46,
"eval_accuracy": 0.5123003322681465,
"eval_loss": 2.23828125,
"eval_runtime": 40.309,
"eval_samples_per_second": 88.516,
"eval_steps_per_second": 11.065,
"step": 1520000
},
{
"epoch": 0.46,
"learning_rate": 4.4986096998077965e-05,
"loss": 2.2148,
"step": 1525000
},
{
"epoch": 0.46,
"eval_accuracy": 0.5124967671799727,
"eval_loss": 2.23828125,
"eval_runtime": 40.2971,
"eval_samples_per_second": 88.542,
"eval_steps_per_second": 11.068,
"step": 1525000
},
{
"epoch": 0.46,
"learning_rate": 4.491127961540121e-05,
"loss": 2.2151,
"step": 1530000
},
{
"epoch": 0.46,
"eval_accuracy": 0.5127488175549798,
"eval_loss": 2.236328125,
"eval_runtime": 40.3675,
"eval_samples_per_second": 88.388,
"eval_steps_per_second": 11.049,
"step": 1530000
},
{
"epoch": 0.46,
"learning_rate": 4.483646223272445e-05,
"loss": 2.2129,
"step": 1535000
},
{
"epoch": 0.46,
"eval_accuracy": 0.5127022430291633,
"eval_loss": 2.236328125,
"eval_runtime": 40.4605,
"eval_samples_per_second": 88.185,
"eval_steps_per_second": 11.023,
"step": 1535000
},
{
"epoch": 0.46,
"learning_rate": 4.47616298715927e-05,
"loss": 2.2145,
"step": 1540000
},
{
"epoch": 0.46,
"eval_accuracy": 0.5127690911721,
"eval_loss": 2.236328125,
"eval_runtime": 40.376,
"eval_samples_per_second": 88.369,
"eval_steps_per_second": 11.046,
"step": 1540000
},
{
"epoch": 0.46,
"learning_rate": 4.468681248891595e-05,
"loss": 2.2099,
"step": 1545000
},
{
"epoch": 0.46,
"eval_accuracy": 0.5128871712934349,
"eval_loss": 2.236328125,
"eval_runtime": 40.3585,
"eval_samples_per_second": 88.408,
"eval_steps_per_second": 11.051,
"step": 1545000
},
{
"epoch": 0.46,
"learning_rate": 4.46119651493292e-05,
"loss": 2.2125,
"step": 1550000
},
{
"epoch": 0.46,
"eval_accuracy": 0.5131964809384164,
"eval_loss": 2.234375,
"eval_runtime": 40.3163,
"eval_samples_per_second": 88.5,
"eval_steps_per_second": 11.063,
"step": 1550000
},
{
"epoch": 0.47,
"learning_rate": 4.453713278819746e-05,
"loss": 2.2101,
"step": 1555000
},
{
"epoch": 0.47,
"eval_accuracy": 0.5130504561015916,
"eval_loss": 2.234375,
"eval_runtime": 40.2993,
"eval_samples_per_second": 88.537,
"eval_steps_per_second": 11.067,
"step": 1555000
},
{
"epoch": 0.47,
"learning_rate": 4.4462300427065714e-05,
"loss": 2.211,
"step": 1560000
},
{
"epoch": 0.47,
"eval_accuracy": 0.5132159326521398,
"eval_loss": 2.234375,
"eval_runtime": 40.3465,
"eval_samples_per_second": 88.434,
"eval_steps_per_second": 11.054,
"step": 1560000
},
{
"epoch": 0.47,
"learning_rate": 4.438743810902398e-05,
"loss": 2.2086,
"step": 1565000
},
{
"epoch": 0.47,
"eval_accuracy": 0.5131567556075729,
"eval_loss": 2.234375,
"eval_runtime": 40.256,
"eval_samples_per_second": 88.633,
"eval_steps_per_second": 11.079,
"step": 1565000
},
{
"epoch": 0.47,
"learning_rate": 4.4312605747892234e-05,
"loss": 2.2137,
"step": 1570000
},
{
"epoch": 0.47,
"eval_accuracy": 0.5131545638651815,
"eval_loss": 2.232421875,
"eval_runtime": 40.2935,
"eval_samples_per_second": 88.55,
"eval_steps_per_second": 11.069,
"step": 1570000
},
{
"epoch": 0.47,
"learning_rate": 4.423778836521548e-05,
"loss": 2.2122,
"step": 1575000
},
{
"epoch": 0.47,
"eval_accuracy": 0.5134236002437218,
"eval_loss": 2.232421875,
"eval_runtime": 40.3698,
"eval_samples_per_second": 88.383,
"eval_steps_per_second": 11.048,
"step": 1575000
},
{
"epoch": 0.47,
"learning_rate": 4.416297098253872e-05,
"loss": 2.2053,
"step": 1580000
},
{
"epoch": 0.47,
"eval_accuracy": 0.5133767517501063,
"eval_loss": 2.232421875,
"eval_runtime": 40.3058,
"eval_samples_per_second": 88.523,
"eval_steps_per_second": 11.065,
"step": 1580000
},
{
"epoch": 0.47,
"learning_rate": 4.408813862140697e-05,
"loss": 2.208,
"step": 1585000
},
{
"epoch": 0.47,
"eval_accuracy": 0.513388258397661,
"eval_loss": 2.23046875,
"eval_runtime": 40.3539,
"eval_samples_per_second": 88.418,
"eval_steps_per_second": 11.052,
"step": 1585000
},
{
"epoch": 0.48,
"learning_rate": 4.401329128182023e-05,
"loss": 2.2081,
"step": 1590000
},
{
"epoch": 0.48,
"eval_accuracy": 0.5135512692380189,
"eval_loss": 2.23046875,
"eval_runtime": 42.8555,
"eval_samples_per_second": 83.257,
"eval_steps_per_second": 10.407,
"step": 1590000
},
{
"epoch": 0.48,
"learning_rate": 4.3938458920688484e-05,
"loss": 2.2077,
"step": 1595000
},
{
"epoch": 0.48,
"eval_accuracy": 0.5137540054092202,
"eval_loss": 2.23046875,
"eval_runtime": 40.2657,
"eval_samples_per_second": 88.611,
"eval_steps_per_second": 11.076,
"step": 1595000
},
{
"epoch": 0.48,
"learning_rate": 4.3863611581101744e-05,
"loss": 2.2061,
"step": 1600000
},
{
"epoch": 0.48,
"eval_accuracy": 0.5135893507620688,
"eval_loss": 2.23046875,
"eval_runtime": 41.6767,
"eval_samples_per_second": 85.611,
"eval_steps_per_second": 10.701,
"step": 1600000
},
{
"epoch": 0.48,
"learning_rate": 4.378880917687998e-05,
"loss": 2.2055,
"step": 1605000
},
{
"epoch": 0.48,
"eval_accuracy": 0.5138961946968601,
"eval_loss": 2.228515625,
"eval_runtime": 40.3609,
"eval_samples_per_second": 88.402,
"eval_steps_per_second": 11.05,
"step": 1605000
},
{
"epoch": 0.48,
"learning_rate": 4.371397681574823e-05,
"loss": 2.2065,
"step": 1610000
},
{
"epoch": 0.48,
"eval_accuracy": 0.5138923591476752,
"eval_loss": 2.228515625,
"eval_runtime": 40.453,
"eval_samples_per_second": 88.201,
"eval_steps_per_second": 11.025,
"step": 1610000
},
{
"epoch": 0.48,
"learning_rate": 4.363914445461648e-05,
"loss": 2.2054,
"step": 1615000
},
{
"epoch": 0.48,
"eval_accuracy": 0.5138997562782461,
"eval_loss": 2.228515625,
"eval_runtime": 41.6251,
"eval_samples_per_second": 85.718,
"eval_steps_per_second": 10.715,
"step": 1615000
},
{
"epoch": 0.49,
"learning_rate": 4.356432707193973e-05,
"loss": 2.2035,
"step": 1620000
},
{
"epoch": 0.49,
"eval_accuracy": 0.5140274252725432,
"eval_loss": 2.228515625,
"eval_runtime": 40.4365,
"eval_samples_per_second": 88.237,
"eval_steps_per_second": 11.03,
"step": 1620000
},
{
"epoch": 0.49,
"learning_rate": 4.348947973235298e-05,
"loss": 2.2021,
"step": 1625000
},
{
"epoch": 0.49,
"eval_accuracy": 0.5139844123281126,
"eval_loss": 2.228515625,
"eval_runtime": 40.3492,
"eval_samples_per_second": 88.428,
"eval_steps_per_second": 11.054,
"step": 1625000
},
{
"epoch": 0.49,
"learning_rate": 4.3414662349676226e-05,
"loss": 2.2036,
"step": 1630000
},
{
"epoch": 0.49,
"eval_accuracy": 0.5138233192623471,
"eval_loss": 2.228515625,
"eval_runtime": 40.3662,
"eval_samples_per_second": 88.391,
"eval_steps_per_second": 11.049,
"step": 1630000
},
{
"epoch": 0.49,
"learning_rate": 4.333981501008949e-05,
"loss": 2.204,
"step": 1635000
},
{
"epoch": 0.49,
"eval_accuracy": 0.5139803028111288,
"eval_loss": 2.2265625,
"eval_runtime": 40.2896,
"eval_samples_per_second": 88.559,
"eval_steps_per_second": 11.07,
"step": 1635000
},
{
"epoch": 0.49,
"learning_rate": 4.3264967670502746e-05,
"loss": 2.2042,
"step": 1640000
},
{
"epoch": 0.49,
"eval_accuracy": 0.5140860543815122,
"eval_loss": 2.2265625,
"eval_runtime": 42.4068,
"eval_samples_per_second": 84.137,
"eval_steps_per_second": 10.517,
"step": 1640000
},
{
"epoch": 0.49,
"learning_rate": 4.3190135309371006e-05,
"loss": 2.2024,
"step": 1645000
},
{
"epoch": 0.49,
"eval_accuracy": 0.514173450109368,
"eval_loss": 2.2265625,
"eval_runtime": 40.3018,
"eval_samples_per_second": 88.532,
"eval_steps_per_second": 11.067,
"step": 1645000
},
{
"epoch": 0.49,
"learning_rate": 4.311530294823926e-05,
"loss": 2.2023,
"step": 1650000
},
{
"epoch": 0.49,
"eval_accuracy": 0.5144172814504074,
"eval_loss": 2.2265625,
"eval_runtime": 40.2694,
"eval_samples_per_second": 88.603,
"eval_steps_per_second": 11.075,
"step": 1650000
},
{
"epoch": 0.5,
"learning_rate": 4.30404855655625e-05,
"loss": 2.1976,
"step": 1655000
},
{
"epoch": 0.5,
"eval_accuracy": 0.5145805662585642,
"eval_loss": 2.224609375,
"eval_runtime": 40.3729,
"eval_samples_per_second": 88.376,
"eval_steps_per_second": 11.047,
"step": 1655000
},
{
"epoch": 0.5,
"learning_rate": 4.296565320443075e-05,
"loss": 2.2028,
"step": 1660000
},
{
"epoch": 0.5,
"eval_accuracy": 0.5147172761902257,
"eval_loss": 2.224609375,
"eval_runtime": 40.282,
"eval_samples_per_second": 88.576,
"eval_steps_per_second": 11.072,
"step": 1660000
},
{
"epoch": 0.5,
"learning_rate": 4.289080586484401e-05,
"loss": 2.1971,
"step": 1665000
},
{
"epoch": 0.5,
"eval_accuracy": 0.5146457705947074,
"eval_loss": 2.224609375,
"eval_runtime": 40.4909,
"eval_samples_per_second": 88.119,
"eval_steps_per_second": 11.015,
"step": 1665000
},
{
"epoch": 0.5,
"learning_rate": 4.281595852525727e-05,
"loss": 2.1978,
"step": 1670000
},
{
"epoch": 0.5,
"eval_accuracy": 0.5146065931994617,
"eval_loss": 2.224609375,
"eval_runtime": 40.3534,
"eval_samples_per_second": 88.419,
"eval_steps_per_second": 11.052,
"step": 1670000
},
{
"epoch": 0.5,
"learning_rate": 4.27411561210355e-05,
"loss": 2.1955,
"step": 1675000
},
{
"epoch": 0.5,
"eval_accuracy": 0.5148249455352015,
"eval_loss": 2.22265625,
"eval_runtime": 39.3164,
"eval_samples_per_second": 90.751,
"eval_steps_per_second": 11.344,
"step": 1675000
},
{
"epoch": 0.5,
"learning_rate": 4.266630878144877e-05,
"loss": 2.1967,
"step": 1680000
},
{
"epoch": 0.5,
"eval_accuracy": 0.5146874137001434,
"eval_loss": 2.22265625,
"eval_runtime": 40.3287,
"eval_samples_per_second": 88.473,
"eval_steps_per_second": 11.059,
"step": 1680000
},
{
"epoch": 0.5,
"learning_rate": 4.259149139877201e-05,
"loss": 2.1975,
"step": 1685000
},
{
"epoch": 0.5,
"eval_accuracy": 0.5151745284466245,
"eval_loss": 2.22265625,
"eval_runtime": 40.2734,
"eval_samples_per_second": 88.594,
"eval_steps_per_second": 11.074,
"step": 1685000
},
{
"epoch": 0.51,
"learning_rate": 4.251668899455024e-05,
"loss": 2.1972,
"step": 1690000
},
{
"epoch": 0.51,
"eval_accuracy": 0.5148622051558548,
"eval_loss": 2.220703125,
"eval_runtime": 40.2657,
"eval_samples_per_second": 88.611,
"eval_steps_per_second": 11.076,
"step": 1690000
},
{
"epoch": 0.51,
"learning_rate": 4.2441841654963506e-05,
"loss": 2.1967,
"step": 1695000
},
{
"epoch": 0.51,
"eval_accuracy": 0.5150745302000184,
"eval_loss": 2.220703125,
"eval_runtime": 40.2491,
"eval_samples_per_second": 88.648,
"eval_steps_per_second": 11.081,
"step": 1695000
},
{
"epoch": 0.51,
"learning_rate": 4.236699431537676e-05,
"loss": 2.194,
"step": 1700000
},
{
"epoch": 0.51,
"eval_accuracy": 0.5150706946508335,
"eval_loss": 2.220703125,
"eval_runtime": 40.3323,
"eval_samples_per_second": 88.465,
"eval_steps_per_second": 11.058,
"step": 1700000
},
{
"epoch": 0.51,
"learning_rate": 4.9925167638868255e-05,
"loss": 2.2009,
"step": 1705000
},
{
"epoch": 0.51,
"eval_accuracy": 0.5139096191190072,
"eval_loss": 2.228515625,
"eval_runtime": 39.4304,
"eval_samples_per_second": 90.489,
"eval_steps_per_second": 11.311,
"step": 1705000
},
{
"epoch": 0.51,
"learning_rate": 4.9850350256191494e-05,
"loss": 2.2085,
"step": 1710000
},
{
"epoch": 0.51,
"eval_accuracy": 0.5136279802217166,
"eval_loss": 2.23046875,
"eval_runtime": 39.2325,
"eval_samples_per_second": 90.945,
"eval_steps_per_second": 11.368,
"step": 1710000
},
{
"epoch": 0.51,
"learning_rate": 4.977551789505975e-05,
"loss": 2.2077,
"step": 1715000
},
{
"epoch": 0.51,
"eval_accuracy": 0.5136983899460393,
"eval_loss": 2.23046875,
"eval_runtime": 39.2977,
"eval_samples_per_second": 90.794,
"eval_steps_per_second": 11.349,
"step": 1715000
},
{
"epoch": 0.52,
"learning_rate": 4.9700670555473014e-05,
"loss": 2.205,
"step": 1720000
},
{
"epoch": 0.52,
"eval_accuracy": 0.51339866917402,
"eval_loss": 2.23046875,
"eval_runtime": 39.3375,
"eval_samples_per_second": 90.702,
"eval_steps_per_second": 11.338,
"step": 1720000
},
{
"epoch": 0.52,
"learning_rate": 4.962583819434126e-05,
"loss": 2.2063,
"step": 1725000
},
{
"epoch": 0.52,
"eval_accuracy": 0.5134490792490214,
"eval_loss": 2.23046875,
"eval_runtime": 39.3134,
"eval_samples_per_second": 90.758,
"eval_steps_per_second": 11.345,
"step": 1725000
},
{
"epoch": 0.52,
"learning_rate": 4.9551020811664506e-05,
"loss": 2.2076,
"step": 1730000
},
{
"epoch": 0.52,
"eval_accuracy": 0.5134718185763318,
"eval_loss": 2.23046875,
"eval_runtime": 39.2618,
"eval_samples_per_second": 90.877,
"eval_steps_per_second": 11.36,
"step": 1730000
},
{
"epoch": 0.52,
"learning_rate": 4.9476173472077765e-05,
"loss": 2.2036,
"step": 1735000
},
{
"epoch": 0.52,
"eval_accuracy": 0.5133134651885556,
"eval_loss": 2.23046875,
"eval_runtime": 39.3642,
"eval_samples_per_second": 90.641,
"eval_steps_per_second": 11.33,
"step": 1735000
},
{
"epoch": 0.52,
"learning_rate": 4.9401356089401005e-05,
"loss": 2.2064,
"step": 1740000
},
{
"epoch": 0.52,
"eval_accuracy": 0.5138024977096292,
"eval_loss": 2.23046875,
"eval_runtime": 39.4343,
"eval_samples_per_second": 90.48,
"eval_steps_per_second": 11.31,
"step": 1740000
},
{
"epoch": 0.52,
"learning_rate": 4.932650874981427e-05,
"loss": 2.2053,
"step": 1745000
},
{
"epoch": 0.52,
"eval_accuracy": 0.5136904448798706,
"eval_loss": 2.23046875,
"eval_runtime": 39.4148,
"eval_samples_per_second": 90.524,
"eval_steps_per_second": 11.316,
"step": 1745000
},
{
"epoch": 0.52,
"learning_rate": 4.9251676388682524e-05,
"loss": 2.2048,
"step": 1750000
},
{
"epoch": 0.52,
"eval_accuracy": 0.5138953727934633,
"eval_loss": 2.23046875,
"eval_runtime": 39.3715,
"eval_samples_per_second": 90.624,
"eval_steps_per_second": 11.328,
"step": 1750000
},
{
"epoch": 0.53,
"learning_rate": 4.917684402755077e-05,
"loss": 2.2075,
"step": 1755000
},
{
"epoch": 0.53,
"eval_accuracy": 0.5137868815450907,
"eval_loss": 2.23046875,
"eval_runtime": 39.4167,
"eval_samples_per_second": 90.52,
"eval_steps_per_second": 11.315,
"step": 1755000
},
{
"epoch": 0.53,
"learning_rate": 4.910201166641902e-05,
"loss": 2.2041,
"step": 1760000
},
{
"epoch": 0.53,
"eval_accuracy": 0.5136414046438638,
"eval_loss": 2.228515625,
"eval_runtime": 39.46,
"eval_samples_per_second": 90.421,
"eval_steps_per_second": 11.303,
"step": 1760000
},
{
"epoch": 0.53,
"learning_rate": 4.902716432683229e-05,
"loss": 2.2057,
"step": 1765000
},
{
"epoch": 0.53,
"eval_accuracy": 0.5138789347255281,
"eval_loss": 2.228515625,
"eval_runtime": 39.4616,
"eval_samples_per_second": 90.417,
"eval_steps_per_second": 11.302,
"step": 1765000
},
{
"epoch": 0.53,
"learning_rate": 4.8952331965700536e-05,
"loss": 2.2054,
"step": 1770000
},
{
"epoch": 0.53,
"eval_accuracy": 0.5138929070832731,
"eval_loss": 2.228515625,
"eval_runtime": 39.5693,
"eval_samples_per_second": 90.171,
"eval_steps_per_second": 11.271,
"step": 1770000
},
{
"epoch": 0.53,
"learning_rate": 4.887752956147877e-05,
"loss": 2.2085,
"step": 1775000
},
{
"epoch": 0.53,
"eval_accuracy": 0.5139266051225403,
"eval_loss": 2.228515625,
"eval_runtime": 39.4709,
"eval_samples_per_second": 90.396,
"eval_steps_per_second": 11.299,
"step": 1775000
},
{
"epoch": 0.53,
"learning_rate": 4.880269720034703e-05,
"loss": 2.2051,
"step": 1780000
},
{
"epoch": 0.53,
"eval_accuracy": 0.5141471492006715,
"eval_loss": 2.2265625,
"eval_runtime": 39.4552,
"eval_samples_per_second": 90.432,
"eval_steps_per_second": 11.304,
"step": 1780000
},
{
"epoch": 0.53,
"learning_rate": 4.872786483921527e-05,
"loss": 2.2023,
"step": 1785000
},
{
"epoch": 0.53,
"eval_accuracy": 0.5139211257665619,
"eval_loss": 2.2265625,
"eval_runtime": 39.5212,
"eval_samples_per_second": 90.281,
"eval_steps_per_second": 11.285,
"step": 1785000
},
{
"epoch": 0.54,
"learning_rate": 4.8653032478083526e-05,
"loss": 2.205,
"step": 1790000
},
{
"epoch": 0.54,
"eval_accuracy": 0.5140561918914298,
"eval_loss": 2.2265625,
"eval_runtime": 39.4725,
"eval_samples_per_second": 90.392,
"eval_steps_per_second": 11.299,
"step": 1790000
},
{
"epoch": 0.54,
"learning_rate": 4.857818513849679e-05,
"loss": 2.2009,
"step": 1795000
},
{
"epoch": 0.54,
"eval_accuracy": 0.5141463272972748,
"eval_loss": 2.2265625,
"eval_runtime": 39.6114,
"eval_samples_per_second": 90.075,
"eval_steps_per_second": 11.259,
"step": 1795000
},
{
"epoch": 0.54,
"learning_rate": 4.850335277736504e-05,
"loss": 2.1998,
"step": 1800000
},
{
"epoch": 0.54,
"eval_accuracy": 0.5143134476546165,
"eval_loss": 2.2265625,
"eval_runtime": 39.8498,
"eval_samples_per_second": 89.536,
"eval_steps_per_second": 11.192,
"step": 1800000
},
{
"epoch": 0.54,
"learning_rate": 4.8428505437778305e-05,
"loss": 2.2009,
"step": 1805000
},
{
"epoch": 0.54,
"eval_accuracy": 0.5143608440838298,
"eval_loss": 2.224609375,
"eval_runtime": 38.3896,
"eval_samples_per_second": 92.942,
"eval_steps_per_second": 11.618,
"step": 1805000
},
{
"epoch": 0.54,
"learning_rate": 4.8353688055101544e-05,
"loss": 2.2027,
"step": 1810000
},
{
"epoch": 0.54,
"eval_accuracy": 0.5143013930714639,
"eval_loss": 2.2265625,
"eval_runtime": 39.6231,
"eval_samples_per_second": 90.048,
"eval_steps_per_second": 11.256,
"step": 1810000
},
{
"epoch": 0.54,
"learning_rate": 4.8278840715514804e-05,
"loss": 2.2007,
"step": 1815000
},
{
"epoch": 0.54,
"eval_accuracy": 0.5145857716467437,
"eval_loss": 2.224609375,
"eval_runtime": 39.6832,
"eval_samples_per_second": 89.912,
"eval_steps_per_second": 11.239,
"step": 1815000
},
{
"epoch": 0.55,
"learning_rate": 4.820402333283805e-05,
"loss": 2.1978,
"step": 1820000
},
{
"epoch": 0.55,
"eval_accuracy": 0.5144972800476924,
"eval_loss": 2.224609375,
"eval_runtime": 39.721,
"eval_samples_per_second": 89.827,
"eval_steps_per_second": 11.228,
"step": 1820000
},
{
"epoch": 0.55,
"learning_rate": 4.812920595016129e-05,
"loss": 2.1999,
"step": 1825000
},
{
"epoch": 0.55,
"eval_accuracy": 0.5145970043264995,
"eval_loss": 2.22265625,
"eval_runtime": 39.6521,
"eval_samples_per_second": 89.983,
"eval_steps_per_second": 11.248,
"step": 1825000
},
{
"epoch": 0.55,
"learning_rate": 4.8054388567484535e-05,
"loss": 2.1978,
"step": 1830000
},
{
"epoch": 0.55,
"eval_accuracy": 0.5148150826944404,
"eval_loss": 2.22265625,
"eval_runtime": 39.5596,
"eval_samples_per_second": 90.193,
"eval_steps_per_second": 11.274,
"step": 1830000
},
{
"epoch": 0.55,
"learning_rate": 4.79795262494428e-05,
"loss": 2.1989,
"step": 1835000
},
{
"epoch": 0.55,
"eval_accuracy": 0.5147271390309869,
"eval_loss": 2.22265625,
"eval_runtime": 39.6103,
"eval_samples_per_second": 90.077,
"eval_steps_per_second": 11.26,
"step": 1835000
},
{
"epoch": 0.55,
"learning_rate": 4.790467890985607e-05,
"loss": 2.1989,
"step": 1840000
},
{
"epoch": 0.55,
"eval_accuracy": 0.5147980966909074,
"eval_loss": 2.22265625,
"eval_runtime": 39.6511,
"eval_samples_per_second": 89.985,
"eval_steps_per_second": 11.248,
"step": 1840000
},
{
"epoch": 0.55,
"learning_rate": 4.7829876505634294e-05,
"loss": 2.1982,
"step": 1845000
},
{
"epoch": 0.55,
"eval_accuracy": 0.5149764497280047,
"eval_loss": 2.220703125,
"eval_runtime": 39.6684,
"eval_samples_per_second": 89.946,
"eval_steps_per_second": 11.243,
"step": 1845000
},
{
"epoch": 0.55,
"learning_rate": 4.775501418759257e-05,
"loss": 2.1974,
"step": 1850000
},
{
"epoch": 0.55,
"eval_accuracy": 0.515062201649067,
"eval_loss": 2.220703125,
"eval_runtime": 39.5826,
"eval_samples_per_second": 90.141,
"eval_steps_per_second": 11.268,
"step": 1850000
},
{
"epoch": 0.56,
"learning_rate": 4.768016684800583e-05,
"loss": 2.1972,
"step": 1855000
},
{
"epoch": 0.56,
"eval_accuracy": 0.5151389126327648,
"eval_loss": 2.220703125,
"eval_runtime": 39.774,
"eval_samples_per_second": 89.707,
"eval_steps_per_second": 11.213,
"step": 1855000
},
{
"epoch": 0.56,
"learning_rate": 4.760533448687408e-05,
"loss": 2.1966,
"step": 1860000
},
{
"epoch": 0.56,
"eval_accuracy": 0.515106858400291,
"eval_loss": 2.220703125,
"eval_runtime": 39.7218,
"eval_samples_per_second": 89.825,
"eval_steps_per_second": 11.228,
"step": 1860000
},
{
"epoch": 0.56,
"learning_rate": 4.753050212574233e-05,
"loss": 2.198,
"step": 1865000
},
{
"epoch": 0.56,
"eval_accuracy": 0.5150282296420008,
"eval_loss": 2.220703125,
"eval_runtime": 39.6783,
"eval_samples_per_second": 89.923,
"eval_steps_per_second": 11.24,
"step": 1865000
},
{
"epoch": 0.56,
"learning_rate": 4.7455669764610586e-05,
"loss": 2.1978,
"step": 1870000
},
{
"epoch": 0.56,
"eval_accuracy": 0.5151887747721684,
"eval_loss": 2.220703125,
"eval_runtime": 39.753,
"eval_samples_per_second": 89.754,
"eval_steps_per_second": 11.219,
"step": 1870000
},
{
"epoch": 0.56,
"learning_rate": 4.738083740347884e-05,
"loss": 2.1938,
"step": 1875000
},
{
"epoch": 0.56,
"eval_accuracy": 0.5152000074519242,
"eval_loss": 2.220703125,
"eval_runtime": 39.7549,
"eval_samples_per_second": 89.75,
"eval_steps_per_second": 11.219,
"step": 1875000
},
{
"epoch": 0.56,
"learning_rate": 4.730600504234709e-05,
"loss": 2.1908,
"step": 1880000
},
{
"epoch": 0.56,
"eval_accuracy": 0.5152191851978486,
"eval_loss": 2.21875,
"eval_runtime": 39.6445,
"eval_samples_per_second": 90.0,
"eval_steps_per_second": 11.25,
"step": 1880000
},
{
"epoch": 0.56,
"learning_rate": 4.723118765967034e-05,
"loss": 2.1899,
"step": 1885000
},
{
"epoch": 0.56,
"eval_accuracy": 0.5151602821210807,
"eval_loss": 2.21875,
"eval_runtime": 39.7932,
"eval_samples_per_second": 89.664,
"eval_steps_per_second": 11.208,
"step": 1885000
},
{
"epoch": 0.57,
"learning_rate": 4.715634032008359e-05,
"loss": 2.1938,
"step": 1890000
},
{
"epoch": 0.57,
"eval_accuracy": 0.5151682271872493,
"eval_loss": 2.21875,
"eval_runtime": 39.7818,
"eval_samples_per_second": 89.689,
"eval_steps_per_second": 11.211,
"step": 1890000
},
{
"epoch": 0.57,
"learning_rate": 4.708150795895185e-05,
"loss": 2.1909,
"step": 1895000
},
{
"epoch": 0.57,
"eval_accuracy": 0.5153520595803252,
"eval_loss": 2.21875,
"eval_runtime": 39.8181,
"eval_samples_per_second": 89.607,
"eval_steps_per_second": 11.201,
"step": 1895000
},
{
"epoch": 0.57,
"learning_rate": 4.700669057627509e-05,
"loss": 2.1921,
"step": 1900000
},
{
"epoch": 0.57,
"eval_accuracy": 0.5155320564242162,
"eval_loss": 2.21875,
"eval_runtime": 40.2538,
"eval_samples_per_second": 88.638,
"eval_steps_per_second": 11.08,
"step": 1900000
},
{
"epoch": 0.57,
"learning_rate": 4.693187319359833e-05,
"loss": 2.1926,
"step": 1905000
},
{
"epoch": 0.57,
"eval_accuracy": 0.5155928772755766,
"eval_loss": 2.216796875,
"eval_runtime": 39.9117,
"eval_samples_per_second": 89.397,
"eval_steps_per_second": 11.175,
"step": 1905000
},
{
"epoch": 0.57,
"learning_rate": 4.685704083246658e-05,
"loss": 2.194,
"step": 1910000
},
{
"epoch": 0.57,
"eval_accuracy": 0.5154164420130716,
"eval_loss": 2.216796875,
"eval_runtime": 39.9039,
"eval_samples_per_second": 89.415,
"eval_steps_per_second": 11.177,
"step": 1910000
},
{
"epoch": 0.57,
"learning_rate": 4.992518261732325e-05,
"loss": 2.1942,
"step": 1915000
},
{
"epoch": 0.57,
"eval_accuracy": 0.5151652135414612,
"eval_loss": 2.21875,
"eval_runtime": 39.5459,
"eval_samples_per_second": 90.224,
"eval_steps_per_second": 11.278,
"step": 1915000
},
{
"epoch": 0.58,
"learning_rate": 4.985036523464649e-05,
"loss": 2.1947,
"step": 1920000
},
{
"epoch": 0.58,
"eval_accuracy": 0.5150559003896918,
"eval_loss": 2.21875,
"eval_runtime": 39.6198,
"eval_samples_per_second": 90.056,
"eval_steps_per_second": 11.257,
"step": 1920000
},
{
"epoch": 0.58,
"learning_rate": 4.977553287351474e-05,
"loss": 2.1941,
"step": 1925000
},
{
"epoch": 0.58,
"eval_accuracy": 0.5150868587509698,
"eval_loss": 2.220703125,
"eval_runtime": 39.5078,
"eval_samples_per_second": 90.311,
"eval_steps_per_second": 11.289,
"step": 1925000
},
{
"epoch": 0.58,
"learning_rate": 4.970071549083798e-05,
"loss": 2.1984,
"step": 1930000
},
{
"epoch": 0.58,
"eval_accuracy": 0.5151849392229835,
"eval_loss": 2.220703125,
"eval_runtime": 39.6089,
"eval_samples_per_second": 90.081,
"eval_steps_per_second": 11.26,
"step": 1930000
},
{
"epoch": 0.58,
"learning_rate": 4.9625868151251246e-05,
"loss": 2.1929,
"step": 1935000
},
{
"epoch": 0.58,
"eval_accuracy": 0.5150846670085785,
"eval_loss": 2.220703125,
"eval_runtime": 39.574,
"eval_samples_per_second": 90.16,
"eval_steps_per_second": 11.27,
"step": 1935000
},
{
"epoch": 0.58,
"learning_rate": 4.955106574702947e-05,
"loss": 2.1921,
"step": 1940000
},
{
"epoch": 0.58,
"eval_accuracy": 0.515442742921768,
"eval_loss": 2.21875,
"eval_runtime": 39.6485,
"eval_samples_per_second": 89.991,
"eval_steps_per_second": 11.249,
"step": 1940000
},
{
"epoch": 0.58,
"learning_rate": 4.947621840744274e-05,
"loss": 2.1932,
"step": 1945000
},
{
"epoch": 0.58,
"eval_accuracy": 0.5153391830937759,
"eval_loss": 2.21875,
"eval_runtime": 39.6644,
"eval_samples_per_second": 89.955,
"eval_steps_per_second": 11.244,
"step": 1945000
},
{
"epoch": 0.58,
"learning_rate": 4.940138604631099e-05,
"loss": 2.1959,
"step": 1950000
},
{
"epoch": 0.58,
"eval_accuracy": 0.5154117845604899,
"eval_loss": 2.21875,
"eval_runtime": 39.6108,
"eval_samples_per_second": 90.077,
"eval_steps_per_second": 11.26,
"step": 1950000
},
{
"epoch": 0.59,
"learning_rate": 4.932656866363423e-05,
"loss": 2.1927,
"step": 1955000
},
{
"epoch": 0.59,
"eval_accuracy": 0.5153761687466302,
"eval_loss": 2.21875,
"eval_runtime": 39.6613,
"eval_samples_per_second": 89.962,
"eval_steps_per_second": 11.245,
"step": 1955000
},
{
"epoch": 0.59,
"learning_rate": 4.9251751280957475e-05,
"loss": 2.1949,
"step": 1960000
},
{
"epoch": 0.59,
"eval_accuracy": 0.5154917831577748,
"eval_loss": 2.21875,
"eval_runtime": 39.8742,
"eval_samples_per_second": 89.481,
"eval_steps_per_second": 11.185,
"step": 1960000
},
{
"epoch": 0.59,
"learning_rate": 4.9176933898280714e-05,
"loss": 2.1918,
"step": 1965000
},
{
"epoch": 0.59,
"eval_accuracy": 0.5153786344568205,
"eval_loss": 2.216796875,
"eval_runtime": 39.9956,
"eval_samples_per_second": 89.21,
"eval_steps_per_second": 11.151,
"step": 1965000
},
{
"epoch": 0.59,
"learning_rate": 4.910210153714897e-05,
"loss": 2.1957,
"step": 1970000
},
{
"epoch": 0.59,
"eval_accuracy": 0.5154745231864427,
"eval_loss": 2.216796875,
"eval_runtime": 39.6981,
"eval_samples_per_second": 89.878,
"eval_steps_per_second": 11.235,
"step": 1970000
},
{
"epoch": 0.59,
"learning_rate": 4.902726917601722e-05,
"loss": 2.1884,
"step": 1975000
},
{
"epoch": 0.59,
"eval_accuracy": 0.51571890246308,
"eval_loss": 2.216796875,
"eval_runtime": 39.9761,
"eval_samples_per_second": 89.253,
"eval_steps_per_second": 11.157,
"step": 1975000
},
{
"epoch": 0.59,
"learning_rate": 4.895245179334046e-05,
"loss": 2.1942,
"step": 1980000
},
{
"epoch": 0.59,
"eval_accuracy": 0.5156073975689194,
"eval_loss": 2.21484375,
"eval_runtime": 39.9647,
"eval_samples_per_second": 89.279,
"eval_steps_per_second": 11.16,
"step": 1980000
},
{
"epoch": 0.59,
"learning_rate": 4.8877604453753726e-05,
"loss": 2.1938,
"step": 1985000
},
{
"epoch": 0.59,
"eval_accuracy": 0.5155802747568262,
"eval_loss": 2.216796875,
"eval_runtime": 39.7886,
"eval_samples_per_second": 89.674,
"eval_steps_per_second": 11.209,
"step": 1985000
},
{
"epoch": 0.6,
"learning_rate": 4.8802787071076965e-05,
"loss": 2.1935,
"step": 1990000
},
{
"epoch": 0.6,
"eval_accuracy": 0.5160071165875447,
"eval_loss": 2.21484375,
"eval_runtime": 40.1621,
"eval_samples_per_second": 88.84,
"eval_steps_per_second": 11.105,
"step": 1990000
},
{
"epoch": 0.6,
"learning_rate": 4.872795470994522e-05,
"loss": 2.1902,
"step": 1995000
},
{
"epoch": 0.6,
"eval_accuracy": 0.5157413678225916,
"eval_loss": 2.21484375,
"eval_runtime": 39.8406,
"eval_samples_per_second": 89.557,
"eval_steps_per_second": 11.195,
"step": 1995000
},
{
"epoch": 0.6,
"learning_rate": 4.865310737035848e-05,
"loss": 2.188,
"step": 2000000
},
{
"epoch": 0.6,
"eval_accuracy": 0.5158473933607739,
"eval_loss": 2.21484375,
"eval_runtime": 39.7424,
"eval_samples_per_second": 89.778,
"eval_steps_per_second": 11.222,
"step": 2000000
},
{
"epoch": 0.6,
"learning_rate": 4.8578289987681716e-05,
"loss": 2.1862,
"step": 2005000
},
{
"epoch": 0.6,
"eval_accuracy": 0.5159235564088739,
"eval_loss": 2.212890625,
"eval_runtime": 39.7499,
"eval_samples_per_second": 89.761,
"eval_steps_per_second": 11.22,
"step": 2005000
},
{
"epoch": 0.6,
"learning_rate": 4.850347260500496e-05,
"loss": 2.1886,
"step": 2010000
},
{
"epoch": 0.6,
"eval_accuracy": 0.5160695812456987,
"eval_loss": 2.212890625,
"eval_runtime": 40.1441,
"eval_samples_per_second": 88.88,
"eval_steps_per_second": 11.11,
"step": 2010000
},
{
"epoch": 0.6,
"learning_rate": 4.8428610286963236e-05,
"loss": 2.1811,
"step": 2015000
},
{
"epoch": 0.6,
"eval_accuracy": 0.516141360809016,
"eval_loss": 2.212890625,
"eval_runtime": 40.0164,
"eval_samples_per_second": 89.163,
"eval_steps_per_second": 11.145,
"step": 2015000
},
{
"epoch": 0.61,
"learning_rate": 4.835377792583148e-05,
"loss": 2.19,
"step": 2020000
},
{
"epoch": 0.61,
"eval_accuracy": 0.5160435543048012,
"eval_loss": 2.212890625,
"eval_runtime": 39.7821,
"eval_samples_per_second": 89.689,
"eval_steps_per_second": 11.211,
"step": 2020000
},
{
"epoch": 0.61,
"learning_rate": 4.827893058624475e-05,
"loss": 2.1895,
"step": 2025000
},
{
"epoch": 0.61,
"eval_accuracy": 0.5164950532374227,
"eval_loss": 2.212890625,
"eval_runtime": 39.7945,
"eval_samples_per_second": 89.661,
"eval_steps_per_second": 11.208,
"step": 2025000
},
{
"epoch": 0.61,
"learning_rate": 4.820411320356799e-05,
"loss": 2.1904,
"step": 2030000
},
{
"epoch": 0.61,
"eval_accuracy": 0.516082457732248,
"eval_loss": 2.212890625,
"eval_runtime": 39.8538,
"eval_samples_per_second": 89.527,
"eval_steps_per_second": 11.191,
"step": 2030000
},
{
"epoch": 0.61,
"learning_rate": 4.812928084243624e-05,
"loss": 2.1854,
"step": 2035000
},
{
"epoch": 0.61,
"eval_accuracy": 0.516504368142586,
"eval_loss": 2.212890625,
"eval_runtime": 39.7999,
"eval_samples_per_second": 89.649,
"eval_steps_per_second": 11.206,
"step": 2035000
},
{
"epoch": 0.61,
"learning_rate": 4.805447843821447e-05,
"loss": 2.1883,
"step": 2040000
},
{
"epoch": 0.61,
"eval_accuracy": 0.51652875127669,
"eval_loss": 2.2109375,
"eval_runtime": 39.8414,
"eval_samples_per_second": 89.555,
"eval_steps_per_second": 11.194,
"step": 2040000
},
{
"epoch": 0.61,
"learning_rate": 4.7979646077082725e-05,
"loss": 2.1859,
"step": 2045000
},
{
"epoch": 0.61,
"eval_accuracy": 0.5165314909546791,
"eval_loss": 2.2109375,
"eval_runtime": 39.9091,
"eval_samples_per_second": 89.403,
"eval_steps_per_second": 11.175,
"step": 2045000
},
{
"epoch": 0.61,
"learning_rate": 4.790481371595098e-05,
"loss": 2.1849,
"step": 2050000
},
{
"epoch": 0.61,
"eval_accuracy": 0.5167750483279198,
"eval_loss": 2.208984375,
"eval_runtime": 39.8945,
"eval_samples_per_second": 89.436,
"eval_steps_per_second": 11.179,
"step": 2050000
},
{
"epoch": 0.62,
"learning_rate": 4.782996637636424e-05,
"loss": 2.1844,
"step": 2055000
},
{
"epoch": 0.62,
"eval_accuracy": 0.5167309395122934,
"eval_loss": 2.2109375,
"eval_runtime": 39.9417,
"eval_samples_per_second": 89.33,
"eval_steps_per_second": 11.166,
"step": 2055000
},
{
"epoch": 0.62,
"learning_rate": 4.7755148993687484e-05,
"loss": 2.1866,
"step": 2060000
},
{
"epoch": 0.62,
"eval_accuracy": 0.5166953236984337,
"eval_loss": 2.208984375,
"eval_runtime": 39.9499,
"eval_samples_per_second": 89.312,
"eval_steps_per_second": 11.164,
"step": 2060000
},
{
"epoch": 0.62,
"learning_rate": 4.768031663255573e-05,
"loss": 2.1865,
"step": 2065000
},
{
"epoch": 0.62,
"eval_accuracy": 0.5167821714906917,
"eval_loss": 2.208984375,
"eval_runtime": 39.8531,
"eval_samples_per_second": 89.529,
"eval_steps_per_second": 11.191,
"step": 2065000
},
{
"epoch": 0.62,
"learning_rate": 4.7605499249878976e-05,
"loss": 2.1846,
"step": 2070000
},
{
"epoch": 0.62,
"eval_accuracy": 0.5171333982089081,
"eval_loss": 2.20703125,
"eval_runtime": 39.9613,
"eval_samples_per_second": 89.286,
"eval_steps_per_second": 11.161,
"step": 2070000
},
{
"epoch": 0.62,
"learning_rate": 4.753066688874723e-05,
"loss": 2.1821,
"step": 2075000
},
{
"epoch": 0.62,
"eval_accuracy": 0.5169824419517027,
"eval_loss": 2.20703125,
"eval_runtime": 39.9564,
"eval_samples_per_second": 89.297,
"eval_steps_per_second": 11.162,
"step": 2075000
},
{
"epoch": 0.62,
"learning_rate": 4.7455849506070474e-05,
"loss": 2.184,
"step": 2080000
},
{
"epoch": 0.62,
"eval_accuracy": 0.5170257288639323,
"eval_loss": 2.20703125,
"eval_runtime": 41.0867,
"eval_samples_per_second": 86.841,
"eval_steps_per_second": 10.855,
"step": 2080000
},
{
"epoch": 0.62,
"learning_rate": 4.738101714493872e-05,
"loss": 2.1847,
"step": 2085000
},
{
"epoch": 0.62,
"eval_accuracy": 0.5173227099579624,
"eval_loss": 2.205078125,
"eval_runtime": 39.9314,
"eval_samples_per_second": 89.353,
"eval_steps_per_second": 11.169,
"step": 2085000
},
{
"epoch": 0.63,
"learning_rate": 4.730618478380697e-05,
"loss": 2.1836,
"step": 2090000
},
{
"epoch": 0.63,
"eval_accuracy": 0.5173509286412512,
"eval_loss": 2.205078125,
"eval_runtime": 40.0844,
"eval_samples_per_second": 89.012,
"eval_steps_per_second": 11.127,
"step": 2090000
},
{
"epoch": 0.63,
"learning_rate": 4.723136740113022e-05,
"loss": 2.1791,
"step": 2095000
},
{
"epoch": 0.63,
"eval_accuracy": 0.5174243520113619,
"eval_loss": 2.205078125,
"eval_runtime": 39.9803,
"eval_samples_per_second": 89.244,
"eval_steps_per_second": 11.156,
"step": 2095000
},
{
"epoch": 0.63,
"learning_rate": 4.715652006154348e-05,
"loss": 2.1812,
"step": 2100000
},
{
"epoch": 0.63,
"eval_accuracy": 0.5173136690205979,
"eval_loss": 2.205078125,
"eval_runtime": 40.0609,
"eval_samples_per_second": 89.064,
"eval_steps_per_second": 11.133,
"step": 2100000
},
{
"epoch": 0.63,
"learning_rate": 4.708168770041173e-05,
"loss": 2.1835,
"step": 2105000
},
{
"epoch": 0.63,
"eval_accuracy": 0.5175572263938386,
"eval_loss": 2.205078125,
"eval_runtime": 40.0398,
"eval_samples_per_second": 89.111,
"eval_steps_per_second": 11.139,
"step": 2105000
},
{
"epoch": 0.63,
"learning_rate": 4.700687031773498e-05,
"loss": 2.1806,
"step": 2110000
},
{
"epoch": 0.63,
"eval_accuracy": 0.517552294973458,
"eval_loss": 2.205078125,
"eval_runtime": 40.3766,
"eval_samples_per_second": 88.368,
"eval_steps_per_second": 11.046,
"step": 2110000
},
{
"epoch": 0.63,
"learning_rate": 4.6932037956603224e-05,
"loss": 2.1832,
"step": 2115000
},
{
"epoch": 0.63,
"eval_accuracy": 0.5174777757321516,
"eval_loss": 2.205078125,
"eval_runtime": 41.019,
"eval_samples_per_second": 86.984,
"eval_steps_per_second": 10.873,
"step": 2115000
},
{
"epoch": 0.64,
"learning_rate": 4.6857205595471476e-05,
"loss": 2.1766,
"step": 2120000
},
{
"epoch": 0.64,
"eval_accuracy": 0.5177577708226486,
"eval_loss": 2.203125,
"eval_runtime": 40.432,
"eval_samples_per_second": 88.247,
"eval_steps_per_second": 11.031,
"step": 2120000
},
{
"epoch": 0.64,
"learning_rate": 4.678237323433973e-05,
"loss": 2.1775,
"step": 2125000
},
{
"epoch": 0.64,
"eval_accuracy": 0.5178353037097432,
"eval_loss": 2.203125,
"eval_runtime": 41.1107,
"eval_samples_per_second": 86.79,
"eval_steps_per_second": 10.849,
"step": 2125000
},
{
"epoch": 0.64,
"learning_rate": 4.6707540873207975e-05,
"loss": 2.1801,
"step": 2130000
},
{
"epoch": 0.64,
"eval_accuracy": 0.5176514713166673,
"eval_loss": 2.203125,
"eval_runtime": 41.0472,
"eval_samples_per_second": 86.924,
"eval_steps_per_second": 10.866,
"step": 2130000
},
{
"epoch": 0.64,
"learning_rate": 4.6632708512076235e-05,
"loss": 2.1789,
"step": 2135000
},
{
"epoch": 0.64,
"eval_accuracy": 0.5177799622143612,
"eval_loss": 2.203125,
"eval_runtime": 41.4469,
"eval_samples_per_second": 86.086,
"eval_steps_per_second": 10.761,
"step": 2135000
},
{
"epoch": 0.64,
"learning_rate": 4.655787615094449e-05,
"loss": 2.1794,
"step": 2140000
},
{
"epoch": 0.64,
"eval_accuracy": 0.5178131123180306,
"eval_loss": 2.203125,
"eval_runtime": 40.4764,
"eval_samples_per_second": 88.15,
"eval_steps_per_second": 11.019,
"step": 2140000
},
{
"epoch": 0.64,
"learning_rate": 4.6483043789812734e-05,
"loss": 2.1799,
"step": 2145000
},
{
"epoch": 0.64,
"eval_accuracy": 0.5178917410763209,
"eval_loss": 2.201171875,
"eval_runtime": 40.376,
"eval_samples_per_second": 88.369,
"eval_steps_per_second": 11.046,
"step": 2145000
},
{
"epoch": 0.64,
"learning_rate": 4.6408196450226e-05,
"loss": 2.1746,
"step": 2150000
},
{
"epoch": 0.64,
"eval_accuracy": 0.5180391357521402,
"eval_loss": 2.201171875,
"eval_runtime": 40.5387,
"eval_samples_per_second": 88.015,
"eval_steps_per_second": 11.002,
"step": 2150000
},
{
"epoch": 0.65,
"learning_rate": 4.633336408909425e-05,
"loss": 2.1766,
"step": 2155000
},
{
"epoch": 0.65,
"eval_accuracy": 0.5178873575915381,
"eval_loss": 2.201171875,
"eval_runtime": 40.514,
"eval_samples_per_second": 88.068,
"eval_steps_per_second": 11.009,
"step": 2155000
},
{
"epoch": 0.65,
"learning_rate": 4.62585317279625e-05,
"loss": 2.1754,
"step": 2160000
},
{
"epoch": 0.65,
"eval_accuracy": 0.5177369492699306,
"eval_loss": 2.201171875,
"eval_runtime": 42.6971,
"eval_samples_per_second": 83.565,
"eval_steps_per_second": 10.446,
"step": 2160000
},
{
"epoch": 0.65,
"learning_rate": 4.618369936683075e-05,
"loss": 2.1764,
"step": 2165000
},
{
"epoch": 0.65,
"eval_accuracy": 0.5177405108513166,
"eval_loss": 2.201171875,
"eval_runtime": 40.5808,
"eval_samples_per_second": 87.923,
"eval_steps_per_second": 10.99,
"step": 2165000
},
{
"epoch": 0.65,
"learning_rate": 4.6108881984154e-05,
"loss": 2.1745,
"step": 2170000
},
{
"epoch": 0.65,
"eval_accuracy": 0.51831337751886,
"eval_loss": 2.19921875,
"eval_runtime": 42.1403,
"eval_samples_per_second": 84.67,
"eval_steps_per_second": 10.584,
"step": 2170000
},
{
"epoch": 0.65,
"learning_rate": 4.6034049623022244e-05,
"loss": 2.1735,
"step": 2175000
},
{
"epoch": 0.65,
"eval_accuracy": 0.5180128348434438,
"eval_loss": 2.19921875,
"eval_runtime": 40.2519,
"eval_samples_per_second": 88.642,
"eval_steps_per_second": 11.08,
"step": 2175000
},
{
"epoch": 0.65,
"learning_rate": 4.59592172618905e-05,
"loss": 2.1778,
"step": 2180000
},
{
"epoch": 0.65,
"eval_accuracy": 0.5180588614336625,
"eval_loss": 2.19921875,
"eval_runtime": 42.4462,
"eval_samples_per_second": 84.059,
"eval_steps_per_second": 10.507,
"step": 2180000
},
{
"epoch": 0.65,
"learning_rate": 4.5884384900758757e-05,
"loss": 2.1717,
"step": 2185000
},
{
"epoch": 0.65,
"eval_accuracy": 0.5183484453971218,
"eval_loss": 2.19921875,
"eval_runtime": 41.2843,
"eval_samples_per_second": 86.425,
"eval_steps_per_second": 10.803,
"step": 2185000
},
{
"epoch": 0.66,
"learning_rate": 4.580953756117201e-05,
"loss": 2.1752,
"step": 2190000
},
{
"epoch": 0.66,
"eval_accuracy": 0.518520497174844,
"eval_loss": 2.197265625,
"eval_runtime": 40.4537,
"eval_samples_per_second": 88.2,
"eval_steps_per_second": 11.025,
"step": 2190000
},
{
"epoch": 0.66,
"learning_rate": 4.5734720178495255e-05,
"loss": 2.1747,
"step": 2195000
},
{
"epoch": 0.66,
"eval_accuracy": 0.5184799499406038,
"eval_loss": 2.197265625,
"eval_runtime": 43.6458,
"eval_samples_per_second": 81.749,
"eval_steps_per_second": 10.219,
"step": 2195000
},
{
"epoch": 0.66,
"learning_rate": 4.56599027958185e-05,
"loss": 2.1754,
"step": 2200000
},
{
"epoch": 0.66,
"eval_accuracy": 0.5186040573535149,
"eval_loss": 2.197265625,
"eval_runtime": 41.0029,
"eval_samples_per_second": 87.018,
"eval_steps_per_second": 10.877,
"step": 2200000
},
{
"epoch": 0.66,
"learning_rate": 4.558508541314174e-05,
"loss": 2.1728,
"step": 2205000
},
{
"epoch": 0.66,
"eval_accuracy": 0.5187728215176501,
"eval_loss": 2.197265625,
"eval_runtime": 40.4446,
"eval_samples_per_second": 88.22,
"eval_steps_per_second": 11.027,
"step": 2205000
},
{
"epoch": 0.66,
"learning_rate": 4.551025305200999e-05,
"loss": 2.1684,
"step": 2210000
},
{
"epoch": 0.66,
"eval_accuracy": 0.5185736469278347,
"eval_loss": 2.197265625,
"eval_runtime": 40.6605,
"eval_samples_per_second": 87.751,
"eval_steps_per_second": 10.969,
"step": 2210000
},
{
"epoch": 0.66,
"learning_rate": 4.543540571242325e-05,
"loss": 2.1722,
"step": 2215000
},
{
"epoch": 0.66,
"eval_accuracy": 0.5188199439790645,
"eval_loss": 2.1953125,
"eval_runtime": 40.5747,
"eval_samples_per_second": 87.937,
"eval_steps_per_second": 10.992,
"step": 2215000
},
{
"epoch": 0.67,
"learning_rate": 4.536058832974649e-05,
"loss": 2.1692,
"step": 2220000
},
{
"epoch": 0.67,
"eval_accuracy": 0.519004872243336,
"eval_loss": 2.1953125,
"eval_runtime": 41.1424,
"eval_samples_per_second": 86.723,
"eval_steps_per_second": 10.84,
"step": 2220000
},
{
"epoch": 0.67,
"learning_rate": 4.528577094706974e-05,
"loss": 2.176,
"step": 2225000
},
{
"epoch": 0.67,
"eval_accuracy": 0.519122404429073,
"eval_loss": 2.1953125,
"eval_runtime": 42.7268,
"eval_samples_per_second": 83.507,
"eval_steps_per_second": 10.438,
"step": 2225000
},
{
"epoch": 0.67,
"learning_rate": 4.5210923607483e-05,
"loss": 2.1697,
"step": 2230000
},
{
"epoch": 0.67,
"eval_accuracy": 0.5190202144400756,
"eval_loss": 2.1953125,
"eval_runtime": 40.4249,
"eval_samples_per_second": 88.262,
"eval_steps_per_second": 11.033,
"step": 2230000
},
{
"epoch": 0.67,
"learning_rate": 4.5136106224806244e-05,
"loss": 2.1731,
"step": 2235000
},
{
"epoch": 0.67,
"eval_accuracy": 0.5190837749694251,
"eval_loss": 2.1953125,
"eval_runtime": 42.0959,
"eval_samples_per_second": 84.759,
"eval_steps_per_second": 10.595,
"step": 2235000
},
{
"epoch": 0.67,
"learning_rate": 4.506124390676451e-05,
"loss": 2.173,
"step": 2240000
},
{
"epoch": 0.67,
"eval_accuracy": 0.5191125415883119,
"eval_loss": 2.193359375,
"eval_runtime": 40.9494,
"eval_samples_per_second": 87.132,
"eval_steps_per_second": 10.892,
"step": 2240000
},
{
"epoch": 0.67,
"learning_rate": 4.498644150254274e-05,
"loss": 2.1714,
"step": 2245000
},
{
"epoch": 0.67,
"eval_accuracy": 0.5192900727220126,
"eval_loss": 2.193359375,
"eval_runtime": 40.4515,
"eval_samples_per_second": 88.204,
"eval_steps_per_second": 11.026,
"step": 2245000
},
{
"epoch": 0.67,
"learning_rate": 4.4911594162956e-05,
"loss": 2.1719,
"step": 2250000
},
{
"epoch": 0.67,
"eval_accuracy": 0.5192147315773094,
"eval_loss": 2.193359375,
"eval_runtime": 40.7422,
"eval_samples_per_second": 87.575,
"eval_steps_per_second": 10.947,
"step": 2250000
},
{
"epoch": 0.68,
"learning_rate": 4.483674682336927e-05,
"loss": 2.1667,
"step": 2255000
},
{
"epoch": 0.68,
"eval_accuracy": 0.5189793932380363,
"eval_loss": 2.193359375,
"eval_runtime": 40.4467,
"eval_samples_per_second": 88.215,
"eval_steps_per_second": 11.027,
"step": 2255000
},
{
"epoch": 0.68,
"learning_rate": 4.476191446223752e-05,
"loss": 2.1653,
"step": 2260000
},
{
"epoch": 0.68,
"eval_accuracy": 0.5191834992482324,
"eval_loss": 2.193359375,
"eval_runtime": 40.3493,
"eval_samples_per_second": 88.428,
"eval_steps_per_second": 11.053,
"step": 2260000
},
{
"epoch": 0.68,
"learning_rate": 4.468709707956076e-05,
"loss": 2.1656,
"step": 2265000
},
{
"epoch": 0.68,
"eval_accuracy": 0.5193434964428021,
"eval_loss": 2.19140625,
"eval_runtime": 44.129,
"eval_samples_per_second": 80.854,
"eval_steps_per_second": 10.107,
"step": 2265000
},
{
"epoch": 0.68,
"learning_rate": 4.4612264718429014e-05,
"loss": 2.1695,
"step": 2270000
},
{
"epoch": 0.68,
"eval_accuracy": 0.5194328099452503,
"eval_loss": 2.19140625,
"eval_runtime": 42.07,
"eval_samples_per_second": 84.811,
"eval_steps_per_second": 10.601,
"step": 2270000
},
{
"epoch": 0.68,
"learning_rate": 4.453744733575226e-05,
"loss": 2.17,
"step": 2275000
},
{
"epoch": 0.68,
"eval_accuracy": 0.5195643144887323,
"eval_loss": 2.19140625,
"eval_runtime": 40.4092,
"eval_samples_per_second": 88.297,
"eval_steps_per_second": 11.037,
"step": 2275000
},
{
"epoch": 0.68,
"learning_rate": 4.44626299530755e-05,
"loss": 2.1628,
"step": 2280000
},
{
"epoch": 0.68,
"eval_accuracy": 0.5197062298085732,
"eval_loss": 2.19140625,
"eval_runtime": 40.3663,
"eval_samples_per_second": 88.391,
"eval_steps_per_second": 11.049,
"step": 2280000
},
{
"epoch": 0.68,
"learning_rate": 4.438779759194375e-05,
"loss": 2.1648,
"step": 2285000
},
{
"epoch": 0.68,
"eval_accuracy": 0.5196210258231089,
"eval_loss": 2.189453125,
"eval_runtime": 41.4265,
"eval_samples_per_second": 86.128,
"eval_steps_per_second": 10.766,
"step": 2285000
},
{
"epoch": 0.69,
"learning_rate": 4.431298020926699e-05,
"loss": 2.1647,
"step": 2290000
},
{
"epoch": 0.69,
"eval_accuracy": 0.5199106097865681,
"eval_loss": 2.189453125,
"eval_runtime": 43.7368,
"eval_samples_per_second": 81.579,
"eval_steps_per_second": 10.197,
"step": 2290000
},
{
"epoch": 0.69,
"learning_rate": 4.423813286968026e-05,
"loss": 2.1648,
"step": 2295000
},
{
"epoch": 0.69,
"eval_accuracy": 0.5198015706025977,
"eval_loss": 2.189453125,
"eval_runtime": 40.3741,
"eval_samples_per_second": 88.374,
"eval_steps_per_second": 11.047,
"step": 2295000
},
{
"epoch": 0.69,
"learning_rate": 4.4163315487003496e-05,
"loss": 2.168,
"step": 2300000
},
{
"epoch": 0.69,
"eval_accuracy": 0.5196733536727027,
"eval_loss": 2.189453125,
"eval_runtime": 40.4104,
"eval_samples_per_second": 88.294,
"eval_steps_per_second": 11.037,
"step": 2300000
},
{
"epoch": 0.69,
"learning_rate": 4.4088498104326735e-05,
"loss": 2.1607,
"step": 2305000
},
{
"epoch": 0.69,
"eval_accuracy": 0.5197840366634667,
"eval_loss": 2.189453125,
"eval_runtime": 40.7993,
"eval_samples_per_second": 87.452,
"eval_steps_per_second": 10.932,
"step": 2305000
},
{
"epoch": 0.69,
"learning_rate": 4.401365076474e-05,
"loss": 2.1674,
"step": 2310000
},
{
"epoch": 0.69,
"eval_accuracy": 0.5199684169921404,
"eval_loss": 2.1875,
"eval_runtime": 40.3824,
"eval_samples_per_second": 88.355,
"eval_steps_per_second": 11.044,
"step": 2310000
},
{
"epoch": 0.69,
"learning_rate": 4.393881840360825e-05,
"loss": 2.1656,
"step": 2315000
},
{
"epoch": 0.69,
"eval_accuracy": 0.5199689649277383,
"eval_loss": 2.1875,
"eval_runtime": 40.7535,
"eval_samples_per_second": 87.551,
"eval_steps_per_second": 10.944,
"step": 2315000
},
{
"epoch": 0.7,
"learning_rate": 4.38639860424765e-05,
"loss": 2.1637,
"step": 2320000
},
{
"epoch": 0.7,
"eval_accuracy": 0.5201547150954066,
"eval_loss": 2.1875,
"eval_runtime": 40.6083,
"eval_samples_per_second": 87.864,
"eval_steps_per_second": 10.983,
"step": 2320000
},
{
"epoch": 0.7,
"learning_rate": 4.3789168659799746e-05,
"loss": 2.1649,
"step": 2325000
},
{
"epoch": 0.7,
"eval_accuracy": 0.5201152637323619,
"eval_loss": 2.1875,
"eval_runtime": 41.4342,
"eval_samples_per_second": 86.112,
"eval_steps_per_second": 10.764,
"step": 2325000
},
{
"epoch": 0.7,
"learning_rate": 4.371436625557798e-05,
"loss": 2.1625,
"step": 2330000
},
{
"epoch": 0.7,
"eval_accuracy": 0.5200544428810016,
"eval_loss": 2.1875,
"eval_runtime": 44.1316,
"eval_samples_per_second": 80.849,
"eval_steps_per_second": 10.106,
"step": 2330000
},
{
"epoch": 0.7,
"learning_rate": 4.363950393753625e-05,
"loss": 2.1627,
"step": 2335000
},
{
"epoch": 0.7,
"eval_accuracy": 0.5202593707945943,
"eval_loss": 2.1875,
"eval_runtime": 42.6548,
"eval_samples_per_second": 83.648,
"eval_steps_per_second": 10.456,
"step": 2335000
},
{
"epoch": 0.7,
"learning_rate": 4.356468655485949e-05,
"loss": 2.1598,
"step": 2340000
},
{
"epoch": 0.7,
"eval_accuracy": 0.5203048494492152,
"eval_loss": 2.185546875,
"eval_runtime": 43.9859,
"eval_samples_per_second": 81.117,
"eval_steps_per_second": 10.14,
"step": 2340000
},
{
"epoch": 0.7,
"learning_rate": 4.348982423681776e-05,
"loss": 2.1638,
"step": 2345000
},
{
"epoch": 0.7,
"eval_accuracy": 0.5201473179648357,
"eval_loss": 2.1875,
"eval_runtime": 44.5632,
"eval_samples_per_second": 80.066,
"eval_steps_per_second": 10.008,
"step": 2345000
},
{
"epoch": 0.7,
"learning_rate": 4.341499187568602e-05,
"loss": 2.1588,
"step": 2350000
},
{
"epoch": 0.7,
"eval_accuracy": 0.5204785450337309,
"eval_loss": 2.185546875,
"eval_runtime": 41.6749,
"eval_samples_per_second": 85.615,
"eval_steps_per_second": 10.702,
"step": 2350000
},
{
"epoch": 0.71,
"learning_rate": 4.334015951455427e-05,
"loss": 2.1633,
"step": 2355000
},
{
"epoch": 0.71,
"eval_accuracy": 0.5204648466437849,
"eval_loss": 2.185546875,
"eval_runtime": 40.4241,
"eval_samples_per_second": 88.264,
"eval_steps_per_second": 11.033,
"step": 2355000
},
{
"epoch": 0.71,
"learning_rate": 4.326534213187751e-05,
"loss": 2.1621,
"step": 2360000
},
{
"epoch": 0.71,
"eval_accuracy": 0.5205470369834612,
"eval_loss": 2.185546875,
"eval_runtime": 40.3566,
"eval_samples_per_second": 88.412,
"eval_steps_per_second": 11.051,
"step": 2360000
},
{
"epoch": 0.71,
"learning_rate": 4.319049479229077e-05,
"loss": 2.165,
"step": 2365000
},
{
"epoch": 0.71,
"eval_accuracy": 0.5207188147933844,
"eval_loss": 2.18359375,
"eval_runtime": 40.8467,
"eval_samples_per_second": 87.351,
"eval_steps_per_second": 10.919,
"step": 2365000
},
{
"epoch": 0.71,
"learning_rate": 4.311566243115902e-05,
"loss": 2.159,
"step": 2370000
},
{
"epoch": 0.71,
"eval_accuracy": 0.52062347399936,
"eval_loss": 2.18359375,
"eval_runtime": 43.216,
"eval_samples_per_second": 82.562,
"eval_steps_per_second": 10.32,
"step": 2370000
},
{
"epoch": 0.71,
"learning_rate": 4.3040830070027275e-05,
"loss": 2.1573,
"step": 2375000
},
{
"epoch": 0.71,
"eval_accuracy": 0.5207314173121348,
"eval_loss": 2.18359375,
"eval_runtime": 44.6428,
"eval_samples_per_second": 79.923,
"eval_steps_per_second": 9.99,
"step": 2375000
},
{
"epoch": 0.71,
"learning_rate": 4.2966012687350514e-05,
"loss": 2.1556,
"step": 2380000
},
{
"epoch": 0.71,
"eval_accuracy": 0.520848401562274,
"eval_loss": 2.18359375,
"eval_runtime": 40.6164,
"eval_samples_per_second": 87.846,
"eval_steps_per_second": 10.981,
"step": 2380000
},
{
"epoch": 0.71,
"learning_rate": 4.289119530467376e-05,
"loss": 2.1562,
"step": 2385000
},
{
"epoch": 0.71,
"eval_accuracy": 0.5209563448750487,
"eval_loss": 2.18359375,
"eval_runtime": 40.356,
"eval_samples_per_second": 88.413,
"eval_steps_per_second": 11.052,
"step": 2385000
},
{
"epoch": 0.72,
"learning_rate": 4.281636294354201e-05,
"loss": 2.1572,
"step": 2390000
},
{
"epoch": 0.72,
"eval_accuracy": 0.5209188112865967,
"eval_loss": 2.18359375,
"eval_runtime": 44.3639,
"eval_samples_per_second": 80.426,
"eval_steps_per_second": 10.053,
"step": 2390000
},
{
"epoch": 0.72,
"learning_rate": 4.274154556086525e-05,
"loss": 2.1577,
"step": 2395000
},
{
"epoch": 0.72,
"eval_accuracy": 0.5208686751793942,
"eval_loss": 2.181640625,
"eval_runtime": 42.2402,
"eval_samples_per_second": 84.469,
"eval_steps_per_second": 10.559,
"step": 2395000
},
{
"epoch": 0.72,
"learning_rate": 4.2666713199733505e-05,
"loss": 2.1529,
"step": 2400000
},
{
"epoch": 0.72,
"eval_accuracy": 0.5209933305279031,
"eval_loss": 2.181640625,
"eval_runtime": 42.0012,
"eval_samples_per_second": 84.95,
"eval_steps_per_second": 10.619,
"step": 2400000
},
{
"epoch": 0.72,
"learning_rate": 4.259188083860176e-05,
"loss": 2.1636,
"step": 2405000
},
{
"epoch": 0.72,
"eval_accuracy": 0.5210516856690732,
"eval_loss": 2.181640625,
"eval_runtime": 40.8766,
"eval_samples_per_second": 87.287,
"eval_steps_per_second": 10.911,
"step": 2405000
},
{
"epoch": 0.72,
"learning_rate": 4.2517063455924996e-05,
"loss": 2.1521,
"step": 2410000
},
{
"epoch": 0.72,
"eval_accuracy": 0.5212941471711181,
"eval_loss": 2.181640625,
"eval_runtime": 44.8987,
"eval_samples_per_second": 79.468,
"eval_steps_per_second": 9.933,
"step": 2410000
},
{
"epoch": 0.72,
"learning_rate": 4.244221611633826e-05,
"loss": 2.1574,
"step": 2415000
},
{
"epoch": 0.72,
"eval_accuracy": 0.5213697622836202,
"eval_loss": 2.181640625,
"eval_runtime": 40.8755,
"eval_samples_per_second": 87.289,
"eval_steps_per_second": 10.911,
"step": 2415000
},
{
"epoch": 0.72,
"learning_rate": 4.236736877675152e-05,
"loss": 2.1546,
"step": 2420000
},
{
"epoch": 0.72,
"eval_accuracy": 0.5213067496898685,
"eval_loss": 2.1796875,
"eval_runtime": 43.2862,
"eval_samples_per_second": 82.428,
"eval_steps_per_second": 10.304,
"step": 2420000
},
{
"epoch": 0.73,
"learning_rate": 4.2292536415619776e-05,
"loss": 2.1572,
"step": 2425000
},
{
"epoch": 0.73,
"eval_accuracy": 0.521194148924512,
"eval_loss": 2.1796875,
"eval_runtime": 42.5113,
"eval_samples_per_second": 83.931,
"eval_steps_per_second": 10.491,
"step": 2425000
},
{
"epoch": 0.73,
"learning_rate": 4.2217719032943015e-05,
"loss": 2.1544,
"step": 2430000
},
{
"epoch": 0.73,
"eval_accuracy": 0.5212160663484257,
"eval_loss": 2.1796875,
"eval_runtime": 42.4347,
"eval_samples_per_second": 84.082,
"eval_steps_per_second": 10.51,
"step": 2430000
},
{
"epoch": 0.73,
"learning_rate": 4.214290165026626e-05,
"loss": 2.15,
"step": 2435000
},
{
"epoch": 0.73,
"eval_accuracy": 0.5213122290458468,
"eval_loss": 2.1796875,
"eval_runtime": 41.657,
"eval_samples_per_second": 85.652,
"eval_steps_per_second": 10.706,
"step": 2435000
},
{
"epoch": 0.73,
"learning_rate": 4.206805431067952e-05,
"loss": 2.1537,
"step": 2440000
},
{
"epoch": 0.73,
"eval_accuracy": 0.5217242766154238,
"eval_loss": 2.177734375,
"eval_runtime": 45.9893,
"eval_samples_per_second": 77.583,
"eval_steps_per_second": 9.698,
"step": 2440000
},
{
"epoch": 0.73,
"learning_rate": 4.199322194954777e-05,
"loss": 2.1552,
"step": 2445000
},
{
"epoch": 0.73,
"eval_accuracy": 0.5215689368734356,
"eval_loss": 2.177734375,
"eval_runtime": 43.8458,
"eval_samples_per_second": 81.376,
"eval_steps_per_second": 10.172,
"step": 2445000
},
{
"epoch": 0.73,
"learning_rate": 4.1918389588416026e-05,
"loss": 2.1522,
"step": 2450000
},
{
"epoch": 0.73,
"eval_accuracy": 0.5215245540900105,
"eval_loss": 2.177734375,
"eval_runtime": 45.304,
"eval_samples_per_second": 78.757,
"eval_steps_per_second": 9.845,
"step": 2450000
},
{
"epoch": 0.74,
"learning_rate": 4.1843542248829286e-05,
"loss": 2.1487,
"step": 2455000
},
{
"epoch": 0.74,
"eval_accuracy": 0.5214875684371562,
"eval_loss": 2.177734375,
"eval_runtime": 41.9232,
"eval_samples_per_second": 85.108,
"eval_steps_per_second": 10.639,
"step": 2455000
},
{
"epoch": 0.74,
"learning_rate": 4.176870988769754e-05,
"loss": 2.1582,
"step": 2460000
},
{
"epoch": 0.74,
"eval_accuracy": 0.52146373323865,
"eval_loss": 2.177734375,
"eval_runtime": 44.2104,
"eval_samples_per_second": 80.705,
"eval_steps_per_second": 10.088,
"step": 2460000
},
{
"epoch": 0.74,
"learning_rate": 4.169390748347577e-05,
"loss": 2.1582,
"step": 2465000
},
{
"epoch": 0.74,
"eval_accuracy": 0.5218020834703172,
"eval_loss": 2.177734375,
"eval_runtime": 43.1549,
"eval_samples_per_second": 82.679,
"eval_steps_per_second": 10.335,
"step": 2465000
},
{
"epoch": 0.74,
"learning_rate": 4.161907512234402e-05,
"loss": 2.1529,
"step": 2470000
},
{
"epoch": 0.74,
"eval_accuracy": 0.5217976999855345,
"eval_loss": 2.177734375,
"eval_runtime": 45.1081,
"eval_samples_per_second": 79.099,
"eval_steps_per_second": 9.887,
"step": 2470000
},
{
"epoch": 0.74,
"learning_rate": 4.1544242761212276e-05,
"loss": 2.1549,
"step": 2475000
},
{
"epoch": 0.74,
"eval_accuracy": 0.5219418070477668,
"eval_loss": 2.17578125,
"eval_runtime": 40.8292,
"eval_samples_per_second": 87.388,
"eval_steps_per_second": 10.924,
"step": 2475000
},
{
"epoch": 0.74,
"learning_rate": 4.1469425378535516e-05,
"loss": 2.1525,
"step": 2480000
},
{
"epoch": 0.74,
"eval_accuracy": 0.521930574368011,
"eval_loss": 2.17578125,
"eval_runtime": 43.1177,
"eval_samples_per_second": 82.75,
"eval_steps_per_second": 10.344,
"step": 2480000
},
{
"epoch": 0.74,
"learning_rate": 4.139459301740377e-05,
"loss": 2.1478,
"step": 2485000
},
{
"epoch": 0.74,
"eval_accuracy": 0.5221272832476362,
"eval_loss": 2.17578125,
"eval_runtime": 44.4519,
"eval_samples_per_second": 80.266,
"eval_steps_per_second": 10.033,
"step": 2485000
},
{
"epoch": 0.75,
"learning_rate": 4.131977563472701e-05,
"loss": 2.1524,
"step": 2490000
},
{
"epoch": 0.75,
"eval_accuracy": 0.5219949568007575,
"eval_loss": 2.17578125,
"eval_runtime": 43.8567,
"eval_samples_per_second": 81.356,
"eval_steps_per_second": 10.169,
"step": 2490000
},
{
"epoch": 0.75,
"learning_rate": 4.124494327359526e-05,
"loss": 2.1477,
"step": 2495000
},
{
"epoch": 0.75,
"eval_accuracy": 0.5220256411942366,
"eval_loss": 2.173828125,
"eval_runtime": 40.3311,
"eval_samples_per_second": 88.468,
"eval_steps_per_second": 11.058,
"step": 2495000
},
{
"epoch": 0.75,
"learning_rate": 4.117011091246352e-05,
"loss": 2.1524,
"step": 2500000
},
{
"epoch": 0.75,
"eval_accuracy": 0.5221511184461423,
"eval_loss": 2.173828125,
"eval_runtime": 42.2823,
"eval_samples_per_second": 84.385,
"eval_steps_per_second": 10.548,
"step": 2500000
},
{
"epoch": 0.75,
"learning_rate": 4.1095278551331766e-05,
"loss": 2.147,
"step": 2505000
},
{
"epoch": 0.75,
"eval_accuracy": 0.5221815288718226,
"eval_loss": 2.173828125,
"eval_runtime": 40.7144,
"eval_samples_per_second": 87.635,
"eval_steps_per_second": 10.954,
"step": 2505000
},
{
"epoch": 0.75,
"learning_rate": 4.102046116865501e-05,
"loss": 2.1481,
"step": 2510000
},
{
"epoch": 0.75,
"eval_accuracy": 0.5222954994761736,
"eval_loss": 2.173828125,
"eval_runtime": 42.7002,
"eval_samples_per_second": 83.559,
"eval_steps_per_second": 10.445,
"step": 2510000
},
{
"epoch": 0.75,
"learning_rate": 4.094559885061328e-05,
"loss": 2.1494,
"step": 2515000
},
{
"epoch": 0.75,
"eval_accuracy": 0.5222659109538901,
"eval_loss": 2.173828125,
"eval_runtime": 42.0725,
"eval_samples_per_second": 84.806,
"eval_steps_per_second": 10.601,
"step": 2515000
},
{
"epoch": 0.75,
"learning_rate": 4.087078146793652e-05,
"loss": 2.1484,
"step": 2520000
},
{
"epoch": 0.75,
"eval_accuracy": 0.5222675547606836,
"eval_loss": 2.173828125,
"eval_runtime": 44.1957,
"eval_samples_per_second": 80.732,
"eval_steps_per_second": 10.091,
"step": 2520000
},
{
"epoch": 0.76,
"learning_rate": 4.079594910680478e-05,
"loss": 2.1474,
"step": 2525000
},
{
"epoch": 0.76,
"eval_accuracy": 0.5223297454510387,
"eval_loss": 2.173828125,
"eval_runtime": 43.3427,
"eval_samples_per_second": 82.321,
"eval_steps_per_second": 10.29,
"step": 2525000
},
{
"epoch": 0.76,
"learning_rate": 4.072110176721804e-05,
"loss": 2.1487,
"step": 2530000
},
{
"epoch": 0.76,
"eval_accuracy": 0.5222724861810643,
"eval_loss": 2.173828125,
"eval_runtime": 44.0468,
"eval_samples_per_second": 81.005,
"eval_steps_per_second": 10.126,
"step": 2530000
},
{
"epoch": 0.76,
"learning_rate": 4.064626940608628e-05,
"loss": 2.1465,
"step": 2535000
},
{
"epoch": 0.76,
"eval_accuracy": 0.5224768661590591,
"eval_loss": 2.171875,
"eval_runtime": 40.4028,
"eval_samples_per_second": 88.311,
"eval_steps_per_second": 11.039,
"step": 2535000
},
{
"epoch": 0.76,
"learning_rate": 4.057145202340953e-05,
"loss": 2.1456,
"step": 2540000
},
{
"epoch": 0.76,
"eval_accuracy": 0.5226201513178947,
"eval_loss": 2.171875,
"eval_runtime": 42.5959,
"eval_samples_per_second": 83.764,
"eval_steps_per_second": 10.47,
"step": 2540000
},
{
"epoch": 0.76,
"learning_rate": 4.049661966227778e-05,
"loss": 2.1482,
"step": 2545000
},
{
"epoch": 0.76,
"eval_accuracy": 0.5223516628749524,
"eval_loss": 2.171875,
"eval_runtime": 43.342,
"eval_samples_per_second": 82.322,
"eval_steps_per_second": 10.29,
"step": 2545000
},
{
"epoch": 0.76,
"learning_rate": 4.042180227960102e-05,
"loss": 2.1451,
"step": 2550000
},
{
"epoch": 0.76,
"eval_accuracy": 0.5226286443196613,
"eval_loss": 2.171875,
"eval_runtime": 42.1401,
"eval_samples_per_second": 84.67,
"eval_steps_per_second": 10.584,
"step": 2550000
},
{
"epoch": 0.77,
"learning_rate": 4.0346969918469274e-05,
"loss": 2.143,
"step": 2555000
},
{
"epoch": 0.77,
"eval_accuracy": 0.5225609742733278,
"eval_loss": 2.171875,
"eval_runtime": 44.8351,
"eval_samples_per_second": 79.581,
"eval_steps_per_second": 9.948,
"step": 2555000
},
{
"epoch": 0.77,
"learning_rate": 4.027210760042755e-05,
"loss": 2.1463,
"step": 2560000
},
{
"epoch": 0.77,
"eval_accuracy": 0.5225061807135436,
"eval_loss": 2.171875,
"eval_runtime": 42.3643,
"eval_samples_per_second": 84.222,
"eval_steps_per_second": 10.528,
"step": 2560000
},
{
"epoch": 0.77,
"learning_rate": 4.0197290217750786e-05,
"loss": 2.1466,
"step": 2565000
},
{
"epoch": 0.77,
"eval_accuracy": 0.5227820662870569,
"eval_loss": 2.169921875,
"eval_runtime": 44.9199,
"eval_samples_per_second": 79.43,
"eval_steps_per_second": 9.929,
"step": 2565000
},
{
"epoch": 0.77,
"learning_rate": 4.012247283507403e-05,
"loss": 2.1423,
"step": 2570000
},
{
"epoch": 0.77,
"eval_accuracy": 0.5229272692204849,
"eval_loss": 2.169921875,
"eval_runtime": 43.6123,
"eval_samples_per_second": 81.812,
"eval_steps_per_second": 10.226,
"step": 2570000
},
{
"epoch": 0.77,
"learning_rate": 4.004765545239728e-05,
"loss": 2.1423,
"step": 2575000
},
{
"epoch": 0.77,
"eval_accuracy": 0.5230557601181788,
"eval_loss": 2.169921875,
"eval_runtime": 44.819,
"eval_samples_per_second": 79.609,
"eval_steps_per_second": 9.951,
"step": 2575000
},
{
"epoch": 0.77,
"learning_rate": 3.997283806972052e-05,
"loss": 2.1444,
"step": 2580000
},
{
"epoch": 0.77,
"eval_accuracy": 0.5230245277891018,
"eval_loss": 2.169921875,
"eval_runtime": 42.2873,
"eval_samples_per_second": 84.375,
"eval_steps_per_second": 10.547,
"step": 2580000
},
{
"epoch": 0.77,
"learning_rate": 3.9898020687043756e-05,
"loss": 2.1402,
"step": 2585000
},
{
"epoch": 0.77,
"eval_accuracy": 0.5230417877604338,
"eval_loss": 2.16796875,
"eval_runtime": 44.6974,
"eval_samples_per_second": 79.826,
"eval_steps_per_second": 9.978,
"step": 2585000
},
{
"epoch": 0.78,
"learning_rate": 3.9823203304367e-05,
"loss": 2.1376,
"step": 2590000
},
{
"epoch": 0.78,
"eval_accuracy": 0.5230924718032341,
"eval_loss": 2.16796875,
"eval_runtime": 43.4573,
"eval_samples_per_second": 82.104,
"eval_steps_per_second": 10.263,
"step": 2590000
},
{
"epoch": 0.78,
"learning_rate": 3.974838592169024e-05,
"loss": 2.1395,
"step": 2595000
},
{
"epoch": 0.78,
"eval_accuracy": 0.5231894564040521,
"eval_loss": 2.16796875,
"eval_runtime": 43.8137,
"eval_samples_per_second": 81.436,
"eval_steps_per_second": 10.179,
"step": 2595000
},
{
"epoch": 0.78,
"learning_rate": 3.9673553560558494e-05,
"loss": 2.1399,
"step": 2600000
},
{
"epoch": 0.78,
"eval_accuracy": 0.5232801397454949,
"eval_loss": 2.16796875,
"eval_runtime": 41.9685,
"eval_samples_per_second": 85.016,
"eval_steps_per_second": 10.627,
"step": 2600000
},
{
"epoch": 0.78,
"learning_rate": 3.9598721199426747e-05,
"loss": 2.1379,
"step": 2605000
},
{
"epoch": 0.78,
"eval_accuracy": 0.5230908279964406,
"eval_loss": 2.16796875,
"eval_runtime": 42.9652,
"eval_samples_per_second": 83.044,
"eval_steps_per_second": 10.38,
"step": 2605000
},
{
"epoch": 0.78,
"learning_rate": 3.952390381674999e-05,
"loss": 2.1411,
"step": 2610000
},
{
"epoch": 0.78,
"eval_accuracy": 0.5233708230869376,
"eval_loss": 2.166015625,
"eval_runtime": 41.4989,
"eval_samples_per_second": 85.978,
"eval_steps_per_second": 10.747,
"step": 2610000
},
{
"epoch": 0.78,
"learning_rate": 3.944908643407323e-05,
"loss": 2.1421,
"step": 2615000
},
{
"epoch": 0.78,
"eval_accuracy": 0.5232478115452223,
"eval_loss": 2.166015625,
"eval_runtime": 41.1264,
"eval_samples_per_second": 86.757,
"eval_steps_per_second": 10.845,
"step": 2615000
},
{
"epoch": 0.78,
"learning_rate": 3.9374254072941484e-05,
"loss": 2.1412,
"step": 2620000
},
{
"epoch": 0.78,
"eval_accuracy": 0.5236650645029786,
"eval_loss": 2.166015625,
"eval_runtime": 43.7159,
"eval_samples_per_second": 81.618,
"eval_steps_per_second": 10.202,
"step": 2620000
},
{
"epoch": 0.79,
"learning_rate": 3.929942171180974e-05,
"loss": 2.1381,
"step": 2625000
},
{
"epoch": 0.79,
"eval_accuracy": 0.5235886274870797,
"eval_loss": 2.166015625,
"eval_runtime": 43.6678,
"eval_samples_per_second": 81.708,
"eval_steps_per_second": 10.213,
"step": 2625000
},
{
"epoch": 0.79,
"learning_rate": 3.922458935067799e-05,
"loss": 2.142,
"step": 2630000
},
{
"epoch": 0.79,
"eval_accuracy": 0.523625339172135,
"eval_loss": 2.166015625,
"eval_runtime": 42.3354,
"eval_samples_per_second": 84.279,
"eval_steps_per_second": 10.535,
"step": 2630000
},
{
"epoch": 0.79,
"learning_rate": 3.9149756989546236e-05,
"loss": 2.1394,
"step": 2635000
},
{
"epoch": 0.79,
"eval_accuracy": 0.5236212296551512,
"eval_loss": 2.1640625,
"eval_runtime": 42.9131,
"eval_samples_per_second": 83.145,
"eval_steps_per_second": 10.393,
"step": 2635000
},
{
"epoch": 0.79,
"learning_rate": 3.907493960686948e-05,
"loss": 2.1384,
"step": 2640000
},
{
"epoch": 0.79,
"eval_accuracy": 0.5233793160887042,
"eval_loss": 2.1640625,
"eval_runtime": 40.2525,
"eval_samples_per_second": 88.64,
"eval_steps_per_second": 11.08,
"step": 2640000
},
{
"epoch": 0.79,
"learning_rate": 3.900012222419273e-05,
"loss": 2.138,
"step": 2645000
},
{
"epoch": 0.79,
"eval_accuracy": 0.5235653402241714,
"eval_loss": 2.1640625,
"eval_runtime": 43.9803,
"eval_samples_per_second": 81.127,
"eval_steps_per_second": 10.141,
"step": 2645000
},
{
"epoch": 0.79,
"learning_rate": 3.892530484151597e-05,
"loss": 2.1346,
"step": 2650000
},
{
"epoch": 0.79,
"eval_accuracy": 0.523867526706381,
"eval_loss": 2.1640625,
"eval_runtime": 44.5226,
"eval_samples_per_second": 80.139,
"eval_steps_per_second": 10.017,
"step": 2650000
},
{
"epoch": 0.8,
"learning_rate": 3.885047248038421e-05,
"loss": 2.1376,
"step": 2655000
},
{
"epoch": 0.8,
"eval_accuracy": 0.5239204024915728,
"eval_loss": 2.1640625,
"eval_runtime": 42.646,
"eval_samples_per_second": 83.666,
"eval_steps_per_second": 10.458,
"step": 2655000
},
{
"epoch": 0.8,
"learning_rate": 3.877564011925247e-05,
"loss": 2.1409,
"step": 2660000
},
{
"epoch": 0.8,
"eval_accuracy": 0.5239705385987753,
"eval_loss": 2.1640625,
"eval_runtime": 40.3234,
"eval_samples_per_second": 88.485,
"eval_steps_per_second": 11.061,
"step": 2660000
},
{
"epoch": 0.8,
"learning_rate": 3.8700807758120725e-05,
"loss": 2.1343,
"step": 2665000
},
{
"epoch": 0.8,
"eval_accuracy": 0.5239877985701072,
"eval_loss": 2.1640625,
"eval_runtime": 42.9773,
"eval_samples_per_second": 83.021,
"eval_steps_per_second": 10.378,
"step": 2665000
},
{
"epoch": 0.8,
"learning_rate": 3.862597539698897e-05,
"loss": 2.1363,
"step": 2670000
},
{
"epoch": 0.8,
"eval_accuracy": 0.5240447838722828,
"eval_loss": 2.162109375,
"eval_runtime": 44.9261,
"eval_samples_per_second": 79.419,
"eval_steps_per_second": 9.927,
"step": 2670000
},
{
"epoch": 0.8,
"learning_rate": 3.855112805740224e-05,
"loss": 2.1343,
"step": 2675000
},
{
"epoch": 0.8,
"eval_accuracy": 0.5241869731599227,
"eval_loss": 2.162109375,
"eval_runtime": 43.4218,
"eval_samples_per_second": 82.171,
"eval_steps_per_second": 10.271,
"step": 2675000
},
{
"epoch": 0.8,
"learning_rate": 3.847631067472548e-05,
"loss": 2.1381,
"step": 2680000
},
{
"epoch": 0.8,
"eval_accuracy": 0.5243439567087043,
"eval_loss": 2.162109375,
"eval_runtime": 43.0405,
"eval_samples_per_second": 82.899,
"eval_steps_per_second": 10.362,
"step": 2680000
},
{
"epoch": 0.8,
"learning_rate": 3.840147831359373e-05,
"loss": 2.1355,
"step": 2685000
},
{
"epoch": 0.8,
"eval_accuracy": 0.5241456040222856,
"eval_loss": 2.162109375,
"eval_runtime": 43.7247,
"eval_samples_per_second": 81.601,
"eval_steps_per_second": 10.2,
"step": 2685000
},
{
"epoch": 0.81,
"learning_rate": 3.8326615995552e-05,
"loss": 2.1394,
"step": 2690000
},
{
"epoch": 0.81,
"eval_accuracy": 0.5242305340399511,
"eval_loss": 2.16015625,
"eval_runtime": 40.8207,
"eval_samples_per_second": 87.407,
"eval_steps_per_second": 10.926,
"step": 2690000
},
{
"epoch": 0.81,
"learning_rate": 3.825179861287524e-05,
"loss": 2.1359,
"step": 2695000
},
{
"epoch": 0.81,
"eval_accuracy": 0.5244875158353388,
"eval_loss": 2.16015625,
"eval_runtime": 44.3125,
"eval_samples_per_second": 80.519,
"eval_steps_per_second": 10.065,
"step": 2695000
},
{
"epoch": 0.81,
"learning_rate": 3.817698123019848e-05,
"loss": 2.1365,
"step": 2700000
},
{
"epoch": 0.81,
"eval_accuracy": 0.524362312551232,
"eval_loss": 2.16015625,
"eval_runtime": 42.9757,
"eval_samples_per_second": 83.024,
"eval_steps_per_second": 10.378,
"step": 2700000
},
{
"epoch": 0.81,
"learning_rate": 3.810216384752173e-05,
"loss": 2.131,
"step": 2705000
},
{
"epoch": 0.81,
"eval_accuracy": 0.5244225854669946,
"eval_loss": 2.16015625,
"eval_runtime": 44.5469,
"eval_samples_per_second": 80.095,
"eval_steps_per_second": 10.012,
"step": 2705000
},
{
"epoch": 0.81,
"learning_rate": 3.802731650793499e-05,
"loss": 2.1337,
"step": 2710000
},
{
"epoch": 0.81,
"eval_accuracy": 0.5244025858176733,
"eval_loss": 2.16015625,
"eval_runtime": 43.6203,
"eval_samples_per_second": 81.797,
"eval_steps_per_second": 10.225,
"step": 2710000
},
{
"epoch": 0.81,
"learning_rate": 3.7952499125258226e-05,
"loss": 2.1307,
"step": 2715000
},
{
"epoch": 0.81,
"eval_accuracy": 0.5245954591481136,
"eval_loss": 2.158203125,
"eval_runtime": 43.7473,
"eval_samples_per_second": 81.559,
"eval_steps_per_second": 10.195,
"step": 2715000
},
{
"epoch": 0.81,
"learning_rate": 3.787765178567149e-05,
"loss": 2.1333,
"step": 2720000
},
{
"epoch": 0.81,
"eval_accuracy": 0.524749429051107,
"eval_loss": 2.158203125,
"eval_runtime": 43.6989,
"eval_samples_per_second": 81.65,
"eval_steps_per_second": 10.206,
"step": 2720000
},
{
"epoch": 0.82,
"learning_rate": 3.780283440299474e-05,
"loss": 2.1354,
"step": 2725000
},
{
"epoch": 0.82,
"eval_accuracy": 0.5246316228975711,
"eval_loss": 2.158203125,
"eval_runtime": 43.001,
"eval_samples_per_second": 82.975,
"eval_steps_per_second": 10.372,
"step": 2725000
},
{
"epoch": 0.82,
"learning_rate": 3.7728002041862985e-05,
"loss": 2.1372,
"step": 2730000
},
{
"epoch": 0.82,
"eval_accuracy": 0.5248077841922771,
"eval_loss": 2.158203125,
"eval_runtime": 41.555,
"eval_samples_per_second": 85.862,
"eval_steps_per_second": 10.733,
"step": 2730000
},
{
"epoch": 0.82,
"learning_rate": 3.765316968073124e-05,
"loss": 2.1323,
"step": 2735000
},
{
"epoch": 0.82,
"eval_accuracy": 0.5248480574587185,
"eval_loss": 2.158203125,
"eval_runtime": 41.1337,
"eval_samples_per_second": 86.742,
"eval_steps_per_second": 10.843,
"step": 2735000
},
{
"epoch": 0.82,
"learning_rate": 3.757835229805448e-05,
"loss": 2.1315,
"step": 2740000
},
{
"epoch": 0.82,
"eval_accuracy": 0.5249064125998887,
"eval_loss": 2.15625,
"eval_runtime": 43.6977,
"eval_samples_per_second": 81.652,
"eval_steps_per_second": 10.206,
"step": 2740000
},
{
"epoch": 0.82,
"learning_rate": 3.750351993692273e-05,
"loss": 2.1341,
"step": 2745000
},
{
"epoch": 0.82,
"eval_accuracy": 0.5249143576660573,
"eval_loss": 2.15625,
"eval_runtime": 44.5031,
"eval_samples_per_second": 80.174,
"eval_steps_per_second": 10.022,
"step": 2745000
},
{
"epoch": 0.82,
"learning_rate": 3.7428702554245975e-05,
"loss": 2.132,
"step": 2750000
},
{
"epoch": 0.82,
"eval_accuracy": 0.5249768223242113,
"eval_loss": 2.15625,
"eval_runtime": 40.671,
"eval_samples_per_second": 87.728,
"eval_steps_per_second": 10.966,
"step": 2750000
},
{
"epoch": 0.83,
"learning_rate": 3.7353885171569214e-05,
"loss": 2.1322,
"step": 2755000
},
{
"epoch": 0.83,
"eval_accuracy": 0.5251954486277501,
"eval_loss": 2.15625,
"eval_runtime": 43.2998,
"eval_samples_per_second": 82.402,
"eval_steps_per_second": 10.3,
"step": 2755000
},
{
"epoch": 0.83,
"learning_rate": 3.727906778889246e-05,
"loss": 2.1298,
"step": 2760000
},
{
"epoch": 0.83,
"eval_accuracy": 0.5252044895651145,
"eval_loss": 2.15625,
"eval_runtime": 44.6651,
"eval_samples_per_second": 79.883,
"eval_steps_per_second": 9.985,
"step": 2760000
},
{
"epoch": 0.83,
"learning_rate": 3.720422044930572e-05,
"loss": 2.1285,
"step": 2765000
},
{
"epoch": 0.83,
"eval_accuracy": 0.5252362698297893,
"eval_loss": 2.154296875,
"eval_runtime": 43.7277,
"eval_samples_per_second": 81.596,
"eval_steps_per_second": 10.199,
"step": 2765000
},
{
"epoch": 0.83,
"learning_rate": 3.7129403066628966e-05,
"loss": 2.1299,
"step": 2770000
},
{
"epoch": 0.83,
"eval_accuracy": 0.5251601067816893,
"eval_loss": 2.15625,
"eval_runtime": 43.8556,
"eval_samples_per_second": 81.358,
"eval_steps_per_second": 10.17,
"step": 2770000
},
{
"epoch": 0.83,
"learning_rate": 3.7054585683952205e-05,
"loss": 2.1304,
"step": 2775000
},
{
"epoch": 0.83,
"eval_accuracy": 0.5252631186740835,
"eval_loss": 2.154296875,
"eval_runtime": 44.0024,
"eval_samples_per_second": 81.086,
"eval_steps_per_second": 10.136,
"step": 2775000
},
{
"epoch": 0.83,
"learning_rate": 3.6979768301275444e-05,
"loss": 2.1288,
"step": 2780000
},
{
"epoch": 0.83,
"eval_accuracy": 0.5254154447702835,
"eval_loss": 2.154296875,
"eval_runtime": 43.1187,
"eval_samples_per_second": 82.748,
"eval_steps_per_second": 10.344,
"step": 2780000
},
{
"epoch": 0.83,
"learning_rate": 3.6904935940143697e-05,
"loss": 2.1295,
"step": 2785000
},
{
"epoch": 0.83,
"eval_accuracy": 0.5253255833322374,
"eval_loss": 2.154296875,
"eval_runtime": 44.6338,
"eval_samples_per_second": 79.939,
"eval_steps_per_second": 9.992,
"step": 2785000
},
{
"epoch": 0.84,
"learning_rate": 3.683010357901195e-05,
"loss": 2.129,
"step": 2790000
},
{
"epoch": 0.84,
"eval_accuracy": 0.5255368125052053,
"eval_loss": 2.154296875,
"eval_runtime": 44.4544,
"eval_samples_per_second": 80.262,
"eval_steps_per_second": 10.033,
"step": 2790000
},
{
"epoch": 0.84,
"learning_rate": 3.6755286196335195e-05,
"loss": 2.1285,
"step": 2795000
},
{
"epoch": 0.84,
"eval_accuracy": 0.5253598293071026,
"eval_loss": 2.154296875,
"eval_runtime": 44.2524,
"eval_samples_per_second": 80.628,
"eval_steps_per_second": 10.079,
"step": 2795000
},
{
"epoch": 0.84,
"learning_rate": 3.668045383520344e-05,
"loss": 2.1292,
"step": 2800000
},
{
"epoch": 0.84,
"eval_accuracy": 0.5252880497437853,
"eval_loss": 2.154296875,
"eval_runtime": 40.6928,
"eval_samples_per_second": 87.681,
"eval_steps_per_second": 10.96,
"step": 2800000
},
{
"epoch": 0.84,
"learning_rate": 3.660563645252669e-05,
"loss": 2.1278,
"step": 2805000
},
{
"epoch": 0.84,
"eval_accuracy": 0.5256472215281704,
"eval_loss": 2.15234375,
"eval_runtime": 43.3931,
"eval_samples_per_second": 82.225,
"eval_steps_per_second": 10.278,
"step": 2805000
},
{
"epoch": 0.84,
"learning_rate": 3.653080409139494e-05,
"loss": 2.1239,
"step": 2810000
},
{
"epoch": 0.84,
"eval_accuracy": 0.5255110595321069,
"eval_loss": 2.15234375,
"eval_runtime": 42.2966,
"eval_samples_per_second": 84.357,
"eval_steps_per_second": 10.545,
"step": 2810000
},
{
"epoch": 0.84,
"learning_rate": 3.645598670871818e-05,
"loss": 2.1241,
"step": 2815000
},
{
"epoch": 0.84,
"eval_accuracy": 0.5259063950659495,
"eval_loss": 2.15234375,
"eval_runtime": 42.6516,
"eval_samples_per_second": 83.655,
"eval_steps_per_second": 10.457,
"step": 2815000
},
{
"epoch": 0.84,
"learning_rate": 3.638115434758643e-05,
"loss": 2.1232,
"step": 2820000
},
{
"epoch": 0.84,
"eval_accuracy": 0.5256781798894485,
"eval_loss": 2.15234375,
"eval_runtime": 46.2947,
"eval_samples_per_second": 77.071,
"eval_steps_per_second": 9.634,
"step": 2820000
},
{
"epoch": 0.85,
"learning_rate": 3.6306321986454685e-05,
"loss": 2.1241,
"step": 2825000
},
{
"epoch": 0.85,
"eval_accuracy": 0.525697357635373,
"eval_loss": 2.150390625,
"eval_runtime": 43.287,
"eval_samples_per_second": 82.427,
"eval_steps_per_second": 10.303,
"step": 2825000
},
{
"epoch": 0.85,
"learning_rate": 3.623148962532294e-05,
"loss": 2.1236,
"step": 2830000
},
{
"epoch": 0.85,
"eval_accuracy": 0.5259439286544016,
"eval_loss": 2.150390625,
"eval_runtime": 43.0484,
"eval_samples_per_second": 82.884,
"eval_steps_per_second": 10.36,
"step": 2830000
},
{
"epoch": 0.85,
"learning_rate": 3.6156672242646177e-05,
"loss": 2.1272,
"step": 2835000
},
{
"epoch": 0.85,
"eval_accuracy": 0.5259442026222007,
"eval_loss": 2.150390625,
"eval_runtime": 42.9436,
"eval_samples_per_second": 83.086,
"eval_steps_per_second": 10.386,
"step": 2835000
},
{
"epoch": 0.85,
"learning_rate": 3.608183988151443e-05,
"loss": 2.1271,
"step": 2840000
},
{
"epoch": 0.85,
"eval_accuracy": 0.5260592690977474,
"eval_loss": 2.150390625,
"eval_runtime": 40.596,
"eval_samples_per_second": 87.89,
"eval_steps_per_second": 10.986,
"step": 2840000
},
{
"epoch": 0.85,
"learning_rate": 3.6007022498837675e-05,
"loss": 2.1249,
"step": 2845000
},
{
"epoch": 0.85,
"eval_accuracy": 0.52616776034612,
"eval_loss": 2.1484375,
"eval_runtime": 43.4159,
"eval_samples_per_second": 82.182,
"eval_steps_per_second": 10.273,
"step": 2845000
},
{
"epoch": 0.85,
"learning_rate": 3.5932175159250935e-05,
"loss": 2.1245,
"step": 2850000
},
{
"epoch": 0.85,
"eval_accuracy": 0.5260307764466595,
"eval_loss": 2.1484375,
"eval_runtime": 43.8356,
"eval_samples_per_second": 81.395,
"eval_steps_per_second": 10.174,
"step": 2850000
},
{
"epoch": 0.86,
"learning_rate": 3.585735777657418e-05,
"loss": 2.1222,
"step": 2855000
},
{
"epoch": 0.86,
"eval_accuracy": 0.5261137886897326,
"eval_loss": 2.1484375,
"eval_runtime": 43.0232,
"eval_samples_per_second": 82.932,
"eval_steps_per_second": 10.367,
"step": 2855000
},
{
"epoch": 0.86,
"learning_rate": 3.5782525415442434e-05,
"loss": 2.125,
"step": 2860000
},
{
"epoch": 0.86,
"eval_accuracy": 0.5263050182133793,
"eval_loss": 2.1484375,
"eval_runtime": 43.0096,
"eval_samples_per_second": 82.958,
"eval_steps_per_second": 10.37,
"step": 2860000
},
{
"epoch": 0.86,
"learning_rate": 3.570770803276567e-05,
"loss": 2.1261,
"step": 2865000
},
{
"epoch": 0.86,
"eval_accuracy": 0.5260803646182642,
"eval_loss": 2.1484375,
"eval_runtime": 40.5259,
"eval_samples_per_second": 88.042,
"eval_steps_per_second": 11.005,
"step": 2865000
},
{
"epoch": 0.86,
"learning_rate": 3.563289065008891e-05,
"loss": 2.1247,
"step": 2870000
},
{
"epoch": 0.86,
"eval_accuracy": 0.5262392659416383,
"eval_loss": 2.1484375,
"eval_runtime": 46.7614,
"eval_samples_per_second": 76.302,
"eval_steps_per_second": 9.538,
"step": 2870000
},
{
"epoch": 0.86,
"learning_rate": 3.5558028332047185e-05,
"loss": 2.1225,
"step": 2875000
},
{
"epoch": 0.86,
"eval_accuracy": 0.5263167988287328,
"eval_loss": 2.1484375,
"eval_runtime": 43.7319,
"eval_samples_per_second": 81.588,
"eval_steps_per_second": 10.199,
"step": 2875000
},
{
"epoch": 0.86,
"learning_rate": 3.5483210949370424e-05,
"loss": 2.122,
"step": 2880000
},
{
"epoch": 0.86,
"eval_accuracy": 0.5261085833015531,
"eval_loss": 2.1484375,
"eval_runtime": 46.8934,
"eval_samples_per_second": 76.087,
"eval_steps_per_second": 9.511,
"step": 2880000
},
{
"epoch": 0.86,
"learning_rate": 3.540839356669367e-05,
"loss": 2.1237,
"step": 2885000
},
{
"epoch": 0.86,
"eval_accuracy": 0.5261107750439444,
"eval_loss": 2.146484375,
"eval_runtime": 42.8485,
"eval_samples_per_second": 83.27,
"eval_steps_per_second": 10.409,
"step": 2885000
},
{
"epoch": 0.87,
"learning_rate": 3.533356120556192e-05,
"loss": 2.1219,
"step": 2890000
},
{
"epoch": 0.87,
"eval_accuracy": 0.5261825546072617,
"eval_loss": 2.146484375,
"eval_runtime": 44.3826,
"eval_samples_per_second": 80.392,
"eval_steps_per_second": 10.049,
"step": 2890000
},
{
"epoch": 0.87,
"learning_rate": 3.525874382288516e-05,
"loss": 2.1248,
"step": 2895000
},
{
"epoch": 0.87,
"eval_accuracy": 0.526191595544626,
"eval_loss": 2.146484375,
"eval_runtime": 40.9038,
"eval_samples_per_second": 87.229,
"eval_steps_per_second": 10.904,
"step": 2895000
},
{
"epoch": 0.87,
"learning_rate": 3.51839264402084e-05,
"loss": 2.1191,
"step": 2900000
},
{
"epoch": 0.87,
"eval_accuracy": 0.526354332417185,
"eval_loss": 2.146484375,
"eval_runtime": 44.4026,
"eval_samples_per_second": 80.356,
"eval_steps_per_second": 10.044,
"step": 2900000
},
{
"epoch": 0.87,
"learning_rate": 3.510909407907666e-05,
"loss": 2.1181,
"step": 2905000
},
{
"epoch": 0.87,
"eval_accuracy": 0.5264343310144699,
"eval_loss": 2.146484375,
"eval_runtime": 43.5492,
"eval_samples_per_second": 81.93,
"eval_steps_per_second": 10.241,
"step": 2905000
},
{
"epoch": 0.87,
"learning_rate": 3.50342766963999e-05,
"loss": 2.1176,
"step": 2910000
},
{
"epoch": 0.87,
"eval_accuracy": 0.5263020045675911,
"eval_loss": 2.146484375,
"eval_runtime": 46.4423,
"eval_samples_per_second": 76.827,
"eval_steps_per_second": 9.603,
"step": 2910000
},
{
"epoch": 0.87,
"learning_rate": 3.495944433526815e-05,
"loss": 2.1191,
"step": 2915000
},
{
"epoch": 0.87,
"eval_accuracy": 0.5266524093824109,
"eval_loss": 2.146484375,
"eval_runtime": 42.6675,
"eval_samples_per_second": 83.623,
"eval_steps_per_second": 10.453,
"step": 2915000
},
{
"epoch": 0.87,
"learning_rate": 3.488462695259139e-05,
"loss": 2.1206,
"step": 2920000
},
{
"epoch": 0.87,
"eval_accuracy": 0.5267954205734475,
"eval_loss": 2.14453125,
"eval_runtime": 42.9737,
"eval_samples_per_second": 83.028,
"eval_steps_per_second": 10.378,
"step": 2920000
},
{
"epoch": 0.88,
"learning_rate": 3.4809794591459645e-05,
"loss": 2.1148,
"step": 2925000
},
{
"epoch": 0.88,
"eval_accuracy": 0.5267219972033367,
"eval_loss": 2.14453125,
"eval_runtime": 44.1462,
"eval_samples_per_second": 80.822,
"eval_steps_per_second": 10.103,
"step": 2925000
},
{
"epoch": 0.88,
"learning_rate": 3.473497720878289e-05,
"loss": 2.1188,
"step": 2930000
},
{
"epoch": 0.88,
"eval_accuracy": 0.5270244576533453,
"eval_loss": 2.14453125,
"eval_runtime": 44.0298,
"eval_samples_per_second": 81.036,
"eval_steps_per_second": 10.129,
"step": 2930000
},
{
"epoch": 0.88,
"learning_rate": 3.466015982610613e-05,
"loss": 2.1118,
"step": 2935000
},
{
"epoch": 0.88,
"eval_accuracy": 0.5270036361006273,
"eval_loss": 2.14453125,
"eval_runtime": 41.5147,
"eval_samples_per_second": 85.945,
"eval_steps_per_second": 10.743,
"step": 2935000
},
{
"epoch": 0.88,
"learning_rate": 4.992518261732325e-05,
"loss": 2.1283,
"step": 2940000
},
{
"epoch": 0.88,
"eval_accuracy": 0.5243740931665856,
"eval_loss": 2.158203125,
"eval_runtime": 39.6741,
"eval_samples_per_second": 89.933,
"eval_steps_per_second": 11.242,
"step": 2940000
},
{
"epoch": 0.88,
"learning_rate": 4.9850350256191494e-05,
"loss": 2.1336,
"step": 2945000
},
{
"epoch": 0.88,
"eval_accuracy": 0.5240346470637227,
"eval_loss": 2.162109375,
"eval_runtime": 39.6588,
"eval_samples_per_second": 89.967,
"eval_steps_per_second": 11.246,
"step": 2945000
},
{
"epoch": 0.88,
"learning_rate": 4.977551789505975e-05,
"loss": 2.1311,
"step": 2950000
},
{
"epoch": 0.88,
"eval_accuracy": 0.5237494465850462,
"eval_loss": 2.162109375,
"eval_runtime": 39.7632,
"eval_samples_per_second": 89.731,
"eval_steps_per_second": 11.216,
"step": 2950000
},
{
"epoch": 0.89,
"learning_rate": 4.970070051238299e-05,
"loss": 2.1377,
"step": 2955000
},
{
"epoch": 0.89,
"eval_accuracy": 0.523618763944961,
"eval_loss": 2.1640625,
"eval_runtime": 39.7355,
"eval_samples_per_second": 89.794,
"eval_steps_per_second": 11.224,
"step": 2955000
},
{
"epoch": 0.89,
"learning_rate": 4.962588312970624e-05,
"loss": 2.136,
"step": 2960000
},
{
"epoch": 0.89,
"eval_accuracy": 0.523584244002297,
"eval_loss": 2.1640625,
"eval_runtime": 39.6172,
"eval_samples_per_second": 90.062,
"eval_steps_per_second": 11.258,
"step": 2960000
},
{
"epoch": 0.89,
"learning_rate": 4.955103579011949e-05,
"loss": 2.1394,
"step": 2965000
},
{
"epoch": 0.89,
"eval_accuracy": 0.5233458920172359,
"eval_loss": 2.1640625,
"eval_runtime": 39.7138,
"eval_samples_per_second": 89.843,
"eval_steps_per_second": 11.23,
"step": 2965000
},
{
"epoch": 0.89,
"learning_rate": 4.947621840744274e-05,
"loss": 2.1405,
"step": 2970000
},
{
"epoch": 0.89,
"eval_accuracy": 0.5233305498204963,
"eval_loss": 2.166015625,
"eval_runtime": 39.7403,
"eval_samples_per_second": 89.783,
"eval_steps_per_second": 11.223,
"step": 2970000
},
{
"epoch": 0.89,
"learning_rate": 4.9401401024765983e-05,
"loss": 2.1391,
"step": 2975000
},
{
"epoch": 0.89,
"eval_accuracy": 0.5235795865497153,
"eval_loss": 2.166015625,
"eval_runtime": 39.791,
"eval_samples_per_second": 89.668,
"eval_steps_per_second": 11.209,
"step": 2975000
},
{
"epoch": 0.89,
"learning_rate": 4.9326553685179237e-05,
"loss": 2.1353,
"step": 2980000
},
{
"epoch": 0.89,
"eval_accuracy": 0.5233823297344923,
"eval_loss": 2.166015625,
"eval_runtime": 39.6943,
"eval_samples_per_second": 89.887,
"eval_steps_per_second": 11.236,
"step": 2980000
},
{
"epoch": 0.89,
"learning_rate": 4.925173630250248e-05,
"loss": 2.1392,
"step": 2985000
},
{
"epoch": 0.89,
"eval_accuracy": 0.5233889049616665,
"eval_loss": 2.166015625,
"eval_runtime": 39.7244,
"eval_samples_per_second": 89.819,
"eval_steps_per_second": 11.227,
"step": 2985000
},
{
"epoch": 0.9,
"learning_rate": 4.917691891982573e-05,
"loss": 2.1384,
"step": 2990000
},
{
"epoch": 0.9,
"eval_accuracy": 0.5235058892118056,
"eval_loss": 2.166015625,
"eval_runtime": 39.7383,
"eval_samples_per_second": 89.787,
"eval_steps_per_second": 11.223,
"step": 2990000
},
{
"epoch": 0.9,
"learning_rate": 4.910210153714897e-05,
"loss": 2.1373,
"step": 2995000
},
{
"epoch": 0.9,
"eval_accuracy": 0.523321234915333,
"eval_loss": 2.166015625,
"eval_runtime": 39.8394,
"eval_samples_per_second": 89.56,
"eval_steps_per_second": 11.195,
"step": 2995000
},
{
"epoch": 0.9,
"learning_rate": 4.9027284154472206e-05,
"loss": 2.1346,
"step": 3000000
},
{
"epoch": 0.9,
"eval_accuracy": 0.523394110349846,
"eval_loss": 2.166015625,
"eval_runtime": 39.7248,
"eval_samples_per_second": 89.818,
"eval_steps_per_second": 11.227,
"step": 3000000
},
{
"epoch": 0.9,
"learning_rate": 4.895246677179545e-05,
"loss": 2.1368,
"step": 3005000
},
{
"epoch": 0.9,
"eval_accuracy": 0.523494108596452,
"eval_loss": 2.166015625,
"eval_runtime": 39.8116,
"eval_samples_per_second": 89.622,
"eval_steps_per_second": 11.203,
"step": 3005000
},
{
"epoch": 0.9,
"learning_rate": 4.8877634410663705e-05,
"loss": 2.1383,
"step": 3010000
},
{
"epoch": 0.9,
"eval_accuracy": 0.5232738384861197,
"eval_loss": 2.166015625,
"eval_runtime": 39.8277,
"eval_samples_per_second": 89.586,
"eval_steps_per_second": 11.198,
"step": 3010000
},
{
"epoch": 0.9,
"learning_rate": 4.880280204953195e-05,
"loss": 2.1447,
"step": 3015000
},
{
"epoch": 0.9,
"eval_accuracy": 0.5233116460423708,
"eval_loss": 2.166015625,
"eval_runtime": 39.9037,
"eval_samples_per_second": 89.415,
"eval_steps_per_second": 11.177,
"step": 3015000
},
{
"epoch": 0.9,
"learning_rate": 4.87279846668552e-05,
"loss": 2.1392,
"step": 3020000
},
{
"epoch": 0.9,
"eval_accuracy": 0.5234119182567758,
"eval_loss": 2.166015625,
"eval_runtime": 39.8551,
"eval_samples_per_second": 89.524,
"eval_steps_per_second": 11.191,
"step": 3020000
},
{
"epoch": 0.91,
"learning_rate": 4.865315230572345e-05,
"loss": 2.1359,
"step": 3025000
},
{
"epoch": 0.91,
"eval_accuracy": 0.5233072625575881,
"eval_loss": 2.166015625,
"eval_runtime": 39.8861,
"eval_samples_per_second": 89.455,
"eval_steps_per_second": 11.182,
"step": 3025000
},
{
"epoch": 0.91,
"learning_rate": 4.8578334923046695e-05,
"loss": 2.1408,
"step": 3030000
},
{
"epoch": 0.91,
"eval_accuracy": 0.5233184952373438,
"eval_loss": 2.166015625,
"eval_runtime": 39.8536,
"eval_samples_per_second": 89.528,
"eval_steps_per_second": 11.191,
"step": 3030000
},
{
"epoch": 0.91,
"learning_rate": 4.850350256191494e-05,
"loss": 2.1437,
"step": 3035000
},
{
"epoch": 0.91,
"eval_accuracy": 0.5232642496131574,
"eval_loss": 2.166015625,
"eval_runtime": 39.8602,
"eval_samples_per_second": 89.513,
"eval_steps_per_second": 11.189,
"step": 3035000
},
{
"epoch": 0.91,
"learning_rate": 4.8428715136148166e-05,
"loss": 2.1354,
"step": 3040000
},
{
"epoch": 0.91,
"eval_accuracy": 0.5233253444323168,
"eval_loss": 2.166015625,
"eval_runtime": 39.8447,
"eval_samples_per_second": 89.548,
"eval_steps_per_second": 11.193,
"step": 3040000
},
{
"epoch": 0.91,
"learning_rate": 4.835383783965145e-05,
"loss": 2.1371,
"step": 3045000
},
{
"epoch": 0.91,
"eval_accuracy": 0.5234626022995761,
"eval_loss": 2.166015625,
"eval_runtime": 40.0007,
"eval_samples_per_second": 89.199,
"eval_steps_per_second": 11.15,
"step": 3045000
},
{
"epoch": 0.91,
"learning_rate": 4.82790054785197e-05,
"loss": 2.1399,
"step": 3050000
},
{
"epoch": 0.91,
"eval_accuracy": 0.5233957541566394,
"eval_loss": 2.166015625,
"eval_runtime": 39.8876,
"eval_samples_per_second": 89.451,
"eval_steps_per_second": 11.181,
"step": 3050000
},
{
"epoch": 0.92,
"learning_rate": 4.820420307429793e-05,
"loss": 2.1387,
"step": 3055000
},
{
"epoch": 0.92,
"eval_accuracy": 0.5234242468077273,
"eval_loss": 2.166015625,
"eval_runtime": 39.9685,
"eval_samples_per_second": 89.27,
"eval_steps_per_second": 11.159,
"step": 3055000
},
{
"epoch": 0.92,
"learning_rate": 4.8129340756256206e-05,
"loss": 2.1406,
"step": 3060000
},
{
"epoch": 0.92,
"eval_accuracy": 0.5232119217635636,
"eval_loss": 2.166015625,
"eval_runtime": 39.9746,
"eval_samples_per_second": 89.257,
"eval_steps_per_second": 11.157,
"step": 3060000
},
{
"epoch": 0.92,
"learning_rate": 4.805450839512445e-05,
"loss": 2.1387,
"step": 3065000
},
{
"epoch": 0.92,
"eval_accuracy": 0.5234650680097664,
"eval_loss": 2.166015625,
"eval_runtime": 40.0463,
"eval_samples_per_second": 89.097,
"eval_steps_per_second": 11.137,
"step": 3065000
},
{
"epoch": 0.92,
"learning_rate": 4.797966105553772e-05,
"loss": 2.1413,
"step": 3070000
},
{
"epoch": 0.92,
"eval_accuracy": 0.5234842457556909,
"eval_loss": 2.166015625,
"eval_runtime": 39.9277,
"eval_samples_per_second": 89.362,
"eval_steps_per_second": 11.17,
"step": 3070000
},
{
"epoch": 0.92,
"learning_rate": 4.790484367286096e-05,
"loss": 2.1371,
"step": 3075000
},
{
"epoch": 0.92,
"eval_accuracy": 0.523473561011533,
"eval_loss": 2.1640625,
"eval_runtime": 39.9812,
"eval_samples_per_second": 89.242,
"eval_steps_per_second": 11.155,
"step": 3075000
},
{
"epoch": 0.92,
"learning_rate": 4.782999633327422e-05,
"loss": 2.138,
"step": 3080000
},
{
"epoch": 0.92,
"eval_accuracy": 0.5234982181134358,
"eval_loss": 2.1640625,
"eval_runtime": 39.9703,
"eval_samples_per_second": 89.266,
"eval_steps_per_second": 11.158,
"step": 3080000
},
{
"epoch": 0.92,
"learning_rate": 4.7755148993687484e-05,
"loss": 2.1385,
"step": 3085000
},
{
"epoch": 0.92,
"eval_accuracy": 0.5236360239162929,
"eval_loss": 2.1640625,
"eval_runtime": 40.1115,
"eval_samples_per_second": 88.952,
"eval_steps_per_second": 11.119,
"step": 3085000
},
{
"epoch": 0.93,
"learning_rate": 4.768031663255573e-05,
"loss": 2.135,
"step": 3090000
},
{
"epoch": 0.93,
"eval_accuracy": 0.5233746586361225,
"eval_loss": 2.166015625,
"eval_runtime": 40.1304,
"eval_samples_per_second": 88.91,
"eval_steps_per_second": 11.114,
"step": 3090000
},
{
"epoch": 0.93,
"learning_rate": 4.760548427142398e-05,
"loss": 2.1401,
"step": 3095000
},
{
"epoch": 0.93,
"eval_accuracy": 0.5235689018055574,
"eval_loss": 2.1640625,
"eval_runtime": 40.1365,
"eval_samples_per_second": 88.897,
"eval_steps_per_second": 11.112,
"step": 3095000
},
{
"epoch": 0.93,
"learning_rate": 4.753066688874723e-05,
"loss": 2.1374,
"step": 3100000
},
{
"epoch": 0.93,
"eval_accuracy": 0.5235552034156113,
"eval_loss": 2.1640625,
"eval_runtime": 40.133,
"eval_samples_per_second": 88.904,
"eval_steps_per_second": 11.113,
"step": 3100000
},
{
"epoch": 0.93,
"learning_rate": 4.745586448452546e-05,
"loss": 2.1358,
"step": 3105000
},
{
"epoch": 0.93,
"eval_accuracy": 0.5237198580627628,
"eval_loss": 2.1640625,
"eval_runtime": 40.0267,
"eval_samples_per_second": 89.141,
"eval_steps_per_second": 11.143,
"step": 3105000
},
{
"epoch": 0.93,
"learning_rate": 4.7381032123393713e-05,
"loss": 2.1344,
"step": 3110000
},
{
"epoch": 0.93,
"eval_accuracy": 0.5239288954933393,
"eval_loss": 2.162109375,
"eval_runtime": 40.1156,
"eval_samples_per_second": 88.943,
"eval_steps_per_second": 11.118,
"step": 3110000
},
{
"epoch": 0.93,
"learning_rate": 4.730621474071695e-05,
"loss": 2.1368,
"step": 3115000
},
{
"epoch": 0.93,
"eval_accuracy": 0.5238793073217346,
"eval_loss": 2.162109375,
"eval_runtime": 40.1212,
"eval_samples_per_second": 88.93,
"eval_steps_per_second": 11.116,
"step": 3115000
},
{
"epoch": 0.93,
"learning_rate": 4.723136740113022e-05,
"loss": 2.1345,
"step": 3120000
},
{
"epoch": 0.93,
"eval_accuracy": 0.5236836943133052,
"eval_loss": 2.162109375,
"eval_runtime": 40.203,
"eval_samples_per_second": 88.75,
"eval_steps_per_second": 11.094,
"step": 3120000
},
{
"epoch": 0.94,
"learning_rate": 4.7156535039998465e-05,
"loss": 2.1358,
"step": 3125000
},
{
"epoch": 0.94,
"eval_accuracy": 0.5238979371320612,
"eval_loss": 2.162109375,
"eval_runtime": 40.2344,
"eval_samples_per_second": 88.68,
"eval_steps_per_second": 11.085,
"step": 3125000
},
{
"epoch": 0.94,
"learning_rate": 4.708171765732171e-05,
"loss": 2.1395,
"step": 3130000
},
{
"epoch": 0.94,
"eval_accuracy": 0.5239330050103231,
"eval_loss": 2.162109375,
"eval_runtime": 40.4542,
"eval_samples_per_second": 88.199,
"eval_steps_per_second": 11.025,
"step": 3130000
},
{
"epoch": 0.94,
"learning_rate": 4.700690027464496e-05,
"loss": 2.1359,
"step": 3135000
},
{
"epoch": 0.94,
"eval_accuracy": 0.5242872453743277,
"eval_loss": 2.162109375,
"eval_runtime": 40.3355,
"eval_samples_per_second": 88.458,
"eval_steps_per_second": 11.057,
"step": 3135000
},
{
"epoch": 0.94,
"learning_rate": 4.693205293505821e-05,
"loss": 2.1373,
"step": 3140000
},
{
"epoch": 0.94,
"eval_accuracy": 0.5241631379614166,
"eval_loss": 2.16015625,
"eval_runtime": 40.2137,
"eval_samples_per_second": 88.726,
"eval_steps_per_second": 11.091,
"step": 3140000
},
{
"epoch": 0.94,
"learning_rate": 4.6857235552381456e-05,
"loss": 2.1357,
"step": 3145000
},
{
"epoch": 0.94,
"eval_accuracy": 0.5243412170307151,
"eval_loss": 2.16015625,
"eval_runtime": 40.3184,
"eval_samples_per_second": 88.496,
"eval_steps_per_second": 11.062,
"step": 3145000
},
{
"epoch": 0.94,
"learning_rate": 4.67824181697047e-05,
"loss": 2.1354,
"step": 3150000
},
{
"epoch": 0.94,
"eval_accuracy": 0.5243636823902266,
"eval_loss": 2.16015625,
"eval_runtime": 40.1892,
"eval_samples_per_second": 88.78,
"eval_steps_per_second": 11.098,
"step": 3150000
},
{
"epoch": 0.95,
"learning_rate": 4.670757083011796e-05,
"loss": 2.1323,
"step": 3155000
},
{
"epoch": 0.95,
"eval_accuracy": 0.5243839560073468,
"eval_loss": 2.16015625,
"eval_runtime": 40.3635,
"eval_samples_per_second": 88.397,
"eval_steps_per_second": 11.05,
"step": 3155000
},
{
"epoch": 0.95,
"learning_rate": 4.66327534474412e-05,
"loss": 2.133,
"step": 3160000
},
{
"epoch": 0.95,
"eval_accuracy": 0.5242327257823425,
"eval_loss": 2.16015625,
"eval_runtime": 41.1497,
"eval_samples_per_second": 86.708,
"eval_steps_per_second": 10.838,
"step": 3160000
},
{
"epoch": 0.95,
"learning_rate": 4.655790610785447e-05,
"loss": 2.1315,
"step": 3165000
},
{
"epoch": 0.95,
"eval_accuracy": 0.524407243270255,
"eval_loss": 2.16015625,
"eval_runtime": 41.4927,
"eval_samples_per_second": 85.991,
"eval_steps_per_second": 10.749,
"step": 3165000
},
{
"epoch": 0.95,
"learning_rate": 4.6483088725177706e-05,
"loss": 2.1363,
"step": 3170000
},
{
"epoch": 0.95,
"eval_accuracy": 0.5242845056963384,
"eval_loss": 2.16015625,
"eval_runtime": 43.5552,
"eval_samples_per_second": 81.919,
"eval_steps_per_second": 10.24,
"step": 3170000
},
{
"epoch": 0.95,
"learning_rate": 4.640825636404596e-05,
"loss": 2.1349,
"step": 3175000
},
{
"epoch": 0.95,
"eval_accuracy": 0.5245045018388719,
"eval_loss": 2.16015625,
"eval_runtime": 41.5498,
"eval_samples_per_second": 85.873,
"eval_steps_per_second": 10.734,
"step": 3175000
},
{
"epoch": 0.95,
"learning_rate": 4.633342400291421e-05,
"loss": 2.1336,
"step": 3180000
},
{
"epoch": 0.95,
"eval_accuracy": 0.524365600164819,
"eval_loss": 2.16015625,
"eval_runtime": 45.093,
"eval_samples_per_second": 79.125,
"eval_steps_per_second": 9.891,
"step": 3180000
},
{
"epoch": 0.95,
"learning_rate": 4.625860662023746e-05,
"loss": 2.1364,
"step": 3185000
},
{
"epoch": 0.95,
"eval_accuracy": 0.5243612166800363,
"eval_loss": 2.158203125,
"eval_runtime": 42.4027,
"eval_samples_per_second": 84.146,
"eval_steps_per_second": 10.518,
"step": 3185000
},
{
"epoch": 0.96,
"learning_rate": 4.618375928065071e-05,
"loss": 2.133,
"step": 3190000
},
{
"epoch": 0.96,
"eval_accuracy": 0.5243256008661766,
"eval_loss": 2.158203125,
"eval_runtime": 40.6817,
"eval_samples_per_second": 87.705,
"eval_steps_per_second": 10.963,
"step": 3190000
},
{
"epoch": 0.96,
"learning_rate": 4.6108941897973956e-05,
"loss": 2.1349,
"step": 3195000
},
{
"epoch": 0.96,
"eval_accuracy": 0.5245045018388719,
"eval_loss": 2.158203125,
"eval_runtime": 42.2213,
"eval_samples_per_second": 84.507,
"eval_steps_per_second": 10.563,
"step": 3195000
},
{
"epoch": 0.96,
"learning_rate": 4.60341245152972e-05,
"loss": 2.134,
"step": 3200000
},
{
"epoch": 0.96,
"eval_accuracy": 0.5245702541106129,
"eval_loss": 2.158203125,
"eval_runtime": 41.8771,
"eval_samples_per_second": 85.202,
"eval_steps_per_second": 10.65,
"step": 3200000
},
{
"epoch": 0.96,
"learning_rate": 4.5959277175710455e-05,
"loss": 2.1308,
"step": 3205000
},
{
"epoch": 0.96,
"eval_accuracy": 0.5249423023815473,
"eval_loss": 2.15625,
"eval_runtime": 42.7291,
"eval_samples_per_second": 83.503,
"eval_steps_per_second": 10.438,
"step": 3205000
},
{
"epoch": 0.96,
"learning_rate": 4.5884444814578715e-05,
"loss": 2.1302,
"step": 3210000
},
{
"epoch": 0.96,
"eval_accuracy": 0.5246628552266481,
"eval_loss": 2.15625,
"eval_runtime": 41.7473,
"eval_samples_per_second": 85.467,
"eval_steps_per_second": 10.683,
"step": 3210000
},
{
"epoch": 0.96,
"learning_rate": 4.580964241035695e-05,
"loss": 2.1302,
"step": 3215000
},
{
"epoch": 0.96,
"eval_accuracy": 0.5246708002928168,
"eval_loss": 2.15625,
"eval_runtime": 43.5638,
"eval_samples_per_second": 81.903,
"eval_steps_per_second": 10.238,
"step": 3215000
},
{
"epoch": 0.96,
"learning_rate": 4.5734825027680186e-05,
"loss": 2.1331,
"step": 3220000
},
{
"epoch": 0.96,
"eval_accuracy": 0.5247650452156455,
"eval_loss": 2.15625,
"eval_runtime": 42.8533,
"eval_samples_per_second": 83.261,
"eval_steps_per_second": 10.408,
"step": 3220000
},
{
"epoch": 0.97,
"learning_rate": 4.565999266654844e-05,
"loss": 2.1273,
"step": 3225000
},
{
"epoch": 0.97,
"eval_accuracy": 0.5247250459170031,
"eval_loss": 2.15625,
"eval_runtime": 42.4534,
"eval_samples_per_second": 84.045,
"eval_steps_per_second": 10.506,
"step": 3225000
},
{
"epoch": 0.97,
"learning_rate": 4.5585175283871685e-05,
"loss": 2.1286,
"step": 3230000
},
{
"epoch": 0.97,
"eval_accuracy": 0.5249765483564124,
"eval_loss": 2.15625,
"eval_runtime": 43.0626,
"eval_samples_per_second": 82.856,
"eval_steps_per_second": 10.357,
"step": 3230000
},
{
"epoch": 0.97,
"learning_rate": 4.551034292273994e-05,
"loss": 2.1282,
"step": 3235000
},
{
"epoch": 0.97,
"eval_accuracy": 0.525047232048534,
"eval_loss": 2.154296875,
"eval_runtime": 42.4424,
"eval_samples_per_second": 84.067,
"eval_steps_per_second": 10.508,
"step": 3235000
},
{
"epoch": 0.97,
"learning_rate": 4.543552554006317e-05,
"loss": 2.1309,
"step": 3240000
},
{
"epoch": 0.97,
"eval_accuracy": 0.5250951764133451,
"eval_loss": 2.154296875,
"eval_runtime": 44.2507,
"eval_samples_per_second": 80.631,
"eval_steps_per_second": 10.079,
"step": 3240000
},
{
"epoch": 0.97,
"learning_rate": 4.5360708157386415e-05,
"loss": 2.1295,
"step": 3245000
},
{
"epoch": 0.97,
"eval_accuracy": 0.5253592813715047,
"eval_loss": 2.154296875,
"eval_runtime": 44.2616,
"eval_samples_per_second": 80.612,
"eval_steps_per_second": 10.076,
"step": 3245000
},
{
"epoch": 0.97,
"learning_rate": 4.528589077470966e-05,
"loss": 2.1275,
"step": 3250000
},
{
"epoch": 0.97,
"eval_accuracy": 0.5253707880190593,
"eval_loss": 2.154296875,
"eval_runtime": 43.0103,
"eval_samples_per_second": 82.957,
"eval_steps_per_second": 10.37,
"step": 3250000
},
{
"epoch": 0.98,
"learning_rate": 4.5211058413577914e-05,
"loss": 2.133,
"step": 3255000
},
{
"epoch": 0.98,
"eval_accuracy": 0.5253924314751741,
"eval_loss": 2.154296875,
"eval_runtime": 41.4169,
"eval_samples_per_second": 86.148,
"eval_steps_per_second": 10.769,
"step": 3255000
},
{
"epoch": 0.98,
"learning_rate": 4.513622605244616e-05,
"loss": 2.1301,
"step": 3260000
},
{
"epoch": 0.98,
"eval_accuracy": 0.5251132582880739,
"eval_loss": 2.154296875,
"eval_runtime": 42.1862,
"eval_samples_per_second": 84.577,
"eval_steps_per_second": 10.572,
"step": 3260000
},
{
"epoch": 0.98,
"learning_rate": 4.5061408669769406e-05,
"loss": 2.1314,
"step": 3265000
},
{
"epoch": 0.98,
"eval_accuracy": 0.5253096931999001,
"eval_loss": 2.15234375,
"eval_runtime": 43.9727,
"eval_samples_per_second": 81.141,
"eval_steps_per_second": 10.143,
"step": 3265000
},
{
"epoch": 0.98,
"learning_rate": 4.498656133018267e-05,
"loss": 2.1258,
"step": 3270000
},
{
"epoch": 0.98,
"eval_accuracy": 0.5254751697504482,
"eval_loss": 2.15234375,
"eval_runtime": 42.0779,
"eval_samples_per_second": 84.795,
"eval_steps_per_second": 10.599,
"step": 3270000
},
{
"epoch": 0.98,
"learning_rate": 4.491172896905092e-05,
"loss": 2.1286,
"step": 3275000
},
{
"epoch": 0.98,
"eval_accuracy": 0.525354897886722,
"eval_loss": 2.15234375,
"eval_runtime": 41.8893,
"eval_samples_per_second": 85.177,
"eval_steps_per_second": 10.647,
"step": 3275000
},
{
"epoch": 0.98,
"learning_rate": 4.4836911586374165e-05,
"loss": 2.1267,
"step": 3280000
},
{
"epoch": 0.98,
"eval_accuracy": 0.5253622950172928,
"eval_loss": 2.15234375,
"eval_runtime": 42.0241,
"eval_samples_per_second": 84.904,
"eval_steps_per_second": 10.613,
"step": 3280000
},
{
"epoch": 0.98,
"learning_rate": 4.476207922524242e-05,
"loss": 2.13,
"step": 3285000
},
{
"epoch": 0.98,
"eval_accuracy": 0.5254332526772133,
"eval_loss": 2.15234375,
"eval_runtime": 43.4043,
"eval_samples_per_second": 82.204,
"eval_steps_per_second": 10.275,
"step": 3285000
},
{
"epoch": 0.99,
"learning_rate": 4.4687246864110663e-05,
"loss": 2.1284,
"step": 3290000
},
{
"epoch": 0.99,
"eval_accuracy": 0.5254976351099597,
"eval_loss": 2.15234375,
"eval_runtime": 43.4747,
"eval_samples_per_second": 82.071,
"eval_steps_per_second": 10.259,
"step": 3290000
},
{
"epoch": 0.99,
"learning_rate": 4.461242948143391e-05,
"loss": 2.1295,
"step": 3295000
},
{
"epoch": 0.99,
"eval_accuracy": 0.5254190063516695,
"eval_loss": 2.15234375,
"eval_runtime": 44.0831,
"eval_samples_per_second": 80.938,
"eval_steps_per_second": 10.117,
"step": 3295000
},
{
"epoch": 0.99,
"learning_rate": 4.453759712030216e-05,
"loss": 2.1241,
"step": 3300000
},
{
"epoch": 0.99,
"eval_accuracy": 0.5255573600901244,
"eval_loss": 2.15234375,
"eval_runtime": 43.4229,
"eval_samples_per_second": 82.169,
"eval_steps_per_second": 10.271,
"step": 3300000
},
{
"epoch": 0.99,
"learning_rate": 4.446276475917041e-05,
"loss": 2.1297,
"step": 3305000
},
{
"epoch": 0.99,
"eval_accuracy": 0.5257825616208374,
"eval_loss": 2.15234375,
"eval_runtime": 41.6675,
"eval_samples_per_second": 85.63,
"eval_steps_per_second": 10.704,
"step": 3305000
},
{
"epoch": 0.99,
"learning_rate": 4.4387947376493654e-05,
"loss": 2.126,
"step": 3310000
},
{
"epoch": 0.99,
"eval_accuracy": 0.5256425640755888,
"eval_loss": 2.150390625,
"eval_runtime": 43.5829,
"eval_samples_per_second": 81.867,
"eval_steps_per_second": 10.233,
"step": 3310000
},
{
"epoch": 0.99,
"learning_rate": 4.43131299938169e-05,
"loss": 2.1263,
"step": 3315000
},
{
"epoch": 0.99,
"eval_accuracy": 0.5255672229308856,
"eval_loss": 2.150390625,
"eval_runtime": 40.6138,
"eval_samples_per_second": 87.852,
"eval_steps_per_second": 10.981,
"step": 3315000
},
{
"epoch": 0.99,
"learning_rate": 4.423828265423016e-05,
"loss": 2.1273,
"step": 3320000
},
{
"epoch": 0.99,
"eval_accuracy": 0.525589962258196,
"eval_loss": 2.150390625,
"eval_runtime": 41.1643,
"eval_samples_per_second": 86.677,
"eval_steps_per_second": 10.835,
"step": 3320000
},
{
"epoch": 1.0,
"learning_rate": 4.4163450293098406e-05,
"loss": 2.1214,
"step": 3325000
},
{
"epoch": 1.0,
"eval_accuracy": 0.5255433877323795,
"eval_loss": 2.150390625,
"eval_runtime": 40.8274,
"eval_samples_per_second": 87.392,
"eval_steps_per_second": 10.924,
"step": 3325000
},
{
"epoch": 1.0,
"learning_rate": 4.408863291042165e-05,
"loss": 2.1275,
"step": 3330000
},
{
"epoch": 1.0,
"eval_accuracy": 0.5255872225802068,
"eval_loss": 2.150390625,
"eval_runtime": 42.63,
"eval_samples_per_second": 83.697,
"eval_steps_per_second": 10.462,
"step": 3330000
},
{
"epoch": 1.0,
"learning_rate": 4.40138155277449e-05,
"loss": 2.1227,
"step": 3335000
},
{
"epoch": 1.0,
"eval_accuracy": 0.5258348894704312,
"eval_loss": 2.150390625,
"eval_runtime": 42.4569,
"eval_samples_per_second": 84.038,
"eval_steps_per_second": 10.505,
"step": 3335000
},
{
"epoch": 1.0,
"step": 3338128,
"total_flos": 1.0872066371139498e+21,
"train_loss": 0.2576859601399347,
"train_runtime": 150388.7494,
"train_samples_per_second": 177.573,
"train_steps_per_second": 22.197
}
],
"max_steps": 3338128,
"num_train_epochs": 1,
"total_flos": 1.0872066371139498e+21,
"trial_name": null,
"trial_params": null
}