ocr30000 / trainer_state.json
anaghasavit's picture
End of training
05bf82b
raw
history blame contribute delete
No virus
202 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 3320,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 10.8622,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 4.996987951807229e-05,
"loss": 10.2046,
"step": 4
},
{
"epoch": 0.0,
"learning_rate": 4.993975903614458e-05,
"loss": 4.8675,
"step": 6
},
{
"epoch": 0.0,
"learning_rate": 4.990963855421687e-05,
"loss": 3.3569,
"step": 8
},
{
"epoch": 0.0,
"learning_rate": 4.987951807228916e-05,
"loss": 2.1285,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 4.984939759036145e-05,
"loss": 1.9893,
"step": 12
},
{
"epoch": 0.0,
"learning_rate": 4.981927710843374e-05,
"loss": 2.2913,
"step": 14
},
{
"epoch": 0.0,
"learning_rate": 4.978915662650603e-05,
"loss": 1.6324,
"step": 16
},
{
"epoch": 0.01,
"learning_rate": 4.975903614457831e-05,
"loss": 2.1257,
"step": 18
},
{
"epoch": 0.01,
"learning_rate": 4.9728915662650604e-05,
"loss": 2.9242,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 4.9698795180722894e-05,
"loss": 2.0562,
"step": 22
},
{
"epoch": 0.01,
"learning_rate": 4.966867469879518e-05,
"loss": 1.5064,
"step": 24
},
{
"epoch": 0.01,
"learning_rate": 4.9638554216867475e-05,
"loss": 1.8353,
"step": 26
},
{
"epoch": 0.01,
"learning_rate": 4.9608433734939766e-05,
"loss": 3.0208,
"step": 28
},
{
"epoch": 0.01,
"learning_rate": 4.957831325301205e-05,
"loss": 1.4392,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 4.954819277108434e-05,
"loss": 1.3436,
"step": 32
},
{
"epoch": 0.01,
"learning_rate": 4.951807228915663e-05,
"loss": 1.4729,
"step": 34
},
{
"epoch": 0.01,
"learning_rate": 4.9487951807228915e-05,
"loss": 1.412,
"step": 36
},
{
"epoch": 0.01,
"learning_rate": 4.9457831325301205e-05,
"loss": 1.3831,
"step": 38
},
{
"epoch": 0.01,
"learning_rate": 4.9427710843373496e-05,
"loss": 1.8816,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 4.9397590361445786e-05,
"loss": 1.6135,
"step": 42
},
{
"epoch": 0.01,
"learning_rate": 4.936746987951808e-05,
"loss": 2.0372,
"step": 44
},
{
"epoch": 0.01,
"learning_rate": 4.933734939759037e-05,
"loss": 2.0392,
"step": 46
},
{
"epoch": 0.01,
"learning_rate": 4.930722891566265e-05,
"loss": 1.7321,
"step": 48
},
{
"epoch": 0.02,
"learning_rate": 4.927710843373494e-05,
"loss": 1.9968,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 4.924698795180723e-05,
"loss": 1.891,
"step": 52
},
{
"epoch": 0.02,
"learning_rate": 4.9216867469879516e-05,
"loss": 1.7418,
"step": 54
},
{
"epoch": 0.02,
"learning_rate": 4.9186746987951807e-05,
"loss": 2.4759,
"step": 56
},
{
"epoch": 0.02,
"learning_rate": 4.917168674698795e-05,
"loss": 2.9591,
"step": 58
},
{
"epoch": 0.02,
"learning_rate": 4.914156626506025e-05,
"loss": 1.5799,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 4.911144578313253e-05,
"loss": 1.9662,
"step": 62
},
{
"epoch": 0.02,
"learning_rate": 4.9081325301204823e-05,
"loss": 1.8398,
"step": 64
},
{
"epoch": 0.02,
"learning_rate": 4.9051204819277114e-05,
"loss": 1.383,
"step": 66
},
{
"epoch": 0.02,
"learning_rate": 4.90210843373494e-05,
"loss": 1.2026,
"step": 68
},
{
"epoch": 0.02,
"learning_rate": 4.899096385542169e-05,
"loss": 2.0303,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 4.896084337349398e-05,
"loss": 2.1549,
"step": 72
},
{
"epoch": 0.02,
"learning_rate": 4.893072289156627e-05,
"loss": 1.6887,
"step": 74
},
{
"epoch": 0.02,
"learning_rate": 4.890060240963856e-05,
"loss": 2.2655,
"step": 76
},
{
"epoch": 0.02,
"learning_rate": 4.887048192771085e-05,
"loss": 1.4416,
"step": 78
},
{
"epoch": 0.02,
"learning_rate": 4.8840361445783134e-05,
"loss": 1.0641,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 4.8810240963855425e-05,
"loss": 1.9305,
"step": 82
},
{
"epoch": 0.03,
"learning_rate": 4.8780120481927715e-05,
"loss": 1.6745,
"step": 84
},
{
"epoch": 0.03,
"learning_rate": 4.875e-05,
"loss": 1.1574,
"step": 86
},
{
"epoch": 0.03,
"learning_rate": 4.871987951807229e-05,
"loss": 1.0798,
"step": 88
},
{
"epoch": 0.03,
"learning_rate": 4.868975903614458e-05,
"loss": 1.3416,
"step": 90
},
{
"epoch": 0.03,
"learning_rate": 4.865963855421687e-05,
"loss": 1.611,
"step": 92
},
{
"epoch": 0.03,
"learning_rate": 4.862951807228916e-05,
"loss": 1.6071,
"step": 94
},
{
"epoch": 0.03,
"learning_rate": 4.859939759036145e-05,
"loss": 1.1989,
"step": 96
},
{
"epoch": 0.03,
"learning_rate": 4.8569277108433736e-05,
"loss": 1.3987,
"step": 98
},
{
"epoch": 0.03,
"learning_rate": 4.8539156626506026e-05,
"loss": 1.4668,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 4.850903614457832e-05,
"loss": 1.5671,
"step": 102
},
{
"epoch": 0.03,
"learning_rate": 4.84789156626506e-05,
"loss": 1.0915,
"step": 104
},
{
"epoch": 0.03,
"learning_rate": 4.84487951807229e-05,
"loss": 1.6847,
"step": 106
},
{
"epoch": 0.03,
"learning_rate": 4.841867469879519e-05,
"loss": 1.4342,
"step": 108
},
{
"epoch": 0.03,
"learning_rate": 4.838855421686747e-05,
"loss": 2.3332,
"step": 110
},
{
"epoch": 0.03,
"learning_rate": 4.835843373493976e-05,
"loss": 1.7336,
"step": 112
},
{
"epoch": 0.03,
"learning_rate": 4.832831325301205e-05,
"loss": 1.8291,
"step": 114
},
{
"epoch": 0.03,
"learning_rate": 4.829819277108434e-05,
"loss": 1.1642,
"step": 116
},
{
"epoch": 0.04,
"learning_rate": 4.826807228915663e-05,
"loss": 2.2195,
"step": 118
},
{
"epoch": 0.04,
"learning_rate": 4.823795180722892e-05,
"loss": 1.2966,
"step": 120
},
{
"epoch": 0.04,
"learning_rate": 4.820783132530121e-05,
"loss": 1.4346,
"step": 122
},
{
"epoch": 0.04,
"learning_rate": 4.81777108433735e-05,
"loss": 1.2752,
"step": 124
},
{
"epoch": 0.04,
"learning_rate": 4.814759036144579e-05,
"loss": 1.4786,
"step": 126
},
{
"epoch": 0.04,
"learning_rate": 4.8117469879518074e-05,
"loss": 1.8346,
"step": 128
},
{
"epoch": 0.04,
"learning_rate": 4.8087349397590364e-05,
"loss": 1.7827,
"step": 130
},
{
"epoch": 0.04,
"learning_rate": 4.8057228915662655e-05,
"loss": 1.2065,
"step": 132
},
{
"epoch": 0.04,
"learning_rate": 4.802710843373494e-05,
"loss": 1.4948,
"step": 134
},
{
"epoch": 0.04,
"learning_rate": 4.799698795180723e-05,
"loss": 1.3472,
"step": 136
},
{
"epoch": 0.04,
"learning_rate": 4.796686746987952e-05,
"loss": 1.1187,
"step": 138
},
{
"epoch": 0.04,
"learning_rate": 4.793674698795181e-05,
"loss": 1.2414,
"step": 140
},
{
"epoch": 0.04,
"learning_rate": 4.79066265060241e-05,
"loss": 1.7233,
"step": 142
},
{
"epoch": 0.04,
"learning_rate": 4.787650602409639e-05,
"loss": 1.5377,
"step": 144
},
{
"epoch": 0.04,
"learning_rate": 4.7846385542168675e-05,
"loss": 0.9355,
"step": 146
},
{
"epoch": 0.04,
"learning_rate": 4.7816265060240965e-05,
"loss": 1.8284,
"step": 148
},
{
"epoch": 0.05,
"learning_rate": 4.7786144578313256e-05,
"loss": 1.7861,
"step": 150
},
{
"epoch": 0.05,
"learning_rate": 4.7756024096385547e-05,
"loss": 1.4756,
"step": 152
},
{
"epoch": 0.05,
"learning_rate": 4.772590361445784e-05,
"loss": 1.4631,
"step": 154
},
{
"epoch": 0.05,
"learning_rate": 4.769578313253013e-05,
"loss": 1.9937,
"step": 156
},
{
"epoch": 0.05,
"learning_rate": 4.766566265060241e-05,
"loss": 2.1101,
"step": 158
},
{
"epoch": 0.05,
"learning_rate": 4.76355421686747e-05,
"loss": 1.6746,
"step": 160
},
{
"epoch": 0.05,
"learning_rate": 4.760542168674699e-05,
"loss": 1.0945,
"step": 162
},
{
"epoch": 0.05,
"learning_rate": 4.7575301204819276e-05,
"loss": 1.7699,
"step": 164
},
{
"epoch": 0.05,
"learning_rate": 4.754518072289157e-05,
"loss": 1.3642,
"step": 166
},
{
"epoch": 0.05,
"learning_rate": 4.751506024096386e-05,
"loss": 1.3314,
"step": 168
},
{
"epoch": 0.05,
"learning_rate": 4.748493975903615e-05,
"loss": 1.2199,
"step": 170
},
{
"epoch": 0.05,
"learning_rate": 4.745481927710844e-05,
"loss": 1.4667,
"step": 172
},
{
"epoch": 0.05,
"learning_rate": 4.742469879518073e-05,
"loss": 1.0051,
"step": 174
},
{
"epoch": 0.05,
"learning_rate": 4.739457831325301e-05,
"loss": 1.7375,
"step": 176
},
{
"epoch": 0.05,
"learning_rate": 4.73644578313253e-05,
"loss": 1.1558,
"step": 178
},
{
"epoch": 0.05,
"learning_rate": 4.7334337349397594e-05,
"loss": 1.534,
"step": 180
},
{
"epoch": 0.05,
"learning_rate": 4.730421686746988e-05,
"loss": 1.7,
"step": 182
},
{
"epoch": 0.06,
"learning_rate": 4.727409638554217e-05,
"loss": 1.4734,
"step": 184
},
{
"epoch": 0.06,
"learning_rate": 4.7243975903614465e-05,
"loss": 1.0422,
"step": 186
},
{
"epoch": 0.06,
"learning_rate": 4.721385542168675e-05,
"loss": 1.788,
"step": 188
},
{
"epoch": 0.06,
"learning_rate": 4.718373493975904e-05,
"loss": 1.687,
"step": 190
},
{
"epoch": 0.06,
"learning_rate": 4.715361445783133e-05,
"loss": 1.2375,
"step": 192
},
{
"epoch": 0.06,
"learning_rate": 4.7123493975903614e-05,
"loss": 1.1989,
"step": 194
},
{
"epoch": 0.06,
"learning_rate": 4.7093373493975905e-05,
"loss": 1.3956,
"step": 196
},
{
"epoch": 0.06,
"learning_rate": 4.7063253012048195e-05,
"loss": 2.2677,
"step": 198
},
{
"epoch": 0.06,
"learning_rate": 4.7033132530120486e-05,
"loss": 1.9697,
"step": 200
},
{
"epoch": 0.06,
"learning_rate": 4.7003012048192776e-05,
"loss": 1.0789,
"step": 202
},
{
"epoch": 0.06,
"learning_rate": 4.697289156626507e-05,
"loss": 0.9775,
"step": 204
},
{
"epoch": 0.06,
"learning_rate": 4.694277108433735e-05,
"loss": 1.4274,
"step": 206
},
{
"epoch": 0.06,
"learning_rate": 4.691265060240964e-05,
"loss": 0.8563,
"step": 208
},
{
"epoch": 0.06,
"learning_rate": 4.688253012048193e-05,
"loss": 1.4798,
"step": 210
},
{
"epoch": 0.06,
"learning_rate": 4.6852409638554215e-05,
"loss": 1.9549,
"step": 212
},
{
"epoch": 0.06,
"learning_rate": 4.6822289156626506e-05,
"loss": 1.3424,
"step": 214
},
{
"epoch": 0.07,
"learning_rate": 4.6792168674698797e-05,
"loss": 1.9411,
"step": 216
},
{
"epoch": 0.07,
"learning_rate": 4.676204819277109e-05,
"loss": 1.2211,
"step": 218
},
{
"epoch": 0.07,
"learning_rate": 4.673192771084338e-05,
"loss": 1.2069,
"step": 220
},
{
"epoch": 0.07,
"learning_rate": 4.670180722891567e-05,
"loss": 1.7028,
"step": 222
},
{
"epoch": 0.07,
"learning_rate": 4.667168674698795e-05,
"loss": 1.2742,
"step": 224
},
{
"epoch": 0.07,
"learning_rate": 4.664156626506024e-05,
"loss": 1.2481,
"step": 226
},
{
"epoch": 0.07,
"learning_rate": 4.661144578313253e-05,
"loss": 1.4295,
"step": 228
},
{
"epoch": 0.07,
"learning_rate": 4.658132530120482e-05,
"loss": 1.5196,
"step": 230
},
{
"epoch": 0.07,
"learning_rate": 4.6551204819277114e-05,
"loss": 1.6413,
"step": 232
},
{
"epoch": 0.07,
"learning_rate": 4.6521084337349405e-05,
"loss": 1.6045,
"step": 234
},
{
"epoch": 0.07,
"learning_rate": 4.649096385542169e-05,
"loss": 1.244,
"step": 236
},
{
"epoch": 0.07,
"learning_rate": 4.646084337349398e-05,
"loss": 1.5735,
"step": 238
},
{
"epoch": 0.07,
"learning_rate": 4.643072289156627e-05,
"loss": 2.3285,
"step": 240
},
{
"epoch": 0.07,
"learning_rate": 4.640060240963855e-05,
"loss": 1.3264,
"step": 242
},
{
"epoch": 0.07,
"learning_rate": 4.6370481927710844e-05,
"loss": 2.2258,
"step": 244
},
{
"epoch": 0.07,
"learning_rate": 4.6340361445783134e-05,
"loss": 1.7811,
"step": 246
},
{
"epoch": 0.07,
"learning_rate": 4.6310240963855425e-05,
"loss": 1.2091,
"step": 248
},
{
"epoch": 0.08,
"learning_rate": 4.6280120481927716e-05,
"loss": 2.5214,
"step": 250
},
{
"epoch": 0.08,
"learning_rate": 4.6250000000000006e-05,
"loss": 1.2981,
"step": 252
},
{
"epoch": 0.08,
"learning_rate": 4.621987951807229e-05,
"loss": 1.0844,
"step": 254
},
{
"epoch": 0.08,
"learning_rate": 4.618975903614458e-05,
"loss": 1.2004,
"step": 256
},
{
"epoch": 0.08,
"learning_rate": 4.615963855421687e-05,
"loss": 1.5083,
"step": 258
},
{
"epoch": 0.08,
"learning_rate": 4.6129518072289155e-05,
"loss": 1.3797,
"step": 260
},
{
"epoch": 0.08,
"learning_rate": 4.6099397590361445e-05,
"loss": 1.5979,
"step": 262
},
{
"epoch": 0.08,
"learning_rate": 4.606927710843374e-05,
"loss": 1.3333,
"step": 264
},
{
"epoch": 0.08,
"learning_rate": 4.6039156626506026e-05,
"loss": 0.8672,
"step": 266
},
{
"epoch": 0.08,
"learning_rate": 4.600903614457832e-05,
"loss": 1.4402,
"step": 268
},
{
"epoch": 0.08,
"learning_rate": 4.597891566265061e-05,
"loss": 1.0408,
"step": 270
},
{
"epoch": 0.08,
"learning_rate": 4.594879518072289e-05,
"loss": 0.9593,
"step": 272
},
{
"epoch": 0.08,
"learning_rate": 4.591867469879518e-05,
"loss": 1.0505,
"step": 274
},
{
"epoch": 0.08,
"learning_rate": 4.588855421686747e-05,
"loss": 1.1311,
"step": 276
},
{
"epoch": 0.08,
"learning_rate": 4.585843373493976e-05,
"loss": 1.2046,
"step": 278
},
{
"epoch": 0.08,
"learning_rate": 4.5828313253012053e-05,
"loss": 1.7794,
"step": 280
},
{
"epoch": 0.08,
"learning_rate": 4.5798192771084344e-05,
"loss": 0.9784,
"step": 282
},
{
"epoch": 0.09,
"learning_rate": 4.576807228915663e-05,
"loss": 1.0675,
"step": 284
},
{
"epoch": 0.09,
"learning_rate": 4.573795180722892e-05,
"loss": 1.297,
"step": 286
},
{
"epoch": 0.09,
"learning_rate": 4.570783132530121e-05,
"loss": 1.6511,
"step": 288
},
{
"epoch": 0.09,
"learning_rate": 4.567771084337349e-05,
"loss": 1.448,
"step": 290
},
{
"epoch": 0.09,
"learning_rate": 4.564759036144578e-05,
"loss": 1.1691,
"step": 292
},
{
"epoch": 0.09,
"learning_rate": 4.5617469879518074e-05,
"loss": 1.0792,
"step": 294
},
{
"epoch": 0.09,
"learning_rate": 4.5587349397590364e-05,
"loss": 1.0954,
"step": 296
},
{
"epoch": 0.09,
"learning_rate": 4.5557228915662655e-05,
"loss": 0.9702,
"step": 298
},
{
"epoch": 0.09,
"learning_rate": 4.5527108433734945e-05,
"loss": 2.2475,
"step": 300
},
{
"epoch": 0.09,
"learning_rate": 4.549698795180723e-05,
"loss": 1.3065,
"step": 302
},
{
"epoch": 0.09,
"learning_rate": 4.546686746987952e-05,
"loss": 1.086,
"step": 304
},
{
"epoch": 0.09,
"learning_rate": 4.543674698795181e-05,
"loss": 1.7654,
"step": 306
},
{
"epoch": 0.09,
"learning_rate": 4.5406626506024094e-05,
"loss": 1.5944,
"step": 308
},
{
"epoch": 0.09,
"learning_rate": 4.537650602409639e-05,
"loss": 0.98,
"step": 310
},
{
"epoch": 0.09,
"learning_rate": 4.534638554216868e-05,
"loss": 1.4231,
"step": 312
},
{
"epoch": 0.09,
"learning_rate": 4.5316265060240966e-05,
"loss": 1.0698,
"step": 314
},
{
"epoch": 0.1,
"learning_rate": 4.5286144578313256e-05,
"loss": 2.256,
"step": 316
},
{
"epoch": 0.1,
"learning_rate": 4.525602409638555e-05,
"loss": 2.0407,
"step": 318
},
{
"epoch": 0.1,
"learning_rate": 4.522590361445783e-05,
"loss": 1.2693,
"step": 320
},
{
"epoch": 0.1,
"learning_rate": 4.519578313253012e-05,
"loss": 0.9351,
"step": 322
},
{
"epoch": 0.1,
"learning_rate": 4.516566265060241e-05,
"loss": 2.1723,
"step": 324
},
{
"epoch": 0.1,
"learning_rate": 4.51355421686747e-05,
"loss": 1.093,
"step": 326
},
{
"epoch": 0.1,
"learning_rate": 4.510542168674699e-05,
"loss": 1.1964,
"step": 328
},
{
"epoch": 0.1,
"learning_rate": 4.507530120481928e-05,
"loss": 0.7608,
"step": 330
},
{
"epoch": 0.1,
"learning_rate": 4.504518072289157e-05,
"loss": 1.5825,
"step": 332
},
{
"epoch": 0.1,
"learning_rate": 4.501506024096386e-05,
"loss": 1.029,
"step": 334
},
{
"epoch": 0.1,
"learning_rate": 4.498493975903615e-05,
"loss": 1.0301,
"step": 336
},
{
"epoch": 0.1,
"learning_rate": 4.495481927710843e-05,
"loss": 1.7574,
"step": 338
},
{
"epoch": 0.1,
"learning_rate": 4.492469879518072e-05,
"loss": 1.4194,
"step": 340
},
{
"epoch": 0.1,
"learning_rate": 4.489457831325301e-05,
"loss": 2.2513,
"step": 342
},
{
"epoch": 0.1,
"learning_rate": 4.4864457831325304e-05,
"loss": 1.3601,
"step": 344
},
{
"epoch": 0.1,
"learning_rate": 4.4834337349397594e-05,
"loss": 1.7566,
"step": 346
},
{
"epoch": 0.1,
"learning_rate": 4.4804216867469885e-05,
"loss": 0.928,
"step": 348
},
{
"epoch": 0.11,
"learning_rate": 4.477409638554217e-05,
"loss": 2.4179,
"step": 350
},
{
"epoch": 0.11,
"learning_rate": 4.474397590361446e-05,
"loss": 1.1645,
"step": 352
},
{
"epoch": 0.11,
"learning_rate": 4.471385542168675e-05,
"loss": 0.9061,
"step": 354
},
{
"epoch": 0.11,
"learning_rate": 4.468373493975903e-05,
"loss": 0.981,
"step": 356
},
{
"epoch": 0.11,
"learning_rate": 4.465361445783133e-05,
"loss": 2.0295,
"step": 358
},
{
"epoch": 0.11,
"learning_rate": 4.462349397590362e-05,
"loss": 1.4614,
"step": 360
},
{
"epoch": 0.11,
"learning_rate": 4.4593373493975905e-05,
"loss": 0.8199,
"step": 362
},
{
"epoch": 0.11,
"learning_rate": 4.4563253012048195e-05,
"loss": 0.6959,
"step": 364
},
{
"epoch": 0.11,
"learning_rate": 4.4533132530120486e-05,
"loss": 1.3373,
"step": 366
},
{
"epoch": 0.11,
"learning_rate": 4.450301204819277e-05,
"loss": 1.1969,
"step": 368
},
{
"epoch": 0.11,
"learning_rate": 4.447289156626506e-05,
"loss": 1.9879,
"step": 370
},
{
"epoch": 0.11,
"learning_rate": 4.444277108433735e-05,
"loss": 1.5001,
"step": 372
},
{
"epoch": 0.11,
"learning_rate": 4.441265060240964e-05,
"loss": 1.3834,
"step": 374
},
{
"epoch": 0.11,
"learning_rate": 4.438253012048193e-05,
"loss": 1.8107,
"step": 376
},
{
"epoch": 0.11,
"learning_rate": 4.435240963855422e-05,
"loss": 1.369,
"step": 378
},
{
"epoch": 0.11,
"learning_rate": 4.4322289156626506e-05,
"loss": 1.0861,
"step": 380
},
{
"epoch": 0.12,
"learning_rate": 4.42921686746988e-05,
"loss": 1.2826,
"step": 382
},
{
"epoch": 0.12,
"learning_rate": 4.426204819277109e-05,
"loss": 1.2292,
"step": 384
},
{
"epoch": 0.12,
"learning_rate": 4.423192771084337e-05,
"loss": 1.1931,
"step": 386
},
{
"epoch": 0.12,
"learning_rate": 4.420180722891566e-05,
"loss": 1.2869,
"step": 388
},
{
"epoch": 0.12,
"learning_rate": 4.417168674698796e-05,
"loss": 1.8496,
"step": 390
},
{
"epoch": 0.12,
"learning_rate": 4.414156626506024e-05,
"loss": 0.9289,
"step": 392
},
{
"epoch": 0.12,
"learning_rate": 4.411144578313253e-05,
"loss": 1.0665,
"step": 394
},
{
"epoch": 0.12,
"learning_rate": 4.4081325301204824e-05,
"loss": 1.147,
"step": 396
},
{
"epoch": 0.12,
"learning_rate": 4.405120481927711e-05,
"loss": 1.1508,
"step": 398
},
{
"epoch": 0.12,
"learning_rate": 4.40210843373494e-05,
"loss": 1.0054,
"step": 400
},
{
"epoch": 0.12,
"learning_rate": 4.399096385542169e-05,
"loss": 1.4884,
"step": 402
},
{
"epoch": 0.12,
"learning_rate": 4.396084337349398e-05,
"loss": 1.3413,
"step": 404
},
{
"epoch": 0.12,
"learning_rate": 4.393072289156627e-05,
"loss": 1.0413,
"step": 406
},
{
"epoch": 0.12,
"learning_rate": 4.390060240963856e-05,
"loss": 1.1186,
"step": 408
},
{
"epoch": 0.12,
"learning_rate": 4.3870481927710844e-05,
"loss": 1.3527,
"step": 410
},
{
"epoch": 0.12,
"learning_rate": 4.3840361445783135e-05,
"loss": 1.653,
"step": 412
},
{
"epoch": 0.12,
"learning_rate": 4.3810240963855425e-05,
"loss": 1.3227,
"step": 414
},
{
"epoch": 0.13,
"learning_rate": 4.378012048192771e-05,
"loss": 0.8169,
"step": 416
},
{
"epoch": 0.13,
"learning_rate": 4.375e-05,
"loss": 1.0423,
"step": 418
},
{
"epoch": 0.13,
"learning_rate": 4.371987951807229e-05,
"loss": 1.2663,
"step": 420
},
{
"epoch": 0.13,
"learning_rate": 4.368975903614458e-05,
"loss": 1.1048,
"step": 422
},
{
"epoch": 0.13,
"learning_rate": 4.365963855421687e-05,
"loss": 1.1126,
"step": 424
},
{
"epoch": 0.13,
"learning_rate": 4.362951807228916e-05,
"loss": 0.6466,
"step": 426
},
{
"epoch": 0.13,
"learning_rate": 4.3599397590361446e-05,
"loss": 1.1414,
"step": 428
},
{
"epoch": 0.13,
"learning_rate": 4.3569277108433736e-05,
"loss": 1.1685,
"step": 430
},
{
"epoch": 0.13,
"learning_rate": 4.3539156626506027e-05,
"loss": 1.3119,
"step": 432
},
{
"epoch": 0.13,
"learning_rate": 4.350903614457831e-05,
"loss": 0.9607,
"step": 434
},
{
"epoch": 0.13,
"learning_rate": 4.347891566265061e-05,
"loss": 1.3348,
"step": 436
},
{
"epoch": 0.13,
"learning_rate": 4.34487951807229e-05,
"loss": 0.8338,
"step": 438
},
{
"epoch": 0.13,
"learning_rate": 4.341867469879518e-05,
"loss": 1.042,
"step": 440
},
{
"epoch": 0.13,
"learning_rate": 4.338855421686747e-05,
"loss": 1.6587,
"step": 442
},
{
"epoch": 0.13,
"learning_rate": 4.335843373493976e-05,
"loss": 0.9719,
"step": 444
},
{
"epoch": 0.13,
"learning_rate": 4.332831325301205e-05,
"loss": 0.9681,
"step": 446
},
{
"epoch": 0.13,
"learning_rate": 4.329819277108434e-05,
"loss": 1.1285,
"step": 448
},
{
"epoch": 0.14,
"learning_rate": 4.326807228915663e-05,
"loss": 0.9191,
"step": 450
},
{
"epoch": 0.14,
"learning_rate": 4.323795180722892e-05,
"loss": 0.9356,
"step": 452
},
{
"epoch": 0.14,
"learning_rate": 4.320783132530121e-05,
"loss": 0.9351,
"step": 454
},
{
"epoch": 0.14,
"learning_rate": 4.31777108433735e-05,
"loss": 0.9589,
"step": 456
},
{
"epoch": 0.14,
"learning_rate": 4.3147590361445783e-05,
"loss": 0.9536,
"step": 458
},
{
"epoch": 0.14,
"learning_rate": 4.3117469879518074e-05,
"loss": 1.0213,
"step": 460
},
{
"epoch": 0.14,
"learning_rate": 4.3087349397590364e-05,
"loss": 1.07,
"step": 462
},
{
"epoch": 0.14,
"learning_rate": 4.305722891566265e-05,
"loss": 1.257,
"step": 464
},
{
"epoch": 0.14,
"learning_rate": 4.302710843373494e-05,
"loss": 1.3276,
"step": 466
},
{
"epoch": 0.14,
"learning_rate": 4.299698795180723e-05,
"loss": 0.8055,
"step": 468
},
{
"epoch": 0.14,
"learning_rate": 4.296686746987952e-05,
"loss": 1.287,
"step": 470
},
{
"epoch": 0.14,
"learning_rate": 4.293674698795181e-05,
"loss": 1.2929,
"step": 472
},
{
"epoch": 0.14,
"learning_rate": 4.29066265060241e-05,
"loss": 0.8388,
"step": 474
},
{
"epoch": 0.14,
"learning_rate": 4.2876506024096385e-05,
"loss": 1.1582,
"step": 476
},
{
"epoch": 0.14,
"learning_rate": 4.2846385542168675e-05,
"loss": 1.2938,
"step": 478
},
{
"epoch": 0.14,
"learning_rate": 4.2816265060240966e-05,
"loss": 0.904,
"step": 480
},
{
"epoch": 0.15,
"learning_rate": 4.2786144578313256e-05,
"loss": 0.8177,
"step": 482
},
{
"epoch": 0.15,
"learning_rate": 4.275602409638555e-05,
"loss": 1.2093,
"step": 484
},
{
"epoch": 0.15,
"learning_rate": 4.272590361445784e-05,
"loss": 0.878,
"step": 486
},
{
"epoch": 0.15,
"learning_rate": 4.269578313253012e-05,
"loss": 1.0923,
"step": 488
},
{
"epoch": 0.15,
"learning_rate": 4.266566265060241e-05,
"loss": 1.8217,
"step": 490
},
{
"epoch": 0.15,
"learning_rate": 4.26355421686747e-05,
"loss": 0.6914,
"step": 492
},
{
"epoch": 0.15,
"learning_rate": 4.2605421686746986e-05,
"loss": 0.9326,
"step": 494
},
{
"epoch": 0.15,
"learning_rate": 4.257530120481928e-05,
"loss": 1.0016,
"step": 496
},
{
"epoch": 0.15,
"learning_rate": 4.254518072289157e-05,
"loss": 1.1888,
"step": 498
},
{
"epoch": 0.15,
"learning_rate": 4.251506024096386e-05,
"loss": 0.7089,
"step": 500
},
{
"epoch": 0.15,
"learning_rate": 4.248493975903615e-05,
"loss": 1.0932,
"step": 502
},
{
"epoch": 0.15,
"learning_rate": 4.245481927710844e-05,
"loss": 1.499,
"step": 504
},
{
"epoch": 0.15,
"learning_rate": 4.242469879518072e-05,
"loss": 0.9242,
"step": 506
},
{
"epoch": 0.15,
"learning_rate": 4.239457831325301e-05,
"loss": 1.035,
"step": 508
},
{
"epoch": 0.15,
"learning_rate": 4.2364457831325304e-05,
"loss": 1.1617,
"step": 510
},
{
"epoch": 0.15,
"learning_rate": 4.233433734939759e-05,
"loss": 1.3718,
"step": 512
},
{
"epoch": 0.15,
"learning_rate": 4.230421686746988e-05,
"loss": 1.0238,
"step": 514
},
{
"epoch": 0.16,
"learning_rate": 4.2274096385542175e-05,
"loss": 1.2305,
"step": 516
},
{
"epoch": 0.16,
"learning_rate": 4.224397590361446e-05,
"loss": 1.5014,
"step": 518
},
{
"epoch": 0.16,
"learning_rate": 4.221385542168675e-05,
"loss": 1.2938,
"step": 520
},
{
"epoch": 0.16,
"learning_rate": 4.218373493975904e-05,
"loss": 1.4441,
"step": 522
},
{
"epoch": 0.16,
"learning_rate": 4.2153614457831324e-05,
"loss": 1.053,
"step": 524
},
{
"epoch": 0.16,
"learning_rate": 4.2123493975903615e-05,
"loss": 1.1499,
"step": 526
},
{
"epoch": 0.16,
"learning_rate": 4.2093373493975905e-05,
"loss": 2.301,
"step": 528
},
{
"epoch": 0.16,
"learning_rate": 4.2063253012048196e-05,
"loss": 1.104,
"step": 530
},
{
"epoch": 0.16,
"learning_rate": 4.2033132530120486e-05,
"loss": 2.0423,
"step": 532
},
{
"epoch": 0.16,
"learning_rate": 4.200301204819278e-05,
"loss": 1.6999,
"step": 534
},
{
"epoch": 0.16,
"learning_rate": 4.197289156626506e-05,
"loss": 1.2298,
"step": 536
},
{
"epoch": 0.16,
"learning_rate": 4.194277108433735e-05,
"loss": 1.1053,
"step": 538
},
{
"epoch": 0.16,
"learning_rate": 4.191265060240964e-05,
"loss": 1.376,
"step": 540
},
{
"epoch": 0.16,
"learning_rate": 4.1882530120481925e-05,
"loss": 0.9462,
"step": 542
},
{
"epoch": 0.16,
"learning_rate": 4.1852409638554216e-05,
"loss": 0.9247,
"step": 544
},
{
"epoch": 0.16,
"learning_rate": 4.1822289156626506e-05,
"loss": 1.494,
"step": 546
},
{
"epoch": 0.17,
"learning_rate": 4.17921686746988e-05,
"loss": 1.0042,
"step": 548
},
{
"epoch": 0.17,
"learning_rate": 4.176204819277109e-05,
"loss": 1.1377,
"step": 550
},
{
"epoch": 0.17,
"learning_rate": 4.173192771084338e-05,
"loss": 0.84,
"step": 552
},
{
"epoch": 0.17,
"learning_rate": 4.170180722891566e-05,
"loss": 0.9536,
"step": 554
},
{
"epoch": 0.17,
"learning_rate": 4.167168674698795e-05,
"loss": 0.9771,
"step": 556
},
{
"epoch": 0.17,
"learning_rate": 4.164156626506024e-05,
"loss": 1.0907,
"step": 558
},
{
"epoch": 0.17,
"learning_rate": 4.161144578313253e-05,
"loss": 1.4817,
"step": 560
},
{
"epoch": 0.17,
"learning_rate": 4.1581325301204824e-05,
"loss": 0.8953,
"step": 562
},
{
"epoch": 0.17,
"learning_rate": 4.1551204819277115e-05,
"loss": 0.6903,
"step": 564
},
{
"epoch": 0.17,
"learning_rate": 4.15210843373494e-05,
"loss": 1.527,
"step": 566
},
{
"epoch": 0.17,
"learning_rate": 4.149096385542169e-05,
"loss": 1.026,
"step": 568
},
{
"epoch": 0.17,
"learning_rate": 4.146084337349398e-05,
"loss": 1.0497,
"step": 570
},
{
"epoch": 0.17,
"learning_rate": 4.143072289156626e-05,
"loss": 1.7871,
"step": 572
},
{
"epoch": 0.17,
"learning_rate": 4.1400602409638554e-05,
"loss": 1.2404,
"step": 574
},
{
"epoch": 0.17,
"learning_rate": 4.1370481927710844e-05,
"loss": 1.2325,
"step": 576
},
{
"epoch": 0.17,
"learning_rate": 4.1340361445783135e-05,
"loss": 0.884,
"step": 578
},
{
"epoch": 0.17,
"learning_rate": 4.1310240963855425e-05,
"loss": 1.4528,
"step": 580
},
{
"epoch": 0.18,
"learning_rate": 4.1280120481927716e-05,
"loss": 1.5012,
"step": 582
},
{
"epoch": 0.18,
"learning_rate": 4.125e-05,
"loss": 1.8482,
"step": 584
},
{
"epoch": 0.18,
"learning_rate": 4.121987951807229e-05,
"loss": 1.0148,
"step": 586
},
{
"epoch": 0.18,
"learning_rate": 4.118975903614458e-05,
"loss": 0.5776,
"step": 588
},
{
"epoch": 0.18,
"learning_rate": 4.1159638554216865e-05,
"loss": 1.4766,
"step": 590
},
{
"epoch": 0.18,
"learning_rate": 4.1129518072289155e-05,
"loss": 1.1728,
"step": 592
},
{
"epoch": 0.18,
"learning_rate": 4.109939759036145e-05,
"loss": 1.2823,
"step": 594
},
{
"epoch": 0.18,
"learning_rate": 4.1069277108433736e-05,
"loss": 0.877,
"step": 596
},
{
"epoch": 0.18,
"learning_rate": 4.103915662650603e-05,
"loss": 1.0789,
"step": 598
},
{
"epoch": 0.18,
"learning_rate": 4.100903614457832e-05,
"loss": 0.9109,
"step": 600
},
{
"epoch": 0.18,
"learning_rate": 4.09789156626506e-05,
"loss": 1.1303,
"step": 602
},
{
"epoch": 0.18,
"learning_rate": 4.094879518072289e-05,
"loss": 1.0624,
"step": 604
},
{
"epoch": 0.18,
"learning_rate": 4.091867469879518e-05,
"loss": 0.717,
"step": 606
},
{
"epoch": 0.18,
"learning_rate": 4.088855421686747e-05,
"loss": 1.0823,
"step": 608
},
{
"epoch": 0.18,
"learning_rate": 4.085843373493976e-05,
"loss": 0.8625,
"step": 610
},
{
"epoch": 0.18,
"learning_rate": 4.0828313253012054e-05,
"loss": 0.7205,
"step": 612
},
{
"epoch": 0.18,
"learning_rate": 4.079819277108434e-05,
"loss": 1.3222,
"step": 614
},
{
"epoch": 0.19,
"learning_rate": 4.076807228915663e-05,
"loss": 1.3553,
"step": 616
},
{
"epoch": 0.19,
"learning_rate": 4.073795180722892e-05,
"loss": 1.7008,
"step": 618
},
{
"epoch": 0.19,
"learning_rate": 4.07078313253012e-05,
"loss": 1.0944,
"step": 620
},
{
"epoch": 0.19,
"learning_rate": 4.067771084337349e-05,
"loss": 0.9108,
"step": 622
},
{
"epoch": 0.19,
"learning_rate": 4.0647590361445784e-05,
"loss": 1.3168,
"step": 624
},
{
"epoch": 0.19,
"learning_rate": 4.0617469879518074e-05,
"loss": 0.9085,
"step": 626
},
{
"epoch": 0.19,
"learning_rate": 4.0587349397590365e-05,
"loss": 0.8207,
"step": 628
},
{
"epoch": 0.19,
"learning_rate": 4.0557228915662655e-05,
"loss": 0.676,
"step": 630
},
{
"epoch": 0.19,
"learning_rate": 4.052710843373494e-05,
"loss": 1.332,
"step": 632
},
{
"epoch": 0.19,
"learning_rate": 4.049698795180723e-05,
"loss": 0.7345,
"step": 634
},
{
"epoch": 0.19,
"learning_rate": 4.046686746987952e-05,
"loss": 0.7401,
"step": 636
},
{
"epoch": 0.19,
"learning_rate": 4.0436746987951804e-05,
"loss": 0.8568,
"step": 638
},
{
"epoch": 0.19,
"learning_rate": 4.04066265060241e-05,
"loss": 0.8516,
"step": 640
},
{
"epoch": 0.19,
"learning_rate": 4.037650602409639e-05,
"loss": 1.628,
"step": 642
},
{
"epoch": 0.19,
"learning_rate": 4.0346385542168676e-05,
"loss": 0.6841,
"step": 644
},
{
"epoch": 0.19,
"learning_rate": 4.0316265060240966e-05,
"loss": 0.9907,
"step": 646
},
{
"epoch": 0.2,
"learning_rate": 4.028614457831326e-05,
"loss": 0.8871,
"step": 648
},
{
"epoch": 0.2,
"learning_rate": 4.025602409638554e-05,
"loss": 0.6898,
"step": 650
},
{
"epoch": 0.2,
"learning_rate": 4.022590361445783e-05,
"loss": 1.3005,
"step": 652
},
{
"epoch": 0.2,
"learning_rate": 4.019578313253012e-05,
"loss": 0.6809,
"step": 654
},
{
"epoch": 0.2,
"learning_rate": 4.016566265060241e-05,
"loss": 1.1411,
"step": 656
},
{
"epoch": 0.2,
"learning_rate": 4.01355421686747e-05,
"loss": 0.8012,
"step": 658
},
{
"epoch": 0.2,
"learning_rate": 4.010542168674699e-05,
"loss": 0.8811,
"step": 660
},
{
"epoch": 0.2,
"learning_rate": 4.007530120481928e-05,
"loss": 1.5788,
"step": 662
},
{
"epoch": 0.2,
"learning_rate": 4.004518072289157e-05,
"loss": 0.8837,
"step": 664
},
{
"epoch": 0.2,
"learning_rate": 4.001506024096386e-05,
"loss": 1.2661,
"step": 666
},
{
"epoch": 0.2,
"learning_rate": 3.998493975903614e-05,
"loss": 0.8332,
"step": 668
},
{
"epoch": 0.2,
"learning_rate": 3.995481927710843e-05,
"loss": 0.6243,
"step": 670
},
{
"epoch": 0.2,
"learning_rate": 3.992469879518072e-05,
"loss": 1.4703,
"step": 672
},
{
"epoch": 0.2,
"learning_rate": 3.9894578313253013e-05,
"loss": 0.8614,
"step": 674
},
{
"epoch": 0.2,
"learning_rate": 3.9864457831325304e-05,
"loss": 1.0004,
"step": 676
},
{
"epoch": 0.2,
"learning_rate": 3.9834337349397595e-05,
"loss": 0.62,
"step": 678
},
{
"epoch": 0.2,
"learning_rate": 3.980421686746988e-05,
"loss": 1.1593,
"step": 680
},
{
"epoch": 0.21,
"learning_rate": 3.977409638554217e-05,
"loss": 0.8887,
"step": 682
},
{
"epoch": 0.21,
"learning_rate": 3.974397590361446e-05,
"loss": 0.8185,
"step": 684
},
{
"epoch": 0.21,
"learning_rate": 3.971385542168674e-05,
"loss": 0.8665,
"step": 686
},
{
"epoch": 0.21,
"learning_rate": 3.968373493975904e-05,
"loss": 0.9553,
"step": 688
},
{
"epoch": 0.21,
"learning_rate": 3.965361445783133e-05,
"loss": 0.8508,
"step": 690
},
{
"epoch": 0.21,
"learning_rate": 3.9623493975903615e-05,
"loss": 0.9259,
"step": 692
},
{
"epoch": 0.21,
"learning_rate": 3.9593373493975905e-05,
"loss": 1.289,
"step": 694
},
{
"epoch": 0.21,
"learning_rate": 3.9563253012048196e-05,
"loss": 0.7376,
"step": 696
},
{
"epoch": 0.21,
"learning_rate": 3.953313253012048e-05,
"loss": 1.2276,
"step": 698
},
{
"epoch": 0.21,
"learning_rate": 3.950301204819277e-05,
"loss": 1.3651,
"step": 700
},
{
"epoch": 0.21,
"learning_rate": 3.947289156626506e-05,
"loss": 0.7123,
"step": 702
},
{
"epoch": 0.21,
"learning_rate": 3.944277108433735e-05,
"loss": 0.7984,
"step": 704
},
{
"epoch": 0.21,
"learning_rate": 3.941265060240964e-05,
"loss": 0.8901,
"step": 706
},
{
"epoch": 0.21,
"learning_rate": 3.938253012048193e-05,
"loss": 1.1609,
"step": 708
},
{
"epoch": 0.21,
"learning_rate": 3.9352409638554216e-05,
"loss": 1.1221,
"step": 710
},
{
"epoch": 0.21,
"learning_rate": 3.932228915662651e-05,
"loss": 0.7465,
"step": 712
},
{
"epoch": 0.22,
"learning_rate": 3.92921686746988e-05,
"loss": 1.4573,
"step": 714
},
{
"epoch": 0.22,
"learning_rate": 3.926204819277108e-05,
"loss": 0.7937,
"step": 716
},
{
"epoch": 0.22,
"learning_rate": 3.923192771084337e-05,
"loss": 0.8791,
"step": 718
},
{
"epoch": 0.22,
"learning_rate": 3.920180722891567e-05,
"loss": 1.0836,
"step": 720
},
{
"epoch": 0.22,
"learning_rate": 3.917168674698795e-05,
"loss": 0.8816,
"step": 722
},
{
"epoch": 0.22,
"learning_rate": 3.914156626506024e-05,
"loss": 0.8005,
"step": 724
},
{
"epoch": 0.22,
"learning_rate": 3.9111445783132534e-05,
"loss": 0.8314,
"step": 726
},
{
"epoch": 0.22,
"learning_rate": 3.908132530120482e-05,
"loss": 0.6854,
"step": 728
},
{
"epoch": 0.22,
"learning_rate": 3.905120481927711e-05,
"loss": 0.5533,
"step": 730
},
{
"epoch": 0.22,
"learning_rate": 3.90210843373494e-05,
"loss": 0.8984,
"step": 732
},
{
"epoch": 0.22,
"learning_rate": 3.899096385542169e-05,
"loss": 1.7432,
"step": 734
},
{
"epoch": 0.22,
"learning_rate": 3.896084337349398e-05,
"loss": 1.6523,
"step": 736
},
{
"epoch": 0.22,
"learning_rate": 3.893072289156627e-05,
"loss": 1.0325,
"step": 738
},
{
"epoch": 0.22,
"learning_rate": 3.8900602409638554e-05,
"loss": 0.9944,
"step": 740
},
{
"epoch": 0.22,
"learning_rate": 3.8870481927710845e-05,
"loss": 1.2062,
"step": 742
},
{
"epoch": 0.22,
"learning_rate": 3.8840361445783135e-05,
"loss": 1.0234,
"step": 744
},
{
"epoch": 0.22,
"learning_rate": 3.881024096385542e-05,
"loss": 1.3087,
"step": 746
},
{
"epoch": 0.23,
"learning_rate": 3.878012048192771e-05,
"loss": 1.2162,
"step": 748
},
{
"epoch": 0.23,
"learning_rate": 3.875e-05,
"loss": 0.8821,
"step": 750
},
{
"epoch": 0.23,
"learning_rate": 3.871987951807229e-05,
"loss": 1.0549,
"step": 752
},
{
"epoch": 0.23,
"learning_rate": 3.868975903614458e-05,
"loss": 1.6813,
"step": 754
},
{
"epoch": 0.23,
"learning_rate": 3.865963855421687e-05,
"loss": 1.221,
"step": 756
},
{
"epoch": 0.23,
"learning_rate": 3.8629518072289155e-05,
"loss": 1.2386,
"step": 758
},
{
"epoch": 0.23,
"learning_rate": 3.8599397590361446e-05,
"loss": 1.0353,
"step": 760
},
{
"epoch": 0.23,
"learning_rate": 3.8569277108433737e-05,
"loss": 0.7784,
"step": 762
},
{
"epoch": 0.23,
"learning_rate": 3.853915662650602e-05,
"loss": 0.6811,
"step": 764
},
{
"epoch": 0.23,
"learning_rate": 3.850903614457832e-05,
"loss": 0.9586,
"step": 766
},
{
"epoch": 0.23,
"learning_rate": 3.847891566265061e-05,
"loss": 1.0714,
"step": 768
},
{
"epoch": 0.23,
"learning_rate": 3.844879518072289e-05,
"loss": 1.1191,
"step": 770
},
{
"epoch": 0.23,
"learning_rate": 3.841867469879518e-05,
"loss": 0.6565,
"step": 772
},
{
"epoch": 0.23,
"learning_rate": 3.838855421686747e-05,
"loss": 0.9363,
"step": 774
},
{
"epoch": 0.23,
"learning_rate": 3.835843373493976e-05,
"loss": 1.3396,
"step": 776
},
{
"epoch": 0.23,
"learning_rate": 3.832831325301205e-05,
"loss": 0.718,
"step": 778
},
{
"epoch": 0.23,
"learning_rate": 3.829819277108434e-05,
"loss": 0.6094,
"step": 780
},
{
"epoch": 0.24,
"learning_rate": 3.826807228915663e-05,
"loss": 0.7958,
"step": 782
},
{
"epoch": 0.24,
"learning_rate": 3.823795180722892e-05,
"loss": 1.5577,
"step": 784
},
{
"epoch": 0.24,
"learning_rate": 3.820783132530121e-05,
"loss": 0.8789,
"step": 786
},
{
"epoch": 0.24,
"learning_rate": 3.817771084337349e-05,
"loss": 2.4467,
"step": 788
},
{
"epoch": 0.24,
"learning_rate": 3.8147590361445784e-05,
"loss": 1.0957,
"step": 790
},
{
"epoch": 0.24,
"learning_rate": 3.8117469879518074e-05,
"loss": 0.9487,
"step": 792
},
{
"epoch": 0.24,
"learning_rate": 3.808734939759036e-05,
"loss": 0.887,
"step": 794
},
{
"epoch": 0.24,
"learning_rate": 3.805722891566265e-05,
"loss": 1.2218,
"step": 796
},
{
"epoch": 0.24,
"learning_rate": 3.802710843373494e-05,
"loss": 0.8126,
"step": 798
},
{
"epoch": 0.24,
"learning_rate": 3.799698795180723e-05,
"loss": 0.8815,
"step": 800
},
{
"epoch": 0.24,
"learning_rate": 3.796686746987952e-05,
"loss": 1.7028,
"step": 802
},
{
"epoch": 0.24,
"learning_rate": 3.793674698795181e-05,
"loss": 0.8722,
"step": 804
},
{
"epoch": 0.24,
"learning_rate": 3.7906626506024095e-05,
"loss": 1.2355,
"step": 806
},
{
"epoch": 0.24,
"learning_rate": 3.7876506024096385e-05,
"loss": 0.8024,
"step": 808
},
{
"epoch": 0.24,
"learning_rate": 3.7846385542168676e-05,
"loss": 0.9466,
"step": 810
},
{
"epoch": 0.24,
"learning_rate": 3.7816265060240966e-05,
"loss": 0.9121,
"step": 812
},
{
"epoch": 0.25,
"learning_rate": 3.778614457831326e-05,
"loss": 1.0029,
"step": 814
},
{
"epoch": 0.25,
"learning_rate": 3.775602409638555e-05,
"loss": 0.8804,
"step": 816
},
{
"epoch": 0.25,
"learning_rate": 3.772590361445783e-05,
"loss": 0.9294,
"step": 818
},
{
"epoch": 0.25,
"learning_rate": 3.769578313253012e-05,
"loss": 1.1569,
"step": 820
},
{
"epoch": 0.25,
"learning_rate": 3.766566265060241e-05,
"loss": 1.4136,
"step": 822
},
{
"epoch": 0.25,
"learning_rate": 3.7635542168674696e-05,
"loss": 1.0494,
"step": 824
},
{
"epoch": 0.25,
"learning_rate": 3.7605421686746987e-05,
"loss": 0.8518,
"step": 826
},
{
"epoch": 0.25,
"learning_rate": 3.757530120481928e-05,
"loss": 1.2651,
"step": 828
},
{
"epoch": 0.25,
"learning_rate": 3.754518072289157e-05,
"loss": 1.055,
"step": 830
},
{
"epoch": 0.25,
"learning_rate": 3.751506024096386e-05,
"loss": 0.8015,
"step": 832
},
{
"epoch": 0.25,
"learning_rate": 3.748493975903615e-05,
"loss": 0.5668,
"step": 834
},
{
"epoch": 0.25,
"learning_rate": 3.745481927710844e-05,
"loss": 0.8717,
"step": 836
},
{
"epoch": 0.25,
"learning_rate": 3.742469879518072e-05,
"loss": 0.8765,
"step": 838
},
{
"epoch": 0.25,
"learning_rate": 3.7394578313253014e-05,
"loss": 0.692,
"step": 840
},
{
"epoch": 0.25,
"learning_rate": 3.7364457831325304e-05,
"loss": 1.4549,
"step": 842
},
{
"epoch": 0.25,
"learning_rate": 3.733433734939759e-05,
"loss": 0.8493,
"step": 844
},
{
"epoch": 0.25,
"learning_rate": 3.7304216867469885e-05,
"loss": 0.5494,
"step": 846
},
{
"epoch": 0.26,
"learning_rate": 3.7274096385542176e-05,
"loss": 0.8156,
"step": 848
},
{
"epoch": 0.26,
"learning_rate": 3.724397590361446e-05,
"loss": 0.9622,
"step": 850
},
{
"epoch": 0.26,
"learning_rate": 3.721385542168675e-05,
"loss": 1.9199,
"step": 852
},
{
"epoch": 0.26,
"learning_rate": 3.718373493975904e-05,
"loss": 1.3171,
"step": 854
},
{
"epoch": 0.26,
"learning_rate": 3.7153614457831324e-05,
"loss": 1.2651,
"step": 856
},
{
"epoch": 0.26,
"learning_rate": 3.7123493975903615e-05,
"loss": 0.8476,
"step": 858
},
{
"epoch": 0.26,
"learning_rate": 3.7093373493975906e-05,
"loss": 1.2613,
"step": 860
},
{
"epoch": 0.26,
"learning_rate": 3.7063253012048196e-05,
"loss": 0.7681,
"step": 862
},
{
"epoch": 0.26,
"learning_rate": 3.703313253012049e-05,
"loss": 1.3414,
"step": 864
},
{
"epoch": 0.26,
"learning_rate": 3.700301204819278e-05,
"loss": 0.8756,
"step": 866
},
{
"epoch": 0.26,
"learning_rate": 3.697289156626506e-05,
"loss": 1.3291,
"step": 868
},
{
"epoch": 0.26,
"learning_rate": 3.694277108433735e-05,
"loss": 0.7763,
"step": 870
},
{
"epoch": 0.26,
"learning_rate": 3.691265060240964e-05,
"loss": 0.843,
"step": 872
},
{
"epoch": 0.26,
"learning_rate": 3.6882530120481926e-05,
"loss": 0.6986,
"step": 874
},
{
"epoch": 0.26,
"learning_rate": 3.6852409638554216e-05,
"loss": 1.0621,
"step": 876
},
{
"epoch": 0.26,
"learning_rate": 3.6822289156626514e-05,
"loss": 0.897,
"step": 878
},
{
"epoch": 0.27,
"learning_rate": 3.67921686746988e-05,
"loss": 1.0801,
"step": 880
},
{
"epoch": 0.27,
"learning_rate": 3.676204819277109e-05,
"loss": 1.0955,
"step": 882
},
{
"epoch": 0.27,
"learning_rate": 3.673192771084338e-05,
"loss": 2.0716,
"step": 884
},
{
"epoch": 0.27,
"learning_rate": 3.670180722891566e-05,
"loss": 1.0817,
"step": 886
},
{
"epoch": 0.27,
"learning_rate": 3.667168674698795e-05,
"loss": 0.8322,
"step": 888
},
{
"epoch": 0.27,
"learning_rate": 3.6641566265060243e-05,
"loss": 0.8457,
"step": 890
},
{
"epoch": 0.27,
"learning_rate": 3.6611445783132534e-05,
"loss": 1.3808,
"step": 892
},
{
"epoch": 0.27,
"learning_rate": 3.6581325301204825e-05,
"loss": 1.1165,
"step": 894
},
{
"epoch": 0.27,
"learning_rate": 3.6551204819277115e-05,
"loss": 0.907,
"step": 896
},
{
"epoch": 0.27,
"learning_rate": 3.65210843373494e-05,
"loss": 1.1849,
"step": 898
},
{
"epoch": 0.27,
"learning_rate": 3.649096385542169e-05,
"loss": 0.9893,
"step": 900
},
{
"epoch": 0.27,
"learning_rate": 3.646084337349398e-05,
"loss": 0.8124,
"step": 902
},
{
"epoch": 0.27,
"learning_rate": 3.6430722891566264e-05,
"loss": 2.3087,
"step": 904
},
{
"epoch": 0.27,
"learning_rate": 3.6400602409638554e-05,
"loss": 0.7271,
"step": 906
},
{
"epoch": 0.27,
"learning_rate": 3.6370481927710845e-05,
"loss": 0.7002,
"step": 908
},
{
"epoch": 0.27,
"learning_rate": 3.6340361445783135e-05,
"loss": 0.7096,
"step": 910
},
{
"epoch": 0.27,
"learning_rate": 3.6310240963855426e-05,
"loss": 0.8614,
"step": 912
},
{
"epoch": 0.28,
"learning_rate": 3.6280120481927716e-05,
"loss": 0.8016,
"step": 914
},
{
"epoch": 0.28,
"learning_rate": 3.625e-05,
"loss": 1.3715,
"step": 916
},
{
"epoch": 0.28,
"learning_rate": 3.621987951807229e-05,
"loss": 1.5691,
"step": 918
},
{
"epoch": 0.28,
"learning_rate": 3.618975903614458e-05,
"loss": 0.7724,
"step": 920
},
{
"epoch": 0.28,
"learning_rate": 3.6159638554216865e-05,
"loss": 0.7225,
"step": 922
},
{
"epoch": 0.28,
"learning_rate": 3.612951807228916e-05,
"loss": 1.0545,
"step": 924
},
{
"epoch": 0.28,
"learning_rate": 3.609939759036145e-05,
"loss": 0.6165,
"step": 926
},
{
"epoch": 0.28,
"learning_rate": 3.606927710843374e-05,
"loss": 0.5942,
"step": 928
},
{
"epoch": 0.28,
"learning_rate": 3.603915662650603e-05,
"loss": 1.0416,
"step": 930
},
{
"epoch": 0.28,
"learning_rate": 3.600903614457832e-05,
"loss": 0.7985,
"step": 932
},
{
"epoch": 0.28,
"learning_rate": 3.59789156626506e-05,
"loss": 0.7326,
"step": 934
},
{
"epoch": 0.28,
"learning_rate": 3.594879518072289e-05,
"loss": 0.7355,
"step": 936
},
{
"epoch": 0.28,
"learning_rate": 3.591867469879518e-05,
"loss": 1.2497,
"step": 938
},
{
"epoch": 0.28,
"learning_rate": 3.588855421686747e-05,
"loss": 0.9994,
"step": 940
},
{
"epoch": 0.28,
"learning_rate": 3.5858433734939764e-05,
"loss": 1.4224,
"step": 942
},
{
"epoch": 0.28,
"learning_rate": 3.5828313253012054e-05,
"loss": 0.7701,
"step": 944
},
{
"epoch": 0.28,
"learning_rate": 3.579819277108434e-05,
"loss": 0.6786,
"step": 946
},
{
"epoch": 0.29,
"learning_rate": 3.576807228915663e-05,
"loss": 1.073,
"step": 948
},
{
"epoch": 0.29,
"learning_rate": 3.573795180722892e-05,
"loss": 1.086,
"step": 950
},
{
"epoch": 0.29,
"learning_rate": 3.57078313253012e-05,
"loss": 0.726,
"step": 952
},
{
"epoch": 0.29,
"learning_rate": 3.5677710843373494e-05,
"loss": 0.68,
"step": 954
},
{
"epoch": 0.29,
"learning_rate": 3.5647590361445784e-05,
"loss": 0.7528,
"step": 956
},
{
"epoch": 0.29,
"learning_rate": 3.5617469879518075e-05,
"loss": 0.6709,
"step": 958
},
{
"epoch": 0.29,
"learning_rate": 3.5587349397590365e-05,
"loss": 0.7401,
"step": 960
},
{
"epoch": 0.29,
"learning_rate": 3.5557228915662656e-05,
"loss": 0.723,
"step": 962
},
{
"epoch": 0.29,
"learning_rate": 3.552710843373494e-05,
"loss": 1.0229,
"step": 964
},
{
"epoch": 0.29,
"learning_rate": 3.549698795180723e-05,
"loss": 0.9485,
"step": 966
},
{
"epoch": 0.29,
"learning_rate": 3.546686746987952e-05,
"loss": 1.25,
"step": 968
},
{
"epoch": 0.29,
"learning_rate": 3.5436746987951804e-05,
"loss": 0.9716,
"step": 970
},
{
"epoch": 0.29,
"learning_rate": 3.54066265060241e-05,
"loss": 0.6677,
"step": 972
},
{
"epoch": 0.29,
"learning_rate": 3.537650602409639e-05,
"loss": 0.501,
"step": 974
},
{
"epoch": 0.29,
"learning_rate": 3.5346385542168676e-05,
"loss": 0.6084,
"step": 976
},
{
"epoch": 0.29,
"learning_rate": 3.5316265060240967e-05,
"loss": 0.7909,
"step": 978
},
{
"epoch": 0.3,
"learning_rate": 3.528614457831326e-05,
"loss": 0.6167,
"step": 980
},
{
"epoch": 0.3,
"learning_rate": 3.525602409638554e-05,
"loss": 0.75,
"step": 982
},
{
"epoch": 0.3,
"learning_rate": 3.522590361445783e-05,
"loss": 0.9251,
"step": 984
},
{
"epoch": 0.3,
"learning_rate": 3.519578313253012e-05,
"loss": 0.996,
"step": 986
},
{
"epoch": 0.3,
"learning_rate": 3.516566265060241e-05,
"loss": 0.8256,
"step": 988
},
{
"epoch": 0.3,
"learning_rate": 3.51355421686747e-05,
"loss": 0.7673,
"step": 990
},
{
"epoch": 0.3,
"learning_rate": 3.5105421686746994e-05,
"loss": 0.5955,
"step": 992
},
{
"epoch": 0.3,
"learning_rate": 3.507530120481928e-05,
"loss": 0.4808,
"step": 994
},
{
"epoch": 0.3,
"learning_rate": 3.504518072289157e-05,
"loss": 0.813,
"step": 996
},
{
"epoch": 0.3,
"learning_rate": 3.501506024096386e-05,
"loss": 0.5784,
"step": 998
},
{
"epoch": 0.3,
"learning_rate": 3.498493975903614e-05,
"loss": 1.3097,
"step": 1000
},
{
"epoch": 0.3,
"eval_cer": 0.184837493777114,
"eval_loss": 1.2013893127441406,
"eval_runtime": 767.591,
"eval_samples_per_second": 3.844,
"eval_steps_per_second": 0.481,
"step": 1000
},
{
"epoch": 0.3,
"learning_rate": 3.495481927710843e-05,
"loss": 0.9779,
"step": 1002
},
{
"epoch": 0.3,
"learning_rate": 3.492469879518073e-05,
"loss": 1.3517,
"step": 1004
},
{
"epoch": 0.3,
"learning_rate": 3.4894578313253014e-05,
"loss": 0.7302,
"step": 1006
},
{
"epoch": 0.3,
"learning_rate": 3.4864457831325304e-05,
"loss": 1.223,
"step": 1008
},
{
"epoch": 0.3,
"learning_rate": 3.4834337349397595e-05,
"loss": 0.7183,
"step": 1010
},
{
"epoch": 0.3,
"learning_rate": 3.480421686746988e-05,
"loss": 0.8343,
"step": 1012
},
{
"epoch": 0.31,
"learning_rate": 3.477409638554217e-05,
"loss": 0.8855,
"step": 1014
},
{
"epoch": 0.31,
"learning_rate": 3.474397590361446e-05,
"loss": 1.0892,
"step": 1016
},
{
"epoch": 0.31,
"learning_rate": 3.471385542168675e-05,
"loss": 1.0468,
"step": 1018
},
{
"epoch": 0.31,
"learning_rate": 3.468373493975904e-05,
"loss": 1.5211,
"step": 1020
},
{
"epoch": 0.31,
"learning_rate": 3.465361445783133e-05,
"loss": 0.8615,
"step": 1022
},
{
"epoch": 0.31,
"learning_rate": 3.4623493975903615e-05,
"loss": 1.15,
"step": 1024
},
{
"epoch": 0.31,
"learning_rate": 3.4593373493975906e-05,
"loss": 0.7891,
"step": 1026
},
{
"epoch": 0.31,
"learning_rate": 3.4563253012048196e-05,
"loss": 0.7431,
"step": 1028
},
{
"epoch": 0.31,
"learning_rate": 3.453313253012048e-05,
"loss": 1.2265,
"step": 1030
},
{
"epoch": 0.31,
"learning_rate": 3.450301204819277e-05,
"loss": 0.7579,
"step": 1032
},
{
"epoch": 0.31,
"learning_rate": 3.447289156626506e-05,
"loss": 0.9619,
"step": 1034
},
{
"epoch": 0.31,
"learning_rate": 3.444277108433735e-05,
"loss": 0.7612,
"step": 1036
},
{
"epoch": 0.31,
"learning_rate": 3.441265060240964e-05,
"loss": 1.3857,
"step": 1038
},
{
"epoch": 0.31,
"learning_rate": 3.438253012048193e-05,
"loss": 0.8202,
"step": 1040
},
{
"epoch": 0.31,
"learning_rate": 3.4352409638554217e-05,
"loss": 0.8826,
"step": 1042
},
{
"epoch": 0.31,
"learning_rate": 3.432228915662651e-05,
"loss": 0.9047,
"step": 1044
},
{
"epoch": 0.32,
"learning_rate": 3.42921686746988e-05,
"loss": 1.1682,
"step": 1046
},
{
"epoch": 0.32,
"learning_rate": 3.426204819277108e-05,
"loss": 0.6807,
"step": 1048
},
{
"epoch": 0.32,
"learning_rate": 3.423192771084338e-05,
"loss": 0.6283,
"step": 1050
},
{
"epoch": 0.32,
"learning_rate": 3.420180722891567e-05,
"loss": 1.0315,
"step": 1052
},
{
"epoch": 0.32,
"learning_rate": 3.417168674698795e-05,
"loss": 0.9544,
"step": 1054
},
{
"epoch": 0.32,
"learning_rate": 3.4141566265060244e-05,
"loss": 0.9978,
"step": 1056
},
{
"epoch": 0.32,
"learning_rate": 3.4111445783132534e-05,
"loss": 0.837,
"step": 1058
},
{
"epoch": 0.32,
"learning_rate": 3.408132530120482e-05,
"loss": 0.8503,
"step": 1060
},
{
"epoch": 0.32,
"learning_rate": 3.405120481927711e-05,
"loss": 1.0282,
"step": 1062
},
{
"epoch": 0.32,
"learning_rate": 3.40210843373494e-05,
"loss": 0.8456,
"step": 1064
},
{
"epoch": 0.32,
"learning_rate": 3.399096385542169e-05,
"loss": 0.6543,
"step": 1066
},
{
"epoch": 0.32,
"learning_rate": 3.396084337349398e-05,
"loss": 0.9168,
"step": 1068
},
{
"epoch": 0.32,
"learning_rate": 3.393072289156627e-05,
"loss": 1.4285,
"step": 1070
},
{
"epoch": 0.32,
"learning_rate": 3.3900602409638554e-05,
"loss": 0.6634,
"step": 1072
},
{
"epoch": 0.32,
"learning_rate": 3.3870481927710845e-05,
"loss": 0.8181,
"step": 1074
},
{
"epoch": 0.32,
"learning_rate": 3.3840361445783136e-05,
"loss": 0.8265,
"step": 1076
},
{
"epoch": 0.32,
"learning_rate": 3.381024096385542e-05,
"loss": 1.0249,
"step": 1078
},
{
"epoch": 0.33,
"learning_rate": 3.378012048192771e-05,
"loss": 0.7728,
"step": 1080
},
{
"epoch": 0.33,
"learning_rate": 3.375000000000001e-05,
"loss": 0.8333,
"step": 1082
},
{
"epoch": 0.33,
"learning_rate": 3.371987951807229e-05,
"loss": 0.7505,
"step": 1084
},
{
"epoch": 0.33,
"learning_rate": 3.368975903614458e-05,
"loss": 0.867,
"step": 1086
},
{
"epoch": 0.33,
"learning_rate": 3.365963855421687e-05,
"loss": 0.5823,
"step": 1088
},
{
"epoch": 0.33,
"learning_rate": 3.3629518072289156e-05,
"loss": 1.246,
"step": 1090
},
{
"epoch": 0.33,
"learning_rate": 3.3599397590361446e-05,
"loss": 1.0733,
"step": 1092
},
{
"epoch": 0.33,
"learning_rate": 3.356927710843374e-05,
"loss": 0.8378,
"step": 1094
},
{
"epoch": 0.33,
"learning_rate": 3.353915662650603e-05,
"loss": 0.6565,
"step": 1096
},
{
"epoch": 0.33,
"learning_rate": 3.350903614457832e-05,
"loss": 0.9191,
"step": 1098
},
{
"epoch": 0.33,
"learning_rate": 3.347891566265061e-05,
"loss": 0.7753,
"step": 1100
},
{
"epoch": 0.33,
"learning_rate": 3.344879518072289e-05,
"loss": 0.5625,
"step": 1102
},
{
"epoch": 0.33,
"learning_rate": 3.341867469879518e-05,
"loss": 0.7715,
"step": 1104
},
{
"epoch": 0.33,
"learning_rate": 3.3388554216867473e-05,
"loss": 1.0855,
"step": 1106
},
{
"epoch": 0.33,
"learning_rate": 3.335843373493976e-05,
"loss": 1.0258,
"step": 1108
},
{
"epoch": 0.33,
"learning_rate": 3.332831325301205e-05,
"loss": 0.9125,
"step": 1110
},
{
"epoch": 0.33,
"learning_rate": 3.329819277108434e-05,
"loss": 0.8995,
"step": 1112
},
{
"epoch": 0.34,
"learning_rate": 3.326807228915663e-05,
"loss": 0.6108,
"step": 1114
},
{
"epoch": 0.34,
"learning_rate": 3.323795180722892e-05,
"loss": 0.788,
"step": 1116
},
{
"epoch": 0.34,
"learning_rate": 3.320783132530121e-05,
"loss": 0.5976,
"step": 1118
},
{
"epoch": 0.34,
"learning_rate": 3.3177710843373494e-05,
"loss": 0.6334,
"step": 1120
},
{
"epoch": 0.34,
"learning_rate": 3.3147590361445784e-05,
"loss": 1.1983,
"step": 1122
},
{
"epoch": 0.34,
"learning_rate": 3.3117469879518075e-05,
"loss": 0.6194,
"step": 1124
},
{
"epoch": 0.34,
"learning_rate": 3.308734939759036e-05,
"loss": 0.7527,
"step": 1126
},
{
"epoch": 0.34,
"learning_rate": 3.305722891566265e-05,
"loss": 0.8491,
"step": 1128
},
{
"epoch": 0.34,
"learning_rate": 3.3027108433734946e-05,
"loss": 1.0095,
"step": 1130
},
{
"epoch": 0.34,
"learning_rate": 3.299698795180723e-05,
"loss": 1.0419,
"step": 1132
},
{
"epoch": 0.34,
"learning_rate": 3.296686746987952e-05,
"loss": 0.7022,
"step": 1134
},
{
"epoch": 0.34,
"learning_rate": 3.293674698795181e-05,
"loss": 0.7099,
"step": 1136
},
{
"epoch": 0.34,
"learning_rate": 3.2906626506024095e-05,
"loss": 1.1393,
"step": 1138
},
{
"epoch": 0.34,
"learning_rate": 3.2876506024096386e-05,
"loss": 0.8637,
"step": 1140
},
{
"epoch": 0.34,
"learning_rate": 3.2846385542168676e-05,
"loss": 0.8791,
"step": 1142
},
{
"epoch": 0.34,
"learning_rate": 3.281626506024097e-05,
"loss": 0.6076,
"step": 1144
},
{
"epoch": 0.35,
"learning_rate": 3.278614457831326e-05,
"loss": 0.6162,
"step": 1146
},
{
"epoch": 0.35,
"learning_rate": 3.275602409638555e-05,
"loss": 1.1756,
"step": 1148
},
{
"epoch": 0.35,
"learning_rate": 3.272590361445783e-05,
"loss": 1.1942,
"step": 1150
},
{
"epoch": 0.35,
"learning_rate": 3.269578313253012e-05,
"loss": 0.9575,
"step": 1152
},
{
"epoch": 0.35,
"learning_rate": 3.266566265060241e-05,
"loss": 0.5854,
"step": 1154
},
{
"epoch": 0.35,
"learning_rate": 3.2635542168674696e-05,
"loss": 0.9442,
"step": 1156
},
{
"epoch": 0.35,
"learning_rate": 3.260542168674699e-05,
"loss": 1.105,
"step": 1158
},
{
"epoch": 0.35,
"learning_rate": 3.257530120481928e-05,
"loss": 0.7347,
"step": 1160
},
{
"epoch": 0.35,
"learning_rate": 3.254518072289157e-05,
"loss": 0.937,
"step": 1162
},
{
"epoch": 0.35,
"learning_rate": 3.251506024096386e-05,
"loss": 0.7417,
"step": 1164
},
{
"epoch": 0.35,
"learning_rate": 3.248493975903615e-05,
"loss": 0.6828,
"step": 1166
},
{
"epoch": 0.35,
"learning_rate": 3.245481927710843e-05,
"loss": 0.7266,
"step": 1168
},
{
"epoch": 0.35,
"learning_rate": 3.2424698795180724e-05,
"loss": 1.3076,
"step": 1170
},
{
"epoch": 0.35,
"learning_rate": 3.2394578313253014e-05,
"loss": 0.6347,
"step": 1172
},
{
"epoch": 0.35,
"learning_rate": 3.23644578313253e-05,
"loss": 0.6863,
"step": 1174
},
{
"epoch": 0.35,
"learning_rate": 3.2334337349397595e-05,
"loss": 1.1168,
"step": 1176
},
{
"epoch": 0.35,
"learning_rate": 3.2304216867469886e-05,
"loss": 0.8243,
"step": 1178
},
{
"epoch": 0.36,
"learning_rate": 3.227409638554217e-05,
"loss": 1.6656,
"step": 1180
},
{
"epoch": 0.36,
"learning_rate": 3.224397590361446e-05,
"loss": 1.0254,
"step": 1182
},
{
"epoch": 0.36,
"learning_rate": 3.221385542168675e-05,
"loss": 0.912,
"step": 1184
},
{
"epoch": 0.36,
"learning_rate": 3.2183734939759034e-05,
"loss": 0.8226,
"step": 1186
},
{
"epoch": 0.36,
"learning_rate": 3.2153614457831325e-05,
"loss": 0.5505,
"step": 1188
},
{
"epoch": 0.36,
"learning_rate": 3.2123493975903615e-05,
"loss": 0.8036,
"step": 1190
},
{
"epoch": 0.36,
"learning_rate": 3.2093373493975906e-05,
"loss": 0.9761,
"step": 1192
},
{
"epoch": 0.36,
"learning_rate": 3.2063253012048197e-05,
"loss": 0.6607,
"step": 1194
},
{
"epoch": 0.36,
"learning_rate": 3.203313253012049e-05,
"loss": 0.6317,
"step": 1196
},
{
"epoch": 0.36,
"learning_rate": 3.200301204819277e-05,
"loss": 0.9332,
"step": 1198
},
{
"epoch": 0.36,
"learning_rate": 3.197289156626506e-05,
"loss": 0.5312,
"step": 1200
},
{
"epoch": 0.36,
"learning_rate": 3.194277108433735e-05,
"loss": 0.6202,
"step": 1202
},
{
"epoch": 0.36,
"learning_rate": 3.1912650602409636e-05,
"loss": 0.799,
"step": 1204
},
{
"epoch": 0.36,
"learning_rate": 3.1882530120481926e-05,
"loss": 0.8783,
"step": 1206
},
{
"epoch": 0.36,
"learning_rate": 3.1852409638554224e-05,
"loss": 0.8679,
"step": 1208
},
{
"epoch": 0.36,
"learning_rate": 3.182228915662651e-05,
"loss": 0.8258,
"step": 1210
},
{
"epoch": 0.37,
"learning_rate": 3.17921686746988e-05,
"loss": 0.774,
"step": 1212
},
{
"epoch": 0.37,
"learning_rate": 3.176204819277109e-05,
"loss": 1.5223,
"step": 1214
},
{
"epoch": 0.37,
"learning_rate": 3.173192771084337e-05,
"loss": 0.6388,
"step": 1216
},
{
"epoch": 0.37,
"learning_rate": 3.170180722891566e-05,
"loss": 0.9953,
"step": 1218
},
{
"epoch": 0.37,
"learning_rate": 3.167168674698795e-05,
"loss": 1.0687,
"step": 1220
},
{
"epoch": 0.37,
"learning_rate": 3.1641566265060244e-05,
"loss": 0.5961,
"step": 1222
},
{
"epoch": 0.37,
"learning_rate": 3.1611445783132534e-05,
"loss": 0.6535,
"step": 1224
},
{
"epoch": 0.37,
"learning_rate": 3.1581325301204825e-05,
"loss": 1.2129,
"step": 1226
},
{
"epoch": 0.37,
"learning_rate": 3.155120481927711e-05,
"loss": 0.6273,
"step": 1228
},
{
"epoch": 0.37,
"learning_rate": 3.15210843373494e-05,
"loss": 0.7879,
"step": 1230
},
{
"epoch": 0.37,
"learning_rate": 3.149096385542169e-05,
"loss": 0.5647,
"step": 1232
},
{
"epoch": 0.37,
"learning_rate": 3.1460843373493974e-05,
"loss": 0.6358,
"step": 1234
},
{
"epoch": 0.37,
"learning_rate": 3.1430722891566264e-05,
"loss": 0.9404,
"step": 1236
},
{
"epoch": 0.37,
"learning_rate": 3.1400602409638555e-05,
"loss": 1.0291,
"step": 1238
},
{
"epoch": 0.37,
"learning_rate": 3.1370481927710845e-05,
"loss": 0.5714,
"step": 1240
},
{
"epoch": 0.37,
"learning_rate": 3.1340361445783136e-05,
"loss": 0.9311,
"step": 1242
},
{
"epoch": 0.37,
"learning_rate": 3.1310240963855426e-05,
"loss": 0.9119,
"step": 1244
},
{
"epoch": 0.38,
"learning_rate": 3.128012048192771e-05,
"loss": 1.4362,
"step": 1246
},
{
"epoch": 0.38,
"learning_rate": 3.125e-05,
"loss": 0.61,
"step": 1248
},
{
"epoch": 0.38,
"learning_rate": 3.121987951807229e-05,
"loss": 1.5383,
"step": 1250
},
{
"epoch": 0.38,
"learning_rate": 3.1189759036144575e-05,
"loss": 1.081,
"step": 1252
},
{
"epoch": 0.38,
"learning_rate": 3.115963855421687e-05,
"loss": 0.634,
"step": 1254
},
{
"epoch": 0.38,
"learning_rate": 3.112951807228916e-05,
"loss": 0.7168,
"step": 1256
},
{
"epoch": 0.38,
"learning_rate": 3.1099397590361447e-05,
"loss": 0.7379,
"step": 1258
},
{
"epoch": 0.38,
"learning_rate": 3.106927710843374e-05,
"loss": 0.7491,
"step": 1260
},
{
"epoch": 0.38,
"learning_rate": 3.103915662650603e-05,
"loss": 0.8309,
"step": 1262
},
{
"epoch": 0.38,
"learning_rate": 3.100903614457831e-05,
"loss": 1.4238,
"step": 1264
},
{
"epoch": 0.38,
"learning_rate": 3.09789156626506e-05,
"loss": 0.8956,
"step": 1266
},
{
"epoch": 0.38,
"learning_rate": 3.094879518072289e-05,
"loss": 0.6621,
"step": 1268
},
{
"epoch": 0.38,
"learning_rate": 3.091867469879518e-05,
"loss": 0.7109,
"step": 1270
},
{
"epoch": 0.38,
"learning_rate": 3.0888554216867474e-05,
"loss": 0.8666,
"step": 1272
},
{
"epoch": 0.38,
"learning_rate": 3.0858433734939764e-05,
"loss": 0.5857,
"step": 1274
},
{
"epoch": 0.38,
"learning_rate": 3.082831325301205e-05,
"loss": 0.8866,
"step": 1276
},
{
"epoch": 0.38,
"learning_rate": 3.079819277108434e-05,
"loss": 0.8554,
"step": 1278
},
{
"epoch": 0.39,
"learning_rate": 3.076807228915663e-05,
"loss": 0.6407,
"step": 1280
},
{
"epoch": 0.39,
"learning_rate": 3.073795180722891e-05,
"loss": 0.6188,
"step": 1282
},
{
"epoch": 0.39,
"learning_rate": 3.0707831325301203e-05,
"loss": 0.6306,
"step": 1284
},
{
"epoch": 0.39,
"learning_rate": 3.0677710843373494e-05,
"loss": 0.9434,
"step": 1286
},
{
"epoch": 0.39,
"learning_rate": 3.0647590361445784e-05,
"loss": 0.998,
"step": 1288
},
{
"epoch": 0.39,
"learning_rate": 3.0617469879518075e-05,
"loss": 0.5408,
"step": 1290
},
{
"epoch": 0.39,
"learning_rate": 3.0587349397590366e-05,
"loss": 0.6366,
"step": 1292
},
{
"epoch": 0.39,
"learning_rate": 3.055722891566265e-05,
"loss": 0.9771,
"step": 1294
},
{
"epoch": 0.39,
"learning_rate": 3.052710843373494e-05,
"loss": 0.5971,
"step": 1296
},
{
"epoch": 0.39,
"learning_rate": 3.049698795180723e-05,
"loss": 0.7049,
"step": 1298
},
{
"epoch": 0.39,
"learning_rate": 3.0466867469879518e-05,
"loss": 0.9464,
"step": 1300
},
{
"epoch": 0.39,
"learning_rate": 3.0436746987951808e-05,
"loss": 0.7754,
"step": 1302
},
{
"epoch": 0.39,
"learning_rate": 3.04066265060241e-05,
"loss": 0.9045,
"step": 1304
},
{
"epoch": 0.39,
"learning_rate": 3.0376506024096386e-05,
"loss": 0.7472,
"step": 1306
},
{
"epoch": 0.39,
"learning_rate": 3.0346385542168676e-05,
"loss": 0.8814,
"step": 1308
},
{
"epoch": 0.39,
"learning_rate": 3.0316265060240967e-05,
"loss": 0.7019,
"step": 1310
},
{
"epoch": 0.4,
"learning_rate": 3.0286144578313254e-05,
"loss": 0.6155,
"step": 1312
},
{
"epoch": 0.4,
"learning_rate": 3.0256024096385545e-05,
"loss": 0.5321,
"step": 1314
},
{
"epoch": 0.4,
"learning_rate": 3.0225903614457835e-05,
"loss": 1.0773,
"step": 1316
},
{
"epoch": 0.4,
"learning_rate": 3.019578313253012e-05,
"loss": 0.7966,
"step": 1318
},
{
"epoch": 0.4,
"learning_rate": 3.016566265060241e-05,
"loss": 0.964,
"step": 1320
},
{
"epoch": 0.4,
"learning_rate": 3.0135542168674703e-05,
"loss": 0.6225,
"step": 1322
},
{
"epoch": 0.4,
"learning_rate": 3.0105421686746987e-05,
"loss": 0.9026,
"step": 1324
},
{
"epoch": 0.4,
"learning_rate": 3.0075301204819278e-05,
"loss": 0.7066,
"step": 1326
},
{
"epoch": 0.4,
"learning_rate": 3.004518072289157e-05,
"loss": 0.8545,
"step": 1328
},
{
"epoch": 0.4,
"learning_rate": 3.0015060240963855e-05,
"loss": 0.9737,
"step": 1330
},
{
"epoch": 0.4,
"learning_rate": 2.9984939759036146e-05,
"loss": 1.0365,
"step": 1332
},
{
"epoch": 0.4,
"learning_rate": 2.9954819277108437e-05,
"loss": 1.0609,
"step": 1334
},
{
"epoch": 0.4,
"learning_rate": 2.9924698795180724e-05,
"loss": 0.9155,
"step": 1336
},
{
"epoch": 0.4,
"learning_rate": 2.9894578313253014e-05,
"loss": 0.5767,
"step": 1338
},
{
"epoch": 0.4,
"learning_rate": 2.9864457831325305e-05,
"loss": 0.8223,
"step": 1340
},
{
"epoch": 0.4,
"learning_rate": 2.983433734939759e-05,
"loss": 0.561,
"step": 1342
},
{
"epoch": 0.4,
"learning_rate": 2.980421686746988e-05,
"loss": 0.5732,
"step": 1344
},
{
"epoch": 0.41,
"learning_rate": 2.9774096385542173e-05,
"loss": 0.6736,
"step": 1346
},
{
"epoch": 0.41,
"learning_rate": 2.9743975903614457e-05,
"loss": 0.5894,
"step": 1348
},
{
"epoch": 0.41,
"learning_rate": 2.9713855421686747e-05,
"loss": 0.8857,
"step": 1350
},
{
"epoch": 0.41,
"learning_rate": 2.9683734939759038e-05,
"loss": 0.6854,
"step": 1352
},
{
"epoch": 0.41,
"learning_rate": 2.9653614457831325e-05,
"loss": 0.712,
"step": 1354
},
{
"epoch": 0.41,
"learning_rate": 2.9623493975903616e-05,
"loss": 0.5451,
"step": 1356
},
{
"epoch": 0.41,
"learning_rate": 2.9593373493975906e-05,
"loss": 0.5558,
"step": 1358
},
{
"epoch": 0.41,
"learning_rate": 2.9563253012048193e-05,
"loss": 0.8609,
"step": 1360
},
{
"epoch": 0.41,
"learning_rate": 2.9533132530120484e-05,
"loss": 1.332,
"step": 1362
},
{
"epoch": 0.41,
"learning_rate": 2.9503012048192774e-05,
"loss": 0.5356,
"step": 1364
},
{
"epoch": 0.41,
"learning_rate": 2.9472891566265058e-05,
"loss": 0.7829,
"step": 1366
},
{
"epoch": 0.41,
"learning_rate": 2.9442771084337352e-05,
"loss": 0.6097,
"step": 1368
},
{
"epoch": 0.41,
"learning_rate": 2.9412650602409643e-05,
"loss": 1.0876,
"step": 1370
},
{
"epoch": 0.41,
"learning_rate": 2.9382530120481926e-05,
"loss": 0.7777,
"step": 1372
},
{
"epoch": 0.41,
"learning_rate": 2.9352409638554217e-05,
"loss": 1.0398,
"step": 1374
},
{
"epoch": 0.41,
"learning_rate": 2.9322289156626508e-05,
"loss": 0.656,
"step": 1376
},
{
"epoch": 0.42,
"learning_rate": 2.9292168674698795e-05,
"loss": 0.7087,
"step": 1378
},
{
"epoch": 0.42,
"learning_rate": 2.9262048192771085e-05,
"loss": 0.926,
"step": 1380
},
{
"epoch": 0.42,
"learning_rate": 2.9231927710843376e-05,
"loss": 0.5572,
"step": 1382
},
{
"epoch": 0.42,
"learning_rate": 2.9201807228915663e-05,
"loss": 0.775,
"step": 1384
},
{
"epoch": 0.42,
"learning_rate": 2.9171686746987954e-05,
"loss": 1.3833,
"step": 1386
},
{
"epoch": 0.42,
"learning_rate": 2.9141566265060244e-05,
"loss": 0.7701,
"step": 1388
},
{
"epoch": 0.42,
"learning_rate": 2.9111445783132528e-05,
"loss": 1.2231,
"step": 1390
},
{
"epoch": 0.42,
"learning_rate": 2.9081325301204822e-05,
"loss": 0.7498,
"step": 1392
},
{
"epoch": 0.42,
"learning_rate": 2.9051204819277112e-05,
"loss": 0.6473,
"step": 1394
},
{
"epoch": 0.42,
"learning_rate": 2.9021084337349396e-05,
"loss": 0.6937,
"step": 1396
},
{
"epoch": 0.42,
"learning_rate": 2.8990963855421687e-05,
"loss": 0.8775,
"step": 1398
},
{
"epoch": 0.42,
"learning_rate": 2.8960843373493977e-05,
"loss": 0.8968,
"step": 1400
},
{
"epoch": 0.42,
"learning_rate": 2.8930722891566264e-05,
"loss": 0.6821,
"step": 1402
},
{
"epoch": 0.42,
"learning_rate": 2.8900602409638555e-05,
"loss": 0.732,
"step": 1404
},
{
"epoch": 0.42,
"learning_rate": 2.8870481927710845e-05,
"loss": 0.57,
"step": 1406
},
{
"epoch": 0.42,
"learning_rate": 2.8840361445783133e-05,
"loss": 0.966,
"step": 1408
},
{
"epoch": 0.42,
"learning_rate": 2.8810240963855423e-05,
"loss": 0.7925,
"step": 1410
},
{
"epoch": 0.43,
"learning_rate": 2.8780120481927714e-05,
"loss": 1.107,
"step": 1412
},
{
"epoch": 0.43,
"learning_rate": 2.8749999999999997e-05,
"loss": 0.5882,
"step": 1414
},
{
"epoch": 0.43,
"learning_rate": 2.871987951807229e-05,
"loss": 0.541,
"step": 1416
},
{
"epoch": 0.43,
"learning_rate": 2.8689759036144582e-05,
"loss": 1.2726,
"step": 1418
},
{
"epoch": 0.43,
"learning_rate": 2.8659638554216866e-05,
"loss": 0.7039,
"step": 1420
},
{
"epoch": 0.43,
"learning_rate": 2.8629518072289156e-05,
"loss": 0.8166,
"step": 1422
},
{
"epoch": 0.43,
"learning_rate": 2.859939759036145e-05,
"loss": 0.5377,
"step": 1424
},
{
"epoch": 0.43,
"learning_rate": 2.8569277108433734e-05,
"loss": 0.4697,
"step": 1426
},
{
"epoch": 0.43,
"learning_rate": 2.8539156626506025e-05,
"loss": 0.813,
"step": 1428
},
{
"epoch": 0.43,
"learning_rate": 2.8509036144578315e-05,
"loss": 0.7204,
"step": 1430
},
{
"epoch": 0.43,
"learning_rate": 2.8478915662650602e-05,
"loss": 0.6438,
"step": 1432
},
{
"epoch": 0.43,
"learning_rate": 2.8448795180722893e-05,
"loss": 0.6934,
"step": 1434
},
{
"epoch": 0.43,
"learning_rate": 2.8418674698795183e-05,
"loss": 0.785,
"step": 1436
},
{
"epoch": 0.43,
"learning_rate": 2.838855421686747e-05,
"loss": 0.5243,
"step": 1438
},
{
"epoch": 0.43,
"learning_rate": 2.835843373493976e-05,
"loss": 0.6698,
"step": 1440
},
{
"epoch": 0.43,
"learning_rate": 2.832831325301205e-05,
"loss": 0.7991,
"step": 1442
},
{
"epoch": 0.43,
"learning_rate": 2.8298192771084335e-05,
"loss": 0.6214,
"step": 1444
},
{
"epoch": 0.44,
"learning_rate": 2.8268072289156626e-05,
"loss": 0.7353,
"step": 1446
},
{
"epoch": 0.44,
"learning_rate": 2.823795180722892e-05,
"loss": 0.8296,
"step": 1448
},
{
"epoch": 0.44,
"learning_rate": 2.8207831325301204e-05,
"loss": 0.5718,
"step": 1450
},
{
"epoch": 0.44,
"learning_rate": 2.8177710843373494e-05,
"loss": 0.4981,
"step": 1452
},
{
"epoch": 0.44,
"learning_rate": 2.8147590361445785e-05,
"loss": 0.7405,
"step": 1454
},
{
"epoch": 0.44,
"learning_rate": 2.8117469879518072e-05,
"loss": 0.8843,
"step": 1456
},
{
"epoch": 0.44,
"learning_rate": 2.8087349397590362e-05,
"loss": 0.5912,
"step": 1458
},
{
"epoch": 0.44,
"learning_rate": 2.8057228915662653e-05,
"loss": 1.0557,
"step": 1460
},
{
"epoch": 0.44,
"learning_rate": 2.802710843373494e-05,
"loss": 0.5145,
"step": 1462
},
{
"epoch": 0.44,
"learning_rate": 2.799698795180723e-05,
"loss": 0.7143,
"step": 1464
},
{
"epoch": 0.44,
"learning_rate": 2.796686746987952e-05,
"loss": 0.8426,
"step": 1466
},
{
"epoch": 0.44,
"learning_rate": 2.7936746987951805e-05,
"loss": 0.6981,
"step": 1468
},
{
"epoch": 0.44,
"learning_rate": 2.79066265060241e-05,
"loss": 0.6302,
"step": 1470
},
{
"epoch": 0.44,
"learning_rate": 2.787650602409639e-05,
"loss": 0.6578,
"step": 1472
},
{
"epoch": 0.44,
"learning_rate": 2.7846385542168673e-05,
"loss": 0.9418,
"step": 1474
},
{
"epoch": 0.44,
"learning_rate": 2.7816265060240964e-05,
"loss": 0.6313,
"step": 1476
},
{
"epoch": 0.45,
"learning_rate": 2.7786144578313254e-05,
"loss": 0.8067,
"step": 1478
},
{
"epoch": 0.45,
"learning_rate": 2.775602409638554e-05,
"loss": 0.6972,
"step": 1480
},
{
"epoch": 0.45,
"learning_rate": 2.7725903614457832e-05,
"loss": 0.7357,
"step": 1482
},
{
"epoch": 0.45,
"learning_rate": 2.7695783132530123e-05,
"loss": 1.0189,
"step": 1484
},
{
"epoch": 0.45,
"learning_rate": 2.766566265060241e-05,
"loss": 0.5531,
"step": 1486
},
{
"epoch": 0.45,
"learning_rate": 2.76355421686747e-05,
"loss": 0.8916,
"step": 1488
},
{
"epoch": 0.45,
"learning_rate": 2.760542168674699e-05,
"loss": 0.7319,
"step": 1490
},
{
"epoch": 0.45,
"learning_rate": 2.7575301204819275e-05,
"loss": 1.1347,
"step": 1492
},
{
"epoch": 0.45,
"learning_rate": 2.754518072289157e-05,
"loss": 0.9381,
"step": 1494
},
{
"epoch": 0.45,
"learning_rate": 2.751506024096386e-05,
"loss": 0.6869,
"step": 1496
},
{
"epoch": 0.45,
"learning_rate": 2.7484939759036143e-05,
"loss": 0.6901,
"step": 1498
},
{
"epoch": 0.45,
"learning_rate": 2.7454819277108433e-05,
"loss": 0.5416,
"step": 1500
},
{
"epoch": 0.45,
"learning_rate": 2.7424698795180724e-05,
"loss": 0.5354,
"step": 1502
},
{
"epoch": 0.45,
"learning_rate": 2.739457831325301e-05,
"loss": 0.701,
"step": 1504
},
{
"epoch": 0.45,
"learning_rate": 2.73644578313253e-05,
"loss": 0.4346,
"step": 1506
},
{
"epoch": 0.45,
"learning_rate": 2.7334337349397592e-05,
"loss": 0.5845,
"step": 1508
},
{
"epoch": 0.45,
"learning_rate": 2.730421686746988e-05,
"loss": 0.6699,
"step": 1510
},
{
"epoch": 0.46,
"learning_rate": 2.727409638554217e-05,
"loss": 0.6376,
"step": 1512
},
{
"epoch": 0.46,
"learning_rate": 2.724397590361446e-05,
"loss": 0.6015,
"step": 1514
},
{
"epoch": 0.46,
"learning_rate": 2.7213855421686744e-05,
"loss": 0.7313,
"step": 1516
},
{
"epoch": 0.46,
"learning_rate": 2.7183734939759038e-05,
"loss": 0.7103,
"step": 1518
},
{
"epoch": 0.46,
"learning_rate": 2.715361445783133e-05,
"loss": 0.8721,
"step": 1520
},
{
"epoch": 0.46,
"learning_rate": 2.7123493975903612e-05,
"loss": 0.6821,
"step": 1522
},
{
"epoch": 0.46,
"learning_rate": 2.7093373493975903e-05,
"loss": 0.6905,
"step": 1524
},
{
"epoch": 0.46,
"learning_rate": 2.7063253012048197e-05,
"loss": 0.6807,
"step": 1526
},
{
"epoch": 0.46,
"learning_rate": 2.703313253012048e-05,
"loss": 0.8692,
"step": 1528
},
{
"epoch": 0.46,
"learning_rate": 2.700301204819277e-05,
"loss": 0.6638,
"step": 1530
},
{
"epoch": 0.46,
"learning_rate": 2.6972891566265062e-05,
"loss": 0.9087,
"step": 1532
},
{
"epoch": 0.46,
"learning_rate": 2.694277108433735e-05,
"loss": 0.7865,
"step": 1534
},
{
"epoch": 0.46,
"learning_rate": 2.691265060240964e-05,
"loss": 0.5438,
"step": 1536
},
{
"epoch": 0.46,
"learning_rate": 2.688253012048193e-05,
"loss": 0.5592,
"step": 1538
},
{
"epoch": 0.46,
"learning_rate": 2.6852409638554217e-05,
"loss": 0.709,
"step": 1540
},
{
"epoch": 0.46,
"learning_rate": 2.6822289156626508e-05,
"loss": 0.7362,
"step": 1542
},
{
"epoch": 0.47,
"learning_rate": 2.67921686746988e-05,
"loss": 0.5545,
"step": 1544
},
{
"epoch": 0.47,
"learning_rate": 2.6762048192771082e-05,
"loss": 0.7602,
"step": 1546
},
{
"epoch": 0.47,
"learning_rate": 2.6731927710843373e-05,
"loss": 1.0619,
"step": 1548
},
{
"epoch": 0.47,
"learning_rate": 2.6701807228915667e-05,
"loss": 0.5519,
"step": 1550
},
{
"epoch": 0.47,
"learning_rate": 2.667168674698795e-05,
"loss": 0.8413,
"step": 1552
},
{
"epoch": 0.47,
"learning_rate": 2.664156626506024e-05,
"loss": 1.2611,
"step": 1554
},
{
"epoch": 0.47,
"learning_rate": 2.661144578313253e-05,
"loss": 0.9425,
"step": 1556
},
{
"epoch": 0.47,
"learning_rate": 2.658132530120482e-05,
"loss": 1.0871,
"step": 1558
},
{
"epoch": 0.47,
"learning_rate": 2.655120481927711e-05,
"loss": 0.5916,
"step": 1560
},
{
"epoch": 0.47,
"learning_rate": 2.65210843373494e-05,
"loss": 0.8853,
"step": 1562
},
{
"epoch": 0.47,
"learning_rate": 2.6490963855421687e-05,
"loss": 1.2742,
"step": 1564
},
{
"epoch": 0.47,
"learning_rate": 2.6460843373493977e-05,
"loss": 0.9733,
"step": 1566
},
{
"epoch": 0.47,
"learning_rate": 2.6430722891566268e-05,
"loss": 0.7031,
"step": 1568
},
{
"epoch": 0.47,
"learning_rate": 2.6400602409638552e-05,
"loss": 0.6563,
"step": 1570
},
{
"epoch": 0.47,
"learning_rate": 2.6370481927710842e-05,
"loss": 0.6535,
"step": 1572
},
{
"epoch": 0.47,
"learning_rate": 2.6340361445783136e-05,
"loss": 0.8213,
"step": 1574
},
{
"epoch": 0.47,
"learning_rate": 2.631024096385542e-05,
"loss": 0.5234,
"step": 1576
},
{
"epoch": 0.48,
"learning_rate": 2.628012048192771e-05,
"loss": 0.9404,
"step": 1578
},
{
"epoch": 0.48,
"learning_rate": 2.625e-05,
"loss": 0.5511,
"step": 1580
},
{
"epoch": 0.48,
"learning_rate": 2.6219879518072288e-05,
"loss": 0.5412,
"step": 1582
},
{
"epoch": 0.48,
"learning_rate": 2.618975903614458e-05,
"loss": 0.4944,
"step": 1584
},
{
"epoch": 0.48,
"learning_rate": 2.615963855421687e-05,
"loss": 0.6675,
"step": 1586
},
{
"epoch": 0.48,
"learning_rate": 2.6129518072289157e-05,
"loss": 0.6809,
"step": 1588
},
{
"epoch": 0.48,
"learning_rate": 2.6099397590361447e-05,
"loss": 0.6405,
"step": 1590
},
{
"epoch": 0.48,
"learning_rate": 2.6069277108433738e-05,
"loss": 0.5876,
"step": 1592
},
{
"epoch": 0.48,
"learning_rate": 2.603915662650602e-05,
"loss": 0.5346,
"step": 1594
},
{
"epoch": 0.48,
"learning_rate": 2.6009036144578315e-05,
"loss": 0.8315,
"step": 1596
},
{
"epoch": 0.48,
"learning_rate": 2.5978915662650606e-05,
"loss": 0.7644,
"step": 1598
},
{
"epoch": 0.48,
"learning_rate": 2.594879518072289e-05,
"loss": 0.9239,
"step": 1600
},
{
"epoch": 0.48,
"learning_rate": 2.591867469879518e-05,
"loss": 1.2676,
"step": 1602
},
{
"epoch": 0.48,
"learning_rate": 2.588855421686747e-05,
"loss": 0.853,
"step": 1604
},
{
"epoch": 0.48,
"learning_rate": 2.5858433734939758e-05,
"loss": 0.5087,
"step": 1606
},
{
"epoch": 0.48,
"learning_rate": 2.582831325301205e-05,
"loss": 0.6988,
"step": 1608
},
{
"epoch": 0.48,
"learning_rate": 2.579819277108434e-05,
"loss": 0.7349,
"step": 1610
},
{
"epoch": 0.49,
"learning_rate": 2.5768072289156626e-05,
"loss": 0.7943,
"step": 1612
},
{
"epoch": 0.49,
"learning_rate": 2.5737951807228917e-05,
"loss": 0.5201,
"step": 1614
},
{
"epoch": 0.49,
"learning_rate": 2.5707831325301207e-05,
"loss": 0.552,
"step": 1616
},
{
"epoch": 0.49,
"learning_rate": 2.567771084337349e-05,
"loss": 0.6508,
"step": 1618
},
{
"epoch": 0.49,
"learning_rate": 2.5647590361445785e-05,
"loss": 0.8105,
"step": 1620
},
{
"epoch": 0.49,
"learning_rate": 2.5617469879518075e-05,
"loss": 0.5445,
"step": 1622
},
{
"epoch": 0.49,
"learning_rate": 2.558734939759036e-05,
"loss": 0.6921,
"step": 1624
},
{
"epoch": 0.49,
"learning_rate": 2.555722891566265e-05,
"loss": 0.6227,
"step": 1626
},
{
"epoch": 0.49,
"learning_rate": 2.552710843373494e-05,
"loss": 0.6072,
"step": 1628
},
{
"epoch": 0.49,
"learning_rate": 2.5496987951807227e-05,
"loss": 1.1964,
"step": 1630
},
{
"epoch": 0.49,
"learning_rate": 2.5466867469879518e-05,
"loss": 0.5045,
"step": 1632
},
{
"epoch": 0.49,
"learning_rate": 2.543674698795181e-05,
"loss": 0.6507,
"step": 1634
},
{
"epoch": 0.49,
"learning_rate": 2.5406626506024096e-05,
"loss": 0.8598,
"step": 1636
},
{
"epoch": 0.49,
"learning_rate": 2.5376506024096386e-05,
"loss": 1.1451,
"step": 1638
},
{
"epoch": 0.49,
"learning_rate": 2.5346385542168677e-05,
"loss": 1.2287,
"step": 1640
},
{
"epoch": 0.49,
"learning_rate": 2.5316265060240964e-05,
"loss": 0.8723,
"step": 1642
},
{
"epoch": 0.5,
"learning_rate": 2.5286144578313255e-05,
"loss": 0.5671,
"step": 1644
},
{
"epoch": 0.5,
"learning_rate": 2.5256024096385545e-05,
"loss": 0.834,
"step": 1646
},
{
"epoch": 0.5,
"learning_rate": 2.522590361445783e-05,
"loss": 0.5107,
"step": 1648
},
{
"epoch": 0.5,
"learning_rate": 2.519578313253012e-05,
"loss": 0.7222,
"step": 1650
},
{
"epoch": 0.5,
"learning_rate": 2.5165662650602413e-05,
"loss": 0.9072,
"step": 1652
},
{
"epoch": 0.5,
"learning_rate": 2.5135542168674697e-05,
"loss": 0.7065,
"step": 1654
},
{
"epoch": 0.5,
"learning_rate": 2.5105421686746988e-05,
"loss": 0.5313,
"step": 1656
},
{
"epoch": 0.5,
"learning_rate": 2.5075301204819278e-05,
"loss": 0.6763,
"step": 1658
},
{
"epoch": 0.5,
"learning_rate": 2.5045180722891565e-05,
"loss": 0.5546,
"step": 1660
},
{
"epoch": 0.5,
"learning_rate": 2.5015060240963856e-05,
"loss": 0.9492,
"step": 1662
},
{
"epoch": 0.5,
"learning_rate": 2.4984939759036146e-05,
"loss": 0.6624,
"step": 1664
},
{
"epoch": 0.5,
"learning_rate": 2.4954819277108434e-05,
"loss": 0.6395,
"step": 1666
},
{
"epoch": 0.5,
"learning_rate": 2.4924698795180724e-05,
"loss": 0.5013,
"step": 1668
},
{
"epoch": 0.5,
"learning_rate": 2.4894578313253015e-05,
"loss": 0.6636,
"step": 1670
},
{
"epoch": 0.5,
"learning_rate": 2.4864457831325302e-05,
"loss": 1.1427,
"step": 1672
},
{
"epoch": 0.5,
"learning_rate": 2.483433734939759e-05,
"loss": 0.5446,
"step": 1674
},
{
"epoch": 0.5,
"learning_rate": 2.4804216867469883e-05,
"loss": 0.9008,
"step": 1676
},
{
"epoch": 0.51,
"learning_rate": 2.477409638554217e-05,
"loss": 0.6663,
"step": 1678
},
{
"epoch": 0.51,
"learning_rate": 2.4743975903614457e-05,
"loss": 0.7125,
"step": 1680
},
{
"epoch": 0.51,
"learning_rate": 2.4713855421686748e-05,
"loss": 0.8794,
"step": 1682
},
{
"epoch": 0.51,
"learning_rate": 2.468373493975904e-05,
"loss": 0.5719,
"step": 1684
},
{
"epoch": 0.51,
"learning_rate": 2.4653614457831326e-05,
"loss": 0.6174,
"step": 1686
},
{
"epoch": 0.51,
"learning_rate": 2.4623493975903616e-05,
"loss": 0.5762,
"step": 1688
},
{
"epoch": 0.51,
"learning_rate": 2.4593373493975903e-05,
"loss": 0.535,
"step": 1690
},
{
"epoch": 0.51,
"learning_rate": 2.4563253012048194e-05,
"loss": 0.7648,
"step": 1692
},
{
"epoch": 0.51,
"learning_rate": 2.4533132530120484e-05,
"loss": 0.5698,
"step": 1694
},
{
"epoch": 0.51,
"learning_rate": 2.450301204819277e-05,
"loss": 0.804,
"step": 1696
},
{
"epoch": 0.51,
"learning_rate": 2.4472891566265062e-05,
"loss": 0.6621,
"step": 1698
},
{
"epoch": 0.51,
"learning_rate": 2.4442771084337353e-05,
"loss": 0.8664,
"step": 1700
},
{
"epoch": 0.51,
"learning_rate": 2.441265060240964e-05,
"loss": 0.656,
"step": 1702
},
{
"epoch": 0.51,
"learning_rate": 2.4382530120481927e-05,
"loss": 0.6028,
"step": 1704
},
{
"epoch": 0.51,
"learning_rate": 2.4352409638554217e-05,
"loss": 0.7309,
"step": 1706
},
{
"epoch": 0.51,
"learning_rate": 2.4322289156626508e-05,
"loss": 0.7996,
"step": 1708
},
{
"epoch": 0.52,
"learning_rate": 2.4292168674698795e-05,
"loss": 0.4599,
"step": 1710
},
{
"epoch": 0.52,
"learning_rate": 2.4262048192771086e-05,
"loss": 0.63,
"step": 1712
},
{
"epoch": 0.52,
"learning_rate": 2.4231927710843373e-05,
"loss": 0.5482,
"step": 1714
},
{
"epoch": 0.52,
"learning_rate": 2.4201807228915663e-05,
"loss": 0.5533,
"step": 1716
},
{
"epoch": 0.52,
"learning_rate": 2.4171686746987954e-05,
"loss": 0.4462,
"step": 1718
},
{
"epoch": 0.52,
"learning_rate": 2.414156626506024e-05,
"loss": 1.1056,
"step": 1720
},
{
"epoch": 0.52,
"learning_rate": 2.411144578313253e-05,
"loss": 0.673,
"step": 1722
},
{
"epoch": 0.52,
"learning_rate": 2.4081325301204822e-05,
"loss": 0.6243,
"step": 1724
},
{
"epoch": 0.52,
"learning_rate": 2.405120481927711e-05,
"loss": 1.0228,
"step": 1726
},
{
"epoch": 0.52,
"learning_rate": 2.4021084337349397e-05,
"loss": 0.6509,
"step": 1728
},
{
"epoch": 0.52,
"learning_rate": 2.3990963855421687e-05,
"loss": 0.5726,
"step": 1730
},
{
"epoch": 0.52,
"learning_rate": 2.3960843373493978e-05,
"loss": 0.552,
"step": 1732
},
{
"epoch": 0.52,
"learning_rate": 2.3930722891566265e-05,
"loss": 0.6378,
"step": 1734
},
{
"epoch": 0.52,
"learning_rate": 2.3900602409638555e-05,
"loss": 0.7342,
"step": 1736
},
{
"epoch": 0.52,
"learning_rate": 2.3870481927710846e-05,
"loss": 0.6698,
"step": 1738
},
{
"epoch": 0.52,
"learning_rate": 2.3840361445783133e-05,
"loss": 0.7551,
"step": 1740
},
{
"epoch": 0.52,
"learning_rate": 2.3810240963855424e-05,
"loss": 0.8448,
"step": 1742
},
{
"epoch": 0.53,
"learning_rate": 2.378012048192771e-05,
"loss": 0.6835,
"step": 1744
},
{
"epoch": 0.53,
"learning_rate": 2.375e-05,
"loss": 0.8092,
"step": 1746
},
{
"epoch": 0.53,
"learning_rate": 2.3719879518072292e-05,
"loss": 0.5386,
"step": 1748
},
{
"epoch": 0.53,
"learning_rate": 2.368975903614458e-05,
"loss": 0.622,
"step": 1750
},
{
"epoch": 0.53,
"learning_rate": 2.3659638554216866e-05,
"loss": 1.2258,
"step": 1752
},
{
"epoch": 0.53,
"learning_rate": 2.362951807228916e-05,
"loss": 0.9337,
"step": 1754
},
{
"epoch": 0.53,
"learning_rate": 2.3599397590361447e-05,
"loss": 0.5494,
"step": 1756
},
{
"epoch": 0.53,
"learning_rate": 2.3569277108433734e-05,
"loss": 0.5795,
"step": 1758
},
{
"epoch": 0.53,
"learning_rate": 2.3539156626506025e-05,
"loss": 1.1796,
"step": 1760
},
{
"epoch": 0.53,
"learning_rate": 2.3509036144578316e-05,
"loss": 0.6265,
"step": 1762
},
{
"epoch": 0.53,
"learning_rate": 2.3478915662650603e-05,
"loss": 0.5777,
"step": 1764
},
{
"epoch": 0.53,
"learning_rate": 2.3448795180722893e-05,
"loss": 0.7741,
"step": 1766
},
{
"epoch": 0.53,
"learning_rate": 2.341867469879518e-05,
"loss": 0.6213,
"step": 1768
},
{
"epoch": 0.53,
"learning_rate": 2.338855421686747e-05,
"loss": 0.4539,
"step": 1770
},
{
"epoch": 0.53,
"learning_rate": 2.335843373493976e-05,
"loss": 1.1912,
"step": 1772
},
{
"epoch": 0.53,
"learning_rate": 2.332831325301205e-05,
"loss": 0.8139,
"step": 1774
},
{
"epoch": 0.53,
"learning_rate": 2.3298192771084336e-05,
"loss": 0.6261,
"step": 1776
},
{
"epoch": 0.54,
"learning_rate": 2.326807228915663e-05,
"loss": 0.8501,
"step": 1778
},
{
"epoch": 0.54,
"learning_rate": 2.3237951807228917e-05,
"loss": 0.5257,
"step": 1780
},
{
"epoch": 0.54,
"learning_rate": 2.3207831325301204e-05,
"loss": 0.6269,
"step": 1782
},
{
"epoch": 0.54,
"learning_rate": 2.3177710843373495e-05,
"loss": 0.8953,
"step": 1784
},
{
"epoch": 0.54,
"learning_rate": 2.3147590361445785e-05,
"loss": 1.1006,
"step": 1786
},
{
"epoch": 0.54,
"learning_rate": 2.3117469879518072e-05,
"loss": 0.61,
"step": 1788
},
{
"epoch": 0.54,
"learning_rate": 2.3087349397590363e-05,
"loss": 0.446,
"step": 1790
},
{
"epoch": 0.54,
"learning_rate": 2.305722891566265e-05,
"loss": 0.5063,
"step": 1792
},
{
"epoch": 0.54,
"learning_rate": 2.302710843373494e-05,
"loss": 0.5042,
"step": 1794
},
{
"epoch": 0.54,
"learning_rate": 2.299698795180723e-05,
"loss": 0.5522,
"step": 1796
},
{
"epoch": 0.54,
"learning_rate": 2.2966867469879518e-05,
"loss": 0.5351,
"step": 1798
},
{
"epoch": 0.54,
"learning_rate": 2.2936746987951805e-05,
"loss": 0.8872,
"step": 1800
},
{
"epoch": 0.54,
"learning_rate": 2.29066265060241e-05,
"loss": 0.4804,
"step": 1802
},
{
"epoch": 0.54,
"learning_rate": 2.2876506024096387e-05,
"loss": 1.6299,
"step": 1804
},
{
"epoch": 0.54,
"learning_rate": 2.2846385542168674e-05,
"loss": 1.2266,
"step": 1806
},
{
"epoch": 0.54,
"learning_rate": 2.2816265060240964e-05,
"loss": 0.414,
"step": 1808
},
{
"epoch": 0.55,
"learning_rate": 2.2786144578313255e-05,
"loss": 0.5788,
"step": 1810
},
{
"epoch": 0.55,
"learning_rate": 2.2756024096385542e-05,
"loss": 0.5271,
"step": 1812
},
{
"epoch": 0.55,
"learning_rate": 2.2725903614457832e-05,
"loss": 0.4207,
"step": 1814
},
{
"epoch": 0.55,
"learning_rate": 2.269578313253012e-05,
"loss": 0.7799,
"step": 1816
},
{
"epoch": 0.55,
"learning_rate": 2.266566265060241e-05,
"loss": 0.9132,
"step": 1818
},
{
"epoch": 0.55,
"learning_rate": 2.26355421686747e-05,
"loss": 0.7844,
"step": 1820
},
{
"epoch": 0.55,
"learning_rate": 2.2605421686746988e-05,
"loss": 0.5042,
"step": 1822
},
{
"epoch": 0.55,
"learning_rate": 2.257530120481928e-05,
"loss": 0.5157,
"step": 1824
},
{
"epoch": 0.55,
"learning_rate": 2.254518072289157e-05,
"loss": 0.9283,
"step": 1826
},
{
"epoch": 0.55,
"learning_rate": 2.2515060240963856e-05,
"loss": 0.7564,
"step": 1828
},
{
"epoch": 0.55,
"learning_rate": 2.2484939759036143e-05,
"loss": 0.6181,
"step": 1830
},
{
"epoch": 0.55,
"learning_rate": 2.2454819277108434e-05,
"loss": 1.1242,
"step": 1832
},
{
"epoch": 0.55,
"learning_rate": 2.2424698795180724e-05,
"loss": 0.752,
"step": 1834
},
{
"epoch": 0.55,
"learning_rate": 2.239457831325301e-05,
"loss": 0.9748,
"step": 1836
},
{
"epoch": 0.55,
"learning_rate": 2.2364457831325302e-05,
"loss": 0.4915,
"step": 1838
},
{
"epoch": 0.55,
"learning_rate": 2.2334337349397593e-05,
"loss": 0.8839,
"step": 1840
},
{
"epoch": 0.55,
"learning_rate": 2.230421686746988e-05,
"loss": 0.5571,
"step": 1842
},
{
"epoch": 0.56,
"learning_rate": 2.227409638554217e-05,
"loss": 0.5591,
"step": 1844
},
{
"epoch": 0.56,
"learning_rate": 2.2243975903614458e-05,
"loss": 0.5488,
"step": 1846
},
{
"epoch": 0.56,
"learning_rate": 2.2213855421686748e-05,
"loss": 0.5326,
"step": 1848
},
{
"epoch": 0.56,
"learning_rate": 2.218373493975904e-05,
"loss": 1.4484,
"step": 1850
},
{
"epoch": 0.56,
"learning_rate": 2.2153614457831326e-05,
"loss": 0.527,
"step": 1852
},
{
"epoch": 0.56,
"learning_rate": 2.2123493975903613e-05,
"loss": 0.593,
"step": 1854
},
{
"epoch": 0.56,
"learning_rate": 2.2093373493975903e-05,
"loss": 0.7,
"step": 1856
},
{
"epoch": 0.56,
"learning_rate": 2.2063253012048194e-05,
"loss": 0.59,
"step": 1858
},
{
"epoch": 0.56,
"learning_rate": 2.203313253012048e-05,
"loss": 0.7181,
"step": 1860
},
{
"epoch": 0.56,
"learning_rate": 2.2003012048192772e-05,
"loss": 0.6512,
"step": 1862
},
{
"epoch": 0.56,
"learning_rate": 2.1972891566265062e-05,
"loss": 0.7383,
"step": 1864
},
{
"epoch": 0.56,
"learning_rate": 2.194277108433735e-05,
"loss": 0.7412,
"step": 1866
},
{
"epoch": 0.56,
"learning_rate": 2.191265060240964e-05,
"loss": 0.5686,
"step": 1868
},
{
"epoch": 0.56,
"learning_rate": 2.1882530120481927e-05,
"loss": 0.6791,
"step": 1870
},
{
"epoch": 0.56,
"learning_rate": 2.1852409638554218e-05,
"loss": 0.6164,
"step": 1872
},
{
"epoch": 0.56,
"learning_rate": 2.1822289156626508e-05,
"loss": 0.7112,
"step": 1874
},
{
"epoch": 0.57,
"learning_rate": 2.1792168674698795e-05,
"loss": 0.8454,
"step": 1876
},
{
"epoch": 0.57,
"learning_rate": 2.1762048192771086e-05,
"loss": 0.5878,
"step": 1878
},
{
"epoch": 0.57,
"learning_rate": 2.1731927710843376e-05,
"loss": 0.6151,
"step": 1880
},
{
"epoch": 0.57,
"learning_rate": 2.1701807228915664e-05,
"loss": 0.4685,
"step": 1882
},
{
"epoch": 0.57,
"learning_rate": 2.1671686746987954e-05,
"loss": 0.666,
"step": 1884
},
{
"epoch": 0.57,
"learning_rate": 2.164156626506024e-05,
"loss": 0.6314,
"step": 1886
},
{
"epoch": 0.57,
"learning_rate": 2.1611445783132532e-05,
"loss": 0.556,
"step": 1888
},
{
"epoch": 0.57,
"learning_rate": 2.1581325301204822e-05,
"loss": 0.8635,
"step": 1890
},
{
"epoch": 0.57,
"learning_rate": 2.155120481927711e-05,
"loss": 0.6287,
"step": 1892
},
{
"epoch": 0.57,
"learning_rate": 2.1521084337349397e-05,
"loss": 0.4866,
"step": 1894
},
{
"epoch": 0.57,
"learning_rate": 2.149096385542169e-05,
"loss": 0.6008,
"step": 1896
},
{
"epoch": 0.57,
"learning_rate": 2.1460843373493978e-05,
"loss": 0.5745,
"step": 1898
},
{
"epoch": 0.57,
"learning_rate": 2.1430722891566265e-05,
"loss": 0.5113,
"step": 1900
},
{
"epoch": 0.57,
"learning_rate": 2.1400602409638556e-05,
"loss": 0.6779,
"step": 1902
},
{
"epoch": 0.57,
"learning_rate": 2.1370481927710846e-05,
"loss": 0.5321,
"step": 1904
},
{
"epoch": 0.57,
"learning_rate": 2.1340361445783133e-05,
"loss": 0.7097,
"step": 1906
},
{
"epoch": 0.57,
"learning_rate": 2.1310240963855424e-05,
"loss": 0.6043,
"step": 1908
},
{
"epoch": 0.58,
"learning_rate": 2.128012048192771e-05,
"loss": 0.4876,
"step": 1910
},
{
"epoch": 0.58,
"learning_rate": 2.125e-05,
"loss": 0.7147,
"step": 1912
},
{
"epoch": 0.58,
"learning_rate": 2.1219879518072292e-05,
"loss": 0.619,
"step": 1914
},
{
"epoch": 0.58,
"learning_rate": 2.118975903614458e-05,
"loss": 0.7508,
"step": 1916
},
{
"epoch": 0.58,
"learning_rate": 2.1159638554216866e-05,
"loss": 0.7298,
"step": 1918
},
{
"epoch": 0.58,
"learning_rate": 2.112951807228916e-05,
"loss": 0.8781,
"step": 1920
},
{
"epoch": 0.58,
"learning_rate": 2.1099397590361447e-05,
"loss": 0.6319,
"step": 1922
},
{
"epoch": 0.58,
"learning_rate": 2.1069277108433735e-05,
"loss": 0.7589,
"step": 1924
},
{
"epoch": 0.58,
"learning_rate": 2.1039156626506025e-05,
"loss": 0.6029,
"step": 1926
},
{
"epoch": 0.58,
"learning_rate": 2.1009036144578316e-05,
"loss": 0.6917,
"step": 1928
},
{
"epoch": 0.58,
"learning_rate": 2.0978915662650603e-05,
"loss": 0.8015,
"step": 1930
},
{
"epoch": 0.58,
"learning_rate": 2.0948795180722893e-05,
"loss": 0.9763,
"step": 1932
},
{
"epoch": 0.58,
"learning_rate": 2.091867469879518e-05,
"loss": 0.6007,
"step": 1934
},
{
"epoch": 0.58,
"learning_rate": 2.088855421686747e-05,
"loss": 0.5496,
"step": 1936
},
{
"epoch": 0.58,
"learning_rate": 2.0858433734939762e-05,
"loss": 1.2008,
"step": 1938
},
{
"epoch": 0.58,
"learning_rate": 2.082831325301205e-05,
"loss": 0.4956,
"step": 1940
},
{
"epoch": 0.58,
"learning_rate": 2.079819277108434e-05,
"loss": 0.8637,
"step": 1942
},
{
"epoch": 0.59,
"learning_rate": 2.076807228915663e-05,
"loss": 0.4804,
"step": 1944
},
{
"epoch": 0.59,
"learning_rate": 2.0737951807228917e-05,
"loss": 0.892,
"step": 1946
},
{
"epoch": 0.59,
"learning_rate": 2.0707831325301204e-05,
"loss": 0.4594,
"step": 1948
},
{
"epoch": 0.59,
"learning_rate": 2.0677710843373495e-05,
"loss": 0.8251,
"step": 1950
},
{
"epoch": 0.59,
"learning_rate": 2.0647590361445785e-05,
"loss": 0.5045,
"step": 1952
},
{
"epoch": 0.59,
"learning_rate": 2.0617469879518073e-05,
"loss": 0.51,
"step": 1954
},
{
"epoch": 0.59,
"learning_rate": 2.0587349397590363e-05,
"loss": 0.5633,
"step": 1956
},
{
"epoch": 0.59,
"learning_rate": 2.055722891566265e-05,
"loss": 0.6801,
"step": 1958
},
{
"epoch": 0.59,
"learning_rate": 2.052710843373494e-05,
"loss": 0.8144,
"step": 1960
},
{
"epoch": 0.59,
"learning_rate": 2.049698795180723e-05,
"loss": 0.4798,
"step": 1962
},
{
"epoch": 0.59,
"learning_rate": 2.046686746987952e-05,
"loss": 0.5803,
"step": 1964
},
{
"epoch": 0.59,
"learning_rate": 2.043674698795181e-05,
"loss": 0.505,
"step": 1966
},
{
"epoch": 0.59,
"learning_rate": 2.04066265060241e-05,
"loss": 0.5162,
"step": 1968
},
{
"epoch": 0.59,
"learning_rate": 2.0376506024096387e-05,
"loss": 1.5769,
"step": 1970
},
{
"epoch": 0.59,
"learning_rate": 2.0346385542168674e-05,
"loss": 0.5835,
"step": 1972
},
{
"epoch": 0.59,
"learning_rate": 2.0316265060240964e-05,
"loss": 0.4594,
"step": 1974
},
{
"epoch": 0.6,
"learning_rate": 2.0286144578313255e-05,
"loss": 0.5226,
"step": 1976
},
{
"epoch": 0.6,
"learning_rate": 2.0256024096385542e-05,
"loss": 0.6326,
"step": 1978
},
{
"epoch": 0.6,
"learning_rate": 2.0225903614457833e-05,
"loss": 0.5894,
"step": 1980
},
{
"epoch": 0.6,
"learning_rate": 2.0195783132530123e-05,
"loss": 0.5279,
"step": 1982
},
{
"epoch": 0.6,
"learning_rate": 2.016566265060241e-05,
"loss": 0.5131,
"step": 1984
},
{
"epoch": 0.6,
"learning_rate": 2.01355421686747e-05,
"loss": 0.6316,
"step": 1986
},
{
"epoch": 0.6,
"learning_rate": 2.0105421686746988e-05,
"loss": 0.5663,
"step": 1988
},
{
"epoch": 0.6,
"learning_rate": 2.007530120481928e-05,
"loss": 0.7816,
"step": 1990
},
{
"epoch": 0.6,
"learning_rate": 2.004518072289157e-05,
"loss": 0.7826,
"step": 1992
},
{
"epoch": 0.6,
"learning_rate": 2.0015060240963856e-05,
"loss": 0.4551,
"step": 1994
},
{
"epoch": 0.6,
"learning_rate": 1.9984939759036144e-05,
"loss": 0.4776,
"step": 1996
},
{
"epoch": 0.6,
"learning_rate": 1.9954819277108437e-05,
"loss": 0.6508,
"step": 1998
},
{
"epoch": 0.6,
"learning_rate": 1.9924698795180725e-05,
"loss": 0.9406,
"step": 2000
},
{
"epoch": 0.6,
"eval_cer": 0.08249768864234407,
"eval_loss": 0.7267863750457764,
"eval_runtime": 753.2697,
"eval_samples_per_second": 3.918,
"eval_steps_per_second": 0.49,
"step": 2000
},
{
"epoch": 0.6,
"learning_rate": 1.9894578313253012e-05,
"loss": 0.5627,
"step": 2002
},
{
"epoch": 0.6,
"learning_rate": 1.9864457831325302e-05,
"loss": 0.7191,
"step": 2004
},
{
"epoch": 0.6,
"learning_rate": 1.9834337349397593e-05,
"loss": 0.781,
"step": 2006
},
{
"epoch": 0.6,
"learning_rate": 1.980421686746988e-05,
"loss": 0.5979,
"step": 2008
},
{
"epoch": 0.61,
"learning_rate": 1.977409638554217e-05,
"loss": 0.9093,
"step": 2010
},
{
"epoch": 0.61,
"learning_rate": 1.9743975903614458e-05,
"loss": 0.6867,
"step": 2012
},
{
"epoch": 0.61,
"learning_rate": 1.9713855421686748e-05,
"loss": 0.4974,
"step": 2014
},
{
"epoch": 0.61,
"learning_rate": 1.968373493975904e-05,
"loss": 0.8073,
"step": 2016
},
{
"epoch": 0.61,
"learning_rate": 1.9653614457831326e-05,
"loss": 0.7962,
"step": 2018
},
{
"epoch": 0.61,
"learning_rate": 1.9623493975903613e-05,
"loss": 0.6281,
"step": 2020
},
{
"epoch": 0.61,
"learning_rate": 1.9593373493975907e-05,
"loss": 0.4626,
"step": 2022
},
{
"epoch": 0.61,
"learning_rate": 1.9563253012048194e-05,
"loss": 0.5287,
"step": 2024
},
{
"epoch": 0.61,
"learning_rate": 1.953313253012048e-05,
"loss": 0.4783,
"step": 2026
},
{
"epoch": 0.61,
"learning_rate": 1.9503012048192772e-05,
"loss": 0.5702,
"step": 2028
},
{
"epoch": 0.61,
"learning_rate": 1.9472891566265062e-05,
"loss": 0.783,
"step": 2030
},
{
"epoch": 0.61,
"learning_rate": 1.944277108433735e-05,
"loss": 0.636,
"step": 2032
},
{
"epoch": 0.61,
"learning_rate": 1.941265060240964e-05,
"loss": 0.6798,
"step": 2034
},
{
"epoch": 0.61,
"learning_rate": 1.9382530120481927e-05,
"loss": 0.6654,
"step": 2036
},
{
"epoch": 0.61,
"learning_rate": 1.9352409638554218e-05,
"loss": 0.4921,
"step": 2038
},
{
"epoch": 0.61,
"learning_rate": 1.932228915662651e-05,
"loss": 0.7768,
"step": 2040
},
{
"epoch": 0.62,
"learning_rate": 1.9292168674698796e-05,
"loss": 0.4803,
"step": 2042
},
{
"epoch": 0.62,
"learning_rate": 1.9262048192771083e-05,
"loss": 0.5692,
"step": 2044
},
{
"epoch": 0.62,
"learning_rate": 1.9231927710843377e-05,
"loss": 1.0903,
"step": 2046
},
{
"epoch": 0.62,
"learning_rate": 1.9201807228915664e-05,
"loss": 0.4279,
"step": 2048
},
{
"epoch": 0.62,
"learning_rate": 1.917168674698795e-05,
"loss": 0.7128,
"step": 2050
},
{
"epoch": 0.62,
"learning_rate": 1.914156626506024e-05,
"loss": 0.4645,
"step": 2052
},
{
"epoch": 0.62,
"learning_rate": 1.9111445783132532e-05,
"loss": 0.5187,
"step": 2054
},
{
"epoch": 0.62,
"learning_rate": 1.908132530120482e-05,
"loss": 0.5729,
"step": 2056
},
{
"epoch": 0.62,
"learning_rate": 1.905120481927711e-05,
"loss": 0.5023,
"step": 2058
},
{
"epoch": 0.62,
"learning_rate": 1.9021084337349397e-05,
"loss": 0.5597,
"step": 2060
},
{
"epoch": 0.62,
"learning_rate": 1.8990963855421688e-05,
"loss": 0.4916,
"step": 2062
},
{
"epoch": 0.62,
"learning_rate": 1.8960843373493978e-05,
"loss": 0.4832,
"step": 2064
},
{
"epoch": 0.62,
"learning_rate": 1.8930722891566265e-05,
"loss": 0.7273,
"step": 2066
},
{
"epoch": 0.62,
"learning_rate": 1.8900602409638556e-05,
"loss": 0.6162,
"step": 2068
},
{
"epoch": 0.62,
"learning_rate": 1.8870481927710846e-05,
"loss": 0.4669,
"step": 2070
},
{
"epoch": 0.62,
"learning_rate": 1.8840361445783133e-05,
"loss": 0.4762,
"step": 2072
},
{
"epoch": 0.62,
"learning_rate": 1.881024096385542e-05,
"loss": 0.802,
"step": 2074
},
{
"epoch": 0.63,
"learning_rate": 1.878012048192771e-05,
"loss": 0.662,
"step": 2076
},
{
"epoch": 0.63,
"learning_rate": 1.8750000000000002e-05,
"loss": 0.4455,
"step": 2078
},
{
"epoch": 0.63,
"learning_rate": 1.871987951807229e-05,
"loss": 0.7274,
"step": 2080
},
{
"epoch": 0.63,
"learning_rate": 1.868975903614458e-05,
"loss": 0.7014,
"step": 2082
},
{
"epoch": 0.63,
"learning_rate": 1.865963855421687e-05,
"loss": 0.7342,
"step": 2084
},
{
"epoch": 0.63,
"learning_rate": 1.8629518072289157e-05,
"loss": 0.5109,
"step": 2086
},
{
"epoch": 0.63,
"learning_rate": 1.8599397590361448e-05,
"loss": 0.8397,
"step": 2088
},
{
"epoch": 0.63,
"learning_rate": 1.8569277108433735e-05,
"loss": 0.4331,
"step": 2090
},
{
"epoch": 0.63,
"learning_rate": 1.8539156626506025e-05,
"loss": 0.5391,
"step": 2092
},
{
"epoch": 0.63,
"learning_rate": 1.8509036144578316e-05,
"loss": 0.46,
"step": 2094
},
{
"epoch": 0.63,
"learning_rate": 1.8478915662650603e-05,
"loss": 0.7349,
"step": 2096
},
{
"epoch": 0.63,
"learning_rate": 1.844879518072289e-05,
"loss": 0.6208,
"step": 2098
},
{
"epoch": 0.63,
"learning_rate": 1.841867469879518e-05,
"loss": 0.6344,
"step": 2100
},
{
"epoch": 0.63,
"learning_rate": 1.838855421686747e-05,
"loss": 1.4614,
"step": 2102
},
{
"epoch": 0.63,
"learning_rate": 1.835843373493976e-05,
"loss": 0.6851,
"step": 2104
},
{
"epoch": 0.63,
"learning_rate": 1.832831325301205e-05,
"loss": 0.6228,
"step": 2106
},
{
"epoch": 0.63,
"learning_rate": 1.829819277108434e-05,
"loss": 0.4769,
"step": 2108
},
{
"epoch": 0.64,
"learning_rate": 1.8268072289156627e-05,
"loss": 0.8725,
"step": 2110
},
{
"epoch": 0.64,
"learning_rate": 1.8237951807228917e-05,
"loss": 0.4725,
"step": 2112
},
{
"epoch": 0.64,
"learning_rate": 1.8207831325301204e-05,
"loss": 0.6864,
"step": 2114
},
{
"epoch": 0.64,
"learning_rate": 1.8177710843373495e-05,
"loss": 0.4778,
"step": 2116
},
{
"epoch": 0.64,
"learning_rate": 1.8147590361445786e-05,
"loss": 0.5117,
"step": 2118
},
{
"epoch": 0.64,
"learning_rate": 1.8117469879518073e-05,
"loss": 0.639,
"step": 2120
},
{
"epoch": 0.64,
"learning_rate": 1.808734939759036e-05,
"loss": 0.632,
"step": 2122
},
{
"epoch": 0.64,
"learning_rate": 1.8057228915662654e-05,
"loss": 0.4581,
"step": 2124
},
{
"epoch": 0.64,
"learning_rate": 1.802710843373494e-05,
"loss": 0.7146,
"step": 2126
},
{
"epoch": 0.64,
"learning_rate": 1.7996987951807228e-05,
"loss": 0.5075,
"step": 2128
},
{
"epoch": 0.64,
"learning_rate": 1.796686746987952e-05,
"loss": 0.5273,
"step": 2130
},
{
"epoch": 0.64,
"learning_rate": 1.793674698795181e-05,
"loss": 0.4607,
"step": 2132
},
{
"epoch": 0.64,
"learning_rate": 1.7906626506024096e-05,
"loss": 0.7484,
"step": 2134
},
{
"epoch": 0.64,
"learning_rate": 1.7876506024096387e-05,
"loss": 0.5197,
"step": 2136
},
{
"epoch": 0.64,
"learning_rate": 1.7846385542168674e-05,
"loss": 0.8774,
"step": 2138
},
{
"epoch": 0.64,
"learning_rate": 1.7816265060240965e-05,
"loss": 0.4721,
"step": 2140
},
{
"epoch": 0.65,
"learning_rate": 1.7786144578313255e-05,
"loss": 0.7284,
"step": 2142
},
{
"epoch": 0.65,
"learning_rate": 1.7756024096385542e-05,
"loss": 0.5294,
"step": 2144
},
{
"epoch": 0.65,
"learning_rate": 1.772590361445783e-05,
"loss": 0.5771,
"step": 2146
},
{
"epoch": 0.65,
"learning_rate": 1.7695783132530123e-05,
"loss": 0.4677,
"step": 2148
},
{
"epoch": 0.65,
"learning_rate": 1.766566265060241e-05,
"loss": 0.6357,
"step": 2150
},
{
"epoch": 0.65,
"learning_rate": 1.7635542168674698e-05,
"loss": 0.5825,
"step": 2152
},
{
"epoch": 0.65,
"learning_rate": 1.760542168674699e-05,
"loss": 0.6084,
"step": 2154
},
{
"epoch": 0.65,
"learning_rate": 1.757530120481928e-05,
"loss": 0.6753,
"step": 2156
},
{
"epoch": 0.65,
"learning_rate": 1.7545180722891566e-05,
"loss": 0.6695,
"step": 2158
},
{
"epoch": 0.65,
"learning_rate": 1.7515060240963857e-05,
"loss": 0.603,
"step": 2160
},
{
"epoch": 0.65,
"learning_rate": 1.7484939759036144e-05,
"loss": 0.6149,
"step": 2162
},
{
"epoch": 0.65,
"learning_rate": 1.7454819277108434e-05,
"loss": 1.0226,
"step": 2164
},
{
"epoch": 0.65,
"learning_rate": 1.7424698795180725e-05,
"loss": 0.6213,
"step": 2166
},
{
"epoch": 0.65,
"learning_rate": 1.7394578313253012e-05,
"loss": 0.4504,
"step": 2168
},
{
"epoch": 0.65,
"learning_rate": 1.7364457831325303e-05,
"loss": 0.6296,
"step": 2170
},
{
"epoch": 0.65,
"learning_rate": 1.7334337349397593e-05,
"loss": 1.0685,
"step": 2172
},
{
"epoch": 0.65,
"learning_rate": 1.730421686746988e-05,
"loss": 0.5243,
"step": 2174
},
{
"epoch": 0.66,
"learning_rate": 1.7274096385542167e-05,
"loss": 0.5115,
"step": 2176
},
{
"epoch": 0.66,
"learning_rate": 1.7243975903614458e-05,
"loss": 0.8434,
"step": 2178
},
{
"epoch": 0.66,
"learning_rate": 1.721385542168675e-05,
"loss": 0.6915,
"step": 2180
},
{
"epoch": 0.66,
"learning_rate": 1.7183734939759036e-05,
"loss": 0.6062,
"step": 2182
},
{
"epoch": 0.66,
"learning_rate": 1.7153614457831326e-05,
"loss": 0.532,
"step": 2184
},
{
"epoch": 0.66,
"learning_rate": 1.7123493975903613e-05,
"loss": 0.7213,
"step": 2186
},
{
"epoch": 0.66,
"learning_rate": 1.7093373493975904e-05,
"loss": 0.7832,
"step": 2188
},
{
"epoch": 0.66,
"learning_rate": 1.7063253012048194e-05,
"loss": 0.7826,
"step": 2190
},
{
"epoch": 0.66,
"learning_rate": 1.703313253012048e-05,
"loss": 0.5975,
"step": 2192
},
{
"epoch": 0.66,
"learning_rate": 1.7003012048192772e-05,
"loss": 0.5429,
"step": 2194
},
{
"epoch": 0.66,
"learning_rate": 1.6972891566265063e-05,
"loss": 0.6882,
"step": 2196
},
{
"epoch": 0.66,
"learning_rate": 1.694277108433735e-05,
"loss": 0.5458,
"step": 2198
},
{
"epoch": 0.66,
"learning_rate": 1.6912650602409637e-05,
"loss": 0.5258,
"step": 2200
},
{
"epoch": 0.66,
"learning_rate": 1.6882530120481928e-05,
"loss": 0.4843,
"step": 2202
},
{
"epoch": 0.66,
"learning_rate": 1.6852409638554218e-05,
"loss": 0.5335,
"step": 2204
},
{
"epoch": 0.66,
"learning_rate": 1.6822289156626505e-05,
"loss": 0.5807,
"step": 2206
},
{
"epoch": 0.67,
"learning_rate": 1.6792168674698796e-05,
"loss": 0.5568,
"step": 2208
},
{
"epoch": 0.67,
"learning_rate": 1.6762048192771086e-05,
"loss": 0.7063,
"step": 2210
},
{
"epoch": 0.67,
"learning_rate": 1.6731927710843374e-05,
"loss": 0.8424,
"step": 2212
},
{
"epoch": 0.67,
"learning_rate": 1.6701807228915664e-05,
"loss": 0.5472,
"step": 2214
},
{
"epoch": 0.67,
"learning_rate": 1.667168674698795e-05,
"loss": 0.4307,
"step": 2216
},
{
"epoch": 0.67,
"learning_rate": 1.6641566265060242e-05,
"loss": 0.6609,
"step": 2218
},
{
"epoch": 0.67,
"learning_rate": 1.6611445783132532e-05,
"loss": 0.5276,
"step": 2220
},
{
"epoch": 0.67,
"learning_rate": 1.658132530120482e-05,
"loss": 0.5024,
"step": 2222
},
{
"epoch": 0.67,
"learning_rate": 1.6551204819277107e-05,
"loss": 0.445,
"step": 2224
},
{
"epoch": 0.67,
"learning_rate": 1.65210843373494e-05,
"loss": 0.4308,
"step": 2226
},
{
"epoch": 0.67,
"learning_rate": 1.6490963855421688e-05,
"loss": 0.6142,
"step": 2228
},
{
"epoch": 0.67,
"learning_rate": 1.6460843373493975e-05,
"loss": 0.7039,
"step": 2230
},
{
"epoch": 0.67,
"learning_rate": 1.6430722891566265e-05,
"loss": 0.5648,
"step": 2232
},
{
"epoch": 0.67,
"learning_rate": 1.6400602409638556e-05,
"loss": 0.4863,
"step": 2234
},
{
"epoch": 0.67,
"learning_rate": 1.6370481927710843e-05,
"loss": 0.6933,
"step": 2236
},
{
"epoch": 0.67,
"learning_rate": 1.6340361445783134e-05,
"loss": 0.3984,
"step": 2238
},
{
"epoch": 0.67,
"learning_rate": 1.631024096385542e-05,
"loss": 0.6069,
"step": 2240
},
{
"epoch": 0.68,
"learning_rate": 1.628012048192771e-05,
"loss": 0.6001,
"step": 2242
},
{
"epoch": 0.68,
"learning_rate": 1.6250000000000002e-05,
"loss": 0.5772,
"step": 2244
},
{
"epoch": 0.68,
"learning_rate": 1.621987951807229e-05,
"loss": 0.5533,
"step": 2246
},
{
"epoch": 0.68,
"learning_rate": 1.6189759036144576e-05,
"loss": 0.6078,
"step": 2248
},
{
"epoch": 0.68,
"learning_rate": 1.615963855421687e-05,
"loss": 0.5173,
"step": 2250
},
{
"epoch": 0.68,
"learning_rate": 1.6129518072289157e-05,
"loss": 0.6483,
"step": 2252
},
{
"epoch": 0.68,
"learning_rate": 1.6099397590361445e-05,
"loss": 0.5477,
"step": 2254
},
{
"epoch": 0.68,
"learning_rate": 1.6069277108433735e-05,
"loss": 0.8127,
"step": 2256
},
{
"epoch": 0.68,
"learning_rate": 1.6039156626506026e-05,
"loss": 0.9926,
"step": 2258
},
{
"epoch": 0.68,
"learning_rate": 1.6009036144578313e-05,
"loss": 0.5507,
"step": 2260
},
{
"epoch": 0.68,
"learning_rate": 1.5978915662650603e-05,
"loss": 0.8586,
"step": 2262
},
{
"epoch": 0.68,
"learning_rate": 1.594879518072289e-05,
"loss": 0.6409,
"step": 2264
},
{
"epoch": 0.68,
"learning_rate": 1.591867469879518e-05,
"loss": 0.5938,
"step": 2266
},
{
"epoch": 0.68,
"learning_rate": 1.588855421686747e-05,
"loss": 0.8117,
"step": 2268
},
{
"epoch": 0.68,
"learning_rate": 1.585843373493976e-05,
"loss": 0.565,
"step": 2270
},
{
"epoch": 0.68,
"learning_rate": 1.5828313253012046e-05,
"loss": 0.6947,
"step": 2272
},
{
"epoch": 0.68,
"learning_rate": 1.579819277108434e-05,
"loss": 0.9155,
"step": 2274
},
{
"epoch": 0.69,
"learning_rate": 1.5768072289156627e-05,
"loss": 0.805,
"step": 2276
},
{
"epoch": 0.69,
"learning_rate": 1.5737951807228914e-05,
"loss": 0.634,
"step": 2278
},
{
"epoch": 0.69,
"learning_rate": 1.5707831325301205e-05,
"loss": 0.5911,
"step": 2280
},
{
"epoch": 0.69,
"learning_rate": 1.5677710843373495e-05,
"loss": 0.4573,
"step": 2282
},
{
"epoch": 0.69,
"learning_rate": 1.5647590361445782e-05,
"loss": 0.8556,
"step": 2284
},
{
"epoch": 0.69,
"learning_rate": 1.5617469879518073e-05,
"loss": 0.4827,
"step": 2286
},
{
"epoch": 0.69,
"learning_rate": 1.558734939759036e-05,
"loss": 0.7562,
"step": 2288
},
{
"epoch": 0.69,
"learning_rate": 1.5557228915662654e-05,
"loss": 0.5059,
"step": 2290
},
{
"epoch": 0.69,
"learning_rate": 1.552710843373494e-05,
"loss": 0.6849,
"step": 2292
},
{
"epoch": 0.69,
"learning_rate": 1.549698795180723e-05,
"loss": 0.5039,
"step": 2294
},
{
"epoch": 0.69,
"learning_rate": 1.546686746987952e-05,
"loss": 0.6326,
"step": 2296
},
{
"epoch": 0.69,
"learning_rate": 1.543674698795181e-05,
"loss": 0.4967,
"step": 2298
},
{
"epoch": 0.69,
"learning_rate": 1.5406626506024097e-05,
"loss": 0.6562,
"step": 2300
},
{
"epoch": 0.69,
"learning_rate": 1.5376506024096387e-05,
"loss": 0.4834,
"step": 2302
},
{
"epoch": 0.69,
"learning_rate": 1.5346385542168674e-05,
"loss": 0.7589,
"step": 2304
},
{
"epoch": 0.69,
"learning_rate": 1.5316265060240965e-05,
"loss": 0.6225,
"step": 2306
},
{
"epoch": 0.7,
"learning_rate": 1.5286144578313255e-05,
"loss": 0.6328,
"step": 2308
},
{
"epoch": 0.7,
"learning_rate": 1.5256024096385543e-05,
"loss": 0.5992,
"step": 2310
},
{
"epoch": 0.7,
"learning_rate": 1.5225903614457831e-05,
"loss": 0.7051,
"step": 2312
},
{
"epoch": 0.7,
"learning_rate": 1.5195783132530122e-05,
"loss": 0.5301,
"step": 2314
},
{
"epoch": 0.7,
"learning_rate": 1.516566265060241e-05,
"loss": 0.9645,
"step": 2316
},
{
"epoch": 0.7,
"learning_rate": 1.51355421686747e-05,
"loss": 0.5321,
"step": 2318
},
{
"epoch": 0.7,
"learning_rate": 1.510542168674699e-05,
"loss": 0.4871,
"step": 2320
},
{
"epoch": 0.7,
"learning_rate": 1.5075301204819277e-05,
"loss": 0.4946,
"step": 2322
},
{
"epoch": 0.7,
"learning_rate": 1.5045180722891566e-05,
"loss": 0.6003,
"step": 2324
},
{
"epoch": 0.7,
"learning_rate": 1.5015060240963857e-05,
"loss": 0.4844,
"step": 2326
},
{
"epoch": 0.7,
"learning_rate": 1.4984939759036146e-05,
"loss": 0.5497,
"step": 2328
},
{
"epoch": 0.7,
"learning_rate": 1.4954819277108435e-05,
"loss": 0.7667,
"step": 2330
},
{
"epoch": 0.7,
"learning_rate": 1.4924698795180725e-05,
"loss": 0.5915,
"step": 2332
},
{
"epoch": 0.7,
"learning_rate": 1.4894578313253014e-05,
"loss": 0.5949,
"step": 2334
},
{
"epoch": 0.7,
"learning_rate": 1.4864457831325301e-05,
"loss": 0.668,
"step": 2336
},
{
"epoch": 0.7,
"learning_rate": 1.4834337349397592e-05,
"loss": 0.4592,
"step": 2338
},
{
"epoch": 0.7,
"learning_rate": 1.480421686746988e-05,
"loss": 0.4466,
"step": 2340
},
{
"epoch": 0.71,
"learning_rate": 1.477409638554217e-05,
"loss": 0.6669,
"step": 2342
},
{
"epoch": 0.71,
"learning_rate": 1.474397590361446e-05,
"loss": 0.5373,
"step": 2344
},
{
"epoch": 0.71,
"learning_rate": 1.4713855421686749e-05,
"loss": 0.4095,
"step": 2346
},
{
"epoch": 0.71,
"learning_rate": 1.4683734939759036e-05,
"loss": 0.5343,
"step": 2348
},
{
"epoch": 0.71,
"learning_rate": 1.4653614457831326e-05,
"loss": 0.4892,
"step": 2350
},
{
"epoch": 0.71,
"learning_rate": 1.4623493975903615e-05,
"loss": 1.0634,
"step": 2352
},
{
"epoch": 0.71,
"learning_rate": 1.4593373493975904e-05,
"loss": 0.5045,
"step": 2354
},
{
"epoch": 0.71,
"learning_rate": 1.4563253012048195e-05,
"loss": 0.4086,
"step": 2356
},
{
"epoch": 0.71,
"learning_rate": 1.4533132530120484e-05,
"loss": 0.5455,
"step": 2358
},
{
"epoch": 0.71,
"learning_rate": 1.450301204819277e-05,
"loss": 0.4604,
"step": 2360
},
{
"epoch": 0.71,
"learning_rate": 1.4472891566265063e-05,
"loss": 0.6921,
"step": 2362
},
{
"epoch": 0.71,
"learning_rate": 1.444277108433735e-05,
"loss": 0.4871,
"step": 2364
},
{
"epoch": 0.71,
"learning_rate": 1.4412650602409639e-05,
"loss": 0.7762,
"step": 2366
},
{
"epoch": 0.71,
"learning_rate": 1.438253012048193e-05,
"loss": 0.5095,
"step": 2368
},
{
"epoch": 0.71,
"learning_rate": 1.4352409638554218e-05,
"loss": 0.6104,
"step": 2370
},
{
"epoch": 0.71,
"learning_rate": 1.4322289156626506e-05,
"loss": 0.7275,
"step": 2372
},
{
"epoch": 0.72,
"learning_rate": 1.4292168674698798e-05,
"loss": 0.5394,
"step": 2374
},
{
"epoch": 0.72,
"learning_rate": 1.4262048192771085e-05,
"loss": 0.7735,
"step": 2376
},
{
"epoch": 0.72,
"learning_rate": 1.4231927710843374e-05,
"loss": 0.4241,
"step": 2378
},
{
"epoch": 0.72,
"learning_rate": 1.4201807228915664e-05,
"loss": 0.488,
"step": 2380
},
{
"epoch": 0.72,
"learning_rate": 1.4171686746987953e-05,
"loss": 0.7028,
"step": 2382
},
{
"epoch": 0.72,
"learning_rate": 1.414156626506024e-05,
"loss": 0.4675,
"step": 2384
},
{
"epoch": 0.72,
"learning_rate": 1.4111445783132533e-05,
"loss": 0.7467,
"step": 2386
},
{
"epoch": 0.72,
"learning_rate": 1.408132530120482e-05,
"loss": 0.4878,
"step": 2388
},
{
"epoch": 0.72,
"learning_rate": 1.4051204819277109e-05,
"loss": 0.589,
"step": 2390
},
{
"epoch": 0.72,
"learning_rate": 1.4021084337349399e-05,
"loss": 0.7037,
"step": 2392
},
{
"epoch": 0.72,
"learning_rate": 1.3990963855421688e-05,
"loss": 0.5681,
"step": 2394
},
{
"epoch": 0.72,
"learning_rate": 1.3960843373493975e-05,
"loss": 0.6902,
"step": 2396
},
{
"epoch": 0.72,
"learning_rate": 1.3930722891566267e-05,
"loss": 0.4513,
"step": 2398
},
{
"epoch": 0.72,
"learning_rate": 1.3900602409638555e-05,
"loss": 0.483,
"step": 2400
},
{
"epoch": 0.72,
"learning_rate": 1.3870481927710843e-05,
"loss": 0.5047,
"step": 2402
},
{
"epoch": 0.72,
"learning_rate": 1.3840361445783134e-05,
"loss": 0.6932,
"step": 2404
},
{
"epoch": 0.72,
"learning_rate": 1.3810240963855423e-05,
"loss": 0.4888,
"step": 2406
},
{
"epoch": 0.73,
"learning_rate": 1.378012048192771e-05,
"loss": 0.4771,
"step": 2408
},
{
"epoch": 0.73,
"learning_rate": 1.3750000000000002e-05,
"loss": 0.4849,
"step": 2410
},
{
"epoch": 0.73,
"learning_rate": 1.371987951807229e-05,
"loss": 0.6832,
"step": 2412
},
{
"epoch": 0.73,
"learning_rate": 1.3689759036144578e-05,
"loss": 0.578,
"step": 2414
},
{
"epoch": 0.73,
"learning_rate": 1.3659638554216869e-05,
"loss": 0.4934,
"step": 2416
},
{
"epoch": 0.73,
"learning_rate": 1.3629518072289158e-05,
"loss": 0.4642,
"step": 2418
},
{
"epoch": 0.73,
"learning_rate": 1.3599397590361446e-05,
"loss": 0.6765,
"step": 2420
},
{
"epoch": 0.73,
"learning_rate": 1.3569277108433737e-05,
"loss": 0.6208,
"step": 2422
},
{
"epoch": 0.73,
"learning_rate": 1.3539156626506024e-05,
"loss": 0.5729,
"step": 2424
},
{
"epoch": 0.73,
"learning_rate": 1.3509036144578313e-05,
"loss": 0.4703,
"step": 2426
},
{
"epoch": 0.73,
"learning_rate": 1.3478915662650604e-05,
"loss": 0.6501,
"step": 2428
},
{
"epoch": 0.73,
"learning_rate": 1.3448795180722892e-05,
"loss": 0.6411,
"step": 2430
},
{
"epoch": 0.73,
"learning_rate": 1.3418674698795181e-05,
"loss": 0.5919,
"step": 2432
},
{
"epoch": 0.73,
"learning_rate": 1.3388554216867472e-05,
"loss": 0.441,
"step": 2434
},
{
"epoch": 0.73,
"learning_rate": 1.3358433734939759e-05,
"loss": 0.5037,
"step": 2436
},
{
"epoch": 0.73,
"learning_rate": 1.3328313253012048e-05,
"loss": 0.7025,
"step": 2438
},
{
"epoch": 0.73,
"learning_rate": 1.3298192771084338e-05,
"loss": 0.4899,
"step": 2440
},
{
"epoch": 0.74,
"learning_rate": 1.3268072289156627e-05,
"loss": 0.5541,
"step": 2442
},
{
"epoch": 0.74,
"learning_rate": 1.3237951807228916e-05,
"loss": 0.8093,
"step": 2444
},
{
"epoch": 0.74,
"learning_rate": 1.3207831325301207e-05,
"loss": 0.5627,
"step": 2446
},
{
"epoch": 0.74,
"learning_rate": 1.3177710843373495e-05,
"loss": 0.4477,
"step": 2448
},
{
"epoch": 0.74,
"learning_rate": 1.3147590361445783e-05,
"loss": 0.4365,
"step": 2450
},
{
"epoch": 0.74,
"learning_rate": 1.3117469879518073e-05,
"loss": 0.4683,
"step": 2452
},
{
"epoch": 0.74,
"learning_rate": 1.3087349397590362e-05,
"loss": 0.8516,
"step": 2454
},
{
"epoch": 0.74,
"learning_rate": 1.3057228915662651e-05,
"loss": 0.3917,
"step": 2456
},
{
"epoch": 0.74,
"learning_rate": 1.3027108433734941e-05,
"loss": 0.4634,
"step": 2458
},
{
"epoch": 0.74,
"learning_rate": 1.299698795180723e-05,
"loss": 0.464,
"step": 2460
},
{
"epoch": 0.74,
"learning_rate": 1.2966867469879517e-05,
"loss": 0.4927,
"step": 2462
},
{
"epoch": 0.74,
"learning_rate": 1.2936746987951808e-05,
"loss": 0.5025,
"step": 2464
},
{
"epoch": 0.74,
"learning_rate": 1.2906626506024097e-05,
"loss": 0.4321,
"step": 2466
},
{
"epoch": 0.74,
"learning_rate": 1.2876506024096386e-05,
"loss": 0.6427,
"step": 2468
},
{
"epoch": 0.74,
"learning_rate": 1.2846385542168676e-05,
"loss": 0.528,
"step": 2470
},
{
"epoch": 0.74,
"learning_rate": 1.2816265060240965e-05,
"loss": 0.7056,
"step": 2472
},
{
"epoch": 0.75,
"learning_rate": 1.2786144578313252e-05,
"loss": 0.8081,
"step": 2474
},
{
"epoch": 0.75,
"learning_rate": 1.2756024096385545e-05,
"loss": 0.6787,
"step": 2476
},
{
"epoch": 0.75,
"learning_rate": 1.2725903614457832e-05,
"loss": 0.4868,
"step": 2478
},
{
"epoch": 0.75,
"learning_rate": 1.269578313253012e-05,
"loss": 0.6668,
"step": 2480
},
{
"epoch": 0.75,
"learning_rate": 1.2665662650602411e-05,
"loss": 0.4883,
"step": 2482
},
{
"epoch": 0.75,
"learning_rate": 1.26355421686747e-05,
"loss": 0.7302,
"step": 2484
},
{
"epoch": 0.75,
"learning_rate": 1.2605421686746987e-05,
"loss": 0.5865,
"step": 2486
},
{
"epoch": 0.75,
"learning_rate": 1.257530120481928e-05,
"loss": 0.7429,
"step": 2488
},
{
"epoch": 0.75,
"learning_rate": 1.2545180722891566e-05,
"loss": 0.4324,
"step": 2490
},
{
"epoch": 0.75,
"learning_rate": 1.2515060240963855e-05,
"loss": 0.5733,
"step": 2492
},
{
"epoch": 0.75,
"learning_rate": 1.2484939759036144e-05,
"loss": 0.6644,
"step": 2494
},
{
"epoch": 0.75,
"learning_rate": 1.2454819277108435e-05,
"loss": 0.497,
"step": 2496
},
{
"epoch": 0.75,
"learning_rate": 1.2424698795180724e-05,
"loss": 0.9599,
"step": 2498
},
{
"epoch": 0.75,
"learning_rate": 1.2394578313253012e-05,
"loss": 0.7193,
"step": 2500
},
{
"epoch": 0.75,
"learning_rate": 1.2364457831325301e-05,
"loss": 0.4735,
"step": 2502
},
{
"epoch": 0.75,
"learning_rate": 1.2334337349397592e-05,
"loss": 0.4499,
"step": 2504
},
{
"epoch": 0.75,
"learning_rate": 1.2304216867469879e-05,
"loss": 0.4513,
"step": 2506
},
{
"epoch": 0.76,
"learning_rate": 1.227409638554217e-05,
"loss": 0.5862,
"step": 2508
},
{
"epoch": 0.76,
"learning_rate": 1.2243975903614458e-05,
"loss": 0.5472,
"step": 2510
},
{
"epoch": 0.76,
"learning_rate": 1.2213855421686747e-05,
"loss": 0.7218,
"step": 2512
},
{
"epoch": 0.76,
"learning_rate": 1.2183734939759036e-05,
"loss": 0.4443,
"step": 2514
},
{
"epoch": 0.76,
"learning_rate": 1.2153614457831327e-05,
"loss": 0.5327,
"step": 2516
},
{
"epoch": 0.76,
"learning_rate": 1.2123493975903614e-05,
"loss": 0.4409,
"step": 2518
},
{
"epoch": 0.76,
"learning_rate": 1.2093373493975904e-05,
"loss": 0.5025,
"step": 2520
},
{
"epoch": 0.76,
"learning_rate": 1.2063253012048193e-05,
"loss": 0.4341,
"step": 2522
},
{
"epoch": 0.76,
"learning_rate": 1.2033132530120482e-05,
"loss": 0.5548,
"step": 2524
},
{
"epoch": 0.76,
"learning_rate": 1.2003012048192771e-05,
"loss": 0.4608,
"step": 2526
},
{
"epoch": 0.76,
"learning_rate": 1.1972891566265061e-05,
"loss": 1.0397,
"step": 2528
},
{
"epoch": 0.76,
"learning_rate": 1.1942771084337349e-05,
"loss": 0.4513,
"step": 2530
},
{
"epoch": 0.76,
"learning_rate": 1.191265060240964e-05,
"loss": 0.4678,
"step": 2532
},
{
"epoch": 0.76,
"learning_rate": 1.1882530120481928e-05,
"loss": 0.4784,
"step": 2534
},
{
"epoch": 0.76,
"learning_rate": 1.1852409638554217e-05,
"loss": 0.4083,
"step": 2536
},
{
"epoch": 0.76,
"learning_rate": 1.1822289156626506e-05,
"loss": 0.4595,
"step": 2538
},
{
"epoch": 0.77,
"learning_rate": 1.1792168674698796e-05,
"loss": 0.428,
"step": 2540
},
{
"epoch": 0.77,
"learning_rate": 1.1762048192771085e-05,
"loss": 0.4462,
"step": 2542
},
{
"epoch": 0.77,
"learning_rate": 1.1731927710843374e-05,
"loss": 0.711,
"step": 2544
},
{
"epoch": 0.77,
"learning_rate": 1.1701807228915663e-05,
"loss": 0.5545,
"step": 2546
},
{
"epoch": 0.77,
"learning_rate": 1.1671686746987952e-05,
"loss": 0.4871,
"step": 2548
},
{
"epoch": 0.77,
"learning_rate": 1.1641566265060242e-05,
"loss": 0.5592,
"step": 2550
},
{
"epoch": 0.77,
"learning_rate": 1.1611445783132531e-05,
"loss": 0.5846,
"step": 2552
},
{
"epoch": 0.77,
"learning_rate": 1.158132530120482e-05,
"loss": 0.4605,
"step": 2554
},
{
"epoch": 0.77,
"learning_rate": 1.1551204819277109e-05,
"loss": 0.5878,
"step": 2556
},
{
"epoch": 0.77,
"learning_rate": 1.1521084337349398e-05,
"loss": 0.7061,
"step": 2558
},
{
"epoch": 0.77,
"learning_rate": 1.1490963855421686e-05,
"loss": 0.5189,
"step": 2560
},
{
"epoch": 0.77,
"learning_rate": 1.1460843373493977e-05,
"loss": 0.4679,
"step": 2562
},
{
"epoch": 0.77,
"learning_rate": 1.1430722891566266e-05,
"loss": 0.7331,
"step": 2564
},
{
"epoch": 0.77,
"learning_rate": 1.1400602409638555e-05,
"loss": 0.4813,
"step": 2566
},
{
"epoch": 0.77,
"learning_rate": 1.1370481927710844e-05,
"loss": 0.4582,
"step": 2568
},
{
"epoch": 0.77,
"learning_rate": 1.1340361445783134e-05,
"loss": 0.5405,
"step": 2570
},
{
"epoch": 0.77,
"learning_rate": 1.1310240963855421e-05,
"loss": 0.498,
"step": 2572
},
{
"epoch": 0.78,
"learning_rate": 1.1280120481927712e-05,
"loss": 0.5061,
"step": 2574
},
{
"epoch": 0.78,
"learning_rate": 1.125e-05,
"loss": 0.5427,
"step": 2576
},
{
"epoch": 0.78,
"learning_rate": 1.121987951807229e-05,
"loss": 0.5183,
"step": 2578
},
{
"epoch": 0.78,
"learning_rate": 1.1189759036144578e-05,
"loss": 1.0019,
"step": 2580
},
{
"epoch": 0.78,
"learning_rate": 1.1159638554216869e-05,
"loss": 0.7032,
"step": 2582
},
{
"epoch": 0.78,
"learning_rate": 1.1129518072289156e-05,
"loss": 0.4584,
"step": 2584
},
{
"epoch": 0.78,
"learning_rate": 1.1099397590361447e-05,
"loss": 0.3645,
"step": 2586
},
{
"epoch": 0.78,
"learning_rate": 1.1069277108433736e-05,
"loss": 0.5525,
"step": 2588
},
{
"epoch": 0.78,
"learning_rate": 1.1039156626506024e-05,
"loss": 0.4598,
"step": 2590
},
{
"epoch": 0.78,
"learning_rate": 1.1009036144578313e-05,
"loss": 0.5013,
"step": 2592
},
{
"epoch": 0.78,
"learning_rate": 1.0978915662650604e-05,
"loss": 0.8018,
"step": 2594
},
{
"epoch": 0.78,
"learning_rate": 1.0948795180722891e-05,
"loss": 0.4653,
"step": 2596
},
{
"epoch": 0.78,
"learning_rate": 1.0918674698795181e-05,
"loss": 0.3996,
"step": 2598
},
{
"epoch": 0.78,
"learning_rate": 1.088855421686747e-05,
"loss": 0.4117,
"step": 2600
},
{
"epoch": 0.78,
"learning_rate": 1.0858433734939761e-05,
"loss": 0.4191,
"step": 2602
},
{
"epoch": 0.78,
"learning_rate": 1.0828313253012048e-05,
"loss": 0.4847,
"step": 2604
},
{
"epoch": 0.78,
"learning_rate": 1.0798192771084339e-05,
"loss": 0.5277,
"step": 2606
},
{
"epoch": 0.79,
"learning_rate": 1.0768072289156627e-05,
"loss": 0.5957,
"step": 2608
},
{
"epoch": 0.79,
"learning_rate": 1.0753012048192771e-05,
"loss": 0.6555,
"step": 2610
},
{
"epoch": 0.79,
"learning_rate": 1.0722891566265062e-05,
"loss": 0.904,
"step": 2612
},
{
"epoch": 0.79,
"learning_rate": 1.069277108433735e-05,
"loss": 0.3492,
"step": 2614
},
{
"epoch": 0.79,
"learning_rate": 1.066265060240964e-05,
"loss": 0.4198,
"step": 2616
},
{
"epoch": 0.79,
"learning_rate": 1.0632530120481928e-05,
"loss": 0.5887,
"step": 2618
},
{
"epoch": 0.79,
"learning_rate": 1.0602409638554217e-05,
"loss": 0.5334,
"step": 2620
},
{
"epoch": 0.79,
"learning_rate": 1.0572289156626508e-05,
"loss": 0.469,
"step": 2622
},
{
"epoch": 0.79,
"learning_rate": 1.0542168674698796e-05,
"loss": 0.4582,
"step": 2624
},
{
"epoch": 0.79,
"learning_rate": 1.0512048192771085e-05,
"loss": 0.63,
"step": 2626
},
{
"epoch": 0.79,
"learning_rate": 1.0481927710843374e-05,
"loss": 0.4454,
"step": 2628
},
{
"epoch": 0.79,
"learning_rate": 1.0451807228915663e-05,
"loss": 0.7188,
"step": 2630
},
{
"epoch": 0.79,
"learning_rate": 1.0421686746987952e-05,
"loss": 0.694,
"step": 2632
},
{
"epoch": 0.79,
"learning_rate": 1.0391566265060242e-05,
"loss": 0.7592,
"step": 2634
},
{
"epoch": 0.79,
"learning_rate": 1.0361445783132531e-05,
"loss": 0.5014,
"step": 2636
},
{
"epoch": 0.79,
"learning_rate": 1.033132530120482e-05,
"loss": 0.4534,
"step": 2638
},
{
"epoch": 0.8,
"learning_rate": 1.0301204819277109e-05,
"loss": 0.5168,
"step": 2640
},
{
"epoch": 0.8,
"learning_rate": 1.02710843373494e-05,
"loss": 0.4338,
"step": 2642
},
{
"epoch": 0.8,
"learning_rate": 1.0240963855421687e-05,
"loss": 0.4694,
"step": 2644
},
{
"epoch": 0.8,
"learning_rate": 1.0210843373493977e-05,
"loss": 0.6526,
"step": 2646
},
{
"epoch": 0.8,
"learning_rate": 1.0180722891566266e-05,
"loss": 0.3806,
"step": 2648
},
{
"epoch": 0.8,
"learning_rate": 1.0150602409638555e-05,
"loss": 0.4851,
"step": 2650
},
{
"epoch": 0.8,
"learning_rate": 1.0120481927710844e-05,
"loss": 0.5416,
"step": 2652
},
{
"epoch": 0.8,
"learning_rate": 1.0090361445783134e-05,
"loss": 0.3623,
"step": 2654
},
{
"epoch": 0.8,
"learning_rate": 1.0060240963855421e-05,
"loss": 0.6046,
"step": 2656
},
{
"epoch": 0.8,
"learning_rate": 1.0030120481927712e-05,
"loss": 0.4175,
"step": 2658
},
{
"epoch": 0.8,
"learning_rate": 1e-05,
"loss": 0.4626,
"step": 2660
},
{
"epoch": 0.8,
"learning_rate": 9.96987951807229e-06,
"loss": 0.4415,
"step": 2662
},
{
"epoch": 0.8,
"learning_rate": 9.939759036144579e-06,
"loss": 0.9373,
"step": 2664
},
{
"epoch": 0.8,
"learning_rate": 9.909638554216869e-06,
"loss": 0.4119,
"step": 2666
},
{
"epoch": 0.8,
"learning_rate": 9.879518072289156e-06,
"loss": 0.7269,
"step": 2668
},
{
"epoch": 0.8,
"learning_rate": 9.849397590361447e-06,
"loss": 0.6727,
"step": 2670
},
{
"epoch": 0.8,
"learning_rate": 9.819277108433736e-06,
"loss": 0.564,
"step": 2672
},
{
"epoch": 0.81,
"learning_rate": 9.789156626506024e-06,
"loss": 0.5225,
"step": 2674
},
{
"epoch": 0.81,
"learning_rate": 9.759036144578313e-06,
"loss": 0.6704,
"step": 2676
},
{
"epoch": 0.81,
"learning_rate": 9.728915662650604e-06,
"loss": 0.5099,
"step": 2678
},
{
"epoch": 0.81,
"learning_rate": 9.698795180722891e-06,
"loss": 0.4684,
"step": 2680
},
{
"epoch": 0.81,
"learning_rate": 9.668674698795182e-06,
"loss": 0.4669,
"step": 2682
},
{
"epoch": 0.81,
"learning_rate": 9.63855421686747e-06,
"loss": 0.3718,
"step": 2684
},
{
"epoch": 0.81,
"learning_rate": 9.60843373493976e-06,
"loss": 0.6579,
"step": 2686
},
{
"epoch": 0.81,
"learning_rate": 9.578313253012048e-06,
"loss": 0.4796,
"step": 2688
},
{
"epoch": 0.81,
"learning_rate": 9.548192771084339e-06,
"loss": 0.5631,
"step": 2690
},
{
"epoch": 0.81,
"learning_rate": 9.518072289156626e-06,
"loss": 0.6615,
"step": 2692
},
{
"epoch": 0.81,
"learning_rate": 9.487951807228916e-06,
"loss": 0.4837,
"step": 2694
},
{
"epoch": 0.81,
"learning_rate": 9.457831325301205e-06,
"loss": 0.7631,
"step": 2696
},
{
"epoch": 0.81,
"learning_rate": 9.427710843373494e-06,
"loss": 0.4102,
"step": 2698
},
{
"epoch": 0.81,
"learning_rate": 9.397590361445783e-06,
"loss": 0.4618,
"step": 2700
},
{
"epoch": 0.81,
"learning_rate": 9.367469879518074e-06,
"loss": 0.5004,
"step": 2702
},
{
"epoch": 0.81,
"learning_rate": 9.33734939759036e-06,
"loss": 0.5531,
"step": 2704
},
{
"epoch": 0.82,
"learning_rate": 9.307228915662651e-06,
"loss": 0.4868,
"step": 2706
},
{
"epoch": 0.82,
"learning_rate": 9.27710843373494e-06,
"loss": 0.46,
"step": 2708
},
{
"epoch": 0.82,
"learning_rate": 9.246987951807229e-06,
"loss": 0.5719,
"step": 2710
},
{
"epoch": 0.82,
"learning_rate": 9.216867469879518e-06,
"loss": 0.9626,
"step": 2712
},
{
"epoch": 0.82,
"learning_rate": 9.186746987951808e-06,
"loss": 0.4797,
"step": 2714
},
{
"epoch": 0.82,
"learning_rate": 9.156626506024097e-06,
"loss": 0.6067,
"step": 2716
},
{
"epoch": 0.82,
"learning_rate": 9.126506024096386e-06,
"loss": 0.7107,
"step": 2718
},
{
"epoch": 0.82,
"learning_rate": 9.096385542168675e-06,
"loss": 0.6742,
"step": 2720
},
{
"epoch": 0.82,
"learning_rate": 9.066265060240964e-06,
"loss": 0.4472,
"step": 2722
},
{
"epoch": 0.82,
"learning_rate": 9.036144578313253e-06,
"loss": 0.4593,
"step": 2724
},
{
"epoch": 0.82,
"learning_rate": 9.006024096385543e-06,
"loss": 0.3883,
"step": 2726
},
{
"epoch": 0.82,
"learning_rate": 8.975903614457832e-06,
"loss": 0.4475,
"step": 2728
},
{
"epoch": 0.82,
"learning_rate": 8.94578313253012e-06,
"loss": 0.4956,
"step": 2730
},
{
"epoch": 0.82,
"learning_rate": 8.91566265060241e-06,
"loss": 0.6554,
"step": 2732
},
{
"epoch": 0.82,
"learning_rate": 8.885542168674699e-06,
"loss": 0.7875,
"step": 2734
},
{
"epoch": 0.82,
"learning_rate": 8.855421686746989e-06,
"loss": 0.3862,
"step": 2736
},
{
"epoch": 0.82,
"learning_rate": 8.825301204819278e-06,
"loss": 0.5653,
"step": 2738
},
{
"epoch": 0.83,
"learning_rate": 8.795180722891567e-06,
"loss": 0.4701,
"step": 2740
},
{
"epoch": 0.83,
"learning_rate": 8.765060240963856e-06,
"loss": 0.5367,
"step": 2742
},
{
"epoch": 0.83,
"learning_rate": 8.734939759036146e-06,
"loss": 0.6778,
"step": 2744
},
{
"epoch": 0.83,
"learning_rate": 8.704819277108433e-06,
"loss": 0.4376,
"step": 2746
},
{
"epoch": 0.83,
"learning_rate": 8.674698795180724e-06,
"loss": 0.4779,
"step": 2748
},
{
"epoch": 0.83,
"learning_rate": 8.644578313253013e-06,
"loss": 0.6355,
"step": 2750
},
{
"epoch": 0.83,
"learning_rate": 8.614457831325302e-06,
"loss": 0.5208,
"step": 2752
},
{
"epoch": 0.83,
"learning_rate": 8.58433734939759e-06,
"loss": 0.4038,
"step": 2754
},
{
"epoch": 0.83,
"learning_rate": 8.554216867469881e-06,
"loss": 0.4913,
"step": 2756
},
{
"epoch": 0.83,
"learning_rate": 8.524096385542168e-06,
"loss": 0.4138,
"step": 2758
},
{
"epoch": 0.83,
"learning_rate": 8.493975903614459e-06,
"loss": 0.448,
"step": 2760
},
{
"epoch": 0.83,
"learning_rate": 8.463855421686748e-06,
"loss": 0.5758,
"step": 2762
},
{
"epoch": 0.83,
"learning_rate": 8.433734939759036e-06,
"loss": 0.5974,
"step": 2764
},
{
"epoch": 0.83,
"learning_rate": 8.403614457831325e-06,
"loss": 0.6558,
"step": 2766
},
{
"epoch": 0.83,
"learning_rate": 8.373493975903616e-06,
"loss": 0.6133,
"step": 2768
},
{
"epoch": 0.83,
"learning_rate": 8.343373493975903e-06,
"loss": 0.5689,
"step": 2770
},
{
"epoch": 0.83,
"learning_rate": 8.313253012048194e-06,
"loss": 0.442,
"step": 2772
},
{
"epoch": 0.84,
"learning_rate": 8.283132530120482e-06,
"loss": 0.504,
"step": 2774
},
{
"epoch": 0.84,
"learning_rate": 8.253012048192771e-06,
"loss": 0.58,
"step": 2776
},
{
"epoch": 0.84,
"learning_rate": 8.22289156626506e-06,
"loss": 0.5023,
"step": 2778
},
{
"epoch": 0.84,
"learning_rate": 8.19277108433735e-06,
"loss": 0.9812,
"step": 2780
},
{
"epoch": 0.84,
"learning_rate": 8.162650602409638e-06,
"loss": 0.5283,
"step": 2782
},
{
"epoch": 0.84,
"learning_rate": 8.132530120481928e-06,
"loss": 0.4918,
"step": 2784
},
{
"epoch": 0.84,
"learning_rate": 8.102409638554217e-06,
"loss": 0.5006,
"step": 2786
},
{
"epoch": 0.84,
"learning_rate": 8.072289156626506e-06,
"loss": 0.4288,
"step": 2788
},
{
"epoch": 0.84,
"learning_rate": 8.042168674698795e-06,
"loss": 0.3868,
"step": 2790
},
{
"epoch": 0.84,
"learning_rate": 8.012048192771085e-06,
"loss": 0.5809,
"step": 2792
},
{
"epoch": 0.84,
"learning_rate": 7.981927710843373e-06,
"loss": 0.404,
"step": 2794
},
{
"epoch": 0.84,
"learning_rate": 7.951807228915663e-06,
"loss": 0.661,
"step": 2796
},
{
"epoch": 0.84,
"learning_rate": 7.921686746987952e-06,
"loss": 0.4066,
"step": 2798
},
{
"epoch": 0.84,
"learning_rate": 7.891566265060241e-06,
"loss": 0.4491,
"step": 2800
},
{
"epoch": 0.84,
"learning_rate": 7.86144578313253e-06,
"loss": 0.559,
"step": 2802
},
{
"epoch": 0.84,
"learning_rate": 7.83132530120482e-06,
"loss": 0.4259,
"step": 2804
},
{
"epoch": 0.85,
"learning_rate": 7.801204819277109e-06,
"loss": 0.5539,
"step": 2806
},
{
"epoch": 0.85,
"learning_rate": 7.771084337349398e-06,
"loss": 0.4682,
"step": 2808
},
{
"epoch": 0.85,
"learning_rate": 7.740963855421687e-06,
"loss": 0.4958,
"step": 2810
},
{
"epoch": 0.85,
"learning_rate": 7.710843373493977e-06,
"loss": 0.4524,
"step": 2812
},
{
"epoch": 0.85,
"learning_rate": 7.680722891566265e-06,
"loss": 0.5345,
"step": 2814
},
{
"epoch": 0.85,
"learning_rate": 7.650602409638555e-06,
"loss": 0.728,
"step": 2816
},
{
"epoch": 0.85,
"learning_rate": 7.620481927710845e-06,
"loss": 0.4461,
"step": 2818
},
{
"epoch": 0.85,
"learning_rate": 7.590361445783133e-06,
"loss": 0.4594,
"step": 2820
},
{
"epoch": 0.85,
"learning_rate": 7.5602409638554225e-06,
"loss": 0.498,
"step": 2822
},
{
"epoch": 0.85,
"learning_rate": 7.530120481927712e-06,
"loss": 0.4793,
"step": 2824
},
{
"epoch": 0.85,
"learning_rate": 7.5e-06,
"loss": 0.5016,
"step": 2826
},
{
"epoch": 0.85,
"learning_rate": 7.46987951807229e-06,
"loss": 0.9535,
"step": 2828
},
{
"epoch": 0.85,
"learning_rate": 7.43975903614458e-06,
"loss": 0.5864,
"step": 2830
},
{
"epoch": 0.85,
"learning_rate": 7.409638554216868e-06,
"loss": 0.4761,
"step": 2832
},
{
"epoch": 0.85,
"learning_rate": 7.379518072289157e-06,
"loss": 0.5504,
"step": 2834
},
{
"epoch": 0.85,
"learning_rate": 7.349397590361447e-06,
"loss": 0.4295,
"step": 2836
},
{
"epoch": 0.85,
"learning_rate": 7.319277108433735e-06,
"loss": 0.6933,
"step": 2838
},
{
"epoch": 0.86,
"learning_rate": 7.289156626506025e-06,
"loss": 0.4124,
"step": 2840
},
{
"epoch": 0.86,
"learning_rate": 7.259036144578314e-06,
"loss": 0.5502,
"step": 2842
},
{
"epoch": 0.86,
"learning_rate": 7.228915662650602e-06,
"loss": 0.4435,
"step": 2844
},
{
"epoch": 0.86,
"learning_rate": 7.198795180722892e-06,
"loss": 0.7318,
"step": 2846
},
{
"epoch": 0.86,
"learning_rate": 7.168674698795182e-06,
"loss": 0.4234,
"step": 2848
},
{
"epoch": 0.86,
"learning_rate": 7.13855421686747e-06,
"loss": 0.404,
"step": 2850
},
{
"epoch": 0.86,
"learning_rate": 7.1084337349397595e-06,
"loss": 0.5217,
"step": 2852
},
{
"epoch": 0.86,
"learning_rate": 7.078313253012049e-06,
"loss": 0.699,
"step": 2854
},
{
"epoch": 0.86,
"learning_rate": 7.048192771084337e-06,
"loss": 0.4672,
"step": 2856
},
{
"epoch": 0.86,
"learning_rate": 7.018072289156627e-06,
"loss": 0.3463,
"step": 2858
},
{
"epoch": 0.86,
"learning_rate": 6.987951807228917e-06,
"loss": 0.4399,
"step": 2860
},
{
"epoch": 0.86,
"learning_rate": 6.957831325301205e-06,
"loss": 0.3913,
"step": 2862
},
{
"epoch": 0.86,
"learning_rate": 6.927710843373494e-06,
"loss": 0.4821,
"step": 2864
},
{
"epoch": 0.86,
"learning_rate": 6.897590361445784e-06,
"loss": 0.5435,
"step": 2866
},
{
"epoch": 0.86,
"learning_rate": 6.867469879518072e-06,
"loss": 0.5281,
"step": 2868
},
{
"epoch": 0.86,
"learning_rate": 6.837349397590362e-06,
"loss": 0.3923,
"step": 2870
},
{
"epoch": 0.87,
"learning_rate": 6.8072289156626514e-06,
"loss": 0.4874,
"step": 2872
},
{
"epoch": 0.87,
"learning_rate": 6.7771084337349394e-06,
"loss": 0.4362,
"step": 2874
},
{
"epoch": 0.87,
"learning_rate": 6.746987951807229e-06,
"loss": 0.3992,
"step": 2876
},
{
"epoch": 0.87,
"learning_rate": 6.716867469879519e-06,
"loss": 0.4971,
"step": 2878
},
{
"epoch": 0.87,
"learning_rate": 6.686746987951807e-06,
"loss": 0.5001,
"step": 2880
},
{
"epoch": 0.87,
"learning_rate": 6.6566265060240965e-06,
"loss": 0.6711,
"step": 2882
},
{
"epoch": 0.87,
"learning_rate": 6.626506024096386e-06,
"loss": 0.3731,
"step": 2884
},
{
"epoch": 0.87,
"learning_rate": 6.596385542168674e-06,
"loss": 0.5063,
"step": 2886
},
{
"epoch": 0.87,
"learning_rate": 6.566265060240964e-06,
"loss": 0.4883,
"step": 2888
},
{
"epoch": 0.87,
"learning_rate": 6.536144578313254e-06,
"loss": 0.5223,
"step": 2890
},
{
"epoch": 0.87,
"learning_rate": 6.506024096385542e-06,
"loss": 0.4449,
"step": 2892
},
{
"epoch": 0.87,
"learning_rate": 6.475903614457831e-06,
"loss": 0.5159,
"step": 2894
},
{
"epoch": 0.87,
"learning_rate": 6.445783132530121e-06,
"loss": 0.5136,
"step": 2896
},
{
"epoch": 0.87,
"learning_rate": 6.41566265060241e-06,
"loss": 0.4395,
"step": 2898
},
{
"epoch": 0.87,
"learning_rate": 6.385542168674699e-06,
"loss": 0.5728,
"step": 2900
},
{
"epoch": 0.87,
"learning_rate": 6.3554216867469885e-06,
"loss": 0.5897,
"step": 2902
},
{
"epoch": 0.87,
"learning_rate": 6.325301204819277e-06,
"loss": 0.49,
"step": 2904
},
{
"epoch": 0.88,
"learning_rate": 6.295180722891566e-06,
"loss": 0.4421,
"step": 2906
},
{
"epoch": 0.88,
"learning_rate": 6.265060240963856e-06,
"loss": 0.575,
"step": 2908
},
{
"epoch": 0.88,
"learning_rate": 6.234939759036145e-06,
"loss": 0.6445,
"step": 2910
},
{
"epoch": 0.88,
"learning_rate": 6.2048192771084344e-06,
"loss": 0.5586,
"step": 2912
},
{
"epoch": 0.88,
"learning_rate": 6.174698795180723e-06,
"loss": 0.5482,
"step": 2914
},
{
"epoch": 0.88,
"learning_rate": 6.144578313253013e-06,
"loss": 0.9025,
"step": 2916
},
{
"epoch": 0.88,
"learning_rate": 6.114457831325302e-06,
"loss": 0.4403,
"step": 2918
},
{
"epoch": 0.88,
"learning_rate": 6.084337349397591e-06,
"loss": 0.5032,
"step": 2920
},
{
"epoch": 0.88,
"learning_rate": 6.05421686746988e-06,
"loss": 0.5029,
"step": 2922
},
{
"epoch": 0.88,
"learning_rate": 6.024096385542169e-06,
"loss": 0.5098,
"step": 2924
},
{
"epoch": 0.88,
"learning_rate": 5.993975903614458e-06,
"loss": 0.4391,
"step": 2926
},
{
"epoch": 0.88,
"learning_rate": 5.963855421686748e-06,
"loss": 0.6122,
"step": 2928
},
{
"epoch": 0.88,
"learning_rate": 5.933734939759037e-06,
"loss": 0.6701,
"step": 2930
},
{
"epoch": 0.88,
"learning_rate": 5.9036144578313255e-06,
"loss": 0.5178,
"step": 2932
},
{
"epoch": 0.88,
"learning_rate": 5.873493975903615e-06,
"loss": 0.4582,
"step": 2934
},
{
"epoch": 0.88,
"learning_rate": 5.843373493975904e-06,
"loss": 1.3806,
"step": 2936
},
{
"epoch": 0.88,
"learning_rate": 5.813253012048193e-06,
"loss": 0.3716,
"step": 2938
},
{
"epoch": 0.89,
"learning_rate": 5.783132530120483e-06,
"loss": 0.4081,
"step": 2940
},
{
"epoch": 0.89,
"learning_rate": 5.7530120481927715e-06,
"loss": 0.507,
"step": 2942
},
{
"epoch": 0.89,
"learning_rate": 5.72289156626506e-06,
"loss": 0.5409,
"step": 2944
},
{
"epoch": 0.89,
"learning_rate": 5.69277108433735e-06,
"loss": 0.6743,
"step": 2946
},
{
"epoch": 0.89,
"learning_rate": 5.662650602409639e-06,
"loss": 0.5083,
"step": 2948
},
{
"epoch": 0.89,
"learning_rate": 5.632530120481928e-06,
"loss": 0.487,
"step": 2950
},
{
"epoch": 0.89,
"learning_rate": 5.602409638554217e-06,
"loss": 0.555,
"step": 2952
},
{
"epoch": 0.89,
"learning_rate": 5.572289156626506e-06,
"loss": 0.4755,
"step": 2954
},
{
"epoch": 0.89,
"learning_rate": 5.542168674698795e-06,
"loss": 0.3753,
"step": 2956
},
{
"epoch": 0.89,
"learning_rate": 5.512048192771085e-06,
"loss": 0.6542,
"step": 2958
},
{
"epoch": 0.89,
"learning_rate": 5.481927710843374e-06,
"loss": 0.4451,
"step": 2960
},
{
"epoch": 0.89,
"learning_rate": 5.451807228915663e-06,
"loss": 0.444,
"step": 2962
},
{
"epoch": 0.89,
"learning_rate": 5.421686746987952e-06,
"loss": 0.5534,
"step": 2964
},
{
"epoch": 0.89,
"learning_rate": 5.391566265060241e-06,
"loss": 0.5216,
"step": 2966
},
{
"epoch": 0.89,
"learning_rate": 5.361445783132531e-06,
"loss": 0.5913,
"step": 2968
},
{
"epoch": 0.89,
"learning_rate": 5.33132530120482e-06,
"loss": 0.592,
"step": 2970
},
{
"epoch": 0.9,
"learning_rate": 5.3012048192771085e-06,
"loss": 0.4786,
"step": 2972
},
{
"epoch": 0.9,
"learning_rate": 5.271084337349398e-06,
"loss": 0.3734,
"step": 2974
},
{
"epoch": 0.9,
"learning_rate": 5.240963855421687e-06,
"loss": 0.4286,
"step": 2976
},
{
"epoch": 0.9,
"learning_rate": 5.210843373493976e-06,
"loss": 0.6864,
"step": 2978
},
{
"epoch": 0.9,
"learning_rate": 5.180722891566266e-06,
"loss": 0.7851,
"step": 2980
},
{
"epoch": 0.9,
"learning_rate": 5.1506024096385544e-06,
"loss": 0.5432,
"step": 2982
},
{
"epoch": 0.9,
"learning_rate": 5.120481927710843e-06,
"loss": 0.6464,
"step": 2984
},
{
"epoch": 0.9,
"learning_rate": 5.090361445783133e-06,
"loss": 0.3963,
"step": 2986
},
{
"epoch": 0.9,
"learning_rate": 5.060240963855422e-06,
"loss": 0.4815,
"step": 2988
},
{
"epoch": 0.9,
"learning_rate": 5.030120481927711e-06,
"loss": 0.5015,
"step": 2990
},
{
"epoch": 0.9,
"learning_rate": 5e-06,
"loss": 0.3899,
"step": 2992
},
{
"epoch": 0.9,
"learning_rate": 4.969879518072289e-06,
"loss": 0.4682,
"step": 2994
},
{
"epoch": 0.9,
"learning_rate": 4.939759036144578e-06,
"loss": 0.4201,
"step": 2996
},
{
"epoch": 0.9,
"learning_rate": 4.909638554216868e-06,
"loss": 0.4244,
"step": 2998
},
{
"epoch": 0.9,
"learning_rate": 4.879518072289157e-06,
"loss": 0.4937,
"step": 3000
},
{
"epoch": 0.9,
"eval_cer": 0.029656496692980584,
"eval_loss": 0.5131679773330688,
"eval_runtime": 787.753,
"eval_samples_per_second": 3.746,
"eval_steps_per_second": 0.468,
"step": 3000
},
{
"epoch": 0.9,
"learning_rate": 4.8493975903614455e-06,
"loss": 0.4779,
"step": 3002
},
{
"epoch": 0.9,
"learning_rate": 4.819277108433735e-06,
"loss": 0.5577,
"step": 3004
},
{
"epoch": 0.91,
"learning_rate": 4.789156626506024e-06,
"loss": 0.7874,
"step": 3006
},
{
"epoch": 0.91,
"learning_rate": 4.759036144578313e-06,
"loss": 0.4971,
"step": 3008
},
{
"epoch": 0.91,
"learning_rate": 4.728915662650603e-06,
"loss": 0.4488,
"step": 3010
},
{
"epoch": 0.91,
"learning_rate": 4.6987951807228915e-06,
"loss": 0.4523,
"step": 3012
},
{
"epoch": 0.91,
"learning_rate": 4.66867469879518e-06,
"loss": 0.5915,
"step": 3014
},
{
"epoch": 0.91,
"learning_rate": 4.63855421686747e-06,
"loss": 0.5784,
"step": 3016
},
{
"epoch": 0.91,
"learning_rate": 4.608433734939759e-06,
"loss": 0.7348,
"step": 3018
},
{
"epoch": 0.91,
"learning_rate": 4.578313253012049e-06,
"loss": 0.4254,
"step": 3020
},
{
"epoch": 0.91,
"learning_rate": 4.5481927710843374e-06,
"loss": 0.3959,
"step": 3022
},
{
"epoch": 0.91,
"learning_rate": 4.518072289156626e-06,
"loss": 0.3765,
"step": 3024
},
{
"epoch": 0.91,
"learning_rate": 4.487951807228916e-06,
"loss": 0.431,
"step": 3026
},
{
"epoch": 0.91,
"learning_rate": 4.457831325301205e-06,
"loss": 0.528,
"step": 3028
},
{
"epoch": 0.91,
"learning_rate": 4.4277108433734945e-06,
"loss": 0.3413,
"step": 3030
},
{
"epoch": 0.91,
"learning_rate": 4.397590361445783e-06,
"loss": 0.4953,
"step": 3032
},
{
"epoch": 0.91,
"learning_rate": 4.367469879518073e-06,
"loss": 0.4453,
"step": 3034
},
{
"epoch": 0.91,
"learning_rate": 4.337349397590362e-06,
"loss": 0.7223,
"step": 3036
},
{
"epoch": 0.92,
"learning_rate": 4.307228915662651e-06,
"loss": 0.4425,
"step": 3038
},
{
"epoch": 0.92,
"learning_rate": 4.2771084337349405e-06,
"loss": 0.3307,
"step": 3040
},
{
"epoch": 0.92,
"learning_rate": 4.246987951807229e-06,
"loss": 0.6157,
"step": 3042
},
{
"epoch": 0.92,
"learning_rate": 4.216867469879518e-06,
"loss": 0.4238,
"step": 3044
},
{
"epoch": 0.92,
"learning_rate": 4.186746987951808e-06,
"loss": 0.6786,
"step": 3046
},
{
"epoch": 0.92,
"learning_rate": 4.156626506024097e-06,
"loss": 0.4337,
"step": 3048
},
{
"epoch": 0.92,
"learning_rate": 4.126506024096386e-06,
"loss": 0.5159,
"step": 3050
},
{
"epoch": 0.92,
"learning_rate": 4.096385542168675e-06,
"loss": 0.687,
"step": 3052
},
{
"epoch": 0.92,
"learning_rate": 4.066265060240964e-06,
"loss": 0.864,
"step": 3054
},
{
"epoch": 0.92,
"learning_rate": 4.036144578313253e-06,
"loss": 0.3844,
"step": 3056
},
{
"epoch": 0.92,
"learning_rate": 4.006024096385543e-06,
"loss": 0.37,
"step": 3058
},
{
"epoch": 0.92,
"learning_rate": 3.975903614457832e-06,
"loss": 0.3666,
"step": 3060
},
{
"epoch": 0.92,
"learning_rate": 3.9457831325301204e-06,
"loss": 0.4495,
"step": 3062
},
{
"epoch": 0.92,
"learning_rate": 3.91566265060241e-06,
"loss": 0.3817,
"step": 3064
},
{
"epoch": 0.92,
"learning_rate": 3.885542168674699e-06,
"loss": 0.6842,
"step": 3066
},
{
"epoch": 0.92,
"learning_rate": 3.855421686746989e-06,
"loss": 0.5442,
"step": 3068
},
{
"epoch": 0.92,
"learning_rate": 3.8253012048192775e-06,
"loss": 0.3889,
"step": 3070
},
{
"epoch": 0.93,
"learning_rate": 3.7951807228915664e-06,
"loss": 0.605,
"step": 3072
},
{
"epoch": 0.93,
"learning_rate": 3.765060240963856e-06,
"loss": 0.4622,
"step": 3074
},
{
"epoch": 0.93,
"learning_rate": 3.734939759036145e-06,
"loss": 0.404,
"step": 3076
},
{
"epoch": 0.93,
"learning_rate": 3.704819277108434e-06,
"loss": 0.3573,
"step": 3078
},
{
"epoch": 0.93,
"learning_rate": 3.6746987951807235e-06,
"loss": 0.4483,
"step": 3080
},
{
"epoch": 0.93,
"learning_rate": 3.6445783132530124e-06,
"loss": 0.5427,
"step": 3082
},
{
"epoch": 0.93,
"learning_rate": 3.614457831325301e-06,
"loss": 0.3887,
"step": 3084
},
{
"epoch": 0.93,
"learning_rate": 3.584337349397591e-06,
"loss": 0.4402,
"step": 3086
},
{
"epoch": 0.93,
"learning_rate": 3.5542168674698798e-06,
"loss": 0.5547,
"step": 3088
},
{
"epoch": 0.93,
"learning_rate": 3.5240963855421686e-06,
"loss": 0.4765,
"step": 3090
},
{
"epoch": 0.93,
"learning_rate": 3.4939759036144583e-06,
"loss": 0.415,
"step": 3092
},
{
"epoch": 0.93,
"learning_rate": 3.463855421686747e-06,
"loss": 0.4411,
"step": 3094
},
{
"epoch": 0.93,
"learning_rate": 3.433734939759036e-06,
"loss": 0.3931,
"step": 3096
},
{
"epoch": 0.93,
"learning_rate": 3.4036144578313257e-06,
"loss": 0.3582,
"step": 3098
},
{
"epoch": 0.93,
"learning_rate": 3.3734939759036146e-06,
"loss": 0.3797,
"step": 3100
},
{
"epoch": 0.93,
"learning_rate": 3.3433734939759034e-06,
"loss": 0.6859,
"step": 3102
},
{
"epoch": 0.93,
"learning_rate": 3.313253012048193e-06,
"loss": 0.4241,
"step": 3104
},
{
"epoch": 0.94,
"learning_rate": 3.283132530120482e-06,
"loss": 0.5,
"step": 3106
},
{
"epoch": 0.94,
"learning_rate": 3.253012048192771e-06,
"loss": 0.6457,
"step": 3108
},
{
"epoch": 0.94,
"learning_rate": 3.2228915662650605e-06,
"loss": 0.4557,
"step": 3110
},
{
"epoch": 0.94,
"learning_rate": 3.1927710843373494e-06,
"loss": 0.5025,
"step": 3112
},
{
"epoch": 0.94,
"learning_rate": 3.1626506024096387e-06,
"loss": 0.6074,
"step": 3114
},
{
"epoch": 0.94,
"learning_rate": 3.132530120481928e-06,
"loss": 0.3366,
"step": 3116
},
{
"epoch": 0.94,
"learning_rate": 3.1024096385542172e-06,
"loss": 0.3861,
"step": 3118
},
{
"epoch": 0.94,
"learning_rate": 3.0722891566265065e-06,
"loss": 0.4536,
"step": 3120
},
{
"epoch": 0.94,
"learning_rate": 3.0421686746987953e-06,
"loss": 0.4676,
"step": 3122
},
{
"epoch": 0.94,
"learning_rate": 3.0120481927710846e-06,
"loss": 0.5335,
"step": 3124
},
{
"epoch": 0.94,
"learning_rate": 2.981927710843374e-06,
"loss": 0.3932,
"step": 3126
},
{
"epoch": 0.94,
"learning_rate": 2.9518072289156627e-06,
"loss": 0.5856,
"step": 3128
},
{
"epoch": 0.94,
"learning_rate": 2.921686746987952e-06,
"loss": 0.5016,
"step": 3130
},
{
"epoch": 0.94,
"learning_rate": 2.8915662650602413e-06,
"loss": 0.3828,
"step": 3132
},
{
"epoch": 0.94,
"learning_rate": 2.86144578313253e-06,
"loss": 0.5115,
"step": 3134
},
{
"epoch": 0.94,
"learning_rate": 2.8313253012048194e-06,
"loss": 0.4254,
"step": 3136
},
{
"epoch": 0.95,
"learning_rate": 2.8012048192771087e-06,
"loss": 0.4372,
"step": 3138
},
{
"epoch": 0.95,
"learning_rate": 2.7710843373493976e-06,
"loss": 0.7139,
"step": 3140
},
{
"epoch": 0.95,
"learning_rate": 2.740963855421687e-06,
"loss": 0.4562,
"step": 3142
},
{
"epoch": 0.95,
"learning_rate": 2.710843373493976e-06,
"loss": 0.3914,
"step": 3144
},
{
"epoch": 0.95,
"learning_rate": 2.6807228915662654e-06,
"loss": 0.3847,
"step": 3146
},
{
"epoch": 0.95,
"learning_rate": 2.6506024096385542e-06,
"loss": 0.3959,
"step": 3148
},
{
"epoch": 0.95,
"learning_rate": 2.6204819277108435e-06,
"loss": 0.418,
"step": 3150
},
{
"epoch": 0.95,
"learning_rate": 2.590361445783133e-06,
"loss": 0.4108,
"step": 3152
},
{
"epoch": 0.95,
"learning_rate": 2.5602409638554217e-06,
"loss": 0.4908,
"step": 3154
},
{
"epoch": 0.95,
"learning_rate": 2.530120481927711e-06,
"loss": 0.3674,
"step": 3156
},
{
"epoch": 0.95,
"learning_rate": 2.5e-06,
"loss": 0.424,
"step": 3158
},
{
"epoch": 0.95,
"learning_rate": 2.469879518072289e-06,
"loss": 0.5019,
"step": 3160
},
{
"epoch": 0.95,
"learning_rate": 2.4397590361445783e-06,
"loss": 0.4379,
"step": 3162
},
{
"epoch": 0.95,
"learning_rate": 2.4096385542168676e-06,
"loss": 0.4567,
"step": 3164
},
{
"epoch": 0.95,
"learning_rate": 2.3795180722891565e-06,
"loss": 0.474,
"step": 3166
},
{
"epoch": 0.95,
"learning_rate": 2.3493975903614457e-06,
"loss": 0.6564,
"step": 3168
},
{
"epoch": 0.95,
"learning_rate": 2.319277108433735e-06,
"loss": 0.4166,
"step": 3170
},
{
"epoch": 0.96,
"learning_rate": 2.2891566265060243e-06,
"loss": 0.4109,
"step": 3172
},
{
"epoch": 0.96,
"learning_rate": 2.259036144578313e-06,
"loss": 0.3996,
"step": 3174
},
{
"epoch": 0.96,
"learning_rate": 2.2289156626506024e-06,
"loss": 0.7661,
"step": 3176
},
{
"epoch": 0.96,
"learning_rate": 2.1987951807228917e-06,
"loss": 0.7205,
"step": 3178
},
{
"epoch": 0.96,
"learning_rate": 2.168674698795181e-06,
"loss": 0.4874,
"step": 3180
},
{
"epoch": 0.96,
"learning_rate": 2.1385542168674703e-06,
"loss": 0.3582,
"step": 3182
},
{
"epoch": 0.96,
"learning_rate": 2.108433734939759e-06,
"loss": 0.4972,
"step": 3184
},
{
"epoch": 0.96,
"learning_rate": 2.0783132530120484e-06,
"loss": 0.5022,
"step": 3186
},
{
"epoch": 0.96,
"learning_rate": 2.0481927710843377e-06,
"loss": 0.3615,
"step": 3188
},
{
"epoch": 0.96,
"learning_rate": 2.0180722891566265e-06,
"loss": 0.5167,
"step": 3190
},
{
"epoch": 0.96,
"learning_rate": 1.987951807228916e-06,
"loss": 0.5983,
"step": 3192
},
{
"epoch": 0.96,
"learning_rate": 1.957831325301205e-06,
"loss": 0.3595,
"step": 3194
},
{
"epoch": 0.96,
"learning_rate": 1.9277108433734943e-06,
"loss": 0.4943,
"step": 3196
},
{
"epoch": 0.96,
"learning_rate": 1.8975903614457832e-06,
"loss": 0.5132,
"step": 3198
},
{
"epoch": 0.96,
"learning_rate": 1.8674698795180725e-06,
"loss": 0.3815,
"step": 3200
},
{
"epoch": 0.96,
"learning_rate": 1.8373493975903617e-06,
"loss": 0.4878,
"step": 3202
},
{
"epoch": 0.97,
"learning_rate": 1.8072289156626506e-06,
"loss": 0.5628,
"step": 3204
},
{
"epoch": 0.97,
"learning_rate": 1.7771084337349399e-06,
"loss": 0.4442,
"step": 3206
},
{
"epoch": 0.97,
"learning_rate": 1.7469879518072292e-06,
"loss": 0.8291,
"step": 3208
},
{
"epoch": 0.97,
"learning_rate": 1.716867469879518e-06,
"loss": 0.358,
"step": 3210
},
{
"epoch": 0.97,
"learning_rate": 1.6867469879518073e-06,
"loss": 0.4074,
"step": 3212
},
{
"epoch": 0.97,
"learning_rate": 1.6566265060240966e-06,
"loss": 0.5017,
"step": 3214
},
{
"epoch": 0.97,
"learning_rate": 1.6265060240963854e-06,
"loss": 0.5767,
"step": 3216
},
{
"epoch": 0.97,
"learning_rate": 1.5963855421686747e-06,
"loss": 0.5317,
"step": 3218
},
{
"epoch": 0.97,
"learning_rate": 1.566265060240964e-06,
"loss": 0.6054,
"step": 3220
},
{
"epoch": 0.97,
"learning_rate": 1.5361445783132532e-06,
"loss": 0.3636,
"step": 3222
},
{
"epoch": 0.97,
"learning_rate": 1.5060240963855423e-06,
"loss": 0.5551,
"step": 3224
},
{
"epoch": 0.97,
"learning_rate": 1.4759036144578314e-06,
"loss": 0.3758,
"step": 3226
},
{
"epoch": 0.97,
"learning_rate": 1.4457831325301207e-06,
"loss": 0.338,
"step": 3228
},
{
"epoch": 0.97,
"learning_rate": 1.4156626506024097e-06,
"loss": 0.3965,
"step": 3230
},
{
"epoch": 0.97,
"learning_rate": 1.3855421686746988e-06,
"loss": 0.4125,
"step": 3232
},
{
"epoch": 0.97,
"learning_rate": 1.355421686746988e-06,
"loss": 0.382,
"step": 3234
},
{
"epoch": 0.97,
"learning_rate": 1.3253012048192771e-06,
"loss": 0.3946,
"step": 3236
},
{
"epoch": 0.98,
"learning_rate": 1.2951807228915664e-06,
"loss": 0.4157,
"step": 3238
},
{
"epoch": 0.98,
"learning_rate": 1.2650602409638555e-06,
"loss": 0.3808,
"step": 3240
},
{
"epoch": 0.98,
"learning_rate": 1.2349397590361445e-06,
"loss": 0.4715,
"step": 3242
},
{
"epoch": 0.98,
"learning_rate": 1.2048192771084338e-06,
"loss": 0.4858,
"step": 3244
},
{
"epoch": 0.98,
"learning_rate": 1.1746987951807229e-06,
"loss": 0.513,
"step": 3246
},
{
"epoch": 0.98,
"learning_rate": 1.1445783132530121e-06,
"loss": 0.5514,
"step": 3248
},
{
"epoch": 0.98,
"learning_rate": 1.1144578313253012e-06,
"loss": 0.3727,
"step": 3250
},
{
"epoch": 0.98,
"learning_rate": 1.0843373493975905e-06,
"loss": 0.5545,
"step": 3252
},
{
"epoch": 0.98,
"learning_rate": 1.0542168674698796e-06,
"loss": 0.4327,
"step": 3254
},
{
"epoch": 0.98,
"learning_rate": 1.0240963855421688e-06,
"loss": 0.5019,
"step": 3256
},
{
"epoch": 0.98,
"learning_rate": 9.93975903614458e-07,
"loss": 0.4047,
"step": 3258
},
{
"epoch": 0.98,
"learning_rate": 9.638554216867472e-07,
"loss": 0.4119,
"step": 3260
},
{
"epoch": 0.98,
"learning_rate": 9.337349397590362e-07,
"loss": 0.6493,
"step": 3262
},
{
"epoch": 0.98,
"learning_rate": 9.036144578313253e-07,
"loss": 0.3642,
"step": 3264
},
{
"epoch": 0.98,
"learning_rate": 8.734939759036146e-07,
"loss": 0.4044,
"step": 3266
},
{
"epoch": 0.98,
"learning_rate": 8.433734939759036e-07,
"loss": 1.431,
"step": 3268
},
{
"epoch": 0.98,
"learning_rate": 8.132530120481927e-07,
"loss": 0.4005,
"step": 3270
},
{
"epoch": 0.99,
"learning_rate": 7.83132530120482e-07,
"loss": 0.4384,
"step": 3272
},
{
"epoch": 0.99,
"learning_rate": 7.530120481927712e-07,
"loss": 0.7154,
"step": 3274
},
{
"epoch": 0.99,
"learning_rate": 7.228915662650603e-07,
"loss": 0.4118,
"step": 3276
},
{
"epoch": 0.99,
"learning_rate": 6.927710843373494e-07,
"loss": 0.3905,
"step": 3278
},
{
"epoch": 0.99,
"learning_rate": 6.626506024096386e-07,
"loss": 0.4539,
"step": 3280
},
{
"epoch": 0.99,
"learning_rate": 6.325301204819277e-07,
"loss": 0.3882,
"step": 3282
},
{
"epoch": 0.99,
"learning_rate": 6.024096385542169e-07,
"loss": 0.5751,
"step": 3284
},
{
"epoch": 0.99,
"learning_rate": 5.722891566265061e-07,
"loss": 0.4713,
"step": 3286
},
{
"epoch": 0.99,
"learning_rate": 5.421686746987952e-07,
"loss": 0.4606,
"step": 3288
},
{
"epoch": 0.99,
"learning_rate": 5.120481927710844e-07,
"loss": 0.3738,
"step": 3290
},
{
"epoch": 0.99,
"learning_rate": 4.819277108433736e-07,
"loss": 0.557,
"step": 3292
},
{
"epoch": 0.99,
"learning_rate": 4.5180722891566265e-07,
"loss": 0.7135,
"step": 3294
},
{
"epoch": 0.99,
"learning_rate": 4.216867469879518e-07,
"loss": 0.4158,
"step": 3296
},
{
"epoch": 0.99,
"learning_rate": 3.91566265060241e-07,
"loss": 0.5218,
"step": 3298
},
{
"epoch": 0.99,
"learning_rate": 3.6144578313253016e-07,
"loss": 0.4759,
"step": 3300
},
{
"epoch": 0.99,
"learning_rate": 3.313253012048193e-07,
"loss": 0.394,
"step": 3302
},
{
"epoch": 1.0,
"learning_rate": 3.0120481927710845e-07,
"loss": 0.4764,
"step": 3304
},
{
"epoch": 1.0,
"learning_rate": 2.710843373493976e-07,
"loss": 0.369,
"step": 3306
},
{
"epoch": 1.0,
"learning_rate": 2.409638554216868e-07,
"loss": 0.3676,
"step": 3308
},
{
"epoch": 1.0,
"learning_rate": 2.108433734939759e-07,
"loss": 0.347,
"step": 3310
},
{
"epoch": 1.0,
"learning_rate": 1.8072289156626508e-07,
"loss": 0.6015,
"step": 3312
},
{
"epoch": 1.0,
"learning_rate": 1.5060240963855423e-07,
"loss": 0.5245,
"step": 3314
},
{
"epoch": 1.0,
"learning_rate": 1.204819277108434e-07,
"loss": 0.532,
"step": 3316
},
{
"epoch": 1.0,
"learning_rate": 9.036144578313254e-08,
"loss": 0.4304,
"step": 3318
},
{
"epoch": 1.0,
"learning_rate": 6.02409638554217e-08,
"loss": 0.4147,
"step": 3320
},
{
"epoch": 1.0,
"step": 3320,
"total_flos": 2.3497513796848583e+19,
"train_loss": 0.8366967510028058,
"train_runtime": 5913.5726,
"train_samples_per_second": 4.49,
"train_steps_per_second": 0.561
}
],
"max_steps": 3320,
"num_train_epochs": 1,
"total_flos": 2.3497513796848583e+19,
"trial_name": null,
"trial_params": null
}