q-align-quality / trainer_state.json
teowu's picture
Upload folder using huggingface_hub
8e01280
raw
history blame contribute delete
No virus
197 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"global_step": 1626,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.0816326530612243e-07,
"loss": 1.2646,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 8.163265306122449e-07,
"loss": 1.1865,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 1.2244897959183673e-06,
"loss": 1.209,
"step": 3
},
{
"epoch": 0.0,
"learning_rate": 1.6326530612244897e-06,
"loss": 1.1436,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 2.0408163265306125e-06,
"loss": 1.0659,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 2.4489795918367347e-06,
"loss": 1.0713,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 2.8571428571428573e-06,
"loss": 0.8657,
"step": 7
},
{
"epoch": 0.01,
"learning_rate": 3.2653061224489794e-06,
"loss": 0.5254,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 3.6734693877551024e-06,
"loss": 0.4575,
"step": 9
},
{
"epoch": 0.01,
"learning_rate": 4.081632653061225e-06,
"loss": 0.386,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 4.489795918367348e-06,
"loss": 0.2015,
"step": 11
},
{
"epoch": 0.01,
"learning_rate": 4.897959183673469e-06,
"loss": 0.2046,
"step": 12
},
{
"epoch": 0.02,
"learning_rate": 5.306122448979593e-06,
"loss": 0.1884,
"step": 13
},
{
"epoch": 0.02,
"learning_rate": 5.7142857142857145e-06,
"loss": 0.1812,
"step": 14
},
{
"epoch": 0.02,
"learning_rate": 6.122448979591837e-06,
"loss": 0.2687,
"step": 15
},
{
"epoch": 0.02,
"learning_rate": 6.530612244897959e-06,
"loss": 0.2764,
"step": 16
},
{
"epoch": 0.02,
"learning_rate": 6.938775510204082e-06,
"loss": 0.2101,
"step": 17
},
{
"epoch": 0.02,
"learning_rate": 7.346938775510205e-06,
"loss": 0.1816,
"step": 18
},
{
"epoch": 0.02,
"learning_rate": 7.755102040816327e-06,
"loss": 0.1924,
"step": 19
},
{
"epoch": 0.02,
"learning_rate": 8.16326530612245e-06,
"loss": 0.1964,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 8.571428571428571e-06,
"loss": 0.1799,
"step": 21
},
{
"epoch": 0.03,
"learning_rate": 8.979591836734695e-06,
"loss": 0.1705,
"step": 22
},
{
"epoch": 0.03,
"learning_rate": 9.387755102040818e-06,
"loss": 0.1851,
"step": 23
},
{
"epoch": 0.03,
"learning_rate": 9.795918367346939e-06,
"loss": 0.1763,
"step": 24
},
{
"epoch": 0.03,
"learning_rate": 1.0204081632653063e-05,
"loss": 0.2079,
"step": 25
},
{
"epoch": 0.03,
"learning_rate": 1.0612244897959186e-05,
"loss": 0.1682,
"step": 26
},
{
"epoch": 0.03,
"learning_rate": 1.1020408163265306e-05,
"loss": 0.1788,
"step": 27
},
{
"epoch": 0.03,
"learning_rate": 1.1428571428571429e-05,
"loss": 0.1274,
"step": 28
},
{
"epoch": 0.04,
"learning_rate": 1.1836734693877552e-05,
"loss": 0.1627,
"step": 29
},
{
"epoch": 0.04,
"learning_rate": 1.2244897959183674e-05,
"loss": 0.1804,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 1.2653061224489798e-05,
"loss": 0.1431,
"step": 31
},
{
"epoch": 0.04,
"learning_rate": 1.3061224489795918e-05,
"loss": 0.1346,
"step": 32
},
{
"epoch": 0.04,
"learning_rate": 1.3469387755102042e-05,
"loss": 0.1613,
"step": 33
},
{
"epoch": 0.04,
"learning_rate": 1.3877551020408165e-05,
"loss": 0.1469,
"step": 34
},
{
"epoch": 0.04,
"learning_rate": 1.4285714285714287e-05,
"loss": 0.1307,
"step": 35
},
{
"epoch": 0.04,
"learning_rate": 1.469387755102041e-05,
"loss": 0.1355,
"step": 36
},
{
"epoch": 0.05,
"learning_rate": 1.510204081632653e-05,
"loss": 0.1324,
"step": 37
},
{
"epoch": 0.05,
"learning_rate": 1.5510204081632655e-05,
"loss": 0.1292,
"step": 38
},
{
"epoch": 0.05,
"learning_rate": 1.5918367346938776e-05,
"loss": 0.1107,
"step": 39
},
{
"epoch": 0.05,
"learning_rate": 1.63265306122449e-05,
"loss": 0.1312,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 1.673469387755102e-05,
"loss": 0.1328,
"step": 41
},
{
"epoch": 0.05,
"learning_rate": 1.7142857142857142e-05,
"loss": 0.1159,
"step": 42
},
{
"epoch": 0.05,
"learning_rate": 1.7551020408163266e-05,
"loss": 0.1141,
"step": 43
},
{
"epoch": 0.05,
"learning_rate": 1.795918367346939e-05,
"loss": 0.132,
"step": 44
},
{
"epoch": 0.06,
"learning_rate": 1.836734693877551e-05,
"loss": 0.1318,
"step": 45
},
{
"epoch": 0.06,
"learning_rate": 1.8775510204081636e-05,
"loss": 0.105,
"step": 46
},
{
"epoch": 0.06,
"learning_rate": 1.9183673469387756e-05,
"loss": 0.1204,
"step": 47
},
{
"epoch": 0.06,
"learning_rate": 1.9591836734693877e-05,
"loss": 0.1407,
"step": 48
},
{
"epoch": 0.06,
"learning_rate": 2e-05,
"loss": 0.1219,
"step": 49
},
{
"epoch": 0.06,
"learning_rate": 1.9999980157050852e-05,
"loss": 0.1264,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 1.999992062828215e-05,
"loss": 0.1395,
"step": 51
},
{
"epoch": 0.06,
"learning_rate": 1.9999821413930146e-05,
"loss": 0.1053,
"step": 52
},
{
"epoch": 0.07,
"learning_rate": 1.999968251438858e-05,
"loss": 0.1467,
"step": 53
},
{
"epoch": 0.07,
"learning_rate": 1.999950393020868e-05,
"loss": 0.1345,
"step": 54
},
{
"epoch": 0.07,
"learning_rate": 1.9999285662099182e-05,
"loss": 0.1335,
"step": 55
},
{
"epoch": 0.07,
"learning_rate": 1.99990277109263e-05,
"loss": 0.1137,
"step": 56
},
{
"epoch": 0.07,
"learning_rate": 1.9998730077713732e-05,
"loss": 0.1348,
"step": 57
},
{
"epoch": 0.07,
"learning_rate": 1.999839276364267e-05,
"loss": 0.1172,
"step": 58
},
{
"epoch": 0.07,
"learning_rate": 1.9998015770051766e-05,
"loss": 0.1409,
"step": 59
},
{
"epoch": 0.07,
"learning_rate": 1.9997599098437162e-05,
"loss": 0.1337,
"step": 60
},
{
"epoch": 0.08,
"learning_rate": 1.999714275045245e-05,
"loss": 0.1395,
"step": 61
},
{
"epoch": 0.08,
"learning_rate": 1.999664672790869e-05,
"loss": 0.1086,
"step": 62
},
{
"epoch": 0.08,
"learning_rate": 1.9996111032774398e-05,
"loss": 0.1068,
"step": 63
},
{
"epoch": 0.08,
"learning_rate": 1.9995535667175517e-05,
"loss": 0.1271,
"step": 64
},
{
"epoch": 0.08,
"learning_rate": 1.9994920633395445e-05,
"loss": 0.1198,
"step": 65
},
{
"epoch": 0.08,
"learning_rate": 1.9994265933875e-05,
"loss": 0.1198,
"step": 66
},
{
"epoch": 0.08,
"learning_rate": 1.9993571571212408e-05,
"loss": 0.1202,
"step": 67
},
{
"epoch": 0.08,
"learning_rate": 1.9992837548163315e-05,
"loss": 0.0931,
"step": 68
},
{
"epoch": 0.08,
"learning_rate": 1.9992063867640757e-05,
"loss": 0.1003,
"step": 69
},
{
"epoch": 0.09,
"learning_rate": 1.9991250532715158e-05,
"loss": 0.1183,
"step": 70
},
{
"epoch": 0.09,
"learning_rate": 1.99903975466143e-05,
"loss": 0.1287,
"step": 71
},
{
"epoch": 0.09,
"learning_rate": 1.9989504912723347e-05,
"loss": 0.1229,
"step": 72
},
{
"epoch": 0.09,
"learning_rate": 1.9988572634584792e-05,
"loss": 0.1257,
"step": 73
},
{
"epoch": 0.09,
"learning_rate": 1.9987600715898462e-05,
"loss": 0.0889,
"step": 74
},
{
"epoch": 0.09,
"learning_rate": 1.9986589160521508e-05,
"loss": 0.0991,
"step": 75
},
{
"epoch": 0.09,
"learning_rate": 1.9985537972468374e-05,
"loss": 0.1008,
"step": 76
},
{
"epoch": 0.09,
"learning_rate": 1.9984447155910796e-05,
"loss": 0.0897,
"step": 77
},
{
"epoch": 0.1,
"learning_rate": 1.9983316715177784e-05,
"loss": 0.1328,
"step": 78
},
{
"epoch": 0.1,
"learning_rate": 1.998214665475558e-05,
"loss": 0.12,
"step": 79
},
{
"epoch": 0.1,
"learning_rate": 1.9980936979287685e-05,
"loss": 0.098,
"step": 80
},
{
"epoch": 0.1,
"learning_rate": 1.99796876935748e-05,
"loss": 0.1141,
"step": 81
},
{
"epoch": 0.1,
"learning_rate": 1.997839880257483e-05,
"loss": 0.1628,
"step": 82
},
{
"epoch": 0.1,
"learning_rate": 1.9977070311402854e-05,
"loss": 0.0978,
"step": 83
},
{
"epoch": 0.1,
"learning_rate": 1.9975702225331108e-05,
"loss": 0.1315,
"step": 84
},
{
"epoch": 0.1,
"learning_rate": 1.9974294549788964e-05,
"loss": 0.1412,
"step": 85
},
{
"epoch": 0.11,
"learning_rate": 1.9972847290362906e-05,
"loss": 0.1265,
"step": 86
},
{
"epoch": 0.11,
"learning_rate": 1.9971360452796523e-05,
"loss": 0.1239,
"step": 87
},
{
"epoch": 0.11,
"learning_rate": 1.9969834042990452e-05,
"loss": 0.1092,
"step": 88
},
{
"epoch": 0.11,
"learning_rate": 1.9968268067002396e-05,
"loss": 0.132,
"step": 89
},
{
"epoch": 0.11,
"learning_rate": 1.9966662531047065e-05,
"loss": 0.1251,
"step": 90
},
{
"epoch": 0.11,
"learning_rate": 1.9965017441496174e-05,
"loss": 0.1124,
"step": 91
},
{
"epoch": 0.11,
"learning_rate": 1.996333280487841e-05,
"loss": 0.1107,
"step": 92
},
{
"epoch": 0.11,
"learning_rate": 1.996160862787941e-05,
"loss": 0.1136,
"step": 93
},
{
"epoch": 0.12,
"learning_rate": 1.995984491734172e-05,
"loss": 0.1216,
"step": 94
},
{
"epoch": 0.12,
"learning_rate": 1.9958041680264777e-05,
"loss": 0.0949,
"step": 95
},
{
"epoch": 0.12,
"learning_rate": 1.9956198923804898e-05,
"loss": 0.1031,
"step": 96
},
{
"epoch": 0.12,
"learning_rate": 1.995431665527523e-05,
"loss": 0.0992,
"step": 97
},
{
"epoch": 0.12,
"learning_rate": 1.9952394882145718e-05,
"loss": 0.112,
"step": 98
},
{
"epoch": 0.12,
"learning_rate": 1.9950433612043092e-05,
"loss": 0.111,
"step": 99
},
{
"epoch": 0.12,
"learning_rate": 1.994843285275083e-05,
"loss": 0.0984,
"step": 100
},
{
"epoch": 0.12,
"learning_rate": 1.994639261220913e-05,
"loss": 0.0939,
"step": 101
},
{
"epoch": 0.13,
"learning_rate": 1.9944312898514862e-05,
"loss": 0.0968,
"step": 102
},
{
"epoch": 0.13,
"learning_rate": 1.9942193719921557e-05,
"loss": 0.1065,
"step": 103
},
{
"epoch": 0.13,
"learning_rate": 1.994003508483937e-05,
"loss": 0.0937,
"step": 104
},
{
"epoch": 0.13,
"learning_rate": 1.9937837001835038e-05,
"loss": 0.1018,
"step": 105
},
{
"epoch": 0.13,
"learning_rate": 1.993559947963185e-05,
"loss": 0.1007,
"step": 106
},
{
"epoch": 0.13,
"learning_rate": 1.9933322527109614e-05,
"loss": 0.1023,
"step": 107
},
{
"epoch": 0.13,
"learning_rate": 1.9931006153304618e-05,
"loss": 0.1031,
"step": 108
},
{
"epoch": 0.13,
"learning_rate": 1.9928650367409602e-05,
"loss": 0.1252,
"step": 109
},
{
"epoch": 0.14,
"learning_rate": 1.9926255178773713e-05,
"loss": 0.0925,
"step": 110
},
{
"epoch": 0.14,
"learning_rate": 1.9923820596902473e-05,
"loss": 0.13,
"step": 111
},
{
"epoch": 0.14,
"learning_rate": 1.9921346631457737e-05,
"loss": 0.0961,
"step": 112
},
{
"epoch": 0.14,
"learning_rate": 1.9918833292257663e-05,
"loss": 0.0994,
"step": 113
},
{
"epoch": 0.14,
"learning_rate": 1.991628058927666e-05,
"loss": 0.0879,
"step": 114
},
{
"epoch": 0.14,
"learning_rate": 1.991368853264536e-05,
"loss": 0.0966,
"step": 115
},
{
"epoch": 0.14,
"learning_rate": 1.991105713265057e-05,
"loss": 0.105,
"step": 116
},
{
"epoch": 0.14,
"learning_rate": 1.9908386399735243e-05,
"loss": 0.1266,
"step": 117
},
{
"epoch": 0.15,
"learning_rate": 1.990567634449842e-05,
"loss": 0.1223,
"step": 118
},
{
"epoch": 0.15,
"learning_rate": 1.9902926977695196e-05,
"loss": 0.1089,
"step": 119
},
{
"epoch": 0.15,
"learning_rate": 1.9900138310236683e-05,
"loss": 0.1144,
"step": 120
},
{
"epoch": 0.15,
"learning_rate": 1.9897310353189958e-05,
"loss": 0.0997,
"step": 121
},
{
"epoch": 0.15,
"learning_rate": 1.9894443117778022e-05,
"loss": 0.0906,
"step": 122
},
{
"epoch": 0.15,
"learning_rate": 1.9891536615379756e-05,
"loss": 0.0904,
"step": 123
},
{
"epoch": 0.15,
"learning_rate": 1.9888590857529878e-05,
"loss": 0.1114,
"step": 124
},
{
"epoch": 0.15,
"learning_rate": 1.9885605855918887e-05,
"loss": 0.0946,
"step": 125
},
{
"epoch": 0.15,
"learning_rate": 1.9882581622393036e-05,
"loss": 0.0934,
"step": 126
},
{
"epoch": 0.16,
"learning_rate": 1.9879518168954265e-05,
"loss": 0.088,
"step": 127
},
{
"epoch": 0.16,
"learning_rate": 1.9876415507760164e-05,
"loss": 0.096,
"step": 128
},
{
"epoch": 0.16,
"learning_rate": 1.9873273651123925e-05,
"loss": 0.0897,
"step": 129
},
{
"epoch": 0.16,
"learning_rate": 1.9870092611514287e-05,
"loss": 0.0795,
"step": 130
},
{
"epoch": 0.16,
"learning_rate": 1.986687240155549e-05,
"loss": 0.0791,
"step": 131
},
{
"epoch": 0.16,
"learning_rate": 1.9863613034027224e-05,
"loss": 0.0919,
"step": 132
},
{
"epoch": 0.16,
"learning_rate": 1.986031452186459e-05,
"loss": 0.0885,
"step": 133
},
{
"epoch": 0.16,
"learning_rate": 1.9856976878158024e-05,
"loss": 0.0889,
"step": 134
},
{
"epoch": 0.17,
"learning_rate": 1.985360011615326e-05,
"loss": 0.0799,
"step": 135
},
{
"epoch": 0.17,
"learning_rate": 1.9850184249251298e-05,
"loss": 0.1195,
"step": 136
},
{
"epoch": 0.17,
"learning_rate": 1.9846729291008293e-05,
"loss": 0.1328,
"step": 137
},
{
"epoch": 0.17,
"learning_rate": 1.9843235255135572e-05,
"loss": 0.0981,
"step": 138
},
{
"epoch": 0.17,
"learning_rate": 1.983970215549952e-05,
"loss": 0.0985,
"step": 139
},
{
"epoch": 0.17,
"learning_rate": 1.9836130006121565e-05,
"loss": 0.1119,
"step": 140
},
{
"epoch": 0.17,
"learning_rate": 1.9832518821178103e-05,
"loss": 0.1516,
"step": 141
},
{
"epoch": 0.17,
"learning_rate": 1.9828868615000447e-05,
"loss": 0.0893,
"step": 142
},
{
"epoch": 0.18,
"learning_rate": 1.982517940207476e-05,
"loss": 0.0882,
"step": 143
},
{
"epoch": 0.18,
"learning_rate": 1.9821451197042028e-05,
"loss": 0.1068,
"step": 144
},
{
"epoch": 0.18,
"learning_rate": 1.981768401469796e-05,
"loss": 0.0942,
"step": 145
},
{
"epoch": 0.18,
"learning_rate": 1.9813877869992955e-05,
"loss": 0.0942,
"step": 146
},
{
"epoch": 0.18,
"learning_rate": 1.9810032778032043e-05,
"loss": 0.0958,
"step": 147
},
{
"epoch": 0.18,
"learning_rate": 1.980614875407482e-05,
"loss": 0.0868,
"step": 148
},
{
"epoch": 0.18,
"learning_rate": 1.9802225813535383e-05,
"loss": 0.1014,
"step": 149
},
{
"epoch": 0.18,
"learning_rate": 1.979826397198227e-05,
"loss": 0.0912,
"step": 150
},
{
"epoch": 0.19,
"learning_rate": 1.9794263245138406e-05,
"loss": 0.1119,
"step": 151
},
{
"epoch": 0.19,
"learning_rate": 1.9790223648881036e-05,
"loss": 0.086,
"step": 152
},
{
"epoch": 0.19,
"learning_rate": 1.978614519924166e-05,
"loss": 0.1036,
"step": 153
},
{
"epoch": 0.19,
"learning_rate": 1.9782027912405973e-05,
"loss": 0.0825,
"step": 154
},
{
"epoch": 0.19,
"learning_rate": 1.97778718047138e-05,
"loss": 0.1098,
"step": 155
},
{
"epoch": 0.19,
"learning_rate": 1.9773676892659025e-05,
"loss": 0.0965,
"step": 156
},
{
"epoch": 0.19,
"learning_rate": 1.976944319288953e-05,
"loss": 0.0931,
"step": 157
},
{
"epoch": 0.19,
"learning_rate": 1.9765170722207135e-05,
"loss": 0.0986,
"step": 158
},
{
"epoch": 0.2,
"learning_rate": 1.976085949756753e-05,
"loss": 0.1097,
"step": 159
},
{
"epoch": 0.2,
"learning_rate": 1.9756509536080187e-05,
"loss": 0.1147,
"step": 160
},
{
"epoch": 0.2,
"learning_rate": 1.9752120855008326e-05,
"loss": 0.0898,
"step": 161
},
{
"epoch": 0.2,
"learning_rate": 1.974769347176882e-05,
"loss": 0.1077,
"step": 162
},
{
"epoch": 0.2,
"learning_rate": 1.9743227403932135e-05,
"loss": 0.092,
"step": 163
},
{
"epoch": 0.2,
"learning_rate": 1.9738722669222268e-05,
"loss": 0.1033,
"step": 164
},
{
"epoch": 0.2,
"learning_rate": 1.9734179285516657e-05,
"loss": 0.1052,
"step": 165
},
{
"epoch": 0.2,
"learning_rate": 1.9729597270846134e-05,
"loss": 0.1092,
"step": 166
},
{
"epoch": 0.21,
"learning_rate": 1.972497664339483e-05,
"loss": 0.1016,
"step": 167
},
{
"epoch": 0.21,
"learning_rate": 1.9720317421500123e-05,
"loss": 0.1117,
"step": 168
},
{
"epoch": 0.21,
"learning_rate": 1.9715619623652554e-05,
"loss": 0.0913,
"step": 169
},
{
"epoch": 0.21,
"learning_rate": 1.971088326849576e-05,
"loss": 0.0987,
"step": 170
},
{
"epoch": 0.21,
"learning_rate": 1.9706108374826383e-05,
"loss": 0.1089,
"step": 171
},
{
"epoch": 0.21,
"learning_rate": 1.9701294961594022e-05,
"loss": 0.0965,
"step": 172
},
{
"epoch": 0.21,
"learning_rate": 1.969644304790114e-05,
"loss": 0.1017,
"step": 173
},
{
"epoch": 0.21,
"learning_rate": 1.9691552653002993e-05,
"loss": 0.1177,
"step": 174
},
{
"epoch": 0.22,
"learning_rate": 1.968662379630755e-05,
"loss": 0.1043,
"step": 175
},
{
"epoch": 0.22,
"learning_rate": 1.9681656497375427e-05,
"loss": 0.1058,
"step": 176
},
{
"epoch": 0.22,
"learning_rate": 1.967665077591979e-05,
"loss": 0.108,
"step": 177
},
{
"epoch": 0.22,
"learning_rate": 1.9671606651806293e-05,
"loss": 0.0977,
"step": 178
},
{
"epoch": 0.22,
"learning_rate": 1.9666524145053004e-05,
"loss": 0.1074,
"step": 179
},
{
"epoch": 0.22,
"learning_rate": 1.96614032758303e-05,
"loss": 0.11,
"step": 180
},
{
"epoch": 0.22,
"learning_rate": 1.965624406446081e-05,
"loss": 0.0941,
"step": 181
},
{
"epoch": 0.22,
"learning_rate": 1.9651046531419335e-05,
"loss": 0.0945,
"step": 182
},
{
"epoch": 0.23,
"learning_rate": 1.9645810697332747e-05,
"loss": 0.0912,
"step": 183
},
{
"epoch": 0.23,
"learning_rate": 1.9640536582979924e-05,
"loss": 0.104,
"step": 184
},
{
"epoch": 0.23,
"learning_rate": 1.963522420929166e-05,
"loss": 0.1095,
"step": 185
},
{
"epoch": 0.23,
"learning_rate": 1.96298735973506e-05,
"loss": 0.1027,
"step": 186
},
{
"epoch": 0.23,
"learning_rate": 1.9624484768391106e-05,
"loss": 0.0943,
"step": 187
},
{
"epoch": 0.23,
"learning_rate": 1.961905774379925e-05,
"loss": 0.1064,
"step": 188
},
{
"epoch": 0.23,
"learning_rate": 1.9613592545112657e-05,
"loss": 0.0957,
"step": 189
},
{
"epoch": 0.23,
"learning_rate": 1.960808919402046e-05,
"loss": 0.0861,
"step": 190
},
{
"epoch": 0.23,
"learning_rate": 1.9602547712363205e-05,
"loss": 0.0869,
"step": 191
},
{
"epoch": 0.24,
"learning_rate": 1.9596968122132757e-05,
"loss": 0.118,
"step": 192
},
{
"epoch": 0.24,
"learning_rate": 1.9591350445472218e-05,
"loss": 0.0942,
"step": 193
},
{
"epoch": 0.24,
"learning_rate": 1.958569470467585e-05,
"loss": 0.0959,
"step": 194
},
{
"epoch": 0.24,
"learning_rate": 1.9580000922188967e-05,
"loss": 0.0911,
"step": 195
},
{
"epoch": 0.24,
"learning_rate": 1.957426912060785e-05,
"loss": 0.0946,
"step": 196
},
{
"epoch": 0.24,
"learning_rate": 1.9568499322679676e-05,
"loss": 0.0888,
"step": 197
},
{
"epoch": 0.24,
"learning_rate": 1.95626915513024e-05,
"loss": 0.1021,
"step": 198
},
{
"epoch": 0.24,
"learning_rate": 1.9556845829524683e-05,
"loss": 0.1072,
"step": 199
},
{
"epoch": 0.25,
"learning_rate": 1.9550962180545807e-05,
"loss": 0.1157,
"step": 200
},
{
"epoch": 0.25,
"learning_rate": 1.9545040627715554e-05,
"loss": 0.0898,
"step": 201
},
{
"epoch": 0.25,
"learning_rate": 1.953908119453414e-05,
"loss": 0.0882,
"step": 202
},
{
"epoch": 0.25,
"learning_rate": 1.953308390465211e-05,
"loss": 0.0879,
"step": 203
},
{
"epoch": 0.25,
"learning_rate": 1.9527048781870248e-05,
"loss": 0.0979,
"step": 204
},
{
"epoch": 0.25,
"learning_rate": 1.9520975850139485e-05,
"loss": 0.1176,
"step": 205
},
{
"epoch": 0.25,
"learning_rate": 1.9514865133560788e-05,
"loss": 0.0724,
"step": 206
},
{
"epoch": 0.25,
"learning_rate": 1.950871665638509e-05,
"loss": 0.0881,
"step": 207
},
{
"epoch": 0.26,
"learning_rate": 1.950253044301318e-05,
"loss": 0.0869,
"step": 208
},
{
"epoch": 0.26,
"learning_rate": 1.949630651799559e-05,
"loss": 0.096,
"step": 209
},
{
"epoch": 0.26,
"learning_rate": 1.949004490603253e-05,
"loss": 0.1387,
"step": 210
},
{
"epoch": 0.26,
"learning_rate": 1.9483745631973777e-05,
"loss": 0.111,
"step": 211
},
{
"epoch": 0.26,
"learning_rate": 1.9477408720818553e-05,
"loss": 0.0874,
"step": 212
},
{
"epoch": 0.26,
"learning_rate": 1.947103419771547e-05,
"loss": 0.0999,
"step": 213
},
{
"epoch": 0.26,
"learning_rate": 1.946462208796239e-05,
"loss": 0.09,
"step": 214
},
{
"epoch": 0.26,
"learning_rate": 1.9458172417006347e-05,
"loss": 0.1143,
"step": 215
},
{
"epoch": 0.27,
"learning_rate": 1.9451685210443443e-05,
"loss": 0.0936,
"step": 216
},
{
"epoch": 0.27,
"learning_rate": 1.9445160494018734e-05,
"loss": 0.0827,
"step": 217
},
{
"epoch": 0.27,
"learning_rate": 1.9438598293626148e-05,
"loss": 0.0919,
"step": 218
},
{
"epoch": 0.27,
"learning_rate": 1.9431998635308372e-05,
"loss": 0.1132,
"step": 219
},
{
"epoch": 0.27,
"learning_rate": 1.942536154525673e-05,
"loss": 0.1217,
"step": 220
},
{
"epoch": 0.27,
"learning_rate": 1.9418687049811116e-05,
"loss": 0.0837,
"step": 221
},
{
"epoch": 0.27,
"learning_rate": 1.9411975175459864e-05,
"loss": 0.0896,
"step": 222
},
{
"epoch": 0.27,
"learning_rate": 1.9405225948839657e-05,
"loss": 0.1046,
"step": 223
},
{
"epoch": 0.28,
"learning_rate": 1.9398439396735396e-05,
"loss": 0.0971,
"step": 224
},
{
"epoch": 0.28,
"learning_rate": 1.9391615546080132e-05,
"loss": 0.1008,
"step": 225
},
{
"epoch": 0.28,
"learning_rate": 1.9384754423954927e-05,
"loss": 0.0933,
"step": 226
},
{
"epoch": 0.28,
"learning_rate": 1.9377856057588756e-05,
"loss": 0.0978,
"step": 227
},
{
"epoch": 0.28,
"learning_rate": 1.937092047435841e-05,
"loss": 0.0978,
"step": 228
},
{
"epoch": 0.28,
"learning_rate": 1.9363947701788374e-05,
"loss": 0.0815,
"step": 229
},
{
"epoch": 0.28,
"learning_rate": 1.9356937767550715e-05,
"loss": 0.1035,
"step": 230
},
{
"epoch": 0.28,
"learning_rate": 1.9349890699464997e-05,
"loss": 0.1002,
"step": 231
},
{
"epoch": 0.29,
"learning_rate": 1.934280652549814e-05,
"loss": 0.1176,
"step": 232
},
{
"epoch": 0.29,
"learning_rate": 1.9335685273764322e-05,
"loss": 0.1075,
"step": 233
},
{
"epoch": 0.29,
"learning_rate": 1.932852697252487e-05,
"loss": 0.0876,
"step": 234
},
{
"epoch": 0.29,
"learning_rate": 1.932133165018815e-05,
"loss": 0.0895,
"step": 235
},
{
"epoch": 0.29,
"learning_rate": 1.931409933530944e-05,
"loss": 0.0905,
"step": 236
},
{
"epoch": 0.29,
"learning_rate": 1.9306830056590832e-05,
"loss": 0.0938,
"step": 237
},
{
"epoch": 0.29,
"learning_rate": 1.9299523842881117e-05,
"loss": 0.0899,
"step": 238
},
{
"epoch": 0.29,
"learning_rate": 1.9292180723175656e-05,
"loss": 0.0771,
"step": 239
},
{
"epoch": 0.3,
"learning_rate": 1.9284800726616276e-05,
"loss": 0.1033,
"step": 240
},
{
"epoch": 0.3,
"learning_rate": 1.9277383882491162e-05,
"loss": 0.0999,
"step": 241
},
{
"epoch": 0.3,
"learning_rate": 1.926993022023472e-05,
"loss": 0.0803,
"step": 242
},
{
"epoch": 0.3,
"learning_rate": 1.9262439769427488e-05,
"loss": 0.1154,
"step": 243
},
{
"epoch": 0.3,
"learning_rate": 1.9254912559795984e-05,
"loss": 0.0823,
"step": 244
},
{
"epoch": 0.3,
"learning_rate": 1.924734862121262e-05,
"loss": 0.0935,
"step": 245
},
{
"epoch": 0.3,
"learning_rate": 1.9239747983695564e-05,
"loss": 0.0869,
"step": 246
},
{
"epoch": 0.3,
"learning_rate": 1.9232110677408625e-05,
"loss": 0.097,
"step": 247
},
{
"epoch": 0.31,
"learning_rate": 1.9224436732661148e-05,
"loss": 0.09,
"step": 248
},
{
"epoch": 0.31,
"learning_rate": 1.9216726179907866e-05,
"loss": 0.0753,
"step": 249
},
{
"epoch": 0.31,
"learning_rate": 1.9208979049748808e-05,
"loss": 0.0889,
"step": 250
},
{
"epoch": 0.31,
"learning_rate": 1.920119537292914e-05,
"loss": 0.0893,
"step": 251
},
{
"epoch": 0.31,
"learning_rate": 1.9193375180339094e-05,
"loss": 0.0935,
"step": 252
},
{
"epoch": 0.31,
"learning_rate": 1.918551850301381e-05,
"loss": 0.0858,
"step": 253
},
{
"epoch": 0.31,
"learning_rate": 1.9177625372133208e-05,
"loss": 0.0898,
"step": 254
},
{
"epoch": 0.31,
"learning_rate": 1.9169695819021893e-05,
"loss": 0.0827,
"step": 255
},
{
"epoch": 0.31,
"learning_rate": 1.9161729875149006e-05,
"loss": 0.0902,
"step": 256
},
{
"epoch": 0.32,
"learning_rate": 1.9153727572128113e-05,
"loss": 0.0881,
"step": 257
},
{
"epoch": 0.32,
"learning_rate": 1.9145688941717074e-05,
"loss": 0.1011,
"step": 258
},
{
"epoch": 0.32,
"learning_rate": 1.9137614015817913e-05,
"loss": 0.0789,
"step": 259
},
{
"epoch": 0.32,
"learning_rate": 1.9129502826476697e-05,
"loss": 0.0945,
"step": 260
},
{
"epoch": 0.32,
"learning_rate": 1.9121355405883416e-05,
"loss": 0.0928,
"step": 261
},
{
"epoch": 0.32,
"learning_rate": 1.911317178637183e-05,
"loss": 0.0818,
"step": 262
},
{
"epoch": 0.32,
"learning_rate": 1.910495200041938e-05,
"loss": 0.0779,
"step": 263
},
{
"epoch": 0.32,
"learning_rate": 1.909669608064702e-05,
"loss": 0.0792,
"step": 264
},
{
"epoch": 0.33,
"learning_rate": 1.9088404059819106e-05,
"loss": 0.1046,
"step": 265
},
{
"epoch": 0.33,
"learning_rate": 1.908007597084327e-05,
"loss": 0.0768,
"step": 266
},
{
"epoch": 0.33,
"learning_rate": 1.907171184677028e-05,
"loss": 0.0875,
"step": 267
},
{
"epoch": 0.33,
"learning_rate": 1.906331172079392e-05,
"loss": 0.0781,
"step": 268
},
{
"epoch": 0.33,
"learning_rate": 1.9054875626250836e-05,
"loss": 0.1063,
"step": 269
},
{
"epoch": 0.33,
"learning_rate": 1.9046403596620432e-05,
"loss": 0.0774,
"step": 270
},
{
"epoch": 0.33,
"learning_rate": 1.9037895665524716e-05,
"loss": 0.0821,
"step": 271
},
{
"epoch": 0.33,
"learning_rate": 1.902935186672818e-05,
"loss": 0.0793,
"step": 272
},
{
"epoch": 0.34,
"learning_rate": 1.9020772234137656e-05,
"loss": 0.0998,
"step": 273
},
{
"epoch": 0.34,
"learning_rate": 1.9012156801802184e-05,
"loss": 0.1079,
"step": 274
},
{
"epoch": 0.34,
"learning_rate": 1.9003505603912884e-05,
"loss": 0.1056,
"step": 275
},
{
"epoch": 0.34,
"learning_rate": 1.899481867480281e-05,
"loss": 0.103,
"step": 276
},
{
"epoch": 0.34,
"learning_rate": 1.8986096048946826e-05,
"loss": 0.0781,
"step": 277
},
{
"epoch": 0.34,
"learning_rate": 1.8977337760961444e-05,
"loss": 0.0867,
"step": 278
},
{
"epoch": 0.34,
"learning_rate": 1.896854384560473e-05,
"loss": 0.1021,
"step": 279
},
{
"epoch": 0.34,
"learning_rate": 1.895971433777612e-05,
"loss": 0.0839,
"step": 280
},
{
"epoch": 0.35,
"learning_rate": 1.895084927251631e-05,
"loss": 0.105,
"step": 281
},
{
"epoch": 0.35,
"learning_rate": 1.894194868500711e-05,
"loss": 0.1093,
"step": 282
},
{
"epoch": 0.35,
"learning_rate": 1.8933012610571295e-05,
"loss": 0.11,
"step": 283
},
{
"epoch": 0.35,
"learning_rate": 1.8924041084672486e-05,
"loss": 0.0906,
"step": 284
},
{
"epoch": 0.35,
"learning_rate": 1.8915034142914988e-05,
"loss": 0.0861,
"step": 285
},
{
"epoch": 0.35,
"learning_rate": 1.8905991821043656e-05,
"loss": 0.0892,
"step": 286
},
{
"epoch": 0.35,
"learning_rate": 1.8896914154943756e-05,
"loss": 0.1085,
"step": 287
},
{
"epoch": 0.35,
"learning_rate": 1.8887801180640826e-05,
"loss": 0.0923,
"step": 288
},
{
"epoch": 0.36,
"learning_rate": 1.887865293430052e-05,
"loss": 0.0963,
"step": 289
},
{
"epoch": 0.36,
"learning_rate": 1.8869469452228476e-05,
"loss": 0.1025,
"step": 290
},
{
"epoch": 0.36,
"learning_rate": 1.8860250770870167e-05,
"loss": 0.098,
"step": 291
},
{
"epoch": 0.36,
"learning_rate": 1.885099692681076e-05,
"loss": 0.0775,
"step": 292
},
{
"epoch": 0.36,
"learning_rate": 1.8841707956774965e-05,
"loss": 0.0848,
"step": 293
},
{
"epoch": 0.36,
"learning_rate": 1.8832383897626892e-05,
"loss": 0.0968,
"step": 294
},
{
"epoch": 0.36,
"learning_rate": 1.882302478636991e-05,
"loss": 0.1071,
"step": 295
},
{
"epoch": 0.36,
"learning_rate": 1.881363066014649e-05,
"loss": 0.1039,
"step": 296
},
{
"epoch": 0.37,
"learning_rate": 1.8804201556238068e-05,
"loss": 0.086,
"step": 297
},
{
"epoch": 0.37,
"learning_rate": 1.879473751206489e-05,
"loss": 0.093,
"step": 298
},
{
"epoch": 0.37,
"learning_rate": 1.878523856518587e-05,
"loss": 0.1065,
"step": 299
},
{
"epoch": 0.37,
"learning_rate": 1.8775704753298423e-05,
"loss": 0.0929,
"step": 300
},
{
"epoch": 0.37,
"learning_rate": 1.876613611423834e-05,
"loss": 0.099,
"step": 301
},
{
"epoch": 0.37,
"learning_rate": 1.875653268597963e-05,
"loss": 0.0836,
"step": 302
},
{
"epoch": 0.37,
"learning_rate": 1.8746894506634355e-05,
"loss": 0.0891,
"step": 303
},
{
"epoch": 0.37,
"learning_rate": 1.8737221614452497e-05,
"loss": 0.0778,
"step": 304
},
{
"epoch": 0.38,
"learning_rate": 1.87275140478218e-05,
"loss": 0.1147,
"step": 305
},
{
"epoch": 0.38,
"learning_rate": 1.8717771845267615e-05,
"loss": 0.104,
"step": 306
},
{
"epoch": 0.38,
"learning_rate": 1.8707995045452744e-05,
"loss": 0.0832,
"step": 307
},
{
"epoch": 0.38,
"learning_rate": 1.8698183687177295e-05,
"loss": 0.0939,
"step": 308
},
{
"epoch": 0.38,
"learning_rate": 1.868833780937853e-05,
"loss": 0.0778,
"step": 309
},
{
"epoch": 0.38,
"learning_rate": 1.8678457451130693e-05,
"loss": 0.0812,
"step": 310
},
{
"epoch": 0.38,
"learning_rate": 1.866854265164488e-05,
"loss": 0.0859,
"step": 311
},
{
"epoch": 0.38,
"learning_rate": 1.8658593450268853e-05,
"loss": 0.0765,
"step": 312
},
{
"epoch": 0.38,
"learning_rate": 1.8648609886486923e-05,
"loss": 0.0912,
"step": 313
},
{
"epoch": 0.39,
"learning_rate": 1.8638591999919755e-05,
"loss": 0.0885,
"step": 314
},
{
"epoch": 0.39,
"learning_rate": 1.862853983032423e-05,
"loss": 0.0799,
"step": 315
},
{
"epoch": 0.39,
"learning_rate": 1.8618453417593287e-05,
"loss": 0.0827,
"step": 316
},
{
"epoch": 0.39,
"learning_rate": 1.8608332801755764e-05,
"loss": 0.1009,
"step": 317
},
{
"epoch": 0.39,
"learning_rate": 1.859817802297623e-05,
"loss": 0.1001,
"step": 318
},
{
"epoch": 0.39,
"learning_rate": 1.8587989121554837e-05,
"loss": 0.1012,
"step": 319
},
{
"epoch": 0.39,
"learning_rate": 1.8577766137927163e-05,
"loss": 0.0791,
"step": 320
},
{
"epoch": 0.39,
"learning_rate": 1.8567509112664024e-05,
"loss": 0.0901,
"step": 321
},
{
"epoch": 0.4,
"learning_rate": 1.8557218086471356e-05,
"loss": 0.0767,
"step": 322
},
{
"epoch": 0.4,
"learning_rate": 1.854689310019002e-05,
"loss": 0.0936,
"step": 323
},
{
"epoch": 0.4,
"learning_rate": 1.8536534194795647e-05,
"loss": 0.0967,
"step": 324
},
{
"epoch": 0.4,
"learning_rate": 1.8526141411398482e-05,
"loss": 0.0651,
"step": 325
},
{
"epoch": 0.4,
"learning_rate": 1.851571479124323e-05,
"loss": 0.1057,
"step": 326
},
{
"epoch": 0.4,
"learning_rate": 1.850525437570886e-05,
"loss": 0.1047,
"step": 327
},
{
"epoch": 0.4,
"learning_rate": 1.8494760206308476e-05,
"loss": 0.0876,
"step": 328
},
{
"epoch": 0.4,
"learning_rate": 1.8484232324689127e-05,
"loss": 0.0858,
"step": 329
},
{
"epoch": 0.41,
"learning_rate": 1.847367077263166e-05,
"loss": 0.0903,
"step": 330
},
{
"epoch": 0.41,
"learning_rate": 1.846307559205055e-05,
"loss": 0.1116,
"step": 331
},
{
"epoch": 0.41,
"learning_rate": 1.8452446824993704e-05,
"loss": 0.0996,
"step": 332
},
{
"epoch": 0.41,
"learning_rate": 1.844178451364236e-05,
"loss": 0.0898,
"step": 333
},
{
"epoch": 0.41,
"learning_rate": 1.8431088700310846e-05,
"loss": 0.0746,
"step": 334
},
{
"epoch": 0.41,
"learning_rate": 1.842035942744646e-05,
"loss": 0.0701,
"step": 335
},
{
"epoch": 0.41,
"learning_rate": 1.840959673762929e-05,
"loss": 0.1063,
"step": 336
},
{
"epoch": 0.41,
"learning_rate": 1.8398800673572032e-05,
"loss": 0.0771,
"step": 337
},
{
"epoch": 0.42,
"learning_rate": 1.8387971278119834e-05,
"loss": 0.0981,
"step": 338
},
{
"epoch": 0.42,
"learning_rate": 1.837710859425013e-05,
"loss": 0.0967,
"step": 339
},
{
"epoch": 0.42,
"learning_rate": 1.8366212665072455e-05,
"loss": 0.0949,
"step": 340
},
{
"epoch": 0.42,
"learning_rate": 1.8355283533828285e-05,
"loss": 0.0977,
"step": 341
},
{
"epoch": 0.42,
"learning_rate": 1.8344321243890856e-05,
"loss": 0.0936,
"step": 342
},
{
"epoch": 0.42,
"learning_rate": 1.8333325838765e-05,
"loss": 0.0785,
"step": 343
},
{
"epoch": 0.42,
"learning_rate": 1.8322297362086972e-05,
"loss": 0.0927,
"step": 344
},
{
"epoch": 0.42,
"learning_rate": 1.831123585762427e-05,
"loss": 0.0868,
"step": 345
},
{
"epoch": 0.43,
"learning_rate": 1.8300141369275472e-05,
"loss": 0.0973,
"step": 346
},
{
"epoch": 0.43,
"learning_rate": 1.8289013941070046e-05,
"loss": 0.0775,
"step": 347
},
{
"epoch": 0.43,
"learning_rate": 1.8277853617168197e-05,
"loss": 0.0939,
"step": 348
},
{
"epoch": 0.43,
"learning_rate": 1.8266660441860666e-05,
"loss": 0.0839,
"step": 349
},
{
"epoch": 0.43,
"learning_rate": 1.825543445956858e-05,
"loss": 0.0847,
"step": 350
},
{
"epoch": 0.43,
"learning_rate": 1.8244175714843256e-05,
"loss": 0.0966,
"step": 351
},
{
"epoch": 0.43,
"learning_rate": 1.8232884252366036e-05,
"loss": 0.1016,
"step": 352
},
{
"epoch": 0.43,
"learning_rate": 1.8221560116948103e-05,
"loss": 0.0744,
"step": 353
},
{
"epoch": 0.44,
"learning_rate": 1.82102033535303e-05,
"loss": 0.0894,
"step": 354
},
{
"epoch": 0.44,
"learning_rate": 1.819881400718297e-05,
"loss": 0.0811,
"step": 355
},
{
"epoch": 0.44,
"learning_rate": 1.8187392123105752e-05,
"loss": 0.0927,
"step": 356
},
{
"epoch": 0.44,
"learning_rate": 1.817593774662742e-05,
"loss": 0.0945,
"step": 357
},
{
"epoch": 0.44,
"learning_rate": 1.8164450923205697e-05,
"loss": 0.0894,
"step": 358
},
{
"epoch": 0.44,
"learning_rate": 1.8152931698427076e-05,
"loss": 0.0939,
"step": 359
},
{
"epoch": 0.44,
"learning_rate": 1.8141380118006632e-05,
"loss": 0.0999,
"step": 360
},
{
"epoch": 0.44,
"learning_rate": 1.812979622778785e-05,
"loss": 0.089,
"step": 361
},
{
"epoch": 0.45,
"learning_rate": 1.8118180073742442e-05,
"loss": 0.0957,
"step": 362
},
{
"epoch": 0.45,
"learning_rate": 1.810653170197015e-05,
"loss": 0.0742,
"step": 363
},
{
"epoch": 0.45,
"learning_rate": 1.8094851158698597e-05,
"loss": 0.1087,
"step": 364
},
{
"epoch": 0.45,
"learning_rate": 1.8083138490283057e-05,
"loss": 0.0948,
"step": 365
},
{
"epoch": 0.45,
"learning_rate": 1.807139374320631e-05,
"loss": 0.0744,
"step": 366
},
{
"epoch": 0.45,
"learning_rate": 1.8059616964078443e-05,
"loss": 0.0789,
"step": 367
},
{
"epoch": 0.45,
"learning_rate": 1.804780819963666e-05,
"loss": 0.0861,
"step": 368
},
{
"epoch": 0.45,
"learning_rate": 1.80359674967451e-05,
"loss": 0.1033,
"step": 369
},
{
"epoch": 0.46,
"learning_rate": 1.802409490239466e-05,
"loss": 0.0928,
"step": 370
},
{
"epoch": 0.46,
"learning_rate": 1.80121904637028e-05,
"loss": 0.0718,
"step": 371
},
{
"epoch": 0.46,
"learning_rate": 1.8000254227913346e-05,
"loss": 0.0928,
"step": 372
},
{
"epoch": 0.46,
"learning_rate": 1.798828624239633e-05,
"loss": 0.0926,
"step": 373
},
{
"epoch": 0.46,
"learning_rate": 1.7976286554647773e-05,
"loss": 0.0819,
"step": 374
},
{
"epoch": 0.46,
"learning_rate": 1.7964255212289513e-05,
"loss": 0.0741,
"step": 375
},
{
"epoch": 0.46,
"learning_rate": 1.7952192263069018e-05,
"loss": 0.0729,
"step": 376
},
{
"epoch": 0.46,
"learning_rate": 1.7940097754859177e-05,
"loss": 0.0887,
"step": 377
},
{
"epoch": 0.46,
"learning_rate": 1.7927971735658143e-05,
"loss": 0.0862,
"step": 378
},
{
"epoch": 0.47,
"learning_rate": 1.791581425358911e-05,
"loss": 0.0792,
"step": 379
},
{
"epoch": 0.47,
"learning_rate": 1.790362535690013e-05,
"loss": 0.0837,
"step": 380
},
{
"epoch": 0.47,
"learning_rate": 1.789140509396394e-05,
"loss": 0.0854,
"step": 381
},
{
"epoch": 0.47,
"learning_rate": 1.787915351327775e-05,
"loss": 0.1124,
"step": 382
},
{
"epoch": 0.47,
"learning_rate": 1.7866870663463057e-05,
"loss": 0.102,
"step": 383
},
{
"epoch": 0.47,
"learning_rate": 1.785455659326546e-05,
"loss": 0.081,
"step": 384
},
{
"epoch": 0.47,
"learning_rate": 1.784221135155445e-05,
"loss": 0.0797,
"step": 385
},
{
"epoch": 0.47,
"learning_rate": 1.782983498732322e-05,
"loss": 0.0941,
"step": 386
},
{
"epoch": 0.48,
"learning_rate": 1.7817427549688493e-05,
"loss": 0.0719,
"step": 387
},
{
"epoch": 0.48,
"learning_rate": 1.78049890878903e-05,
"loss": 0.0892,
"step": 388
},
{
"epoch": 0.48,
"learning_rate": 1.7792519651291783e-05,
"loss": 0.0747,
"step": 389
},
{
"epoch": 0.48,
"learning_rate": 1.7780019289379033e-05,
"loss": 0.0829,
"step": 390
},
{
"epoch": 0.48,
"learning_rate": 1.7767488051760858e-05,
"loss": 0.0736,
"step": 391
},
{
"epoch": 0.48,
"learning_rate": 1.7754925988168592e-05,
"loss": 0.0901,
"step": 392
},
{
"epoch": 0.48,
"learning_rate": 1.7742333148455922e-05,
"loss": 0.0973,
"step": 393
},
{
"epoch": 0.48,
"learning_rate": 1.7729709582598655e-05,
"loss": 0.082,
"step": 394
},
{
"epoch": 0.49,
"learning_rate": 1.7717055340694555e-05,
"loss": 0.1004,
"step": 395
},
{
"epoch": 0.49,
"learning_rate": 1.7704370472963107e-05,
"loss": 0.0738,
"step": 396
},
{
"epoch": 0.49,
"learning_rate": 1.769165502974536e-05,
"loss": 0.0884,
"step": 397
},
{
"epoch": 0.49,
"learning_rate": 1.7678909061503685e-05,
"loss": 0.0656,
"step": 398
},
{
"epoch": 0.49,
"learning_rate": 1.7666132618821605e-05,
"loss": 0.0684,
"step": 399
},
{
"epoch": 0.49,
"learning_rate": 1.7653325752403576e-05,
"loss": 0.0775,
"step": 400
},
{
"epoch": 0.49,
"learning_rate": 1.7640488513074804e-05,
"loss": 0.0779,
"step": 401
},
{
"epoch": 0.49,
"learning_rate": 1.7627620951781024e-05,
"loss": 0.0899,
"step": 402
},
{
"epoch": 0.5,
"learning_rate": 1.7614723119588305e-05,
"loss": 0.063,
"step": 403
},
{
"epoch": 0.5,
"learning_rate": 1.760179506768286e-05,
"loss": 0.0573,
"step": 404
},
{
"epoch": 0.5,
"learning_rate": 1.7588836847370817e-05,
"loss": 0.0805,
"step": 405
},
{
"epoch": 0.5,
"learning_rate": 1.7575848510078047e-05,
"loss": 0.0758,
"step": 406
},
{
"epoch": 0.5,
"learning_rate": 1.7562830107349923e-05,
"loss": 0.1009,
"step": 407
},
{
"epoch": 0.5,
"learning_rate": 1.7549781690851148e-05,
"loss": 0.0785,
"step": 408
},
{
"epoch": 0.5,
"learning_rate": 1.753670331236554e-05,
"loss": 0.1014,
"step": 409
},
{
"epoch": 0.5,
"learning_rate": 1.7523595023795814e-05,
"loss": 0.0843,
"step": 410
},
{
"epoch": 0.51,
"learning_rate": 1.7510456877163394e-05,
"loss": 0.0962,
"step": 411
},
{
"epoch": 0.51,
"learning_rate": 1.7497288924608192e-05,
"loss": 0.0856,
"step": 412
},
{
"epoch": 0.51,
"learning_rate": 1.7484091218388414e-05,
"loss": 0.0955,
"step": 413
},
{
"epoch": 0.51,
"learning_rate": 1.7470863810880335e-05,
"loss": 0.09,
"step": 414
},
{
"epoch": 0.51,
"learning_rate": 1.7457606754578122e-05,
"loss": 0.1079,
"step": 415
},
{
"epoch": 0.51,
"learning_rate": 1.7444320102093586e-05,
"loss": 0.0975,
"step": 416
},
{
"epoch": 0.51,
"learning_rate": 1.7431003906156e-05,
"loss": 0.085,
"step": 417
},
{
"epoch": 0.51,
"learning_rate": 1.7417658219611892e-05,
"loss": 0.082,
"step": 418
},
{
"epoch": 0.52,
"learning_rate": 1.7404283095424803e-05,
"loss": 0.0768,
"step": 419
},
{
"epoch": 0.52,
"learning_rate": 1.7390878586675127e-05,
"loss": 0.0977,
"step": 420
},
{
"epoch": 0.52,
"learning_rate": 1.7377444746559865e-05,
"loss": 0.0636,
"step": 421
},
{
"epoch": 0.52,
"learning_rate": 1.7363981628392405e-05,
"loss": 0.0862,
"step": 422
},
{
"epoch": 0.52,
"learning_rate": 1.7350489285602345e-05,
"loss": 0.0921,
"step": 423
},
{
"epoch": 0.52,
"learning_rate": 1.7336967771735266e-05,
"loss": 0.0857,
"step": 424
},
{
"epoch": 0.52,
"learning_rate": 1.7323417140452507e-05,
"loss": 0.0764,
"step": 425
},
{
"epoch": 0.52,
"learning_rate": 1.7309837445530958e-05,
"loss": 0.1004,
"step": 426
},
{
"epoch": 0.53,
"learning_rate": 1.7296228740862872e-05,
"loss": 0.0869,
"step": 427
},
{
"epoch": 0.53,
"learning_rate": 1.7282591080455602e-05,
"loss": 0.085,
"step": 428
},
{
"epoch": 0.53,
"learning_rate": 1.7268924518431437e-05,
"loss": 0.0819,
"step": 429
},
{
"epoch": 0.53,
"learning_rate": 1.7255229109027357e-05,
"loss": 0.0714,
"step": 430
},
{
"epoch": 0.53,
"learning_rate": 1.724150490659482e-05,
"loss": 0.0966,
"step": 431
},
{
"epoch": 0.53,
"learning_rate": 1.7227751965599556e-05,
"loss": 0.0691,
"step": 432
},
{
"epoch": 0.53,
"learning_rate": 1.7213970340621346e-05,
"loss": 0.0778,
"step": 433
},
{
"epoch": 0.53,
"learning_rate": 1.7200160086353815e-05,
"loss": 0.0892,
"step": 434
},
{
"epoch": 0.54,
"learning_rate": 1.7186321257604186e-05,
"loss": 0.0899,
"step": 435
},
{
"epoch": 0.54,
"learning_rate": 1.7172453909293105e-05,
"loss": 0.0754,
"step": 436
},
{
"epoch": 0.54,
"learning_rate": 1.715855809645438e-05,
"loss": 0.1022,
"step": 437
},
{
"epoch": 0.54,
"learning_rate": 1.71446338742348e-05,
"loss": 0.0779,
"step": 438
},
{
"epoch": 0.54,
"learning_rate": 1.7130681297893884e-05,
"loss": 0.1035,
"step": 439
},
{
"epoch": 0.54,
"learning_rate": 1.7116700422803694e-05,
"loss": 0.0765,
"step": 440
},
{
"epoch": 0.54,
"learning_rate": 1.710269130444858e-05,
"loss": 0.0885,
"step": 441
},
{
"epoch": 0.54,
"learning_rate": 1.7088653998424995e-05,
"loss": 0.1034,
"step": 442
},
{
"epoch": 0.54,
"learning_rate": 1.707458856044124e-05,
"loss": 0.0789,
"step": 443
},
{
"epoch": 0.55,
"learning_rate": 1.7060495046317273e-05,
"loss": 0.1259,
"step": 444
},
{
"epoch": 0.55,
"learning_rate": 1.7046373511984477e-05,
"loss": 0.1019,
"step": 445
},
{
"epoch": 0.55,
"learning_rate": 1.7032224013485417e-05,
"loss": 0.0759,
"step": 446
},
{
"epoch": 0.55,
"learning_rate": 1.701804660697366e-05,
"loss": 0.0869,
"step": 447
},
{
"epoch": 0.55,
"learning_rate": 1.700384134871351e-05,
"loss": 0.0926,
"step": 448
},
{
"epoch": 0.55,
"learning_rate": 1.6989608295079817e-05,
"loss": 0.1235,
"step": 449
},
{
"epoch": 0.55,
"learning_rate": 1.6975347502557724e-05,
"loss": 0.0898,
"step": 450
},
{
"epoch": 0.55,
"learning_rate": 1.6961059027742474e-05,
"loss": 0.1056,
"step": 451
},
{
"epoch": 0.56,
"learning_rate": 1.6946742927339165e-05,
"loss": 0.108,
"step": 452
},
{
"epoch": 0.56,
"learning_rate": 1.6932399258162518e-05,
"loss": 0.0877,
"step": 453
},
{
"epoch": 0.56,
"learning_rate": 1.691802807713668e-05,
"loss": 0.0891,
"step": 454
},
{
"epoch": 0.56,
"learning_rate": 1.6903629441294978e-05,
"loss": 0.081,
"step": 455
},
{
"epoch": 0.56,
"learning_rate": 1.6889203407779678e-05,
"loss": 0.0975,
"step": 456
},
{
"epoch": 0.56,
"learning_rate": 1.6874750033841802e-05,
"loss": 0.0823,
"step": 457
},
{
"epoch": 0.56,
"learning_rate": 1.6860269376840856e-05,
"loss": 0.0913,
"step": 458
},
{
"epoch": 0.56,
"learning_rate": 1.6845761494244633e-05,
"loss": 0.0668,
"step": 459
},
{
"epoch": 0.57,
"learning_rate": 1.6831226443628966e-05,
"loss": 0.1065,
"step": 460
},
{
"epoch": 0.57,
"learning_rate": 1.681666428267751e-05,
"loss": 0.098,
"step": 461
},
{
"epoch": 0.57,
"learning_rate": 1.680207506918151e-05,
"loss": 0.0768,
"step": 462
},
{
"epoch": 0.57,
"learning_rate": 1.6787458861039562e-05,
"loss": 0.0893,
"step": 463
},
{
"epoch": 0.57,
"learning_rate": 1.6772815716257414e-05,
"loss": 0.0721,
"step": 464
},
{
"epoch": 0.57,
"learning_rate": 1.675814569294769e-05,
"loss": 0.0936,
"step": 465
},
{
"epoch": 0.57,
"learning_rate": 1.6743448849329702e-05,
"loss": 0.0724,
"step": 466
},
{
"epoch": 0.57,
"learning_rate": 1.672872524372919e-05,
"loss": 0.075,
"step": 467
},
{
"epoch": 0.58,
"learning_rate": 1.671397493457811e-05,
"loss": 0.0751,
"step": 468
},
{
"epoch": 0.58,
"learning_rate": 1.6699197980414385e-05,
"loss": 0.0825,
"step": 469
},
{
"epoch": 0.58,
"learning_rate": 1.6684394439881688e-05,
"loss": 0.0908,
"step": 470
},
{
"epoch": 0.58,
"learning_rate": 1.6669564371729196e-05,
"loss": 0.0822,
"step": 471
},
{
"epoch": 0.58,
"learning_rate": 1.665470783481137e-05,
"loss": 0.0824,
"step": 472
},
{
"epoch": 0.58,
"learning_rate": 1.663982488808771e-05,
"loss": 0.0953,
"step": 473
},
{
"epoch": 0.58,
"learning_rate": 1.6624915590622527e-05,
"loss": 0.0791,
"step": 474
},
{
"epoch": 0.58,
"learning_rate": 1.6609980001584708e-05,
"loss": 0.0733,
"step": 475
},
{
"epoch": 0.59,
"learning_rate": 1.6595018180247478e-05,
"loss": 0.1086,
"step": 476
},
{
"epoch": 0.59,
"learning_rate": 1.6580030185988167e-05,
"loss": 0.0836,
"step": 477
},
{
"epoch": 0.59,
"learning_rate": 1.6565016078287984e-05,
"loss": 0.0847,
"step": 478
},
{
"epoch": 0.59,
"learning_rate": 1.654997591673176e-05,
"loss": 0.0902,
"step": 479
},
{
"epoch": 0.59,
"learning_rate": 1.6534909761007723e-05,
"loss": 0.0702,
"step": 480
},
{
"epoch": 0.59,
"learning_rate": 1.651981767090727e-05,
"loss": 0.0858,
"step": 481
},
{
"epoch": 0.59,
"learning_rate": 1.6504699706324717e-05,
"loss": 0.0799,
"step": 482
},
{
"epoch": 0.59,
"learning_rate": 1.6489555927257063e-05,
"loss": 0.0693,
"step": 483
},
{
"epoch": 0.6,
"learning_rate": 1.647438639380375e-05,
"loss": 0.1123,
"step": 484
},
{
"epoch": 0.6,
"learning_rate": 1.645919116616645e-05,
"loss": 0.0936,
"step": 485
},
{
"epoch": 0.6,
"learning_rate": 1.644397030464877e-05,
"loss": 0.1063,
"step": 486
},
{
"epoch": 0.6,
"learning_rate": 1.642872386965608e-05,
"loss": 0.0695,
"step": 487
},
{
"epoch": 0.6,
"learning_rate": 1.641345192169522e-05,
"loss": 0.0672,
"step": 488
},
{
"epoch": 0.6,
"learning_rate": 1.639815452137429e-05,
"loss": 0.0824,
"step": 489
},
{
"epoch": 0.6,
"learning_rate": 1.6382831729402395e-05,
"loss": 0.0959,
"step": 490
},
{
"epoch": 0.6,
"learning_rate": 1.6367483606589413e-05,
"loss": 0.0889,
"step": 491
},
{
"epoch": 0.61,
"learning_rate": 1.6352110213845746e-05,
"loss": 0.0782,
"step": 492
},
{
"epoch": 0.61,
"learning_rate": 1.633671161218209e-05,
"loss": 0.0632,
"step": 493
},
{
"epoch": 0.61,
"learning_rate": 1.6321287862709173e-05,
"loss": 0.0774,
"step": 494
},
{
"epoch": 0.61,
"learning_rate": 1.6305839026637532e-05,
"loss": 0.1093,
"step": 495
},
{
"epoch": 0.61,
"learning_rate": 1.6290365165277263e-05,
"loss": 0.0845,
"step": 496
},
{
"epoch": 0.61,
"learning_rate": 1.627486634003777e-05,
"loss": 0.0668,
"step": 497
},
{
"epoch": 0.61,
"learning_rate": 1.625934261242754e-05,
"loss": 0.0862,
"step": 498
},
{
"epoch": 0.61,
"learning_rate": 1.624379404405387e-05,
"loss": 0.0817,
"step": 499
},
{
"epoch": 0.62,
"learning_rate": 1.622822069662266e-05,
"loss": 0.0754,
"step": 500
},
{
"epoch": 0.62,
"learning_rate": 1.6212622631938137e-05,
"loss": 0.0861,
"step": 501
},
{
"epoch": 0.62,
"learning_rate": 1.619699991190262e-05,
"loss": 0.0885,
"step": 502
},
{
"epoch": 0.62,
"learning_rate": 1.6181352598516275e-05,
"loss": 0.0726,
"step": 503
},
{
"epoch": 0.62,
"learning_rate": 1.6165680753876872e-05,
"loss": 0.0966,
"step": 504
},
{
"epoch": 0.62,
"learning_rate": 1.614998444017954e-05,
"loss": 0.0753,
"step": 505
},
{
"epoch": 0.62,
"learning_rate": 1.61342637197165e-05,
"loss": 0.0747,
"step": 506
},
{
"epoch": 0.62,
"learning_rate": 1.611851865487685e-05,
"loss": 0.09,
"step": 507
},
{
"epoch": 0.62,
"learning_rate": 1.6102749308146285e-05,
"loss": 0.0911,
"step": 508
},
{
"epoch": 0.63,
"learning_rate": 1.608695574210689e-05,
"loss": 0.084,
"step": 509
},
{
"epoch": 0.63,
"learning_rate": 1.607113801943684e-05,
"loss": 0.0721,
"step": 510
},
{
"epoch": 0.63,
"learning_rate": 1.605529620291019e-05,
"loss": 0.0927,
"step": 511
},
{
"epoch": 0.63,
"learning_rate": 1.603943035539661e-05,
"loss": 0.0732,
"step": 512
},
{
"epoch": 0.63,
"learning_rate": 1.6023540539861144e-05,
"loss": 0.0688,
"step": 513
},
{
"epoch": 0.63,
"learning_rate": 1.6007626819363955e-05,
"loss": 0.0683,
"step": 514
},
{
"epoch": 0.63,
"learning_rate": 1.5991689257060067e-05,
"loss": 0.0739,
"step": 515
},
{
"epoch": 0.63,
"learning_rate": 1.5975727916199128e-05,
"loss": 0.0953,
"step": 516
},
{
"epoch": 0.64,
"learning_rate": 1.5959742860125153e-05,
"loss": 0.0833,
"step": 517
},
{
"epoch": 0.64,
"learning_rate": 1.5943734152276277e-05,
"loss": 0.0616,
"step": 518
},
{
"epoch": 0.64,
"learning_rate": 1.5927701856184494e-05,
"loss": 0.0848,
"step": 519
},
{
"epoch": 0.64,
"learning_rate": 1.591164603547541e-05,
"loss": 0.0801,
"step": 520
},
{
"epoch": 0.64,
"learning_rate": 1.589556675386799e-05,
"loss": 0.0747,
"step": 521
},
{
"epoch": 0.64,
"learning_rate": 1.587946407517431e-05,
"loss": 0.0627,
"step": 522
},
{
"epoch": 0.64,
"learning_rate": 1.5863338063299296e-05,
"loss": 0.0657,
"step": 523
},
{
"epoch": 0.64,
"learning_rate": 1.5847188782240473e-05,
"loss": 0.0676,
"step": 524
},
{
"epoch": 0.65,
"learning_rate": 1.5831016296087714e-05,
"loss": 0.0775,
"step": 525
},
{
"epoch": 0.65,
"learning_rate": 1.5814820669022988e-05,
"loss": 0.0668,
"step": 526
},
{
"epoch": 0.65,
"learning_rate": 1.5798601965320096e-05,
"loss": 0.0826,
"step": 527
},
{
"epoch": 0.65,
"learning_rate": 1.578236024934441e-05,
"loss": 0.0708,
"step": 528
},
{
"epoch": 0.65,
"learning_rate": 1.5766095585552648e-05,
"loss": 0.0992,
"step": 529
},
{
"epoch": 0.65,
"learning_rate": 1.5749808038492587e-05,
"loss": 0.0591,
"step": 530
},
{
"epoch": 0.65,
"learning_rate": 1.573349767280282e-05,
"loss": 0.067,
"step": 531
},
{
"epoch": 0.65,
"learning_rate": 1.57171645532125e-05,
"loss": 0.074,
"step": 532
},
{
"epoch": 0.66,
"learning_rate": 1.570080874454108e-05,
"loss": 0.0922,
"step": 533
},
{
"epoch": 0.66,
"learning_rate": 1.568443031169805e-05,
"loss": 0.0903,
"step": 534
},
{
"epoch": 0.66,
"learning_rate": 1.5668029319682698e-05,
"loss": 0.0968,
"step": 535
},
{
"epoch": 0.66,
"learning_rate": 1.5651605833583834e-05,
"loss": 0.0839,
"step": 536
},
{
"epoch": 0.66,
"learning_rate": 1.5635159918579537e-05,
"loss": 0.0827,
"step": 537
},
{
"epoch": 0.66,
"learning_rate": 1.5618691639936897e-05,
"loss": 0.0935,
"step": 538
},
{
"epoch": 0.66,
"learning_rate": 1.5602201063011754e-05,
"loss": 0.0938,
"step": 539
},
{
"epoch": 0.66,
"learning_rate": 1.558568825324845e-05,
"loss": 0.0834,
"step": 540
},
{
"epoch": 0.67,
"learning_rate": 1.5569153276179546e-05,
"loss": 0.0829,
"step": 541
},
{
"epoch": 0.67,
"learning_rate": 1.5552596197425596e-05,
"loss": 0.0757,
"step": 542
},
{
"epoch": 0.67,
"learning_rate": 1.5536017082694846e-05,
"loss": 0.0796,
"step": 543
},
{
"epoch": 0.67,
"learning_rate": 1.5519415997783003e-05,
"loss": 0.0959,
"step": 544
},
{
"epoch": 0.67,
"learning_rate": 1.5502793008572964e-05,
"loss": 0.0686,
"step": 545
},
{
"epoch": 0.67,
"learning_rate": 1.5486148181034555e-05,
"loss": 0.0927,
"step": 546
},
{
"epoch": 0.67,
"learning_rate": 1.5469481581224274e-05,
"loss": 0.0837,
"step": 547
},
{
"epoch": 0.67,
"learning_rate": 1.5452793275285006e-05,
"loss": 0.0677,
"step": 548
},
{
"epoch": 0.68,
"learning_rate": 1.5436083329445807e-05,
"loss": 0.0806,
"step": 549
},
{
"epoch": 0.68,
"learning_rate": 1.541935181002159e-05,
"loss": 0.0696,
"step": 550
},
{
"epoch": 0.68,
"learning_rate": 1.5402598783412897e-05,
"loss": 0.0777,
"step": 551
},
{
"epoch": 0.68,
"learning_rate": 1.5385824316105615e-05,
"loss": 0.0859,
"step": 552
},
{
"epoch": 0.68,
"learning_rate": 1.536902847467073e-05,
"loss": 0.0651,
"step": 553
},
{
"epoch": 0.68,
"learning_rate": 1.5352211325764045e-05,
"loss": 0.0799,
"step": 554
},
{
"epoch": 0.68,
"learning_rate": 1.533537293612592e-05,
"loss": 0.0856,
"step": 555
},
{
"epoch": 0.68,
"learning_rate": 1.5318513372581026e-05,
"loss": 0.0809,
"step": 556
},
{
"epoch": 0.69,
"learning_rate": 1.5301632702038047e-05,
"loss": 0.092,
"step": 557
},
{
"epoch": 0.69,
"learning_rate": 1.5284730991489448e-05,
"loss": 0.0685,
"step": 558
},
{
"epoch": 0.69,
"learning_rate": 1.5267808308011183e-05,
"loss": 0.0849,
"step": 559
},
{
"epoch": 0.69,
"learning_rate": 1.525086471876244e-05,
"loss": 0.0819,
"step": 560
},
{
"epoch": 0.69,
"learning_rate": 1.5233900290985375e-05,
"loss": 0.0855,
"step": 561
},
{
"epoch": 0.69,
"learning_rate": 1.5216915092004847e-05,
"loss": 0.0922,
"step": 562
},
{
"epoch": 0.69,
"learning_rate": 1.5199909189228139e-05,
"loss": 0.0982,
"step": 563
},
{
"epoch": 0.69,
"learning_rate": 1.518288265014471e-05,
"loss": 0.0703,
"step": 564
},
{
"epoch": 0.69,
"learning_rate": 1.5165835542325901e-05,
"loss": 0.0814,
"step": 565
},
{
"epoch": 0.7,
"learning_rate": 1.5148767933424697e-05,
"loss": 0.075,
"step": 566
},
{
"epoch": 0.7,
"learning_rate": 1.513167989117544e-05,
"loss": 0.06,
"step": 567
},
{
"epoch": 0.7,
"learning_rate": 1.5114571483393554e-05,
"loss": 0.0829,
"step": 568
},
{
"epoch": 0.7,
"learning_rate": 1.5097442777975295e-05,
"loss": 0.0938,
"step": 569
},
{
"epoch": 0.7,
"learning_rate": 1.508029384289747e-05,
"loss": 0.0826,
"step": 570
},
{
"epoch": 0.7,
"learning_rate": 1.5063124746217165e-05,
"loss": 0.0596,
"step": 571
},
{
"epoch": 0.7,
"learning_rate": 1.5045935556071487e-05,
"loss": 0.0837,
"step": 572
},
{
"epoch": 0.7,
"learning_rate": 1.5028726340677278e-05,
"loss": 0.069,
"step": 573
},
{
"epoch": 0.71,
"learning_rate": 1.5011497168330853e-05,
"loss": 0.076,
"step": 574
},
{
"epoch": 0.71,
"learning_rate": 1.4994248107407736e-05,
"loss": 0.1051,
"step": 575
},
{
"epoch": 0.71,
"learning_rate": 1.4976979226362372e-05,
"loss": 0.0864,
"step": 576
},
{
"epoch": 0.71,
"learning_rate": 1.4959690593727867e-05,
"loss": 0.0729,
"step": 577
},
{
"epoch": 0.71,
"learning_rate": 1.4942382278115713e-05,
"loss": 0.0778,
"step": 578
},
{
"epoch": 0.71,
"learning_rate": 1.4925054348215514e-05,
"loss": 0.0801,
"step": 579
},
{
"epoch": 0.71,
"learning_rate": 1.4907706872794717e-05,
"loss": 0.0569,
"step": 580
},
{
"epoch": 0.71,
"learning_rate": 1.4890339920698334e-05,
"loss": 0.0748,
"step": 581
},
{
"epoch": 0.72,
"learning_rate": 1.4872953560848678e-05,
"loss": 0.0768,
"step": 582
},
{
"epoch": 0.72,
"learning_rate": 1.4855547862245082e-05,
"loss": 0.0943,
"step": 583
},
{
"epoch": 0.72,
"learning_rate": 1.4838122893963619e-05,
"loss": 0.0739,
"step": 584
},
{
"epoch": 0.72,
"learning_rate": 1.4820678725156844e-05,
"loss": 0.0809,
"step": 585
},
{
"epoch": 0.72,
"learning_rate": 1.4803215425053505e-05,
"loss": 0.086,
"step": 586
},
{
"epoch": 0.72,
"learning_rate": 1.4785733062958284e-05,
"loss": 0.0972,
"step": 587
},
{
"epoch": 0.72,
"learning_rate": 1.4768231708251497e-05,
"loss": 0.0846,
"step": 588
},
{
"epoch": 0.72,
"learning_rate": 1.4750711430388846e-05,
"loss": 0.0782,
"step": 589
},
{
"epoch": 0.73,
"learning_rate": 1.473317229890113e-05,
"loss": 0.0685,
"step": 590
},
{
"epoch": 0.73,
"learning_rate": 1.4715614383393963e-05,
"loss": 0.0742,
"step": 591
},
{
"epoch": 0.73,
"learning_rate": 1.4698037753547513e-05,
"loss": 0.077,
"step": 592
},
{
"epoch": 0.73,
"learning_rate": 1.4680442479116215e-05,
"loss": 0.0718,
"step": 593
},
{
"epoch": 0.73,
"learning_rate": 1.4662828629928496e-05,
"loss": 0.0983,
"step": 594
},
{
"epoch": 0.73,
"learning_rate": 1.4645196275886497e-05,
"loss": 0.0732,
"step": 595
},
{
"epoch": 0.73,
"learning_rate": 1.4627545486965799e-05,
"loss": 0.0865,
"step": 596
},
{
"epoch": 0.73,
"learning_rate": 1.4609876333215143e-05,
"loss": 0.0994,
"step": 597
},
{
"epoch": 0.74,
"learning_rate": 1.4592188884756155e-05,
"loss": 0.0811,
"step": 598
},
{
"epoch": 0.74,
"learning_rate": 1.4574483211783061e-05,
"loss": 0.0928,
"step": 599
},
{
"epoch": 0.74,
"learning_rate": 1.4556759384562418e-05,
"loss": 0.1023,
"step": 600
},
{
"epoch": 0.74,
"learning_rate": 1.4539017473432822e-05,
"loss": 0.0704,
"step": 601
},
{
"epoch": 0.74,
"learning_rate": 1.4521257548804644e-05,
"loss": 0.0734,
"step": 602
},
{
"epoch": 0.74,
"learning_rate": 1.450347968115974e-05,
"loss": 0.0878,
"step": 603
},
{
"epoch": 0.74,
"learning_rate": 1.4485683941051174e-05,
"loss": 0.0966,
"step": 604
},
{
"epoch": 0.74,
"learning_rate": 1.446787039910294e-05,
"loss": 0.0792,
"step": 605
},
{
"epoch": 0.75,
"learning_rate": 1.4450039126009679e-05,
"loss": 0.086,
"step": 606
},
{
"epoch": 0.75,
"learning_rate": 1.4432190192536398e-05,
"loss": 0.0756,
"step": 607
},
{
"epoch": 0.75,
"learning_rate": 1.4414323669518194e-05,
"loss": 0.072,
"step": 608
},
{
"epoch": 0.75,
"learning_rate": 1.439643962785997e-05,
"loss": 0.0839,
"step": 609
},
{
"epoch": 0.75,
"learning_rate": 1.4378538138536154e-05,
"loss": 0.0833,
"step": 610
},
{
"epoch": 0.75,
"learning_rate": 1.4360619272590413e-05,
"loss": 0.077,
"step": 611
},
{
"epoch": 0.75,
"learning_rate": 1.434268310113537e-05,
"loss": 0.082,
"step": 612
},
{
"epoch": 0.75,
"learning_rate": 1.4324729695352338e-05,
"loss": 0.0713,
"step": 613
},
{
"epoch": 0.76,
"learning_rate": 1.4306759126491021e-05,
"loss": 0.0855,
"step": 614
},
{
"epoch": 0.76,
"learning_rate": 1.4288771465869236e-05,
"loss": 0.0626,
"step": 615
},
{
"epoch": 0.76,
"learning_rate": 1.4270766784872627e-05,
"loss": 0.0873,
"step": 616
},
{
"epoch": 0.76,
"learning_rate": 1.4252745154954393e-05,
"loss": 0.0771,
"step": 617
},
{
"epoch": 0.76,
"learning_rate": 1.4234706647634982e-05,
"loss": 0.0602,
"step": 618
},
{
"epoch": 0.76,
"learning_rate": 1.421665133450184e-05,
"loss": 0.0777,
"step": 619
},
{
"epoch": 0.76,
"learning_rate": 1.4198579287209098e-05,
"loss": 0.0768,
"step": 620
},
{
"epoch": 0.76,
"learning_rate": 1.4180490577477295e-05,
"loss": 0.0785,
"step": 621
},
{
"epoch": 0.77,
"learning_rate": 1.4162385277093102e-05,
"loss": 0.077,
"step": 622
},
{
"epoch": 0.77,
"learning_rate": 1.4144263457909028e-05,
"loss": 0.0874,
"step": 623
},
{
"epoch": 0.77,
"learning_rate": 1.4126125191843146e-05,
"loss": 0.0868,
"step": 624
},
{
"epoch": 0.77,
"learning_rate": 1.410797055087879e-05,
"loss": 0.1035,
"step": 625
},
{
"epoch": 0.77,
"learning_rate": 1.4089799607064282e-05,
"loss": 0.091,
"step": 626
},
{
"epoch": 0.77,
"learning_rate": 1.4071612432512652e-05,
"loss": 0.0817,
"step": 627
},
{
"epoch": 0.77,
"learning_rate": 1.4053409099401325e-05,
"loss": 0.0782,
"step": 628
},
{
"epoch": 0.77,
"learning_rate": 1.4035189679971874e-05,
"loss": 0.0683,
"step": 629
},
{
"epoch": 0.77,
"learning_rate": 1.4016954246529697e-05,
"loss": 0.0681,
"step": 630
},
{
"epoch": 0.78,
"learning_rate": 1.399870287144375e-05,
"loss": 0.0854,
"step": 631
},
{
"epoch": 0.78,
"learning_rate": 1.3980435627146252e-05,
"loss": 0.0875,
"step": 632
},
{
"epoch": 0.78,
"learning_rate": 1.3962152586132409e-05,
"loss": 0.0905,
"step": 633
},
{
"epoch": 0.78,
"learning_rate": 1.3943853820960104e-05,
"loss": 0.0985,
"step": 634
},
{
"epoch": 0.78,
"learning_rate": 1.3925539404249638e-05,
"loss": 0.085,
"step": 635
},
{
"epoch": 0.78,
"learning_rate": 1.3907209408683415e-05,
"loss": 0.0953,
"step": 636
},
{
"epoch": 0.78,
"learning_rate": 1.3888863907005669e-05,
"loss": 0.0796,
"step": 637
},
{
"epoch": 0.78,
"learning_rate": 1.3870502972022175e-05,
"loss": 0.075,
"step": 638
},
{
"epoch": 0.79,
"learning_rate": 1.3852126676599944e-05,
"loss": 0.0712,
"step": 639
},
{
"epoch": 0.79,
"learning_rate": 1.3833735093666962e-05,
"loss": 0.0847,
"step": 640
},
{
"epoch": 0.79,
"learning_rate": 1.3815328296211878e-05,
"loss": 0.0783,
"step": 641
},
{
"epoch": 0.79,
"learning_rate": 1.3796906357283723e-05,
"loss": 0.0901,
"step": 642
},
{
"epoch": 0.79,
"learning_rate": 1.3778469349991612e-05,
"loss": 0.097,
"step": 643
},
{
"epoch": 0.79,
"learning_rate": 1.3760017347504462e-05,
"loss": 0.0814,
"step": 644
},
{
"epoch": 0.79,
"learning_rate": 1.3741550423050711e-05,
"loss": 0.072,
"step": 645
},
{
"epoch": 0.79,
"learning_rate": 1.3723068649918e-05,
"loss": 0.0859,
"step": 646
},
{
"epoch": 0.8,
"learning_rate": 1.3704572101452911e-05,
"loss": 0.0704,
"step": 647
},
{
"epoch": 0.8,
"learning_rate": 1.3686060851060657e-05,
"loss": 0.0822,
"step": 648
},
{
"epoch": 0.8,
"learning_rate": 1.3667534972204795e-05,
"loss": 0.086,
"step": 649
},
{
"epoch": 0.8,
"learning_rate": 1.364899453840694e-05,
"loss": 0.0628,
"step": 650
},
{
"epoch": 0.8,
"learning_rate": 1.3630439623246474e-05,
"loss": 0.0668,
"step": 651
},
{
"epoch": 0.8,
"learning_rate": 1.361187030036024e-05,
"loss": 0.0688,
"step": 652
},
{
"epoch": 0.8,
"learning_rate": 1.3593286643442265e-05,
"loss": 0.0773,
"step": 653
},
{
"epoch": 0.8,
"learning_rate": 1.3574688726243462e-05,
"loss": 0.0883,
"step": 654
},
{
"epoch": 0.81,
"learning_rate": 1.3556076622571332e-05,
"loss": 0.0902,
"step": 655
},
{
"epoch": 0.81,
"learning_rate": 1.3537450406289686e-05,
"loss": 0.0685,
"step": 656
},
{
"epoch": 0.81,
"learning_rate": 1.351881015131833e-05,
"loss": 0.0714,
"step": 657
},
{
"epoch": 0.81,
"learning_rate": 1.3500155931632799e-05,
"loss": 0.1048,
"step": 658
},
{
"epoch": 0.81,
"learning_rate": 1.3481487821264034e-05,
"loss": 0.0834,
"step": 659
},
{
"epoch": 0.81,
"learning_rate": 1.3462805894298105e-05,
"loss": 0.0802,
"step": 660
},
{
"epoch": 0.81,
"learning_rate": 1.3444110224875926e-05,
"loss": 0.1218,
"step": 661
},
{
"epoch": 0.81,
"learning_rate": 1.3425400887192934e-05,
"loss": 0.0936,
"step": 662
},
{
"epoch": 0.82,
"learning_rate": 1.3406677955498817e-05,
"loss": 0.0942,
"step": 663
},
{
"epoch": 0.82,
"learning_rate": 1.3387941504097213e-05,
"loss": 0.0864,
"step": 664
},
{
"epoch": 0.82,
"learning_rate": 1.336919160734541e-05,
"loss": 0.0802,
"step": 665
},
{
"epoch": 0.82,
"learning_rate": 1.3350428339654057e-05,
"loss": 0.0757,
"step": 666
},
{
"epoch": 0.82,
"learning_rate": 1.3331651775486874e-05,
"loss": 0.0853,
"step": 667
},
{
"epoch": 0.82,
"learning_rate": 1.3312861989360336e-05,
"loss": 0.072,
"step": 668
},
{
"epoch": 0.82,
"learning_rate": 1.32940590558434e-05,
"loss": 0.0948,
"step": 669
},
{
"epoch": 0.82,
"learning_rate": 1.3275243049557195e-05,
"loss": 0.0751,
"step": 670
},
{
"epoch": 0.83,
"learning_rate": 1.3256414045174734e-05,
"loss": 0.0796,
"step": 671
},
{
"epoch": 0.83,
"learning_rate": 1.3237572117420612e-05,
"loss": 0.0924,
"step": 672
},
{
"epoch": 0.83,
"learning_rate": 1.3218717341070709e-05,
"loss": 0.0884,
"step": 673
},
{
"epoch": 0.83,
"learning_rate": 1.3199849790951901e-05,
"loss": 0.0778,
"step": 674
},
{
"epoch": 0.83,
"learning_rate": 1.318096954194176e-05,
"loss": 0.1088,
"step": 675
},
{
"epoch": 0.83,
"learning_rate": 1.316207666896824e-05,
"loss": 0.0738,
"step": 676
},
{
"epoch": 0.83,
"learning_rate": 1.3143171247009414e-05,
"loss": 0.0818,
"step": 677
},
{
"epoch": 0.83,
"learning_rate": 1.3124253351093141e-05,
"loss": 0.0872,
"step": 678
},
{
"epoch": 0.84,
"learning_rate": 1.3105323056296797e-05,
"loss": 0.0835,
"step": 679
},
{
"epoch": 0.84,
"learning_rate": 1.3086380437746948e-05,
"loss": 0.0842,
"step": 680
},
{
"epoch": 0.84,
"learning_rate": 1.3067425570619083e-05,
"loss": 0.0822,
"step": 681
},
{
"epoch": 0.84,
"learning_rate": 1.3048458530137299e-05,
"loss": 0.1031,
"step": 682
},
{
"epoch": 0.84,
"learning_rate": 1.3029479391573991e-05,
"loss": 0.0943,
"step": 683
},
{
"epoch": 0.84,
"learning_rate": 1.3010488230249583e-05,
"loss": 0.0728,
"step": 684
},
{
"epoch": 0.84,
"learning_rate": 1.29914851215322e-05,
"loss": 0.0755,
"step": 685
},
{
"epoch": 0.84,
"learning_rate": 1.2972470140837386e-05,
"loss": 0.0753,
"step": 686
},
{
"epoch": 0.85,
"learning_rate": 1.2953443363627805e-05,
"loss": 0.0835,
"step": 687
},
{
"epoch": 0.85,
"learning_rate": 1.2934404865412924e-05,
"loss": 0.067,
"step": 688
},
{
"epoch": 0.85,
"learning_rate": 1.2915354721748739e-05,
"loss": 0.0862,
"step": 689
},
{
"epoch": 0.85,
"learning_rate": 1.2896293008237454e-05,
"loss": 0.0893,
"step": 690
},
{
"epoch": 0.85,
"learning_rate": 1.2877219800527194e-05,
"loss": 0.075,
"step": 691
},
{
"epoch": 0.85,
"learning_rate": 1.2858135174311692e-05,
"loss": 0.0863,
"step": 692
},
{
"epoch": 0.85,
"learning_rate": 1.283903920533001e-05,
"loss": 0.0872,
"step": 693
},
{
"epoch": 0.85,
"learning_rate": 1.2819931969366208e-05,
"loss": 0.0775,
"step": 694
},
{
"epoch": 0.85,
"learning_rate": 1.2800813542249073e-05,
"loss": 0.079,
"step": 695
},
{
"epoch": 0.86,
"learning_rate": 1.2781683999851794e-05,
"loss": 0.0773,
"step": 696
},
{
"epoch": 0.86,
"learning_rate": 1.2762543418091688e-05,
"loss": 0.0782,
"step": 697
},
{
"epoch": 0.86,
"learning_rate": 1.2743391872929864e-05,
"loss": 0.1129,
"step": 698
},
{
"epoch": 0.86,
"learning_rate": 1.2724229440370959e-05,
"loss": 0.0717,
"step": 699
},
{
"epoch": 0.86,
"learning_rate": 1.2705056196462801e-05,
"loss": 0.0704,
"step": 700
},
{
"epoch": 0.86,
"learning_rate": 1.2685872217296131e-05,
"loss": 0.0724,
"step": 701
},
{
"epoch": 0.86,
"learning_rate": 1.2666677579004296e-05,
"loss": 0.0721,
"step": 702
},
{
"epoch": 0.86,
"learning_rate": 1.264747235776294e-05,
"loss": 0.1017,
"step": 703
},
{
"epoch": 0.87,
"learning_rate": 1.2628256629789712e-05,
"loss": 0.0685,
"step": 704
},
{
"epoch": 0.87,
"learning_rate": 1.2609030471343953e-05,
"loss": 0.0841,
"step": 705
},
{
"epoch": 0.87,
"learning_rate": 1.2589793958726398e-05,
"loss": 0.0722,
"step": 706
},
{
"epoch": 0.87,
"learning_rate": 1.2570547168278875e-05,
"loss": 0.0823,
"step": 707
},
{
"epoch": 0.87,
"learning_rate": 1.2551290176384005e-05,
"loss": 0.0836,
"step": 708
},
{
"epoch": 0.87,
"learning_rate": 1.253202305946489e-05,
"loss": 0.0731,
"step": 709
},
{
"epoch": 0.87,
"learning_rate": 1.251274589398481e-05,
"loss": 0.0535,
"step": 710
},
{
"epoch": 0.87,
"learning_rate": 1.249345875644693e-05,
"loss": 0.0787,
"step": 711
},
{
"epoch": 0.88,
"learning_rate": 1.2474161723393987e-05,
"loss": 0.0844,
"step": 712
},
{
"epoch": 0.88,
"learning_rate": 1.2454854871407993e-05,
"loss": 0.077,
"step": 713
},
{
"epoch": 0.88,
"learning_rate": 1.2435538277109919e-05,
"loss": 0.0858,
"step": 714
},
{
"epoch": 0.88,
"learning_rate": 1.2416212017159413e-05,
"loss": 0.0847,
"step": 715
},
{
"epoch": 0.88,
"learning_rate": 1.2396876168254468e-05,
"loss": 0.0807,
"step": 716
},
{
"epoch": 0.88,
"learning_rate": 1.2377530807131138e-05,
"loss": 0.0827,
"step": 717
},
{
"epoch": 0.88,
"learning_rate": 1.2358176010563223e-05,
"loss": 0.071,
"step": 718
},
{
"epoch": 0.88,
"learning_rate": 1.2338811855361978e-05,
"loss": 0.0819,
"step": 719
},
{
"epoch": 0.89,
"learning_rate": 1.2319438418375792e-05,
"loss": 0.0788,
"step": 720
},
{
"epoch": 0.89,
"learning_rate": 1.2300055776489883e-05,
"loss": 0.0839,
"step": 721
},
{
"epoch": 0.89,
"learning_rate": 1.2280664006626012e-05,
"loss": 0.0717,
"step": 722
},
{
"epoch": 0.89,
"learning_rate": 1.226126318574216e-05,
"loss": 0.0718,
"step": 723
},
{
"epoch": 0.89,
"learning_rate": 1.2241853390832229e-05,
"loss": 0.085,
"step": 724
},
{
"epoch": 0.89,
"learning_rate": 1.2222434698925728e-05,
"loss": 0.0916,
"step": 725
},
{
"epoch": 0.89,
"learning_rate": 1.2203007187087485e-05,
"loss": 0.0892,
"step": 726
},
{
"epoch": 0.89,
"learning_rate": 1.2183570932417324e-05,
"loss": 0.0726,
"step": 727
},
{
"epoch": 0.9,
"learning_rate": 1.2164126012049766e-05,
"loss": 0.0924,
"step": 728
},
{
"epoch": 0.9,
"learning_rate": 1.2144672503153728e-05,
"loss": 0.0666,
"step": 729
},
{
"epoch": 0.9,
"learning_rate": 1.2125210482932204e-05,
"loss": 0.0716,
"step": 730
},
{
"epoch": 0.9,
"learning_rate": 1.2105740028621971e-05,
"loss": 0.0593,
"step": 731
},
{
"epoch": 0.9,
"learning_rate": 1.2086261217493276e-05,
"loss": 0.0822,
"step": 732
},
{
"epoch": 0.9,
"learning_rate": 1.206677412684953e-05,
"loss": 0.1001,
"step": 733
},
{
"epoch": 0.9,
"learning_rate": 1.2047278834027006e-05,
"loss": 0.0839,
"step": 734
},
{
"epoch": 0.9,
"learning_rate": 1.2027775416394523e-05,
"loss": 0.0941,
"step": 735
},
{
"epoch": 0.91,
"learning_rate": 1.2008263951353143e-05,
"loss": 0.0696,
"step": 736
},
{
"epoch": 0.91,
"learning_rate": 1.198874451633587e-05,
"loss": 0.0889,
"step": 737
},
{
"epoch": 0.91,
"learning_rate": 1.1969217188807334e-05,
"loss": 0.0728,
"step": 738
},
{
"epoch": 0.91,
"learning_rate": 1.194968204626349e-05,
"loss": 0.0891,
"step": 739
},
{
"epoch": 0.91,
"learning_rate": 1.193013916623131e-05,
"loss": 0.0585,
"step": 740
},
{
"epoch": 0.91,
"learning_rate": 1.191058862626846e-05,
"loss": 0.0811,
"step": 741
},
{
"epoch": 0.91,
"learning_rate": 1.1891030503963022e-05,
"loss": 0.0806,
"step": 742
},
{
"epoch": 0.91,
"learning_rate": 1.1871464876933156e-05,
"loss": 0.0866,
"step": 743
},
{
"epoch": 0.92,
"learning_rate": 1.185189182282681e-05,
"loss": 0.0919,
"step": 744
},
{
"epoch": 0.92,
"learning_rate": 1.1832311419321414e-05,
"loss": 0.0688,
"step": 745
},
{
"epoch": 0.92,
"learning_rate": 1.1812723744123553e-05,
"loss": 0.0819,
"step": 746
},
{
"epoch": 0.92,
"learning_rate": 1.1793128874968675e-05,
"loss": 0.0857,
"step": 747
},
{
"epoch": 0.92,
"learning_rate": 1.1773526889620782e-05,
"loss": 0.0874,
"step": 748
},
{
"epoch": 0.92,
"learning_rate": 1.1753917865872111e-05,
"loss": 0.0915,
"step": 749
},
{
"epoch": 0.92,
"learning_rate": 1.1734301881542836e-05,
"loss": 0.0774,
"step": 750
},
{
"epoch": 0.92,
"learning_rate": 1.1714679014480752e-05,
"loss": 0.0598,
"step": 751
},
{
"epoch": 0.92,
"learning_rate": 1.1695049342560969e-05,
"loss": 0.0685,
"step": 752
},
{
"epoch": 0.93,
"learning_rate": 1.1675412943685605e-05,
"loss": 0.0818,
"step": 753
},
{
"epoch": 0.93,
"learning_rate": 1.1655769895783469e-05,
"loss": 0.0755,
"step": 754
},
{
"epoch": 0.93,
"learning_rate": 1.1636120276809764e-05,
"loss": 0.0809,
"step": 755
},
{
"epoch": 0.93,
"learning_rate": 1.1616464164745768e-05,
"loss": 0.0835,
"step": 756
},
{
"epoch": 0.93,
"learning_rate": 1.1596801637598532e-05,
"loss": 0.0994,
"step": 757
},
{
"epoch": 0.93,
"learning_rate": 1.1577132773400552e-05,
"loss": 0.0767,
"step": 758
},
{
"epoch": 0.93,
"learning_rate": 1.155745765020949e-05,
"loss": 0.0808,
"step": 759
},
{
"epoch": 0.93,
"learning_rate": 1.1537776346107834e-05,
"loss": 0.0757,
"step": 760
},
{
"epoch": 0.94,
"learning_rate": 1.1518088939202613e-05,
"loss": 0.0901,
"step": 761
},
{
"epoch": 0.94,
"learning_rate": 1.1498395507625066e-05,
"loss": 0.0935,
"step": 762
},
{
"epoch": 0.94,
"learning_rate": 1.1478696129530347e-05,
"loss": 0.0842,
"step": 763
},
{
"epoch": 0.94,
"learning_rate": 1.1458990883097205e-05,
"loss": 0.0769,
"step": 764
},
{
"epoch": 0.94,
"learning_rate": 1.1439279846527682e-05,
"loss": 0.0823,
"step": 765
},
{
"epoch": 0.94,
"learning_rate": 1.14195630980468e-05,
"loss": 0.0793,
"step": 766
},
{
"epoch": 0.94,
"learning_rate": 1.1399840715902243e-05,
"loss": 0.0738,
"step": 767
},
{
"epoch": 0.94,
"learning_rate": 1.1380112778364058e-05,
"loss": 0.0834,
"step": 768
},
{
"epoch": 0.95,
"learning_rate": 1.1360379363724338e-05,
"loss": 0.0816,
"step": 769
},
{
"epoch": 0.95,
"learning_rate": 1.1340640550296906e-05,
"loss": 0.0762,
"step": 770
},
{
"epoch": 0.95,
"learning_rate": 1.1320896416417026e-05,
"loss": 0.1129,
"step": 771
},
{
"epoch": 0.95,
"learning_rate": 1.1301147040441056e-05,
"loss": 0.0711,
"step": 772
},
{
"epoch": 0.95,
"learning_rate": 1.1281392500746178e-05,
"loss": 0.0804,
"step": 773
},
{
"epoch": 0.95,
"learning_rate": 1.1261632875730052e-05,
"loss": 0.0819,
"step": 774
},
{
"epoch": 0.95,
"learning_rate": 1.1241868243810525e-05,
"loss": 0.0673,
"step": 775
},
{
"epoch": 0.95,
"learning_rate": 1.122209868342532e-05,
"loss": 0.0754,
"step": 776
},
{
"epoch": 0.96,
"learning_rate": 1.1202324273031706e-05,
"loss": 0.0901,
"step": 777
},
{
"epoch": 0.96,
"learning_rate": 1.118254509110621e-05,
"loss": 0.1003,
"step": 778
},
{
"epoch": 0.96,
"learning_rate": 1.1162761216144293e-05,
"loss": 0.0957,
"step": 779
},
{
"epoch": 0.96,
"learning_rate": 1.1142972726660038e-05,
"loss": 0.0937,
"step": 780
},
{
"epoch": 0.96,
"learning_rate": 1.112317970118584e-05,
"loss": 0.0779,
"step": 781
},
{
"epoch": 0.96,
"learning_rate": 1.1103382218272107e-05,
"loss": 0.0804,
"step": 782
},
{
"epoch": 0.96,
"learning_rate": 1.1083580356486925e-05,
"loss": 0.0921,
"step": 783
},
{
"epoch": 0.96,
"learning_rate": 1.1063774194415762e-05,
"loss": 0.0757,
"step": 784
},
{
"epoch": 0.97,
"learning_rate": 1.1043963810661144e-05,
"loss": 0.0827,
"step": 785
},
{
"epoch": 0.97,
"learning_rate": 1.102414928384237e-05,
"loss": 0.0916,
"step": 786
},
{
"epoch": 0.97,
"learning_rate": 1.1004330692595159e-05,
"loss": 0.0871,
"step": 787
},
{
"epoch": 0.97,
"learning_rate": 1.0984508115571377e-05,
"loss": 0.0871,
"step": 788
},
{
"epoch": 0.97,
"learning_rate": 1.0964681631438703e-05,
"loss": 0.0848,
"step": 789
},
{
"epoch": 0.97,
"learning_rate": 1.0944851318880314e-05,
"loss": 0.0828,
"step": 790
},
{
"epoch": 0.97,
"learning_rate": 1.092501725659459e-05,
"loss": 0.0935,
"step": 791
},
{
"epoch": 0.97,
"learning_rate": 1.0905179523294791e-05,
"loss": 0.0785,
"step": 792
},
{
"epoch": 0.98,
"learning_rate": 1.0885338197708741e-05,
"loss": 0.0846,
"step": 793
},
{
"epoch": 0.98,
"learning_rate": 1.0865493358578525e-05,
"loss": 0.0601,
"step": 794
},
{
"epoch": 0.98,
"learning_rate": 1.0845645084660169e-05,
"loss": 0.062,
"step": 795
},
{
"epoch": 0.98,
"learning_rate": 1.0825793454723325e-05,
"loss": 0.0764,
"step": 796
},
{
"epoch": 0.98,
"learning_rate": 1.0805938547550976e-05,
"loss": 0.0867,
"step": 797
},
{
"epoch": 0.98,
"learning_rate": 1.0786080441939107e-05,
"loss": 0.0812,
"step": 798
},
{
"epoch": 0.98,
"learning_rate": 1.076621921669639e-05,
"loss": 0.081,
"step": 799
},
{
"epoch": 0.98,
"learning_rate": 1.0746354950643883e-05,
"loss": 0.0605,
"step": 800
},
{
"epoch": 0.99,
"learning_rate": 1.0726487722614704e-05,
"loss": 0.0871,
"step": 801
},
{
"epoch": 0.99,
"learning_rate": 1.0706617611453745e-05,
"loss": 0.0757,
"step": 802
},
{
"epoch": 0.99,
"learning_rate": 1.0686744696017314e-05,
"loss": 0.0721,
"step": 803
},
{
"epoch": 0.99,
"learning_rate": 1.066686905517287e-05,
"loss": 0.0715,
"step": 804
},
{
"epoch": 0.99,
"learning_rate": 1.0646990767798672e-05,
"loss": 0.0887,
"step": 805
},
{
"epoch": 0.99,
"learning_rate": 1.0627109912783497e-05,
"loss": 0.0834,
"step": 806
},
{
"epoch": 0.99,
"learning_rate": 1.0607226569026297e-05,
"loss": 0.0783,
"step": 807
},
{
"epoch": 0.99,
"learning_rate": 1.0587340815435913e-05,
"loss": 0.0661,
"step": 808
},
{
"epoch": 1.0,
"learning_rate": 1.0567452730930743e-05,
"loss": 0.0891,
"step": 809
},
{
"epoch": 1.0,
"learning_rate": 1.0547562394438434e-05,
"loss": 0.0586,
"step": 810
},
{
"epoch": 1.0,
"learning_rate": 1.0527669884895573e-05,
"loss": 0.083,
"step": 811
},
{
"epoch": 1.0,
"learning_rate": 1.0507775281247376e-05,
"loss": 0.0794,
"step": 812
},
{
"epoch": 1.0,
"learning_rate": 1.0487878662447361e-05,
"loss": 0.0768,
"step": 813
},
{
"epoch": 1.0,
"learning_rate": 1.046798010745705e-05,
"loss": 0.0605,
"step": 814
},
{
"epoch": 1.0,
"learning_rate": 1.0448079695245643e-05,
"loss": 0.0593,
"step": 815
},
{
"epoch": 1.0,
"learning_rate": 1.0428177504789713e-05,
"loss": 0.0526,
"step": 816
},
{
"epoch": 1.0,
"learning_rate": 1.0408273615072893e-05,
"loss": 0.063,
"step": 817
},
{
"epoch": 1.01,
"learning_rate": 1.0388368105085558e-05,
"loss": 0.0728,
"step": 818
},
{
"epoch": 1.01,
"learning_rate": 1.036846105382451e-05,
"loss": 0.0747,
"step": 819
},
{
"epoch": 1.01,
"learning_rate": 1.034855254029267e-05,
"loss": 0.0661,
"step": 820
},
{
"epoch": 1.01,
"learning_rate": 1.0328642643498763e-05,
"loss": 0.0829,
"step": 821
},
{
"epoch": 1.01,
"learning_rate": 1.0308731442457004e-05,
"loss": 0.0784,
"step": 822
},
{
"epoch": 1.01,
"learning_rate": 1.0288819016186782e-05,
"loss": 0.0592,
"step": 823
},
{
"epoch": 1.01,
"learning_rate": 1.0268905443712351e-05,
"loss": 0.0831,
"step": 824
},
{
"epoch": 1.01,
"learning_rate": 1.024899080406251e-05,
"loss": 0.0753,
"step": 825
},
{
"epoch": 1.02,
"learning_rate": 1.0229075176270297e-05,
"loss": 0.0849,
"step": 826
},
{
"epoch": 1.02,
"learning_rate": 1.020915863937267e-05,
"loss": 0.0646,
"step": 827
},
{
"epoch": 1.02,
"learning_rate": 1.0189241272410191e-05,
"loss": 0.0718,
"step": 828
},
{
"epoch": 1.02,
"learning_rate": 1.0169323154426726e-05,
"loss": 0.0677,
"step": 829
},
{
"epoch": 1.02,
"learning_rate": 1.0149404364469108e-05,
"loss": 0.0726,
"step": 830
},
{
"epoch": 1.02,
"learning_rate": 1.0129484981586853e-05,
"loss": 0.069,
"step": 831
},
{
"epoch": 1.02,
"learning_rate": 1.0109565084831817e-05,
"loss": 0.067,
"step": 832
},
{
"epoch": 1.02,
"learning_rate": 1.0089644753257897e-05,
"loss": 0.0669,
"step": 833
},
{
"epoch": 1.03,
"learning_rate": 1.006972406592072e-05,
"loss": 0.077,
"step": 834
},
{
"epoch": 1.03,
"learning_rate": 1.004980310187733e-05,
"loss": 0.0579,
"step": 835
},
{
"epoch": 1.03,
"learning_rate": 1.0029881940185851e-05,
"loss": 0.051,
"step": 836
},
{
"epoch": 1.03,
"learning_rate": 1.0009960659905212e-05,
"loss": 0.0714,
"step": 837
},
{
"epoch": 1.03,
"learning_rate": 9.990039340094793e-06,
"loss": 0.0589,
"step": 838
},
{
"epoch": 1.03,
"learning_rate": 9.97011805981415e-06,
"loss": 0.0613,
"step": 839
},
{
"epoch": 1.03,
"learning_rate": 9.950196898122677e-06,
"loss": 0.0489,
"step": 840
},
{
"epoch": 1.03,
"learning_rate": 9.930275934079281e-06,
"loss": 0.0657,
"step": 841
},
{
"epoch": 1.04,
"learning_rate": 9.910355246742105e-06,
"loss": 0.0698,
"step": 842
},
{
"epoch": 1.04,
"learning_rate": 9.890434915168187e-06,
"loss": 0.0691,
"step": 843
},
{
"epoch": 1.04,
"learning_rate": 9.870515018413147e-06,
"loss": 0.0733,
"step": 844
},
{
"epoch": 1.04,
"learning_rate": 9.850595635530894e-06,
"loss": 0.0618,
"step": 845
},
{
"epoch": 1.04,
"learning_rate": 9.830676845573277e-06,
"loss": 0.0928,
"step": 846
},
{
"epoch": 1.04,
"learning_rate": 9.810758727589814e-06,
"loss": 0.0799,
"step": 847
},
{
"epoch": 1.04,
"learning_rate": 9.790841360627335e-06,
"loss": 0.0538,
"step": 848
},
{
"epoch": 1.04,
"learning_rate": 9.770924823729708e-06,
"loss": 0.0669,
"step": 849
},
{
"epoch": 1.05,
"learning_rate": 9.751009195937493e-06,
"loss": 0.1058,
"step": 850
},
{
"epoch": 1.05,
"learning_rate": 9.731094556287649e-06,
"loss": 0.0659,
"step": 851
},
{
"epoch": 1.05,
"learning_rate": 9.711180983813222e-06,
"loss": 0.0677,
"step": 852
},
{
"epoch": 1.05,
"learning_rate": 9.691268557542997e-06,
"loss": 0.093,
"step": 853
},
{
"epoch": 1.05,
"learning_rate": 9.67135735650124e-06,
"loss": 0.071,
"step": 854
},
{
"epoch": 1.05,
"learning_rate": 9.651447459707333e-06,
"loss": 0.0553,
"step": 855
},
{
"epoch": 1.05,
"learning_rate": 9.631538946175496e-06,
"loss": 0.0541,
"step": 856
},
{
"epoch": 1.05,
"learning_rate": 9.611631894914445e-06,
"loss": 0.0654,
"step": 857
},
{
"epoch": 1.06,
"learning_rate": 9.59172638492711e-06,
"loss": 0.0668,
"step": 858
},
{
"epoch": 1.06,
"learning_rate": 9.571822495210288e-06,
"loss": 0.061,
"step": 859
},
{
"epoch": 1.06,
"learning_rate": 9.551920304754359e-06,
"loss": 0.0612,
"step": 860
},
{
"epoch": 1.06,
"learning_rate": 9.532019892542953e-06,
"loss": 0.0678,
"step": 861
},
{
"epoch": 1.06,
"learning_rate": 9.51212133755264e-06,
"loss": 0.0608,
"step": 862
},
{
"epoch": 1.06,
"learning_rate": 9.492224718752629e-06,
"loss": 0.0617,
"step": 863
},
{
"epoch": 1.06,
"learning_rate": 9.472330115104428e-06,
"loss": 0.0646,
"step": 864
},
{
"epoch": 1.06,
"learning_rate": 9.452437605561571e-06,
"loss": 0.0683,
"step": 865
},
{
"epoch": 1.07,
"learning_rate": 9.43254726906926e-06,
"loss": 0.0643,
"step": 866
},
{
"epoch": 1.07,
"learning_rate": 9.412659184564088e-06,
"loss": 0.0768,
"step": 867
},
{
"epoch": 1.07,
"learning_rate": 9.392773430973705e-06,
"loss": 0.0636,
"step": 868
},
{
"epoch": 1.07,
"learning_rate": 9.372890087216505e-06,
"loss": 0.0675,
"step": 869
},
{
"epoch": 1.07,
"learning_rate": 9.35300923220133e-06,
"loss": 0.0754,
"step": 870
},
{
"epoch": 1.07,
"learning_rate": 9.333130944827132e-06,
"loss": 0.0565,
"step": 871
},
{
"epoch": 1.07,
"learning_rate": 9.31325530398269e-06,
"loss": 0.0712,
"step": 872
},
{
"epoch": 1.07,
"learning_rate": 9.29338238854626e-06,
"loss": 0.0648,
"step": 873
},
{
"epoch": 1.08,
"learning_rate": 9.273512277385297e-06,
"loss": 0.0816,
"step": 874
},
{
"epoch": 1.08,
"learning_rate": 9.25364504935612e-06,
"loss": 0.0739,
"step": 875
},
{
"epoch": 1.08,
"learning_rate": 9.233780783303611e-06,
"loss": 0.0663,
"step": 876
},
{
"epoch": 1.08,
"learning_rate": 9.213919558060897e-06,
"loss": 0.0724,
"step": 877
},
{
"epoch": 1.08,
"learning_rate": 9.194061452449024e-06,
"loss": 0.0611,
"step": 878
},
{
"epoch": 1.08,
"learning_rate": 9.174206545276678e-06,
"loss": 0.0451,
"step": 879
},
{
"epoch": 1.08,
"learning_rate": 9.154354915339836e-06,
"loss": 0.0607,
"step": 880
},
{
"epoch": 1.08,
"learning_rate": 9.13450664142148e-06,
"loss": 0.0635,
"step": 881
},
{
"epoch": 1.08,
"learning_rate": 9.114661802291262e-06,
"loss": 0.0797,
"step": 882
},
{
"epoch": 1.09,
"learning_rate": 9.094820476705209e-06,
"loss": 0.0552,
"step": 883
},
{
"epoch": 1.09,
"learning_rate": 9.074982743405413e-06,
"loss": 0.0657,
"step": 884
},
{
"epoch": 1.09,
"learning_rate": 9.055148681119688e-06,
"loss": 0.0617,
"step": 885
},
{
"epoch": 1.09,
"learning_rate": 9.035318368561302e-06,
"loss": 0.0636,
"step": 886
},
{
"epoch": 1.09,
"learning_rate": 9.015491884428623e-06,
"loss": 0.0532,
"step": 887
},
{
"epoch": 1.09,
"learning_rate": 8.995669307404844e-06,
"loss": 0.056,
"step": 888
},
{
"epoch": 1.09,
"learning_rate": 8.975850716157634e-06,
"loss": 0.064,
"step": 889
},
{
"epoch": 1.09,
"learning_rate": 8.956036189338857e-06,
"loss": 0.0856,
"step": 890
},
{
"epoch": 1.1,
"learning_rate": 8.936225805584241e-06,
"loss": 0.064,
"step": 891
},
{
"epoch": 1.1,
"learning_rate": 8.916419643513075e-06,
"loss": 0.0797,
"step": 892
},
{
"epoch": 1.1,
"learning_rate": 8.896617781727895e-06,
"loss": 0.068,
"step": 893
},
{
"epoch": 1.1,
"learning_rate": 8.87682029881416e-06,
"loss": 0.0681,
"step": 894
},
{
"epoch": 1.1,
"learning_rate": 8.857027273339967e-06,
"loss": 0.0736,
"step": 895
},
{
"epoch": 1.1,
"learning_rate": 8.837238783855709e-06,
"loss": 0.0697,
"step": 896
},
{
"epoch": 1.1,
"learning_rate": 8.817454908893794e-06,
"loss": 0.0675,
"step": 897
},
{
"epoch": 1.1,
"learning_rate": 8.797675726968297e-06,
"loss": 0.0531,
"step": 898
},
{
"epoch": 1.11,
"learning_rate": 8.777901316574687e-06,
"loss": 0.0576,
"step": 899
},
{
"epoch": 1.11,
"learning_rate": 8.758131756189477e-06,
"loss": 0.0576,
"step": 900
},
{
"epoch": 1.11,
"learning_rate": 8.73836712426995e-06,
"loss": 0.0632,
"step": 901
},
{
"epoch": 1.11,
"learning_rate": 8.718607499253825e-06,
"loss": 0.0739,
"step": 902
},
{
"epoch": 1.11,
"learning_rate": 8.698852959558944e-06,
"loss": 0.0591,
"step": 903
},
{
"epoch": 1.11,
"learning_rate": 8.67910358358298e-06,
"loss": 0.0519,
"step": 904
},
{
"epoch": 1.11,
"learning_rate": 8.659359449703095e-06,
"loss": 0.0628,
"step": 905
},
{
"epoch": 1.11,
"learning_rate": 8.639620636275667e-06,
"loss": 0.0693,
"step": 906
},
{
"epoch": 1.12,
"learning_rate": 8.619887221635943e-06,
"loss": 0.062,
"step": 907
},
{
"epoch": 1.12,
"learning_rate": 8.600159284097756e-06,
"loss": 0.0705,
"step": 908
},
{
"epoch": 1.12,
"learning_rate": 8.580436901953203e-06,
"loss": 0.0814,
"step": 909
},
{
"epoch": 1.12,
"learning_rate": 8.56072015347232e-06,
"loss": 0.069,
"step": 910
},
{
"epoch": 1.12,
"learning_rate": 8.541009116902798e-06,
"loss": 0.0564,
"step": 911
},
{
"epoch": 1.12,
"learning_rate": 8.521303870469655e-06,
"loss": 0.0672,
"step": 912
},
{
"epoch": 1.12,
"learning_rate": 8.501604492374939e-06,
"loss": 0.0739,
"step": 913
},
{
"epoch": 1.12,
"learning_rate": 8.48191106079739e-06,
"loss": 0.1009,
"step": 914
},
{
"epoch": 1.13,
"learning_rate": 8.462223653892169e-06,
"loss": 0.0469,
"step": 915
},
{
"epoch": 1.13,
"learning_rate": 8.442542349790515e-06,
"loss": 0.071,
"step": 916
},
{
"epoch": 1.13,
"learning_rate": 8.42286722659945e-06,
"loss": 0.0629,
"step": 917
},
{
"epoch": 1.13,
"learning_rate": 8.403198362401472e-06,
"loss": 0.0612,
"step": 918
},
{
"epoch": 1.13,
"learning_rate": 8.38353583525423e-06,
"loss": 0.0573,
"step": 919
},
{
"epoch": 1.13,
"learning_rate": 8.363879723190241e-06,
"loss": 0.051,
"step": 920
},
{
"epoch": 1.13,
"learning_rate": 8.344230104216536e-06,
"loss": 0.0487,
"step": 921
},
{
"epoch": 1.13,
"learning_rate": 8.324587056314402e-06,
"loss": 0.0579,
"step": 922
},
{
"epoch": 1.14,
"learning_rate": 8.304950657439034e-06,
"loss": 0.0534,
"step": 923
},
{
"epoch": 1.14,
"learning_rate": 8.285320985519254e-06,
"loss": 0.0626,
"step": 924
},
{
"epoch": 1.14,
"learning_rate": 8.265698118457167e-06,
"loss": 0.0774,
"step": 925
},
{
"epoch": 1.14,
"learning_rate": 8.24608213412789e-06,
"loss": 0.0598,
"step": 926
},
{
"epoch": 1.14,
"learning_rate": 8.226473110379221e-06,
"loss": 0.0838,
"step": 927
},
{
"epoch": 1.14,
"learning_rate": 8.206871125031324e-06,
"loss": 0.0661,
"step": 928
},
{
"epoch": 1.14,
"learning_rate": 8.18727625587645e-06,
"loss": 0.0715,
"step": 929
},
{
"epoch": 1.14,
"learning_rate": 8.167688580678587e-06,
"loss": 0.0689,
"step": 930
},
{
"epoch": 1.15,
"learning_rate": 8.148108177173191e-06,
"loss": 0.0521,
"step": 931
},
{
"epoch": 1.15,
"learning_rate": 8.128535123066846e-06,
"loss": 0.0575,
"step": 932
},
{
"epoch": 1.15,
"learning_rate": 8.10896949603698e-06,
"loss": 0.0607,
"step": 933
},
{
"epoch": 1.15,
"learning_rate": 8.089411373731542e-06,
"loss": 0.0499,
"step": 934
},
{
"epoch": 1.15,
"learning_rate": 8.069860833768692e-06,
"loss": 0.0822,
"step": 935
},
{
"epoch": 1.15,
"learning_rate": 8.050317953736511e-06,
"loss": 0.0607,
"step": 936
},
{
"epoch": 1.15,
"learning_rate": 8.030782811192668e-06,
"loss": 0.0467,
"step": 937
},
{
"epoch": 1.15,
"learning_rate": 8.011255483664135e-06,
"loss": 0.0704,
"step": 938
},
{
"epoch": 1.15,
"learning_rate": 7.991736048646859e-06,
"loss": 0.0633,
"step": 939
},
{
"epoch": 1.16,
"learning_rate": 7.972224583605484e-06,
"loss": 0.0717,
"step": 940
},
{
"epoch": 1.16,
"learning_rate": 7.952721165972996e-06,
"loss": 0.0719,
"step": 941
},
{
"epoch": 1.16,
"learning_rate": 7.93322587315047e-06,
"loss": 0.0747,
"step": 942
},
{
"epoch": 1.16,
"learning_rate": 7.913738782506727e-06,
"loss": 0.0873,
"step": 943
},
{
"epoch": 1.16,
"learning_rate": 7.89425997137803e-06,
"loss": 0.0648,
"step": 944
},
{
"epoch": 1.16,
"learning_rate": 7.874789517067801e-06,
"loss": 0.0647,
"step": 945
},
{
"epoch": 1.16,
"learning_rate": 7.855327496846277e-06,
"loss": 0.0767,
"step": 946
},
{
"epoch": 1.16,
"learning_rate": 7.835873987950238e-06,
"loss": 0.0564,
"step": 947
},
{
"epoch": 1.17,
"learning_rate": 7.81642906758268e-06,
"loss": 0.0498,
"step": 948
},
{
"epoch": 1.17,
"learning_rate": 7.796992812912517e-06,
"loss": 0.0703,
"step": 949
},
{
"epoch": 1.17,
"learning_rate": 7.777565301074274e-06,
"loss": 0.0517,
"step": 950
},
{
"epoch": 1.17,
"learning_rate": 7.758146609167773e-06,
"loss": 0.0613,
"step": 951
},
{
"epoch": 1.17,
"learning_rate": 7.738736814257844e-06,
"loss": 0.0588,
"step": 952
},
{
"epoch": 1.17,
"learning_rate": 7.719335993373989e-06,
"loss": 0.0609,
"step": 953
},
{
"epoch": 1.17,
"learning_rate": 7.69994422351012e-06,
"loss": 0.0494,
"step": 954
},
{
"epoch": 1.17,
"learning_rate": 7.680561581624213e-06,
"loss": 0.0546,
"step": 955
},
{
"epoch": 1.18,
"learning_rate": 7.661188144638027e-06,
"loss": 0.0644,
"step": 956
},
{
"epoch": 1.18,
"learning_rate": 7.64182398943678e-06,
"loss": 0.0641,
"step": 957
},
{
"epoch": 1.18,
"learning_rate": 7.622469192868866e-06,
"loss": 0.0786,
"step": 958
},
{
"epoch": 1.18,
"learning_rate": 7.603123831745536e-06,
"loss": 0.0593,
"step": 959
},
{
"epoch": 1.18,
"learning_rate": 7.583787982840588e-06,
"loss": 0.0574,
"step": 960
},
{
"epoch": 1.18,
"learning_rate": 7.564461722890082e-06,
"loss": 0.0661,
"step": 961
},
{
"epoch": 1.18,
"learning_rate": 7.545145128592009e-06,
"loss": 0.0699,
"step": 962
},
{
"epoch": 1.18,
"learning_rate": 7.525838276606016e-06,
"loss": 0.0471,
"step": 963
},
{
"epoch": 1.19,
"learning_rate": 7.506541243553072e-06,
"loss": 0.0404,
"step": 964
},
{
"epoch": 1.19,
"learning_rate": 7.4872541060151945e-06,
"loss": 0.0597,
"step": 965
},
{
"epoch": 1.19,
"learning_rate": 7.467976940535113e-06,
"loss": 0.0667,
"step": 966
},
{
"epoch": 1.19,
"learning_rate": 7.448709823615995e-06,
"loss": 0.0618,
"step": 967
},
{
"epoch": 1.19,
"learning_rate": 7.429452831721128e-06,
"loss": 0.0623,
"step": 968
},
{
"epoch": 1.19,
"learning_rate": 7.410206041273605e-06,
"loss": 0.0769,
"step": 969
},
{
"epoch": 1.19,
"learning_rate": 7.390969528656051e-06,
"loss": 0.0589,
"step": 970
},
{
"epoch": 1.19,
"learning_rate": 7.371743370210289e-06,
"loss": 0.0584,
"step": 971
},
{
"epoch": 1.2,
"learning_rate": 7.352527642237064e-06,
"loss": 0.0507,
"step": 972
},
{
"epoch": 1.2,
"learning_rate": 7.333322420995708e-06,
"loss": 0.0691,
"step": 973
},
{
"epoch": 1.2,
"learning_rate": 7.31412778270387e-06,
"loss": 0.0597,
"step": 974
},
{
"epoch": 1.2,
"learning_rate": 7.294943803537202e-06,
"loss": 0.0677,
"step": 975
},
{
"epoch": 1.2,
"learning_rate": 7.275770559629042e-06,
"loss": 0.0586,
"step": 976
},
{
"epoch": 1.2,
"learning_rate": 7.256608127070137e-06,
"loss": 0.0721,
"step": 977
},
{
"epoch": 1.2,
"learning_rate": 7.2374565819083155e-06,
"loss": 0.0423,
"step": 978
},
{
"epoch": 1.2,
"learning_rate": 7.2183160001482075e-06,
"loss": 0.058,
"step": 979
},
{
"epoch": 1.21,
"learning_rate": 7.199186457750931e-06,
"loss": 0.0751,
"step": 980
},
{
"epoch": 1.21,
"learning_rate": 7.180068030633798e-06,
"loss": 0.0601,
"step": 981
},
{
"epoch": 1.21,
"learning_rate": 7.160960794669993e-06,
"loss": 0.0616,
"step": 982
},
{
"epoch": 1.21,
"learning_rate": 7.141864825688307e-06,
"loss": 0.0595,
"step": 983
},
{
"epoch": 1.21,
"learning_rate": 7.122780199472808e-06,
"loss": 0.0511,
"step": 984
},
{
"epoch": 1.21,
"learning_rate": 7.1037069917625456e-06,
"loss": 0.0504,
"step": 985
},
{
"epoch": 1.21,
"learning_rate": 7.084645278251264e-06,
"loss": 0.0649,
"step": 986
},
{
"epoch": 1.21,
"learning_rate": 7.065595134587078e-06,
"loss": 0.0634,
"step": 987
},
{
"epoch": 1.22,
"learning_rate": 7.046556636372202e-06,
"loss": 0.0647,
"step": 988
},
{
"epoch": 1.22,
"learning_rate": 7.027529859162616e-06,
"loss": 0.0488,
"step": 989
},
{
"epoch": 1.22,
"learning_rate": 7.008514878467805e-06,
"loss": 0.0489,
"step": 990
},
{
"epoch": 1.22,
"learning_rate": 6.989511769750421e-06,
"loss": 0.0687,
"step": 991
},
{
"epoch": 1.22,
"learning_rate": 6.97052060842601e-06,
"loss": 0.0668,
"step": 992
},
{
"epoch": 1.22,
"learning_rate": 6.951541469862706e-06,
"loss": 0.0637,
"step": 993
},
{
"epoch": 1.22,
"learning_rate": 6.932574429380918e-06,
"loss": 0.0567,
"step": 994
},
{
"epoch": 1.22,
"learning_rate": 6.913619562253055e-06,
"loss": 0.0778,
"step": 995
},
{
"epoch": 1.23,
"learning_rate": 6.894676943703207e-06,
"loss": 0.0471,
"step": 996
},
{
"epoch": 1.23,
"learning_rate": 6.875746648906862e-06,
"loss": 0.0712,
"step": 997
},
{
"epoch": 1.23,
"learning_rate": 6.85682875299059e-06,
"loss": 0.0723,
"step": 998
},
{
"epoch": 1.23,
"learning_rate": 6.837923331031761e-06,
"loss": 0.0777,
"step": 999
},
{
"epoch": 1.23,
"learning_rate": 6.819030458058243e-06,
"loss": 0.0796,
"step": 1000
},
{
"epoch": 1.23,
"learning_rate": 6.800150209048098e-06,
"loss": 0.0655,
"step": 1001
},
{
"epoch": 1.23,
"learning_rate": 6.7812826589292936e-06,
"loss": 0.0837,
"step": 1002
},
{
"epoch": 1.23,
"learning_rate": 6.76242788257939e-06,
"loss": 0.0505,
"step": 1003
},
{
"epoch": 1.23,
"learning_rate": 6.74358595482527e-06,
"loss": 0.0891,
"step": 1004
},
{
"epoch": 1.24,
"learning_rate": 6.724756950442808e-06,
"loss": 0.0752,
"step": 1005
},
{
"epoch": 1.24,
"learning_rate": 6.705940944156604e-06,
"loss": 0.0557,
"step": 1006
},
{
"epoch": 1.24,
"learning_rate": 6.687138010639667e-06,
"loss": 0.0687,
"step": 1007
},
{
"epoch": 1.24,
"learning_rate": 6.668348224513127e-06,
"loss": 0.0693,
"step": 1008
},
{
"epoch": 1.24,
"learning_rate": 6.649571660345945e-06,
"loss": 0.0605,
"step": 1009
},
{
"epoch": 1.24,
"learning_rate": 6.630808392654593e-06,
"loss": 0.0778,
"step": 1010
},
{
"epoch": 1.24,
"learning_rate": 6.612058495902791e-06,
"loss": 0.0795,
"step": 1011
},
{
"epoch": 1.24,
"learning_rate": 6.593322044501185e-06,
"loss": 0.0762,
"step": 1012
},
{
"epoch": 1.25,
"learning_rate": 6.574599112807071e-06,
"loss": 0.0679,
"step": 1013
},
{
"epoch": 1.25,
"learning_rate": 6.555889775124077e-06,
"loss": 0.0766,
"step": 1014
},
{
"epoch": 1.25,
"learning_rate": 6.537194105701896e-06,
"loss": 0.0645,
"step": 1015
},
{
"epoch": 1.25,
"learning_rate": 6.5185121787359686e-06,
"loss": 0.0544,
"step": 1016
},
{
"epoch": 1.25,
"learning_rate": 6.4998440683672e-06,
"loss": 0.0605,
"step": 1017
},
{
"epoch": 1.25,
"learning_rate": 6.48118984868167e-06,
"loss": 0.0668,
"step": 1018
},
{
"epoch": 1.25,
"learning_rate": 6.462549593710317e-06,
"loss": 0.0666,
"step": 1019
},
{
"epoch": 1.25,
"learning_rate": 6.443923377428672e-06,
"loss": 0.0677,
"step": 1020
},
{
"epoch": 1.26,
"learning_rate": 6.4253112737565426e-06,
"loss": 0.0697,
"step": 1021
},
{
"epoch": 1.26,
"learning_rate": 6.406713356557739e-06,
"loss": 0.0598,
"step": 1022
},
{
"epoch": 1.26,
"learning_rate": 6.3881296996397624e-06,
"loss": 0.0642,
"step": 1023
},
{
"epoch": 1.26,
"learning_rate": 6.3695603767535275e-06,
"loss": 0.0724,
"step": 1024
},
{
"epoch": 1.26,
"learning_rate": 6.351005461593063e-06,
"loss": 0.0522,
"step": 1025
},
{
"epoch": 1.26,
"learning_rate": 6.332465027795208e-06,
"loss": 0.0602,
"step": 1026
},
{
"epoch": 1.26,
"learning_rate": 6.313939148939348e-06,
"loss": 0.0692,
"step": 1027
},
{
"epoch": 1.26,
"learning_rate": 6.295427898547091e-06,
"loss": 0.0582,
"step": 1028
},
{
"epoch": 1.27,
"learning_rate": 6.2769313500820025e-06,
"loss": 0.0572,
"step": 1029
},
{
"epoch": 1.27,
"learning_rate": 6.258449576949292e-06,
"loss": 0.0653,
"step": 1030
},
{
"epoch": 1.27,
"learning_rate": 6.239982652495539e-06,
"loss": 0.0697,
"step": 1031
},
{
"epoch": 1.27,
"learning_rate": 6.221530650008391e-06,
"loss": 0.0685,
"step": 1032
},
{
"epoch": 1.27,
"learning_rate": 6.203093642716278e-06,
"loss": 0.0521,
"step": 1033
},
{
"epoch": 1.27,
"learning_rate": 6.184671703788123e-06,
"loss": 0.0616,
"step": 1034
},
{
"epoch": 1.27,
"learning_rate": 6.166264906333038e-06,
"loss": 0.0595,
"step": 1035
},
{
"epoch": 1.27,
"learning_rate": 6.147873323400058e-06,
"loss": 0.0721,
"step": 1036
},
{
"epoch": 1.28,
"learning_rate": 6.129497027977829e-06,
"loss": 0.0582,
"step": 1037
},
{
"epoch": 1.28,
"learning_rate": 6.111136092994334e-06,
"loss": 0.0598,
"step": 1038
},
{
"epoch": 1.28,
"learning_rate": 6.092790591316586e-06,
"loss": 0.059,
"step": 1039
},
{
"epoch": 1.28,
"learning_rate": 6.074460595750362e-06,
"loss": 0.0546,
"step": 1040
},
{
"epoch": 1.28,
"learning_rate": 6.056146179039899e-06,
"loss": 0.0611,
"step": 1041
},
{
"epoch": 1.28,
"learning_rate": 6.037847413867595e-06,
"loss": 0.0522,
"step": 1042
},
{
"epoch": 1.28,
"learning_rate": 6.01956437285375e-06,
"loss": 0.0569,
"step": 1043
},
{
"epoch": 1.28,
"learning_rate": 6.0012971285562535e-06,
"loss": 0.0681,
"step": 1044
},
{
"epoch": 1.29,
"learning_rate": 5.983045753470308e-06,
"loss": 0.0727,
"step": 1045
},
{
"epoch": 1.29,
"learning_rate": 5.964810320028129e-06,
"loss": 0.0707,
"step": 1046
},
{
"epoch": 1.29,
"learning_rate": 5.9465909005986765e-06,
"loss": 0.0721,
"step": 1047
},
{
"epoch": 1.29,
"learning_rate": 5.9283875674873524e-06,
"loss": 0.053,
"step": 1048
},
{
"epoch": 1.29,
"learning_rate": 5.910200392935718e-06,
"loss": 0.0577,
"step": 1049
},
{
"epoch": 1.29,
"learning_rate": 5.892029449121214e-06,
"loss": 0.0568,
"step": 1050
},
{
"epoch": 1.29,
"learning_rate": 5.873874808156856e-06,
"loss": 0.0635,
"step": 1051
},
{
"epoch": 1.29,
"learning_rate": 5.8557365420909725e-06,
"loss": 0.0669,
"step": 1052
},
{
"epoch": 1.3,
"learning_rate": 5.8376147229069e-06,
"loss": 0.0545,
"step": 1053
},
{
"epoch": 1.3,
"learning_rate": 5.819509422522711e-06,
"loss": 0.0674,
"step": 1054
},
{
"epoch": 1.3,
"learning_rate": 5.801420712790905e-06,
"loss": 0.0704,
"step": 1055
},
{
"epoch": 1.3,
"learning_rate": 5.78334866549816e-06,
"loss": 0.0511,
"step": 1056
},
{
"epoch": 1.3,
"learning_rate": 5.7652933523650205e-06,
"loss": 0.0637,
"step": 1057
},
{
"epoch": 1.3,
"learning_rate": 5.747254845045609e-06,
"loss": 0.0676,
"step": 1058
},
{
"epoch": 1.3,
"learning_rate": 5.729233215127378e-06,
"loss": 0.0579,
"step": 1059
},
{
"epoch": 1.3,
"learning_rate": 5.711228534130766e-06,
"loss": 0.0523,
"step": 1060
},
{
"epoch": 1.31,
"learning_rate": 5.693240873508981e-06,
"loss": 0.0526,
"step": 1061
},
{
"epoch": 1.31,
"learning_rate": 5.675270304647665e-06,
"loss": 0.0657,
"step": 1062
},
{
"epoch": 1.31,
"learning_rate": 5.657316898864634e-06,
"loss": 0.0522,
"step": 1063
},
{
"epoch": 1.31,
"learning_rate": 5.639380727409594e-06,
"loss": 0.0811,
"step": 1064
},
{
"epoch": 1.31,
"learning_rate": 5.621461861463846e-06,
"loss": 0.0542,
"step": 1065
},
{
"epoch": 1.31,
"learning_rate": 5.603560372140029e-06,
"loss": 0.0523,
"step": 1066
},
{
"epoch": 1.31,
"learning_rate": 5.585676330481806e-06,
"loss": 0.0573,
"step": 1067
},
{
"epoch": 1.31,
"learning_rate": 5.567809807463606e-06,
"loss": 0.0479,
"step": 1068
},
{
"epoch": 1.31,
"learning_rate": 5.5499608739903256e-06,
"loss": 0.0478,
"step": 1069
},
{
"epoch": 1.32,
"learning_rate": 5.532129600897065e-06,
"loss": 0.0645,
"step": 1070
},
{
"epoch": 1.32,
"learning_rate": 5.514316058948828e-06,
"loss": 0.0688,
"step": 1071
},
{
"epoch": 1.32,
"learning_rate": 5.496520318840266e-06,
"loss": 0.0655,
"step": 1072
},
{
"epoch": 1.32,
"learning_rate": 5.478742451195358e-06,
"loss": 0.0582,
"step": 1073
},
{
"epoch": 1.32,
"learning_rate": 5.46098252656718e-06,
"loss": 0.0622,
"step": 1074
},
{
"epoch": 1.32,
"learning_rate": 5.443240615437586e-06,
"loss": 0.0649,
"step": 1075
},
{
"epoch": 1.32,
"learning_rate": 5.425516788216942e-06,
"loss": 0.0471,
"step": 1076
},
{
"epoch": 1.32,
"learning_rate": 5.40781111524385e-06,
"loss": 0.0639,
"step": 1077
},
{
"epoch": 1.33,
"learning_rate": 5.3901236667848586e-06,
"loss": 0.0646,
"step": 1078
},
{
"epoch": 1.33,
"learning_rate": 5.372454513034208e-06,
"loss": 0.0519,
"step": 1079
},
{
"epoch": 1.33,
"learning_rate": 5.354803724113506e-06,
"loss": 0.0593,
"step": 1080
},
{
"epoch": 1.33,
"learning_rate": 5.337171370071508e-06,
"loss": 0.0443,
"step": 1081
},
{
"epoch": 1.33,
"learning_rate": 5.3195575208837865e-06,
"loss": 0.0629,
"step": 1082
},
{
"epoch": 1.33,
"learning_rate": 5.301962246452486e-06,
"loss": 0.0726,
"step": 1083
},
{
"epoch": 1.33,
"learning_rate": 5.28438561660604e-06,
"loss": 0.0683,
"step": 1084
},
{
"epoch": 1.33,
"learning_rate": 5.266827701098871e-06,
"loss": 0.0511,
"step": 1085
},
{
"epoch": 1.34,
"learning_rate": 5.249288569611155e-06,
"loss": 0.0679,
"step": 1086
},
{
"epoch": 1.34,
"learning_rate": 5.231768291748506e-06,
"loss": 0.0536,
"step": 1087
},
{
"epoch": 1.34,
"learning_rate": 5.214266937041721e-06,
"loss": 0.0593,
"step": 1088
},
{
"epoch": 1.34,
"learning_rate": 5.196784574946496e-06,
"loss": 0.0654,
"step": 1089
},
{
"epoch": 1.34,
"learning_rate": 5.1793212748431565e-06,
"loss": 0.0866,
"step": 1090
},
{
"epoch": 1.34,
"learning_rate": 5.161877106036386e-06,
"loss": 0.053,
"step": 1091
},
{
"epoch": 1.34,
"learning_rate": 5.144452137754921e-06,
"loss": 0.0696,
"step": 1092
},
{
"epoch": 1.34,
"learning_rate": 5.127046439151322e-06,
"loss": 0.0608,
"step": 1093
},
{
"epoch": 1.35,
"learning_rate": 5.109660079301668e-06,
"loss": 0.0619,
"step": 1094
},
{
"epoch": 1.35,
"learning_rate": 5.092293127205287e-06,
"loss": 0.0487,
"step": 1095
},
{
"epoch": 1.35,
"learning_rate": 5.074945651784491e-06,
"loss": 0.0568,
"step": 1096
},
{
"epoch": 1.35,
"learning_rate": 5.057617721884293e-06,
"loss": 0.0666,
"step": 1097
},
{
"epoch": 1.35,
"learning_rate": 5.040309406272135e-06,
"loss": 0.0825,
"step": 1098
},
{
"epoch": 1.35,
"learning_rate": 5.02302077363763e-06,
"loss": 0.075,
"step": 1099
},
{
"epoch": 1.35,
"learning_rate": 5.005751892592266e-06,
"loss": 0.0663,
"step": 1100
},
{
"epoch": 1.35,
"learning_rate": 4.98850283166915e-06,
"loss": 0.0498,
"step": 1101
},
{
"epoch": 1.36,
"learning_rate": 4.9712736593227285e-06,
"loss": 0.0471,
"step": 1102
},
{
"epoch": 1.36,
"learning_rate": 4.954064443928515e-06,
"loss": 0.0767,
"step": 1103
},
{
"epoch": 1.36,
"learning_rate": 4.936875253782841e-06,
"loss": 0.0521,
"step": 1104
},
{
"epoch": 1.36,
"learning_rate": 4.919706157102533e-06,
"loss": 0.0494,
"step": 1105
},
{
"epoch": 1.36,
"learning_rate": 4.9025572220247074e-06,
"loss": 0.0665,
"step": 1106
},
{
"epoch": 1.36,
"learning_rate": 4.885428516606449e-06,
"loss": 0.0611,
"step": 1107
},
{
"epoch": 1.36,
"learning_rate": 4.868320108824564e-06,
"loss": 0.0559,
"step": 1108
},
{
"epoch": 1.36,
"learning_rate": 4.851232066575304e-06,
"loss": 0.0532,
"step": 1109
},
{
"epoch": 1.37,
"learning_rate": 4.834164457674099e-06,
"loss": 0.0531,
"step": 1110
},
{
"epoch": 1.37,
"learning_rate": 4.817117349855297e-06,
"loss": 0.0596,
"step": 1111
},
{
"epoch": 1.37,
"learning_rate": 4.8000908107718625e-06,
"loss": 0.0684,
"step": 1112
},
{
"epoch": 1.37,
"learning_rate": 4.783084907995156e-06,
"loss": 0.063,
"step": 1113
},
{
"epoch": 1.37,
"learning_rate": 4.766099709014628e-06,
"loss": 0.0634,
"step": 1114
},
{
"epoch": 1.37,
"learning_rate": 4.74913528123756e-06,
"loss": 0.0522,
"step": 1115
},
{
"epoch": 1.37,
"learning_rate": 4.732191691988822e-06,
"loss": 0.0612,
"step": 1116
},
{
"epoch": 1.37,
"learning_rate": 4.715269008510552e-06,
"loss": 0.0597,
"step": 1117
},
{
"epoch": 1.38,
"learning_rate": 4.698367297961954e-06,
"loss": 0.0604,
"step": 1118
},
{
"epoch": 1.38,
"learning_rate": 4.681486627418979e-06,
"loss": 0.0656,
"step": 1119
},
{
"epoch": 1.38,
"learning_rate": 4.664627063874083e-06,
"loss": 0.0619,
"step": 1120
},
{
"epoch": 1.38,
"learning_rate": 4.647788674235961e-06,
"loss": 0.0743,
"step": 1121
},
{
"epoch": 1.38,
"learning_rate": 4.630971525329274e-06,
"loss": 0.0485,
"step": 1122
},
{
"epoch": 1.38,
"learning_rate": 4.614175683894384e-06,
"loss": 0.0691,
"step": 1123
},
{
"epoch": 1.38,
"learning_rate": 4.597401216587104e-06,
"loss": 0.0716,
"step": 1124
},
{
"epoch": 1.38,
"learning_rate": 4.58064818997841e-06,
"loss": 0.0459,
"step": 1125
},
{
"epoch": 1.38,
"learning_rate": 4.563916670554196e-06,
"loss": 0.0554,
"step": 1126
},
{
"epoch": 1.39,
"learning_rate": 4.547206724714996e-06,
"loss": 0.0534,
"step": 1127
},
{
"epoch": 1.39,
"learning_rate": 4.530518418775734e-06,
"loss": 0.07,
"step": 1128
},
{
"epoch": 1.39,
"learning_rate": 4.513851818965449e-06,
"loss": 0.0471,
"step": 1129
},
{
"epoch": 1.39,
"learning_rate": 4.497206991427036e-06,
"loss": 0.0541,
"step": 1130
},
{
"epoch": 1.39,
"learning_rate": 4.480584002216999e-06,
"loss": 0.0701,
"step": 1131
},
{
"epoch": 1.39,
"learning_rate": 4.463982917305155e-06,
"loss": 0.0693,
"step": 1132
},
{
"epoch": 1.39,
"learning_rate": 4.447403802574406e-06,
"loss": 0.0579,
"step": 1133
},
{
"epoch": 1.39,
"learning_rate": 4.430846723820453e-06,
"loss": 0.0448,
"step": 1134
},
{
"epoch": 1.4,
"learning_rate": 4.4143117467515514e-06,
"loss": 0.0685,
"step": 1135
},
{
"epoch": 1.4,
"learning_rate": 4.397798936988251e-06,
"loss": 0.0611,
"step": 1136
},
{
"epoch": 1.4,
"learning_rate": 4.381308360063107e-06,
"loss": 0.0721,
"step": 1137
},
{
"epoch": 1.4,
"learning_rate": 4.3648400814204665e-06,
"loss": 0.0766,
"step": 1138
},
{
"epoch": 1.4,
"learning_rate": 4.348394166416169e-06,
"loss": 0.0606,
"step": 1139
},
{
"epoch": 1.4,
"learning_rate": 4.3319706803173e-06,
"loss": 0.0483,
"step": 1140
},
{
"epoch": 1.4,
"learning_rate": 4.315569688301953e-06,
"loss": 0.0504,
"step": 1141
},
{
"epoch": 1.4,
"learning_rate": 4.299191255458922e-06,
"loss": 0.0694,
"step": 1142
},
{
"epoch": 1.41,
"learning_rate": 4.282835446787504e-06,
"loss": 0.0657,
"step": 1143
},
{
"epoch": 1.41,
"learning_rate": 4.266502327197182e-06,
"loss": 0.0719,
"step": 1144
},
{
"epoch": 1.41,
"learning_rate": 4.250191961507416e-06,
"loss": 0.0594,
"step": 1145
},
{
"epoch": 1.41,
"learning_rate": 4.233904414447355e-06,
"loss": 0.0582,
"step": 1146
},
{
"epoch": 1.41,
"learning_rate": 4.217639750655595e-06,
"loss": 0.0509,
"step": 1147
},
{
"epoch": 1.41,
"learning_rate": 4.201398034679911e-06,
"loss": 0.0595,
"step": 1148
},
{
"epoch": 1.41,
"learning_rate": 4.185179330977011e-06,
"loss": 0.0438,
"step": 1149
},
{
"epoch": 1.41,
"learning_rate": 4.168983703912285e-06,
"loss": 0.0575,
"step": 1150
},
{
"epoch": 1.42,
"learning_rate": 4.152811217759529e-06,
"loss": 0.0618,
"step": 1151
},
{
"epoch": 1.42,
"learning_rate": 4.1366619367007086e-06,
"loss": 0.0573,
"step": 1152
},
{
"epoch": 1.42,
"learning_rate": 4.1205359248256946e-06,
"loss": 0.0616,
"step": 1153
},
{
"epoch": 1.42,
"learning_rate": 4.104433246132015e-06,
"loss": 0.0583,
"step": 1154
},
{
"epoch": 1.42,
"learning_rate": 4.088353964524593e-06,
"loss": 0.0597,
"step": 1155
},
{
"epoch": 1.42,
"learning_rate": 4.072298143815508e-06,
"loss": 0.0559,
"step": 1156
},
{
"epoch": 1.42,
"learning_rate": 4.0562658477237235e-06,
"loss": 0.0726,
"step": 1157
},
{
"epoch": 1.42,
"learning_rate": 4.040257139874848e-06,
"loss": 0.0652,
"step": 1158
},
{
"epoch": 1.43,
"learning_rate": 4.024272083800877e-06,
"loss": 0.052,
"step": 1159
},
{
"epoch": 1.43,
"learning_rate": 4.008310742939939e-06,
"loss": 0.0564,
"step": 1160
},
{
"epoch": 1.43,
"learning_rate": 3.992373180636051e-06,
"loss": 0.0594,
"step": 1161
},
{
"epoch": 1.43,
"learning_rate": 3.9764594601388565e-06,
"loss": 0.0636,
"step": 1162
},
{
"epoch": 1.43,
"learning_rate": 3.960569644603395e-06,
"loss": 0.0832,
"step": 1163
},
{
"epoch": 1.43,
"learning_rate": 3.944703797089814e-06,
"loss": 0.0829,
"step": 1164
},
{
"epoch": 1.43,
"learning_rate": 3.928861980563164e-06,
"loss": 0.0464,
"step": 1165
},
{
"epoch": 1.43,
"learning_rate": 3.913044257893114e-06,
"loss": 0.0753,
"step": 1166
},
{
"epoch": 1.44,
"learning_rate": 3.8972506918537125e-06,
"loss": 0.0564,
"step": 1167
},
{
"epoch": 1.44,
"learning_rate": 3.881481345123158e-06,
"loss": 0.063,
"step": 1168
},
{
"epoch": 1.44,
"learning_rate": 3.865736280283503e-06,
"loss": 0.0547,
"step": 1169
},
{
"epoch": 1.44,
"learning_rate": 3.850015559820465e-06,
"loss": 0.0652,
"step": 1170
},
{
"epoch": 1.44,
"learning_rate": 3.834319246123129e-06,
"loss": 0.065,
"step": 1171
},
{
"epoch": 1.44,
"learning_rate": 3.818647401483724e-06,
"loss": 0.049,
"step": 1172
},
{
"epoch": 1.44,
"learning_rate": 3.803000088097384e-06,
"loss": 0.0616,
"step": 1173
},
{
"epoch": 1.44,
"learning_rate": 3.7873773680618618e-06,
"loss": 0.0694,
"step": 1174
},
{
"epoch": 1.45,
"learning_rate": 3.771779303377342e-06,
"loss": 0.0598,
"step": 1175
},
{
"epoch": 1.45,
"learning_rate": 3.7562059559461293e-06,
"loss": 0.0716,
"step": 1176
},
{
"epoch": 1.45,
"learning_rate": 3.740657387572464e-06,
"loss": 0.0474,
"step": 1177
},
{
"epoch": 1.45,
"learning_rate": 3.7251336599622323e-06,
"loss": 0.057,
"step": 1178
},
{
"epoch": 1.45,
"learning_rate": 3.7096348347227406e-06,
"loss": 0.048,
"step": 1179
},
{
"epoch": 1.45,
"learning_rate": 3.6941609733624707e-06,
"loss": 0.0584,
"step": 1180
},
{
"epoch": 1.45,
"learning_rate": 3.6787121372908274e-06,
"loss": 0.0494,
"step": 1181
},
{
"epoch": 1.45,
"learning_rate": 3.6632883878179103e-06,
"loss": 0.071,
"step": 1182
},
{
"epoch": 1.46,
"learning_rate": 3.647889786154254e-06,
"loss": 0.0613,
"step": 1183
},
{
"epoch": 1.46,
"learning_rate": 3.632516393410589e-06,
"loss": 0.0466,
"step": 1184
},
{
"epoch": 1.46,
"learning_rate": 3.6171682705976086e-06,
"loss": 0.0637,
"step": 1185
},
{
"epoch": 1.46,
"learning_rate": 3.601845478625714e-06,
"loss": 0.0583,
"step": 1186
},
{
"epoch": 1.46,
"learning_rate": 3.5865480783047802e-06,
"loss": 0.07,
"step": 1187
},
{
"epoch": 1.46,
"learning_rate": 3.5712761303439246e-06,
"loss": 0.0622,
"step": 1188
},
{
"epoch": 1.46,
"learning_rate": 3.5560296953512296e-06,
"loss": 0.0658,
"step": 1189
},
{
"epoch": 1.46,
"learning_rate": 3.5408088338335543e-06,
"loss": 0.0517,
"step": 1190
},
{
"epoch": 1.46,
"learning_rate": 3.525613606196249e-06,
"loss": 0.0583,
"step": 1191
},
{
"epoch": 1.47,
"learning_rate": 3.5104440727429378e-06,
"loss": 0.0609,
"step": 1192
},
{
"epoch": 1.47,
"learning_rate": 3.4953002936752876e-06,
"loss": 0.0499,
"step": 1193
},
{
"epoch": 1.47,
"learning_rate": 3.480182329092732e-06,
"loss": 0.0478,
"step": 1194
},
{
"epoch": 1.47,
"learning_rate": 3.4650902389922826e-06,
"loss": 0.0569,
"step": 1195
},
{
"epoch": 1.47,
"learning_rate": 3.4500240832682454e-06,
"loss": 0.0615,
"step": 1196
},
{
"epoch": 1.47,
"learning_rate": 3.4349839217120194e-06,
"loss": 0.0635,
"step": 1197
},
{
"epoch": 1.47,
"learning_rate": 3.4199698140118354e-06,
"loss": 0.0603,
"step": 1198
},
{
"epoch": 1.47,
"learning_rate": 3.404981819752524e-06,
"loss": 0.0578,
"step": 1199
},
{
"epoch": 1.48,
"learning_rate": 3.390019998415297e-06,
"loss": 0.0502,
"step": 1200
},
{
"epoch": 1.48,
"learning_rate": 3.3750844093774736e-06,
"loss": 0.0737,
"step": 1201
},
{
"epoch": 1.48,
"learning_rate": 3.360175111912292e-06,
"loss": 0.063,
"step": 1202
},
{
"epoch": 1.48,
"learning_rate": 3.3452921651886315e-06,
"loss": 0.0596,
"step": 1203
},
{
"epoch": 1.48,
"learning_rate": 3.330435628270806e-06,
"loss": 0.0569,
"step": 1204
},
{
"epoch": 1.48,
"learning_rate": 3.3156055601183156e-06,
"loss": 0.0742,
"step": 1205
},
{
"epoch": 1.48,
"learning_rate": 3.300802019585615e-06,
"loss": 0.0562,
"step": 1206
},
{
"epoch": 1.48,
"learning_rate": 3.2860250654218916e-06,
"loss": 0.0644,
"step": 1207
},
{
"epoch": 1.49,
"learning_rate": 3.2712747562708115e-06,
"loss": 0.0612,
"step": 1208
},
{
"epoch": 1.49,
"learning_rate": 3.2565511506703007e-06,
"loss": 0.0546,
"step": 1209
},
{
"epoch": 1.49,
"learning_rate": 3.2418543070523134e-06,
"loss": 0.0539,
"step": 1210
},
{
"epoch": 1.49,
"learning_rate": 3.2271842837425917e-06,
"loss": 0.0539,
"step": 1211
},
{
"epoch": 1.49,
"learning_rate": 3.2125411389604397e-06,
"loss": 0.0691,
"step": 1212
},
{
"epoch": 1.49,
"learning_rate": 3.1979249308184955e-06,
"loss": 0.0588,
"step": 1213
},
{
"epoch": 1.49,
"learning_rate": 3.18333571732249e-06,
"loss": 0.0727,
"step": 1214
},
{
"epoch": 1.49,
"learning_rate": 3.168773556371034e-06,
"loss": 0.0572,
"step": 1215
},
{
"epoch": 1.5,
"learning_rate": 3.154238505755367e-06,
"loss": 0.0721,
"step": 1216
},
{
"epoch": 1.5,
"learning_rate": 3.139730623159144e-06,
"loss": 0.0673,
"step": 1217
},
{
"epoch": 1.5,
"learning_rate": 3.1252499661582016e-06,
"loss": 0.0627,
"step": 1218
},
{
"epoch": 1.5,
"learning_rate": 3.110796592220322e-06,
"loss": 0.0856,
"step": 1219
},
{
"epoch": 1.5,
"learning_rate": 3.0963705587050286e-06,
"loss": 0.0603,
"step": 1220
},
{
"epoch": 1.5,
"learning_rate": 3.081971922863319e-06,
"loss": 0.0895,
"step": 1221
},
{
"epoch": 1.5,
"learning_rate": 3.067600741837483e-06,
"loss": 0.0665,
"step": 1222
},
{
"epoch": 1.5,
"learning_rate": 3.05325707266084e-06,
"loss": 0.0794,
"step": 1223
},
{
"epoch": 1.51,
"learning_rate": 3.0389409722575258e-06,
"loss": 0.0684,
"step": 1224
},
{
"epoch": 1.51,
"learning_rate": 3.0246524974422808e-06,
"loss": 0.0544,
"step": 1225
},
{
"epoch": 1.51,
"learning_rate": 3.010391704920187e-06,
"loss": 0.0552,
"step": 1226
},
{
"epoch": 1.51,
"learning_rate": 2.9961586512864947e-06,
"loss": 0.0617,
"step": 1227
},
{
"epoch": 1.51,
"learning_rate": 2.9819533930263434e-06,
"loss": 0.0457,
"step": 1228
},
{
"epoch": 1.51,
"learning_rate": 2.967775986514585e-06,
"loss": 0.0671,
"step": 1229
},
{
"epoch": 1.51,
"learning_rate": 2.9536264880155286e-06,
"loss": 0.0571,
"step": 1230
},
{
"epoch": 1.51,
"learning_rate": 2.939504953682726e-06,
"loss": 0.0442,
"step": 1231
},
{
"epoch": 1.52,
"learning_rate": 2.925411439558764e-06,
"loss": 0.0519,
"step": 1232
},
{
"epoch": 1.52,
"learning_rate": 2.9113460015750073e-06,
"loss": 0.0651,
"step": 1233
},
{
"epoch": 1.52,
"learning_rate": 2.8973086955514194e-06,
"loss": 0.0581,
"step": 1234
},
{
"epoch": 1.52,
"learning_rate": 2.883299577196308e-06,
"loss": 0.0485,
"step": 1235
},
{
"epoch": 1.52,
"learning_rate": 2.8693187021061164e-06,
"loss": 0.0631,
"step": 1236
},
{
"epoch": 1.52,
"learning_rate": 2.8553661257652045e-06,
"loss": 0.0595,
"step": 1237
},
{
"epoch": 1.52,
"learning_rate": 2.8414419035456197e-06,
"loss": 0.0545,
"step": 1238
},
{
"epoch": 1.52,
"learning_rate": 2.8275460907068974e-06,
"loss": 0.0613,
"step": 1239
},
{
"epoch": 1.53,
"learning_rate": 2.8136787423958147e-06,
"loss": 0.064,
"step": 1240
},
{
"epoch": 1.53,
"learning_rate": 2.7998399136461884e-06,
"loss": 0.0581,
"step": 1241
},
{
"epoch": 1.53,
"learning_rate": 2.7860296593786553e-06,
"loss": 0.0463,
"step": 1242
},
{
"epoch": 1.53,
"learning_rate": 2.7722480344004487e-06,
"loss": 0.0515,
"step": 1243
},
{
"epoch": 1.53,
"learning_rate": 2.7584950934051823e-06,
"loss": 0.067,
"step": 1244
},
{
"epoch": 1.53,
"learning_rate": 2.744770890972648e-06,
"loss": 0.0496,
"step": 1245
},
{
"epoch": 1.53,
"learning_rate": 2.7310754815685627e-06,
"loss": 0.056,
"step": 1246
},
{
"epoch": 1.53,
"learning_rate": 2.7174089195443985e-06,
"loss": 0.0485,
"step": 1247
},
{
"epoch": 1.54,
"learning_rate": 2.7037712591371324e-06,
"loss": 0.0484,
"step": 1248
},
{
"epoch": 1.54,
"learning_rate": 2.6901625544690436e-06,
"loss": 0.0641,
"step": 1249
},
{
"epoch": 1.54,
"learning_rate": 2.6765828595474985e-06,
"loss": 0.0571,
"step": 1250
},
{
"epoch": 1.54,
"learning_rate": 2.6630322282647336e-06,
"loss": 0.0438,
"step": 1251
},
{
"epoch": 1.54,
"learning_rate": 2.6495107143976573e-06,
"loss": 0.045,
"step": 1252
},
{
"epoch": 1.54,
"learning_rate": 2.6360183716075984e-06,
"loss": 0.0474,
"step": 1253
},
{
"epoch": 1.54,
"learning_rate": 2.622555253440139e-06,
"loss": 0.0626,
"step": 1254
},
{
"epoch": 1.54,
"learning_rate": 2.609121413324872e-06,
"loss": 0.0658,
"step": 1255
},
{
"epoch": 1.54,
"learning_rate": 2.5957169045751962e-06,
"loss": 0.0655,
"step": 1256
},
{
"epoch": 1.55,
"learning_rate": 2.5823417803881145e-06,
"loss": 0.0471,
"step": 1257
},
{
"epoch": 1.55,
"learning_rate": 2.5689960938440006e-06,
"loss": 0.0724,
"step": 1258
},
{
"epoch": 1.55,
"learning_rate": 2.555679897906417e-06,
"loss": 0.0554,
"step": 1259
},
{
"epoch": 1.55,
"learning_rate": 2.5423932454218804e-06,
"loss": 0.063,
"step": 1260
},
{
"epoch": 1.55,
"learning_rate": 2.5291361891196654e-06,
"loss": 0.0719,
"step": 1261
},
{
"epoch": 1.55,
"learning_rate": 2.515908781611591e-06,
"loss": 0.0546,
"step": 1262
},
{
"epoch": 1.55,
"learning_rate": 2.5027110753918093e-06,
"loss": 0.0465,
"step": 1263
},
{
"epoch": 1.55,
"learning_rate": 2.4895431228366106e-06,
"loss": 0.0504,
"step": 1264
},
{
"epoch": 1.56,
"learning_rate": 2.4764049762041874e-06,
"loss": 0.05,
"step": 1265
},
{
"epoch": 1.56,
"learning_rate": 2.463296687634462e-06,
"loss": 0.073,
"step": 1266
},
{
"epoch": 1.56,
"learning_rate": 2.450218309148853e-06,
"loss": 0.0618,
"step": 1267
},
{
"epoch": 1.56,
"learning_rate": 2.437169892650081e-06,
"loss": 0.0597,
"step": 1268
},
{
"epoch": 1.56,
"learning_rate": 2.424151489921958e-06,
"loss": 0.0561,
"step": 1269
},
{
"epoch": 1.56,
"learning_rate": 2.4111631526291846e-06,
"loss": 0.0673,
"step": 1270
},
{
"epoch": 1.56,
"learning_rate": 2.398204932317141e-06,
"loss": 0.0507,
"step": 1271
},
{
"epoch": 1.56,
"learning_rate": 2.3852768804116955e-06,
"loss": 0.0752,
"step": 1272
},
{
"epoch": 1.57,
"learning_rate": 2.372379048218979e-06,
"loss": 0.0512,
"step": 1273
},
{
"epoch": 1.57,
"learning_rate": 2.359511486925199e-06,
"loss": 0.068,
"step": 1274
},
{
"epoch": 1.57,
"learning_rate": 2.3466742475964266e-06,
"loss": 0.0636,
"step": 1275
},
{
"epoch": 1.57,
"learning_rate": 2.3338673811783974e-06,
"loss": 0.061,
"step": 1276
},
{
"epoch": 1.57,
"learning_rate": 2.32109093849632e-06,
"loss": 0.0418,
"step": 1277
},
{
"epoch": 1.57,
"learning_rate": 2.3083449702546425e-06,
"loss": 0.0718,
"step": 1278
},
{
"epoch": 1.57,
"learning_rate": 2.2956295270368965e-06,
"loss": 0.0744,
"step": 1279
},
{
"epoch": 1.57,
"learning_rate": 2.2829446593054495e-06,
"loss": 0.0828,
"step": 1280
},
{
"epoch": 1.58,
"learning_rate": 2.2702904174013475e-06,
"loss": 0.0636,
"step": 1281
},
{
"epoch": 1.58,
"learning_rate": 2.2576668515440824e-06,
"loss": 0.0489,
"step": 1282
},
{
"epoch": 1.58,
"learning_rate": 2.2450740118314083e-06,
"loss": 0.0513,
"step": 1283
},
{
"epoch": 1.58,
"learning_rate": 2.2325119482391466e-06,
"loss": 0.05,
"step": 1284
},
{
"epoch": 1.58,
"learning_rate": 2.219980710620967e-06,
"loss": 0.0573,
"step": 1285
},
{
"epoch": 1.58,
"learning_rate": 2.2074803487082164e-06,
"loss": 0.0681,
"step": 1286
},
{
"epoch": 1.58,
"learning_rate": 2.1950109121097043e-06,
"loss": 0.0617,
"step": 1287
},
{
"epoch": 1.58,
"learning_rate": 2.1825724503115064e-06,
"loss": 0.0714,
"step": 1288
},
{
"epoch": 1.59,
"learning_rate": 2.1701650126767824e-06,
"loss": 0.0522,
"step": 1289
},
{
"epoch": 1.59,
"learning_rate": 2.1577886484455535e-06,
"loss": 0.0427,
"step": 1290
},
{
"epoch": 1.59,
"learning_rate": 2.145443406734542e-06,
"loss": 0.0444,
"step": 1291
},
{
"epoch": 1.59,
"learning_rate": 2.1331293365369444e-06,
"loss": 0.0484,
"step": 1292
},
{
"epoch": 1.59,
"learning_rate": 2.1208464867222544e-06,
"loss": 0.0598,
"step": 1293
},
{
"epoch": 1.59,
"learning_rate": 2.1085949060360654e-06,
"loss": 0.0467,
"step": 1294
},
{
"epoch": 1.59,
"learning_rate": 2.0963746430998753e-06,
"loss": 0.0446,
"step": 1295
},
{
"epoch": 1.59,
"learning_rate": 2.084185746410894e-06,
"loss": 0.0539,
"step": 1296
},
{
"epoch": 1.6,
"learning_rate": 2.0720282643418577e-06,
"loss": 0.0592,
"step": 1297
},
{
"epoch": 1.6,
"learning_rate": 2.0599022451408226e-06,
"loss": 0.0683,
"step": 1298
},
{
"epoch": 1.6,
"learning_rate": 2.0478077369309856e-06,
"loss": 0.0474,
"step": 1299
},
{
"epoch": 1.6,
"learning_rate": 2.0357447877104897e-06,
"loss": 0.0561,
"step": 1300
},
{
"epoch": 1.6,
"learning_rate": 2.023713445352232e-06,
"loss": 0.0514,
"step": 1301
},
{
"epoch": 1.6,
"learning_rate": 2.011713757603675e-06,
"loss": 0.0619,
"step": 1302
},
{
"epoch": 1.6,
"learning_rate": 1.9997457720866554e-06,
"loss": 0.0679,
"step": 1303
},
{
"epoch": 1.6,
"learning_rate": 1.9878095362972037e-06,
"loss": 0.0452,
"step": 1304
},
{
"epoch": 1.61,
"learning_rate": 1.9759050976053407e-06,
"loss": 0.0645,
"step": 1305
},
{
"epoch": 1.61,
"learning_rate": 1.9640325032549023e-06,
"loss": 0.0489,
"step": 1306
},
{
"epoch": 1.61,
"learning_rate": 1.9521918003633446e-06,
"loss": 0.0537,
"step": 1307
},
{
"epoch": 1.61,
"learning_rate": 1.940383035921558e-06,
"loss": 0.0704,
"step": 1308
},
{
"epoch": 1.61,
"learning_rate": 1.9286062567936935e-06,
"loss": 0.0615,
"step": 1309
},
{
"epoch": 1.61,
"learning_rate": 1.916861509716945e-06,
"loss": 0.0522,
"step": 1310
},
{
"epoch": 1.61,
"learning_rate": 1.9051488413014063e-06,
"loss": 0.058,
"step": 1311
},
{
"epoch": 1.61,
"learning_rate": 1.8934682980298502e-06,
"loss": 0.0448,
"step": 1312
},
{
"epoch": 1.62,
"learning_rate": 1.8818199262575598e-06,
"loss": 0.0421,
"step": 1313
},
{
"epoch": 1.62,
"learning_rate": 1.8702037722121523e-06,
"loss": 0.0561,
"step": 1314
},
{
"epoch": 1.62,
"learning_rate": 1.8586198819933688e-06,
"loss": 0.062,
"step": 1315
},
{
"epoch": 1.62,
"learning_rate": 1.8470683015729273e-06,
"loss": 0.0693,
"step": 1316
},
{
"epoch": 1.62,
"learning_rate": 1.835549076794303e-06,
"loss": 0.0711,
"step": 1317
},
{
"epoch": 1.62,
"learning_rate": 1.8240622533725816e-06,
"loss": 0.0674,
"step": 1318
},
{
"epoch": 1.62,
"learning_rate": 1.8126078768942512e-06,
"loss": 0.0562,
"step": 1319
},
{
"epoch": 1.62,
"learning_rate": 1.8011859928170338e-06,
"loss": 0.0743,
"step": 1320
},
{
"epoch": 1.62,
"learning_rate": 1.7897966464697036e-06,
"loss": 0.066,
"step": 1321
},
{
"epoch": 1.63,
"learning_rate": 1.7784398830519002e-06,
"loss": 0.0647,
"step": 1322
},
{
"epoch": 1.63,
"learning_rate": 1.767115747633965e-06,
"loss": 0.0597,
"step": 1323
},
{
"epoch": 1.63,
"learning_rate": 1.7558242851567442e-06,
"loss": 0.0717,
"step": 1324
},
{
"epoch": 1.63,
"learning_rate": 1.744565540431421e-06,
"loss": 0.0728,
"step": 1325
},
{
"epoch": 1.63,
"learning_rate": 1.7333395581393364e-06,
"loss": 0.046,
"step": 1326
},
{
"epoch": 1.63,
"learning_rate": 1.7221463828318074e-06,
"loss": 0.0685,
"step": 1327
},
{
"epoch": 1.63,
"learning_rate": 1.7109860589299554e-06,
"loss": 0.0498,
"step": 1328
},
{
"epoch": 1.63,
"learning_rate": 1.6998586307245313e-06,
"loss": 0.0641,
"step": 1329
},
{
"epoch": 1.64,
"learning_rate": 1.6887641423757328e-06,
"loss": 0.0558,
"step": 1330
},
{
"epoch": 1.64,
"learning_rate": 1.6777026379130323e-06,
"loss": 0.0692,
"step": 1331
},
{
"epoch": 1.64,
"learning_rate": 1.6666741612350034e-06,
"loss": 0.079,
"step": 1332
},
{
"epoch": 1.64,
"learning_rate": 1.6556787561091492e-06,
"loss": 0.0392,
"step": 1333
},
{
"epoch": 1.64,
"learning_rate": 1.6447164661717197e-06,
"loss": 0.0639,
"step": 1334
},
{
"epoch": 1.64,
"learning_rate": 1.6337873349275457e-06,
"loss": 0.0684,
"step": 1335
},
{
"epoch": 1.64,
"learning_rate": 1.6228914057498747e-06,
"loss": 0.0651,
"step": 1336
},
{
"epoch": 1.64,
"learning_rate": 1.612028721880169e-06,
"loss": 0.0634,
"step": 1337
},
{
"epoch": 1.65,
"learning_rate": 1.6011993264279735e-06,
"loss": 0.0488,
"step": 1338
},
{
"epoch": 1.65,
"learning_rate": 1.5904032623707144e-06,
"loss": 0.0654,
"step": 1339
},
{
"epoch": 1.65,
"learning_rate": 1.5796405725535401e-06,
"loss": 0.0587,
"step": 1340
},
{
"epoch": 1.65,
"learning_rate": 1.5689112996891576e-06,
"loss": 0.0528,
"step": 1341
},
{
"epoch": 1.65,
"learning_rate": 1.5582154863576415e-06,
"loss": 0.0566,
"step": 1342
},
{
"epoch": 1.65,
"learning_rate": 1.5475531750062955e-06,
"loss": 0.0617,
"step": 1343
},
{
"epoch": 1.65,
"learning_rate": 1.5369244079494561e-06,
"loss": 0.0666,
"step": 1344
},
{
"epoch": 1.65,
"learning_rate": 1.5263292273683405e-06,
"loss": 0.0535,
"step": 1345
},
{
"epoch": 1.66,
"learning_rate": 1.5157676753108752e-06,
"loss": 0.0536,
"step": 1346
},
{
"epoch": 1.66,
"learning_rate": 1.5052397936915264e-06,
"loss": 0.0457,
"step": 1347
},
{
"epoch": 1.66,
"learning_rate": 1.4947456242911407e-06,
"loss": 0.0645,
"step": 1348
},
{
"epoch": 1.66,
"learning_rate": 1.4842852087567727e-06,
"loss": 0.0584,
"step": 1349
},
{
"epoch": 1.66,
"learning_rate": 1.4738585886015178e-06,
"loss": 0.0468,
"step": 1350
},
{
"epoch": 1.66,
"learning_rate": 1.4634658052043581e-06,
"loss": 0.0669,
"step": 1351
},
{
"epoch": 1.66,
"learning_rate": 1.453106899809985e-06,
"loss": 0.0624,
"step": 1352
},
{
"epoch": 1.66,
"learning_rate": 1.442781913528647e-06,
"loss": 0.0612,
"step": 1353
},
{
"epoch": 1.67,
"learning_rate": 1.4324908873359766e-06,
"loss": 0.0515,
"step": 1354
},
{
"epoch": 1.67,
"learning_rate": 1.4222338620728405e-06,
"loss": 0.0671,
"step": 1355
},
{
"epoch": 1.67,
"learning_rate": 1.4120108784451625e-06,
"loss": 0.0502,
"step": 1356
},
{
"epoch": 1.67,
"learning_rate": 1.4018219770237718e-06,
"loss": 0.0562,
"step": 1357
},
{
"epoch": 1.67,
"learning_rate": 1.3916671982442387e-06,
"loss": 0.0561,
"step": 1358
},
{
"epoch": 1.67,
"learning_rate": 1.3815465824067154e-06,
"loss": 0.071,
"step": 1359
},
{
"epoch": 1.67,
"learning_rate": 1.3714601696757713e-06,
"loss": 0.0671,
"step": 1360
},
{
"epoch": 1.67,
"learning_rate": 1.3614080000802488e-06,
"loss": 0.0699,
"step": 1361
},
{
"epoch": 1.68,
"learning_rate": 1.351390113513078e-06,
"loss": 0.0429,
"step": 1362
},
{
"epoch": 1.68,
"learning_rate": 1.341406549731148e-06,
"loss": 0.0447,
"step": 1363
},
{
"epoch": 1.68,
"learning_rate": 1.3314573483551253e-06,
"loss": 0.0499,
"step": 1364
},
{
"epoch": 1.68,
"learning_rate": 1.321542548869308e-06,
"loss": 0.056,
"step": 1365
},
{
"epoch": 1.68,
"learning_rate": 1.3116621906214744e-06,
"loss": 0.0495,
"step": 1366
},
{
"epoch": 1.68,
"learning_rate": 1.3018163128227058e-06,
"loss": 0.042,
"step": 1367
},
{
"epoch": 1.68,
"learning_rate": 1.2920049545472602e-06,
"loss": 0.0697,
"step": 1368
},
{
"epoch": 1.68,
"learning_rate": 1.2822281547323867e-06,
"loss": 0.0571,
"step": 1369
},
{
"epoch": 1.69,
"learning_rate": 1.2724859521781996e-06,
"loss": 0.044,
"step": 1370
},
{
"epoch": 1.69,
"learning_rate": 1.262778385547504e-06,
"loss": 0.0781,
"step": 1371
},
{
"epoch": 1.69,
"learning_rate": 1.2531054933656462e-06,
"loss": 0.0408,
"step": 1372
},
{
"epoch": 1.69,
"learning_rate": 1.2434673140203746e-06,
"loss": 0.0575,
"step": 1373
},
{
"epoch": 1.69,
"learning_rate": 1.2338638857616615e-06,
"loss": 0.0536,
"step": 1374
},
{
"epoch": 1.69,
"learning_rate": 1.22429524670158e-06,
"loss": 0.041,
"step": 1375
},
{
"epoch": 1.69,
"learning_rate": 1.2147614348141334e-06,
"loss": 0.0581,
"step": 1376
},
{
"epoch": 1.69,
"learning_rate": 1.2052624879351105e-06,
"loss": 0.0815,
"step": 1377
},
{
"epoch": 1.69,
"learning_rate": 1.195798443761933e-06,
"loss": 0.0559,
"step": 1378
},
{
"epoch": 1.7,
"learning_rate": 1.1863693398535115e-06,
"loss": 0.0434,
"step": 1379
},
{
"epoch": 1.7,
"learning_rate": 1.1769752136300927e-06,
"loss": 0.0806,
"step": 1380
},
{
"epoch": 1.7,
"learning_rate": 1.1676161023731115e-06,
"loss": 0.0751,
"step": 1381
},
{
"epoch": 1.7,
"learning_rate": 1.158292043225039e-06,
"loss": 0.058,
"step": 1382
},
{
"epoch": 1.7,
"learning_rate": 1.1490030731892422e-06,
"loss": 0.0448,
"step": 1383
},
{
"epoch": 1.7,
"learning_rate": 1.139749229129834e-06,
"loss": 0.0447,
"step": 1384
},
{
"epoch": 1.7,
"learning_rate": 1.1305305477715256e-06,
"loss": 0.0462,
"step": 1385
},
{
"epoch": 1.7,
"learning_rate": 1.1213470656994818e-06,
"loss": 0.0427,
"step": 1386
},
{
"epoch": 1.71,
"learning_rate": 1.1121988193591737e-06,
"loss": 0.0607,
"step": 1387
},
{
"epoch": 1.71,
"learning_rate": 1.1030858450562443e-06,
"loss": 0.0585,
"step": 1388
},
{
"epoch": 1.71,
"learning_rate": 1.0940081789563462e-06,
"loss": 0.0449,
"step": 1389
},
{
"epoch": 1.71,
"learning_rate": 1.0849658570850153e-06,
"loss": 0.0553,
"step": 1390
},
{
"epoch": 1.71,
"learning_rate": 1.0759589153275162e-06,
"loss": 0.0675,
"step": 1391
},
{
"epoch": 1.71,
"learning_rate": 1.0669873894287052e-06,
"loss": 0.0531,
"step": 1392
},
{
"epoch": 1.71,
"learning_rate": 1.0580513149928961e-06,
"loss": 0.0551,
"step": 1393
},
{
"epoch": 1.71,
"learning_rate": 1.0491507274836922e-06,
"loss": 0.052,
"step": 1394
},
{
"epoch": 1.72,
"learning_rate": 1.0402856622238832e-06,
"loss": 0.0431,
"step": 1395
},
{
"epoch": 1.72,
"learning_rate": 1.0314561543952728e-06,
"loss": 0.0527,
"step": 1396
},
{
"epoch": 1.72,
"learning_rate": 1.0226622390385553e-06,
"loss": 0.0531,
"step": 1397
},
{
"epoch": 1.72,
"learning_rate": 1.01390395105318e-06,
"loss": 0.0528,
"step": 1398
},
{
"epoch": 1.72,
"learning_rate": 1.0051813251971897e-06,
"loss": 0.0748,
"step": 1399
},
{
"epoch": 1.72,
"learning_rate": 9.964943960871187e-07,
"loss": 0.0414,
"step": 1400
},
{
"epoch": 1.72,
"learning_rate": 9.878431981978177e-07,
"loss": 0.0418,
"step": 1401
},
{
"epoch": 1.72,
"learning_rate": 9.792277658623461e-07,
"loss": 0.0383,
"step": 1402
},
{
"epoch": 1.73,
"learning_rate": 9.70648133271821e-07,
"loss": 0.0653,
"step": 1403
},
{
"epoch": 1.73,
"learning_rate": 9.621043344752834e-07,
"loss": 0.0468,
"step": 1404
},
{
"epoch": 1.73,
"learning_rate": 9.535964033795709e-07,
"loss": 0.0393,
"step": 1405
},
{
"epoch": 1.73,
"learning_rate": 9.451243737491656e-07,
"loss": 0.0463,
"step": 1406
},
{
"epoch": 1.73,
"learning_rate": 9.366882792060827e-07,
"loss": 0.0546,
"step": 1407
},
{
"epoch": 1.73,
"learning_rate": 9.282881532297205e-07,
"loss": 0.0699,
"step": 1408
},
{
"epoch": 1.73,
"learning_rate": 9.199240291567335e-07,
"loss": 0.0528,
"step": 1409
},
{
"epoch": 1.73,
"learning_rate": 9.115959401808983e-07,
"loss": 0.065,
"step": 1410
},
{
"epoch": 1.74,
"learning_rate": 9.033039193529858e-07,
"loss": 0.049,
"step": 1411
},
{
"epoch": 1.74,
"learning_rate": 8.950479995806216e-07,
"loss": 0.0518,
"step": 1412
},
{
"epoch": 1.74,
"learning_rate": 8.868282136281703e-07,
"loss": 0.0409,
"step": 1413
},
{
"epoch": 1.74,
"learning_rate": 8.786445941165878e-07,
"loss": 0.037,
"step": 1414
},
{
"epoch": 1.74,
"learning_rate": 8.704971735233048e-07,
"loss": 0.0394,
"step": 1415
},
{
"epoch": 1.74,
"learning_rate": 8.623859841820902e-07,
"loss": 0.044,
"step": 1416
},
{
"epoch": 1.74,
"learning_rate": 8.543110582829272e-07,
"loss": 0.056,
"step": 1417
},
{
"epoch": 1.74,
"learning_rate": 8.462724278718882e-07,
"loss": 0.064,
"step": 1418
},
{
"epoch": 1.75,
"learning_rate": 8.38270124850995e-07,
"loss": 0.084,
"step": 1419
},
{
"epoch": 1.75,
"learning_rate": 8.303041809781087e-07,
"loss": 0.0408,
"step": 1420
},
{
"epoch": 1.75,
"learning_rate": 8.223746278667944e-07,
"loss": 0.0529,
"step": 1421
},
{
"epoch": 1.75,
"learning_rate": 8.144814969861936e-07,
"loss": 0.0571,
"step": 1422
},
{
"epoch": 1.75,
"learning_rate": 8.066248196609073e-07,
"loss": 0.0502,
"step": 1423
},
{
"epoch": 1.75,
"learning_rate": 7.988046270708615e-07,
"loss": 0.0439,
"step": 1424
},
{
"epoch": 1.75,
"learning_rate": 7.91020950251199e-07,
"loss": 0.0636,
"step": 1425
},
{
"epoch": 1.75,
"learning_rate": 7.83273820092133e-07,
"loss": 0.0573,
"step": 1426
},
{
"epoch": 1.76,
"learning_rate": 7.755632673388524e-07,
"loss": 0.0514,
"step": 1427
},
{
"epoch": 1.76,
"learning_rate": 7.678893225913742e-07,
"loss": 0.0587,
"step": 1428
},
{
"epoch": 1.76,
"learning_rate": 7.60252016304438e-07,
"loss": 0.0695,
"step": 1429
},
{
"epoch": 1.76,
"learning_rate": 7.526513787873834e-07,
"loss": 0.0399,
"step": 1430
},
{
"epoch": 1.76,
"learning_rate": 7.450874402040176e-07,
"loss": 0.0418,
"step": 1431
},
{
"epoch": 1.76,
"learning_rate": 7.375602305725138e-07,
"loss": 0.0556,
"step": 1432
},
{
"epoch": 1.76,
"learning_rate": 7.3006977976528e-07,
"loss": 0.0585,
"step": 1433
},
{
"epoch": 1.76,
"learning_rate": 7.22616117508842e-07,
"loss": 0.0542,
"step": 1434
},
{
"epoch": 1.77,
"learning_rate": 7.151992733837276e-07,
"loss": 0.0523,
"step": 1435
},
{
"epoch": 1.77,
"learning_rate": 7.078192768243486e-07,
"loss": 0.0642,
"step": 1436
},
{
"epoch": 1.77,
"learning_rate": 7.004761571188856e-07,
"loss": 0.0692,
"step": 1437
},
{
"epoch": 1.77,
"learning_rate": 6.931699434091676e-07,
"loss": 0.0489,
"step": 1438
},
{
"epoch": 1.77,
"learning_rate": 6.85900664690562e-07,
"loss": 0.0693,
"step": 1439
},
{
"epoch": 1.77,
"learning_rate": 6.786683498118518e-07,
"loss": 0.0504,
"step": 1440
},
{
"epoch": 1.77,
"learning_rate": 6.714730274751303e-07,
"loss": 0.0378,
"step": 1441
},
{
"epoch": 1.77,
"learning_rate": 6.643147262356809e-07,
"loss": 0.0536,
"step": 1442
},
{
"epoch": 1.77,
"learning_rate": 6.571934745018627e-07,
"loss": 0.0619,
"step": 1443
},
{
"epoch": 1.78,
"learning_rate": 6.501093005350023e-07,
"loss": 0.0528,
"step": 1444
},
{
"epoch": 1.78,
"learning_rate": 6.430622324492853e-07,
"loss": 0.0501,
"step": 1445
},
{
"epoch": 1.78,
"learning_rate": 6.360522982116301e-07,
"loss": 0.0505,
"step": 1446
},
{
"epoch": 1.78,
"learning_rate": 6.290795256415927e-07,
"loss": 0.0631,
"step": 1447
},
{
"epoch": 1.78,
"learning_rate": 6.221439424112463e-07,
"loss": 0.0431,
"step": 1448
},
{
"epoch": 1.78,
"learning_rate": 6.152455760450749e-07,
"loss": 0.0626,
"step": 1449
},
{
"epoch": 1.78,
"learning_rate": 6.083844539198691e-07,
"loss": 0.0562,
"step": 1450
},
{
"epoch": 1.78,
"learning_rate": 6.015606032646026e-07,
"loss": 0.0785,
"step": 1451
},
{
"epoch": 1.79,
"learning_rate": 5.947740511603461e-07,
"loss": 0.0458,
"step": 1452
},
{
"epoch": 1.79,
"learning_rate": 5.880248245401354e-07,
"loss": 0.0663,
"step": 1453
},
{
"epoch": 1.79,
"learning_rate": 5.813129501888859e-07,
"loss": 0.07,
"step": 1454
},
{
"epoch": 1.79,
"learning_rate": 5.746384547432738e-07,
"loss": 0.0753,
"step": 1455
},
{
"epoch": 1.79,
"learning_rate": 5.680013646916316e-07,
"loss": 0.0505,
"step": 1456
},
{
"epoch": 1.79,
"learning_rate": 5.614017063738519e-07,
"loss": 0.0543,
"step": 1457
},
{
"epoch": 1.79,
"learning_rate": 5.54839505981265e-07,
"loss": 0.0735,
"step": 1458
},
{
"epoch": 1.79,
"learning_rate": 5.483147895565589e-07,
"loss": 0.0554,
"step": 1459
},
{
"epoch": 1.8,
"learning_rate": 5.418275829936537e-07,
"loss": 0.0665,
"step": 1460
},
{
"epoch": 1.8,
"learning_rate": 5.353779120376102e-07,
"loss": 0.0541,
"step": 1461
},
{
"epoch": 1.8,
"learning_rate": 5.289658022845323e-07,
"loss": 0.0795,
"step": 1462
},
{
"epoch": 1.8,
"learning_rate": 5.22591279181447e-07,
"loss": 0.0708,
"step": 1463
},
{
"epoch": 1.8,
"learning_rate": 5.162543680262267e-07,
"loss": 0.0357,
"step": 1464
},
{
"epoch": 1.8,
"learning_rate": 5.099550939674691e-07,
"loss": 0.047,
"step": 1465
},
{
"epoch": 1.8,
"learning_rate": 5.036934820044126e-07,
"loss": 0.0578,
"step": 1466
},
{
"epoch": 1.8,
"learning_rate": 4.974695569868238e-07,
"loss": 0.0611,
"step": 1467
},
{
"epoch": 1.81,
"learning_rate": 4.9128334361491e-07,
"loss": 0.0453,
"step": 1468
},
{
"epoch": 1.81,
"learning_rate": 4.85134866439212e-07,
"loss": 0.0666,
"step": 1469
},
{
"epoch": 1.81,
"learning_rate": 4.790241498605175e-07,
"loss": 0.0497,
"step": 1470
},
{
"epoch": 1.81,
"learning_rate": 4.729512181297524e-07,
"loss": 0.0534,
"step": 1471
},
{
"epoch": 1.81,
"learning_rate": 4.669160953478913e-07,
"loss": 0.0584,
"step": 1472
},
{
"epoch": 1.81,
"learning_rate": 4.6091880546586307e-07,
"loss": 0.0515,
"step": 1473
},
{
"epoch": 1.81,
"learning_rate": 4.549593722844492e-07,
"loss": 0.0729,
"step": 1474
},
{
"epoch": 1.81,
"learning_rate": 4.4903781945419556e-07,
"loss": 0.0506,
"step": 1475
},
{
"epoch": 1.82,
"learning_rate": 4.431541704753173e-07,
"loss": 0.0455,
"step": 1476
},
{
"epoch": 1.82,
"learning_rate": 4.3730844869760535e-07,
"loss": 0.0496,
"step": 1477
},
{
"epoch": 1.82,
"learning_rate": 4.3150067732032895e-07,
"loss": 0.0689,
"step": 1478
},
{
"epoch": 1.82,
"learning_rate": 4.2573087939215217e-07,
"loss": 0.0435,
"step": 1479
},
{
"epoch": 1.82,
"learning_rate": 4.199990778110363e-07,
"loss": 0.0607,
"step": 1480
},
{
"epoch": 1.82,
"learning_rate": 4.1430529532414886e-07,
"loss": 0.0649,
"step": 1481
},
{
"epoch": 1.82,
"learning_rate": 4.0864955452778244e-07,
"loss": 0.0516,
"step": 1482
},
{
"epoch": 1.82,
"learning_rate": 4.0303187786724486e-07,
"loss": 0.0416,
"step": 1483
},
{
"epoch": 1.83,
"learning_rate": 3.97452287636797e-07,
"loss": 0.0468,
"step": 1484
},
{
"epoch": 1.83,
"learning_rate": 3.919108059795407e-07,
"loss": 0.0622,
"step": 1485
},
{
"epoch": 1.83,
"learning_rate": 3.864074548873431e-07,
"loss": 0.0449,
"step": 1486
},
{
"epoch": 1.83,
"learning_rate": 3.809422562007525e-07,
"loss": 0.0676,
"step": 1487
},
{
"epoch": 1.83,
"learning_rate": 3.755152316088928e-07,
"loss": 0.0494,
"step": 1488
},
{
"epoch": 1.83,
"learning_rate": 3.701264026494067e-07,
"loss": 0.0607,
"step": 1489
},
{
"epoch": 1.83,
"learning_rate": 3.6477579070833934e-07,
"loss": 0.0487,
"step": 1490
},
{
"epoch": 1.83,
"learning_rate": 3.5946341702007836e-07,
"loss": 0.0601,
"step": 1491
},
{
"epoch": 1.84,
"learning_rate": 3.5418930266725606e-07,
"loss": 0.0572,
"step": 1492
},
{
"epoch": 1.84,
"learning_rate": 3.4895346858066723e-07,
"loss": 0.0537,
"step": 1493
},
{
"epoch": 1.84,
"learning_rate": 3.437559355391917e-07,
"loss": 0.0629,
"step": 1494
},
{
"epoch": 1.84,
"learning_rate": 3.385967241697041e-07,
"loss": 0.0461,
"step": 1495
},
{
"epoch": 1.84,
"learning_rate": 3.3347585494699966e-07,
"loss": 0.0558,
"step": 1496
},
{
"epoch": 1.84,
"learning_rate": 3.283933481937085e-07,
"loss": 0.0887,
"step": 1497
},
{
"epoch": 1.84,
"learning_rate": 3.2334922408021385e-07,
"loss": 0.0471,
"step": 1498
},
{
"epoch": 1.84,
"learning_rate": 3.1834350262457624e-07,
"loss": 0.0577,
"step": 1499
},
{
"epoch": 1.85,
"learning_rate": 3.1337620369245034e-07,
"loss": 0.0532,
"step": 1500
},
{
"epoch": 1.85,
"learning_rate": 3.0844734699700727e-07,
"loss": 0.0556,
"step": 1501
},
{
"epoch": 1.85,
"learning_rate": 3.0355695209886125e-07,
"loss": 0.0505,
"step": 1502
},
{
"epoch": 1.85,
"learning_rate": 2.987050384059798e-07,
"loss": 0.0478,
"step": 1503
},
{
"epoch": 1.85,
"learning_rate": 2.93891625173619e-07,
"loss": 0.0691,
"step": 1504
},
{
"epoch": 1.85,
"learning_rate": 2.8911673150424314e-07,
"loss": 0.0704,
"step": 1505
},
{
"epoch": 1.85,
"learning_rate": 2.843803763474462e-07,
"loss": 0.0631,
"step": 1506
},
{
"epoch": 1.85,
"learning_rate": 2.796825784998791e-07,
"loss": 0.0491,
"step": 1507
},
{
"epoch": 1.85,
"learning_rate": 2.750233566051719e-07,
"loss": 0.0571,
"step": 1508
},
{
"epoch": 1.86,
"learning_rate": 2.7040272915387024e-07,
"loss": 0.0562,
"step": 1509
},
{
"epoch": 1.86,
"learning_rate": 2.6582071448334466e-07,
"loss": 0.0565,
"step": 1510
},
{
"epoch": 1.86,
"learning_rate": 2.61277330777735e-07,
"loss": 0.0483,
"step": 1511
},
{
"epoch": 1.86,
"learning_rate": 2.5677259606786686e-07,
"loss": 0.0615,
"step": 1512
},
{
"epoch": 1.86,
"learning_rate": 2.5230652823118204e-07,
"loss": 0.0457,
"step": 1513
},
{
"epoch": 1.86,
"learning_rate": 2.478791449916773e-07,
"loss": 0.0494,
"step": 1514
},
{
"epoch": 1.86,
"learning_rate": 2.434904639198155e-07,
"loss": 0.0583,
"step": 1515
},
{
"epoch": 1.86,
"learning_rate": 2.3914050243247446e-07,
"loss": 0.0656,
"step": 1516
},
{
"epoch": 1.87,
"learning_rate": 2.3482927779286624e-07,
"loss": 0.0665,
"step": 1517
},
{
"epoch": 1.87,
"learning_rate": 2.3055680711047356e-07,
"loss": 0.0488,
"step": 1518
},
{
"epoch": 1.87,
"learning_rate": 2.2632310734097994e-07,
"loss": 0.0768,
"step": 1519
},
{
"epoch": 1.87,
"learning_rate": 2.2212819528620199e-07,
"loss": 0.0589,
"step": 1520
},
{
"epoch": 1.87,
"learning_rate": 2.179720875940272e-07,
"loss": 0.0533,
"step": 1521
},
{
"epoch": 1.87,
"learning_rate": 2.1385480075834076e-07,
"loss": 0.0491,
"step": 1522
},
{
"epoch": 1.87,
"learning_rate": 2.0977635111896656e-07,
"loss": 0.0449,
"step": 1523
},
{
"epoch": 1.87,
"learning_rate": 2.0573675486159738e-07,
"loss": 0.0428,
"step": 1524
},
{
"epoch": 1.88,
"learning_rate": 2.0173602801773496e-07,
"loss": 0.0582,
"step": 1525
},
{
"epoch": 1.88,
"learning_rate": 1.97774186464621e-07,
"loss": 0.0623,
"step": 1526
},
{
"epoch": 1.88,
"learning_rate": 1.9385124592518068e-07,
"loss": 0.0418,
"step": 1527
},
{
"epoch": 1.88,
"learning_rate": 1.8996722196795715e-07,
"loss": 0.0615,
"step": 1528
},
{
"epoch": 1.88,
"learning_rate": 1.8612213000704705e-07,
"loss": 0.0887,
"step": 1529
},
{
"epoch": 1.88,
"learning_rate": 1.8231598530204287e-07,
"loss": 0.0585,
"step": 1530
},
{
"epoch": 1.88,
"learning_rate": 1.7854880295797406e-07,
"loss": 0.061,
"step": 1531
},
{
"epoch": 1.88,
"learning_rate": 1.748205979252393e-07,
"loss": 0.0435,
"step": 1532
},
{
"epoch": 1.89,
"learning_rate": 1.711313849995555e-07,
"loss": 0.0513,
"step": 1533
},
{
"epoch": 1.89,
"learning_rate": 1.6748117882189886e-07,
"loss": 0.0505,
"step": 1534
},
{
"epoch": 1.89,
"learning_rate": 1.6386999387843717e-07,
"loss": 0.0451,
"step": 1535
},
{
"epoch": 1.89,
"learning_rate": 1.6029784450048325e-07,
"loss": 0.0591,
"step": 1536
},
{
"epoch": 1.89,
"learning_rate": 1.567647448644327e-07,
"loss": 0.0599,
"step": 1537
},
{
"epoch": 1.89,
"learning_rate": 1.5327070899170736e-07,
"loss": 0.0618,
"step": 1538
},
{
"epoch": 1.89,
"learning_rate": 1.4981575074870636e-07,
"loss": 0.0658,
"step": 1539
},
{
"epoch": 1.89,
"learning_rate": 1.4639988384673843e-07,
"loss": 0.0428,
"step": 1540
},
{
"epoch": 1.9,
"learning_rate": 1.4302312184197975e-07,
"loss": 0.0583,
"step": 1541
},
{
"epoch": 1.9,
"learning_rate": 1.396854781354129e-07,
"loss": 0.0538,
"step": 1542
},
{
"epoch": 1.9,
"learning_rate": 1.3638696597277678e-07,
"loss": 0.0729,
"step": 1543
},
{
"epoch": 1.9,
"learning_rate": 1.3312759844451352e-07,
"loss": 0.0376,
"step": 1544
},
{
"epoch": 1.9,
"learning_rate": 1.2990738848571493e-07,
"loss": 0.0392,
"step": 1545
},
{
"epoch": 1.9,
"learning_rate": 1.2672634887607616e-07,
"loss": 0.062,
"step": 1546
},
{
"epoch": 1.9,
"learning_rate": 1.235844922398355e-07,
"loss": 0.0567,
"step": 1547
},
{
"epoch": 1.9,
"learning_rate": 1.2048183104573563e-07,
"loss": 0.0629,
"step": 1548
},
{
"epoch": 1.91,
"learning_rate": 1.1741837760696595e-07,
"loss": 0.0553,
"step": 1549
},
{
"epoch": 1.91,
"learning_rate": 1.1439414408111471e-07,
"loss": 0.0469,
"step": 1550
},
{
"epoch": 1.91,
"learning_rate": 1.114091424701258e-07,
"loss": 0.0535,
"step": 1551
},
{
"epoch": 1.91,
"learning_rate": 1.0846338462024542e-07,
"loss": 0.0526,
"step": 1552
},
{
"epoch": 1.91,
"learning_rate": 1.055568822219799e-07,
"loss": 0.0626,
"step": 1553
},
{
"epoch": 1.91,
"learning_rate": 1.0268964681004356e-07,
"loss": 0.0417,
"step": 1554
},
{
"epoch": 1.91,
"learning_rate": 9.986168976331866e-08,
"loss": 0.0585,
"step": 1555
},
{
"epoch": 1.91,
"learning_rate": 9.707302230480553e-08,
"loss": 0.0592,
"step": 1556
},
{
"epoch": 1.92,
"learning_rate": 9.432365550158251e-08,
"loss": 0.0675,
"step": 1557
},
{
"epoch": 1.92,
"learning_rate": 9.16136002647583e-08,
"loss": 0.0584,
"step": 1558
},
{
"epoch": 1.92,
"learning_rate": 8.89428673494308e-08,
"loss": 0.0486,
"step": 1559
},
{
"epoch": 1.92,
"learning_rate": 8.631146735464279e-08,
"loss": 0.0583,
"step": 1560
},
{
"epoch": 1.92,
"learning_rate": 8.3719410723343e-08,
"loss": 0.052,
"step": 1561
},
{
"epoch": 1.92,
"learning_rate": 8.116670774234059e-08,
"loss": 0.0648,
"step": 1562
},
{
"epoch": 1.92,
"learning_rate": 7.865336854226525e-08,
"loss": 0.0599,
"step": 1563
},
{
"epoch": 1.92,
"learning_rate": 7.617940309753047e-08,
"loss": 0.0588,
"step": 1564
},
{
"epoch": 1.92,
"learning_rate": 7.374482122628923e-08,
"loss": 0.0413,
"step": 1565
},
{
"epoch": 1.93,
"learning_rate": 7.134963259040173e-08,
"loss": 0.0804,
"step": 1566
},
{
"epoch": 1.93,
"learning_rate": 6.899384669538434e-08,
"loss": 0.0612,
"step": 1567
},
{
"epoch": 1.93,
"learning_rate": 6.66774728903885e-08,
"loss": 0.0409,
"step": 1568
},
{
"epoch": 1.93,
"learning_rate": 6.440052036815081e-08,
"loss": 0.075,
"step": 1569
},
{
"epoch": 1.93,
"learning_rate": 6.216299816496186e-08,
"loss": 0.058,
"step": 1570
},
{
"epoch": 1.93,
"learning_rate": 5.996491516062964e-08,
"loss": 0.0829,
"step": 1571
},
{
"epoch": 1.93,
"learning_rate": 5.7806280078444024e-08,
"loss": 0.07,
"step": 1572
},
{
"epoch": 1.93,
"learning_rate": 5.568710148514123e-08,
"loss": 0.0383,
"step": 1573
},
{
"epoch": 1.94,
"learning_rate": 5.360738779087382e-08,
"loss": 0.0439,
"step": 1574
},
{
"epoch": 1.94,
"learning_rate": 5.156714724917078e-08,
"loss": 0.0503,
"step": 1575
},
{
"epoch": 1.94,
"learning_rate": 4.9566387956909713e-08,
"loss": 0.0511,
"step": 1576
},
{
"epoch": 1.94,
"learning_rate": 4.760511785428468e-08,
"loss": 0.0505,
"step": 1577
},
{
"epoch": 1.94,
"learning_rate": 4.568334472477287e-08,
"loss": 0.0631,
"step": 1578
},
{
"epoch": 1.94,
"learning_rate": 4.380107619510243e-08,
"loss": 0.0595,
"step": 1579
},
{
"epoch": 1.94,
"learning_rate": 4.195831973522468e-08,
"loss": 0.0586,
"step": 1580
},
{
"epoch": 1.94,
"learning_rate": 4.015508265828527e-08,
"loss": 0.0582,
"step": 1581
},
{
"epoch": 1.95,
"learning_rate": 3.8391372120591964e-08,
"loss": 0.0601,
"step": 1582
},
{
"epoch": 1.95,
"learning_rate": 3.666719512158912e-08,
"loss": 0.0605,
"step": 1583
},
{
"epoch": 1.95,
"learning_rate": 3.498255850382659e-08,
"loss": 0.0696,
"step": 1584
},
{
"epoch": 1.95,
"learning_rate": 3.333746895293755e-08,
"loss": 0.0612,
"step": 1585
},
{
"epoch": 1.95,
"learning_rate": 3.173193299760735e-08,
"loss": 0.0604,
"step": 1586
},
{
"epoch": 1.95,
"learning_rate": 3.016595700954916e-08,
"loss": 0.068,
"step": 1587
},
{
"epoch": 1.95,
"learning_rate": 2.86395472034795e-08,
"loss": 0.0478,
"step": 1588
},
{
"epoch": 1.95,
"learning_rate": 2.715270963709382e-08,
"loss": 0.0582,
"step": 1589
},
{
"epoch": 1.96,
"learning_rate": 2.5705450211038764e-08,
"loss": 0.0615,
"step": 1590
},
{
"epoch": 1.96,
"learning_rate": 2.429777466889438e-08,
"loss": 0.0779,
"step": 1591
},
{
"epoch": 1.96,
"learning_rate": 2.2929688597147505e-08,
"loss": 0.0458,
"step": 1592
},
{
"epoch": 1.96,
"learning_rate": 2.160119742517064e-08,
"loss": 0.045,
"step": 1593
},
{
"epoch": 1.96,
"learning_rate": 2.031230642520088e-08,
"loss": 0.067,
"step": 1594
},
{
"epoch": 1.96,
"learning_rate": 1.9063020712316582e-08,
"loss": 0.0642,
"step": 1595
},
{
"epoch": 1.96,
"learning_rate": 1.785334524442073e-08,
"loss": 0.0516,
"step": 1596
},
{
"epoch": 1.96,
"learning_rate": 1.6683284822219814e-08,
"loss": 0.0549,
"step": 1597
},
{
"epoch": 1.97,
"learning_rate": 1.555284408920388e-08,
"loss": 0.0508,
"step": 1598
},
{
"epoch": 1.97,
"learning_rate": 1.4462027531627621e-08,
"loss": 0.05,
"step": 1599
},
{
"epoch": 1.97,
"learning_rate": 1.3410839478493753e-08,
"loss": 0.0448,
"step": 1600
},
{
"epoch": 1.97,
"learning_rate": 1.2399284101538566e-08,
"loss": 0.0556,
"step": 1601
},
{
"epoch": 1.97,
"learning_rate": 1.1427365415209723e-08,
"loss": 0.0582,
"step": 1602
},
{
"epoch": 1.97,
"learning_rate": 1.0495087276654049e-08,
"loss": 0.0722,
"step": 1603
},
{
"epoch": 1.97,
"learning_rate": 9.602453385699762e-09,
"loss": 0.0654,
"step": 1604
},
{
"epoch": 1.97,
"learning_rate": 8.74946728484538e-09,
"loss": 0.0486,
"step": 1605
},
{
"epoch": 1.98,
"learning_rate": 7.936132359243066e-09,
"loss": 0.0503,
"step": 1606
},
{
"epoch": 1.98,
"learning_rate": 7.162451836685291e-09,
"loss": 0.0516,
"step": 1607
},
{
"epoch": 1.98,
"learning_rate": 6.428428787593755e-09,
"loss": 0.0434,
"step": 1608
},
{
"epoch": 1.98,
"learning_rate": 5.7340661250038235e-09,
"loss": 0.0446,
"step": 1609
},
{
"epoch": 1.98,
"learning_rate": 5.079366604555658e-09,
"loss": 0.0518,
"step": 1610
},
{
"epoch": 1.98,
"learning_rate": 4.464332824483108e-09,
"loss": 0.045,
"step": 1611
},
{
"epoch": 1.98,
"learning_rate": 3.888967225604834e-09,
"loss": 0.054,
"step": 1612
},
{
"epoch": 1.98,
"learning_rate": 3.353272091309867e-09,
"loss": 0.0743,
"step": 1613
},
{
"epoch": 1.99,
"learning_rate": 2.8572495475509553e-09,
"loss": 0.046,
"step": 1614
},
{
"epoch": 1.99,
"learning_rate": 2.400901562840119e-09,
"loss": 0.0793,
"step": 1615
},
{
"epoch": 1.99,
"learning_rate": 1.984229948235328e-09,
"loss": 0.035,
"step": 1616
},
{
"epoch": 1.99,
"learning_rate": 1.6072363573338412e-09,
"loss": 0.0519,
"step": 1617
},
{
"epoch": 1.99,
"learning_rate": 1.2699222862699868e-09,
"loss": 0.0559,
"step": 1618
},
{
"epoch": 1.99,
"learning_rate": 9.722890737029478e-10,
"loss": 0.0643,
"step": 1619
},
{
"epoch": 1.99,
"learning_rate": 7.143379008200946e-10,
"loss": 0.0642,
"step": 1620
},
{
"epoch": 1.99,
"learning_rate": 4.960697913203305e-10,
"loss": 0.0362,
"step": 1621
},
{
"epoch": 2.0,
"learning_rate": 3.174856114229741e-10,
"loss": 0.053,
"step": 1622
},
{
"epoch": 2.0,
"learning_rate": 1.7858606985443616e-10,
"loss": 0.0692,
"step": 1623
},
{
"epoch": 2.0,
"learning_rate": 7.937171784933028e-11,
"loss": 0.0524,
"step": 1624
},
{
"epoch": 2.0,
"learning_rate": 1.9842949149362358e-11,
"loss": 0.0488,
"step": 1625
},
{
"epoch": 2.0,
"learning_rate": 0.0,
"loss": 0.0335,
"step": 1626
},
{
"epoch": 2.0,
"step": 1626,
"total_flos": 43698468323328.0,
"train_loss": 0.08158567177560204,
"train_runtime": 9449.8527,
"train_samples_per_second": 11.005,
"train_steps_per_second": 0.172
}
],
"max_steps": 1626,
"num_train_epochs": 2,
"total_flos": 43698468323328.0,
"trial_name": null,
"trial_params": null
}