MBTI_ESFP / checkpoint-1000 /trainer_state.json
ToBeWithYou's picture
output_ESFP
924e54c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 12.658227848101266,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 5e-05,
"loss": 1.0641,
"step": 1
},
{
"epoch": 0.03,
"learning_rate": 0.0001,
"loss": 0.9056,
"step": 2
},
{
"epoch": 0.04,
"learning_rate": 9.999975227016531e-05,
"loss": 0.9593,
"step": 3
},
{
"epoch": 0.05,
"learning_rate": 9.999900908311602e-05,
"loss": 0.973,
"step": 4
},
{
"epoch": 0.06,
"learning_rate": 9.999777044621652e-05,
"loss": 1.0652,
"step": 5
},
{
"epoch": 0.08,
"learning_rate": 9.999603637174071e-05,
"loss": 0.9212,
"step": 6
},
{
"epoch": 0.09,
"learning_rate": 9.999380687687188e-05,
"loss": 0.899,
"step": 7
},
{
"epoch": 0.1,
"learning_rate": 9.999108198370249e-05,
"loss": 0.7079,
"step": 8
},
{
"epoch": 0.11,
"learning_rate": 9.998786171923407e-05,
"loss": 0.7139,
"step": 9
},
{
"epoch": 0.13,
"learning_rate": 9.998414611537681e-05,
"loss": 0.8258,
"step": 10
},
{
"epoch": 0.14,
"learning_rate": 9.997993520894937e-05,
"loss": 0.7415,
"step": 11
},
{
"epoch": 0.15,
"learning_rate": 9.997522904167844e-05,
"loss": 0.8233,
"step": 12
},
{
"epoch": 0.16,
"learning_rate": 9.997002766019832e-05,
"loss": 0.8698,
"step": 13
},
{
"epoch": 0.18,
"learning_rate": 9.996433111605052e-05,
"loss": 0.6354,
"step": 14
},
{
"epoch": 0.19,
"learning_rate": 9.99581394656832e-05,
"loss": 0.6136,
"step": 15
},
{
"epoch": 0.2,
"learning_rate": 9.995145277045061e-05,
"loss": 0.7363,
"step": 16
},
{
"epoch": 0.22,
"learning_rate": 9.994427109661253e-05,
"loss": 0.6065,
"step": 17
},
{
"epoch": 0.23,
"learning_rate": 9.993659451533353e-05,
"loss": 0.6152,
"step": 18
},
{
"epoch": 0.24,
"learning_rate": 9.992842310268233e-05,
"loss": 0.7021,
"step": 19
},
{
"epoch": 0.25,
"learning_rate": 9.991975693963107e-05,
"loss": 0.5468,
"step": 20
},
{
"epoch": 0.27,
"learning_rate": 9.99105961120544e-05,
"loss": 0.5437,
"step": 21
},
{
"epoch": 0.28,
"learning_rate": 9.990094071072877e-05,
"loss": 0.6458,
"step": 22
},
{
"epoch": 0.29,
"learning_rate": 9.989079083133139e-05,
"loss": 0.6744,
"step": 23
},
{
"epoch": 0.3,
"learning_rate": 9.988014657443941e-05,
"loss": 0.5592,
"step": 24
},
{
"epoch": 0.32,
"learning_rate": 9.986900804552878e-05,
"loss": 0.5351,
"step": 25
},
{
"epoch": 0.33,
"learning_rate": 9.985737535497337e-05,
"loss": 0.5705,
"step": 26
},
{
"epoch": 0.34,
"learning_rate": 9.984524861804376e-05,
"loss": 0.5696,
"step": 27
},
{
"epoch": 0.35,
"learning_rate": 9.983262795490613e-05,
"loss": 0.588,
"step": 28
},
{
"epoch": 0.37,
"learning_rate": 9.981951349062106e-05,
"loss": 0.627,
"step": 29
},
{
"epoch": 0.38,
"learning_rate": 9.980590535514233e-05,
"loss": 0.5702,
"step": 30
},
{
"epoch": 0.39,
"learning_rate": 9.979180368331558e-05,
"loss": 0.5907,
"step": 31
},
{
"epoch": 0.41,
"learning_rate": 9.9777208614877e-05,
"loss": 0.4817,
"step": 32
},
{
"epoch": 0.42,
"learning_rate": 9.976212029445194e-05,
"loss": 0.5789,
"step": 33
},
{
"epoch": 0.43,
"learning_rate": 9.97465388715535e-05,
"loss": 0.5108,
"step": 34
},
{
"epoch": 0.44,
"learning_rate": 9.9730464500581e-05,
"loss": 0.557,
"step": 35
},
{
"epoch": 0.46,
"learning_rate": 9.971389734081848e-05,
"loss": 0.5771,
"step": 36
},
{
"epoch": 0.47,
"learning_rate": 9.969683755643317e-05,
"loss": 0.5524,
"step": 37
},
{
"epoch": 0.48,
"learning_rate": 9.967928531647374e-05,
"loss": 0.5648,
"step": 38
},
{
"epoch": 0.49,
"learning_rate": 9.966124079486872e-05,
"loss": 0.4951,
"step": 39
},
{
"epoch": 0.51,
"learning_rate": 9.96427041704248e-05,
"loss": 0.5516,
"step": 40
},
{
"epoch": 0.52,
"learning_rate": 9.962367562682496e-05,
"loss": 0.5582,
"step": 41
},
{
"epoch": 0.53,
"learning_rate": 9.960415535262671e-05,
"loss": 0.5352,
"step": 42
},
{
"epoch": 0.54,
"learning_rate": 9.958414354126022e-05,
"loss": 0.737,
"step": 43
},
{
"epoch": 0.56,
"learning_rate": 9.956364039102642e-05,
"loss": 0.4762,
"step": 44
},
{
"epoch": 0.57,
"learning_rate": 9.954264610509497e-05,
"loss": 0.4896,
"step": 45
},
{
"epoch": 0.58,
"learning_rate": 9.952116089150232e-05,
"loss": 0.4975,
"step": 46
},
{
"epoch": 0.59,
"learning_rate": 9.94991849631496e-05,
"loss": 0.5646,
"step": 47
},
{
"epoch": 0.61,
"learning_rate": 9.947671853780054e-05,
"loss": 0.5599,
"step": 48
},
{
"epoch": 0.62,
"learning_rate": 9.94537618380793e-05,
"loss": 0.5255,
"step": 49
},
{
"epoch": 0.63,
"learning_rate": 9.943031509146825e-05,
"loss": 0.5971,
"step": 50
},
{
"epoch": 0.65,
"learning_rate": 9.940637853030572e-05,
"loss": 0.5932,
"step": 51
},
{
"epoch": 0.66,
"learning_rate": 9.938195239178374e-05,
"loss": 0.5979,
"step": 52
},
{
"epoch": 0.67,
"learning_rate": 9.935703691794565e-05,
"loss": 0.496,
"step": 53
},
{
"epoch": 0.68,
"learning_rate": 9.933163235568367e-05,
"loss": 0.5341,
"step": 54
},
{
"epoch": 0.7,
"learning_rate": 9.930573895673657e-05,
"loss": 0.4661,
"step": 55
},
{
"epoch": 0.71,
"learning_rate": 9.927935697768698e-05,
"loss": 0.6222,
"step": 56
},
{
"epoch": 0.72,
"learning_rate": 9.925248667995907e-05,
"loss": 0.5015,
"step": 57
},
{
"epoch": 0.73,
"learning_rate": 9.922512832981584e-05,
"loss": 0.654,
"step": 58
},
{
"epoch": 0.75,
"learning_rate": 9.919728219835643e-05,
"loss": 0.4863,
"step": 59
},
{
"epoch": 0.76,
"learning_rate": 9.916894856151357e-05,
"loss": 0.6024,
"step": 60
},
{
"epoch": 0.77,
"learning_rate": 9.914012770005072e-05,
"loss": 0.4767,
"step": 61
},
{
"epoch": 0.78,
"learning_rate": 9.91108198995594e-05,
"loss": 0.5065,
"step": 62
},
{
"epoch": 0.8,
"learning_rate": 9.908102545045625e-05,
"loss": 0.6043,
"step": 63
},
{
"epoch": 0.81,
"learning_rate": 9.905074464798024e-05,
"loss": 0.6733,
"step": 64
},
{
"epoch": 0.82,
"learning_rate": 9.901997779218967e-05,
"loss": 0.522,
"step": 65
},
{
"epoch": 0.84,
"learning_rate": 9.898872518795932e-05,
"loss": 0.5205,
"step": 66
},
{
"epoch": 0.85,
"learning_rate": 9.895698714497724e-05,
"loss": 0.5216,
"step": 67
},
{
"epoch": 0.86,
"learning_rate": 9.892476397774186e-05,
"loss": 0.6014,
"step": 68
},
{
"epoch": 0.87,
"learning_rate": 9.889205600555877e-05,
"loss": 0.5321,
"step": 69
},
{
"epoch": 0.89,
"learning_rate": 9.885886355253758e-05,
"loss": 0.545,
"step": 70
},
{
"epoch": 0.9,
"learning_rate": 9.882518694758875e-05,
"loss": 0.4868,
"step": 71
},
{
"epoch": 0.91,
"learning_rate": 9.879102652442024e-05,
"loss": 0.5366,
"step": 72
},
{
"epoch": 0.92,
"learning_rate": 9.875638262153431e-05,
"loss": 0.5805,
"step": 73
},
{
"epoch": 0.94,
"learning_rate": 9.872125558222409e-05,
"loss": 0.585,
"step": 74
},
{
"epoch": 0.95,
"learning_rate": 9.868564575457023e-05,
"loss": 0.6058,
"step": 75
},
{
"epoch": 0.96,
"learning_rate": 9.864955349143734e-05,
"loss": 0.4402,
"step": 76
},
{
"epoch": 0.97,
"learning_rate": 9.861297915047069e-05,
"loss": 0.5349,
"step": 77
},
{
"epoch": 0.99,
"learning_rate": 9.857592309409247e-05,
"loss": 0.5443,
"step": 78
},
{
"epoch": 1.0,
"learning_rate": 9.853838568949831e-05,
"loss": 0.6198,
"step": 79
},
{
"epoch": 1.01,
"learning_rate": 9.850036730865364e-05,
"loss": 0.6008,
"step": 80
},
{
"epoch": 1.03,
"learning_rate": 9.846186832828989e-05,
"loss": 0.4168,
"step": 81
},
{
"epoch": 1.04,
"learning_rate": 9.842288912990096e-05,
"loss": 0.4738,
"step": 82
},
{
"epoch": 1.05,
"learning_rate": 9.838343009973925e-05,
"loss": 0.5146,
"step": 83
},
{
"epoch": 1.06,
"learning_rate": 9.83434916288119e-05,
"loss": 0.481,
"step": 84
},
{
"epoch": 1.08,
"learning_rate": 9.830307411287695e-05,
"loss": 0.5005,
"step": 85
},
{
"epoch": 1.09,
"learning_rate": 9.82621779524394e-05,
"loss": 0.4643,
"step": 86
},
{
"epoch": 1.1,
"learning_rate": 9.822080355274719e-05,
"loss": 0.5201,
"step": 87
},
{
"epoch": 1.11,
"learning_rate": 9.817895132378725e-05,
"loss": 0.4168,
"step": 88
},
{
"epoch": 1.13,
"learning_rate": 9.813662168028144e-05,
"loss": 0.4409,
"step": 89
},
{
"epoch": 1.14,
"learning_rate": 9.809381504168234e-05,
"loss": 0.4593,
"step": 90
},
{
"epoch": 1.15,
"learning_rate": 9.805053183216923e-05,
"loss": 0.4755,
"step": 91
},
{
"epoch": 1.16,
"learning_rate": 9.800677248064382e-05,
"loss": 0.4565,
"step": 92
},
{
"epoch": 1.18,
"learning_rate": 9.796253742072596e-05,
"loss": 0.4718,
"step": 93
},
{
"epoch": 1.19,
"learning_rate": 9.791782709074944e-05,
"loss": 0.4884,
"step": 94
},
{
"epoch": 1.2,
"learning_rate": 9.787264193375753e-05,
"loss": 0.5204,
"step": 95
},
{
"epoch": 1.22,
"learning_rate": 9.782698239749873e-05,
"loss": 0.4807,
"step": 96
},
{
"epoch": 1.23,
"learning_rate": 9.778084893442218e-05,
"loss": 0.4089,
"step": 97
},
{
"epoch": 1.24,
"learning_rate": 9.77342420016733e-05,
"loss": 0.4371,
"step": 98
},
{
"epoch": 1.25,
"learning_rate": 9.768716206108921e-05,
"loss": 0.4959,
"step": 99
},
{
"epoch": 1.27,
"learning_rate": 9.763960957919413e-05,
"loss": 0.4446,
"step": 100
},
{
"epoch": 1.28,
"learning_rate": 9.759158502719481e-05,
"loss": 0.4576,
"step": 101
},
{
"epoch": 1.29,
"learning_rate": 9.754308888097583e-05,
"loss": 0.3678,
"step": 102
},
{
"epoch": 1.3,
"learning_rate": 9.749412162109485e-05,
"loss": 0.3743,
"step": 103
},
{
"epoch": 1.32,
"learning_rate": 9.744468373277797e-05,
"loss": 0.4571,
"step": 104
},
{
"epoch": 1.33,
"learning_rate": 9.739477570591473e-05,
"loss": 0.3944,
"step": 105
},
{
"epoch": 1.34,
"learning_rate": 9.734439803505345e-05,
"loss": 0.3904,
"step": 106
},
{
"epoch": 1.35,
"learning_rate": 9.729355121939621e-05,
"loss": 0.494,
"step": 107
},
{
"epoch": 1.37,
"learning_rate": 9.724223576279395e-05,
"loss": 0.4904,
"step": 108
},
{
"epoch": 1.38,
"learning_rate": 9.719045217374143e-05,
"loss": 0.3727,
"step": 109
},
{
"epoch": 1.39,
"learning_rate": 9.713820096537225e-05,
"loss": 0.4916,
"step": 110
},
{
"epoch": 1.41,
"learning_rate": 9.708548265545375e-05,
"loss": 0.5386,
"step": 111
},
{
"epoch": 1.42,
"learning_rate": 9.703229776638185e-05,
"loss": 0.3815,
"step": 112
},
{
"epoch": 1.43,
"learning_rate": 9.697864682517592e-05,
"loss": 0.4243,
"step": 113
},
{
"epoch": 1.44,
"learning_rate": 9.692453036347351e-05,
"loss": 0.5205,
"step": 114
},
{
"epoch": 1.46,
"learning_rate": 9.686994891752508e-05,
"loss": 0.5158,
"step": 115
},
{
"epoch": 1.47,
"learning_rate": 9.681490302818874e-05,
"loss": 0.4424,
"step": 116
},
{
"epoch": 1.48,
"learning_rate": 9.675939324092486e-05,
"loss": 0.5825,
"step": 117
},
{
"epoch": 1.49,
"learning_rate": 9.670342010579065e-05,
"loss": 0.6082,
"step": 118
},
{
"epoch": 1.51,
"learning_rate": 9.664698417743475e-05,
"loss": 0.3913,
"step": 119
},
{
"epoch": 1.52,
"learning_rate": 9.659008601509168e-05,
"loss": 0.4178,
"step": 120
},
{
"epoch": 1.53,
"learning_rate": 9.653272618257631e-05,
"loss": 0.5543,
"step": 121
},
{
"epoch": 1.54,
"learning_rate": 9.647490524827834e-05,
"loss": 0.4127,
"step": 122
},
{
"epoch": 1.56,
"learning_rate": 9.641662378515659e-05,
"loss": 0.4079,
"step": 123
},
{
"epoch": 1.57,
"learning_rate": 9.635788237073334e-05,
"loss": 0.4811,
"step": 124
},
{
"epoch": 1.58,
"learning_rate": 9.629868158708861e-05,
"loss": 0.4229,
"step": 125
},
{
"epoch": 1.59,
"learning_rate": 9.623902202085444e-05,
"loss": 0.4332,
"step": 126
},
{
"epoch": 1.61,
"learning_rate": 9.617890426320899e-05,
"loss": 0.4451,
"step": 127
},
{
"epoch": 1.62,
"learning_rate": 9.611832890987076e-05,
"loss": 0.4018,
"step": 128
},
{
"epoch": 1.63,
"learning_rate": 9.605729656109265e-05,
"loss": 0.4175,
"step": 129
},
{
"epoch": 1.65,
"learning_rate": 9.599580782165598e-05,
"loss": 0.4841,
"step": 130
},
{
"epoch": 1.66,
"learning_rate": 9.593386330086458e-05,
"loss": 0.4353,
"step": 131
},
{
"epoch": 1.67,
"learning_rate": 9.587146361253868e-05,
"loss": 0.4262,
"step": 132
},
{
"epoch": 1.68,
"learning_rate": 9.580860937500884e-05,
"loss": 0.4572,
"step": 133
},
{
"epoch": 1.7,
"learning_rate": 9.57453012111099e-05,
"loss": 0.4856,
"step": 134
},
{
"epoch": 1.71,
"learning_rate": 9.568153974817464e-05,
"loss": 0.4451,
"step": 135
},
{
"epoch": 1.72,
"learning_rate": 9.561732561802778e-05,
"loss": 0.5691,
"step": 136
},
{
"epoch": 1.73,
"learning_rate": 9.555265945697953e-05,
"loss": 0.3989,
"step": 137
},
{
"epoch": 1.75,
"learning_rate": 9.548754190581939e-05,
"loss": 0.4935,
"step": 138
},
{
"epoch": 1.76,
"learning_rate": 9.542197360980978e-05,
"loss": 0.4637,
"step": 139
},
{
"epoch": 1.77,
"learning_rate": 9.53559552186796e-05,
"loss": 0.4895,
"step": 140
},
{
"epoch": 1.78,
"learning_rate": 9.528948738661784e-05,
"loss": 0.4625,
"step": 141
},
{
"epoch": 1.8,
"learning_rate": 9.522257077226717e-05,
"loss": 0.4409,
"step": 142
},
{
"epoch": 1.81,
"learning_rate": 9.51552060387172e-05,
"loss": 0.4123,
"step": 143
},
{
"epoch": 1.82,
"learning_rate": 9.508739385349812e-05,
"loss": 0.4801,
"step": 144
},
{
"epoch": 1.84,
"learning_rate": 9.501913488857399e-05,
"loss": 0.3965,
"step": 145
},
{
"epoch": 1.85,
"learning_rate": 9.49504298203361e-05,
"loss": 0.4027,
"step": 146
},
{
"epoch": 1.86,
"learning_rate": 9.488127932959625e-05,
"loss": 0.4779,
"step": 147
},
{
"epoch": 1.87,
"learning_rate": 9.481168410158003e-05,
"loss": 0.4168,
"step": 148
},
{
"epoch": 1.89,
"learning_rate": 9.474164482592002e-05,
"loss": 0.4352,
"step": 149
},
{
"epoch": 1.9,
"learning_rate": 9.467116219664894e-05,
"loss": 0.5395,
"step": 150
},
{
"epoch": 1.91,
"learning_rate": 9.460023691219277e-05,
"loss": 0.5874,
"step": 151
},
{
"epoch": 1.92,
"learning_rate": 9.45288696753639e-05,
"loss": 0.5014,
"step": 152
},
{
"epoch": 1.94,
"learning_rate": 9.445706119335407e-05,
"loss": 0.4106,
"step": 153
},
{
"epoch": 1.95,
"learning_rate": 9.438481217772744e-05,
"loss": 0.4717,
"step": 154
},
{
"epoch": 1.96,
"learning_rate": 9.431212334441343e-05,
"loss": 0.4105,
"step": 155
},
{
"epoch": 1.97,
"learning_rate": 9.423899541369978e-05,
"loss": 0.4421,
"step": 156
},
{
"epoch": 1.99,
"learning_rate": 9.41654291102253e-05,
"loss": 0.491,
"step": 157
},
{
"epoch": 2.0,
"learning_rate": 9.409142516297269e-05,
"loss": 0.4624,
"step": 158
},
{
"epoch": 2.01,
"learning_rate": 9.401698430526142e-05,
"loss": 0.2957,
"step": 159
},
{
"epoch": 2.03,
"learning_rate": 9.394210727474028e-05,
"loss": 0.3436,
"step": 160
},
{
"epoch": 2.04,
"learning_rate": 9.386679481338033e-05,
"loss": 0.3312,
"step": 161
},
{
"epoch": 2.05,
"learning_rate": 9.379104766746722e-05,
"loss": 0.3232,
"step": 162
},
{
"epoch": 2.06,
"learning_rate": 9.371486658759416e-05,
"loss": 0.3306,
"step": 163
},
{
"epoch": 2.08,
"learning_rate": 9.363825232865413e-05,
"loss": 0.3507,
"step": 164
},
{
"epoch": 2.09,
"learning_rate": 9.356120564983266e-05,
"loss": 0.3474,
"step": 165
},
{
"epoch": 2.1,
"learning_rate": 9.348372731460023e-05,
"loss": 0.3206,
"step": 166
},
{
"epoch": 2.11,
"learning_rate": 9.340581809070459e-05,
"loss": 0.2675,
"step": 167
},
{
"epoch": 2.13,
"learning_rate": 9.332747875016332e-05,
"loss": 0.3432,
"step": 168
},
{
"epoch": 2.14,
"learning_rate": 9.324871006925613e-05,
"loss": 0.3829,
"step": 169
},
{
"epoch": 2.15,
"learning_rate": 9.316951282851707e-05,
"loss": 0.3136,
"step": 170
},
{
"epoch": 2.16,
"learning_rate": 9.308988781272694e-05,
"loss": 0.3381,
"step": 171
},
{
"epoch": 2.18,
"learning_rate": 9.300983581090541e-05,
"loss": 0.3394,
"step": 172
},
{
"epoch": 2.19,
"learning_rate": 9.292935761630326e-05,
"loss": 0.3277,
"step": 173
},
{
"epoch": 2.2,
"learning_rate": 9.284845402639446e-05,
"loss": 0.3286,
"step": 174
},
{
"epoch": 2.22,
"learning_rate": 9.276712584286833e-05,
"loss": 0.3151,
"step": 175
},
{
"epoch": 2.23,
"learning_rate": 9.26853738716216e-05,
"loss": 0.3557,
"step": 176
},
{
"epoch": 2.24,
"learning_rate": 9.260319892275034e-05,
"loss": 0.2099,
"step": 177
},
{
"epoch": 2.25,
"learning_rate": 9.2520601810542e-05,
"loss": 0.3627,
"step": 178
},
{
"epoch": 2.27,
"learning_rate": 9.243758335346735e-05,
"loss": 0.285,
"step": 179
},
{
"epoch": 2.28,
"learning_rate": 9.235414437417234e-05,
"loss": 0.3189,
"step": 180
},
{
"epoch": 2.29,
"learning_rate": 9.227028569946996e-05,
"loss": 0.288,
"step": 181
},
{
"epoch": 2.3,
"learning_rate": 9.2186008160332e-05,
"loss": 0.2462,
"step": 182
},
{
"epoch": 2.32,
"learning_rate": 9.210131259188095e-05,
"loss": 0.2527,
"step": 183
},
{
"epoch": 2.33,
"learning_rate": 9.201619983338153e-05,
"loss": 0.3712,
"step": 184
},
{
"epoch": 2.34,
"learning_rate": 9.193067072823251e-05,
"loss": 0.3526,
"step": 185
},
{
"epoch": 2.35,
"learning_rate": 9.18447261239584e-05,
"loss": 0.2721,
"step": 186
},
{
"epoch": 2.37,
"learning_rate": 9.175836687220084e-05,
"loss": 0.2738,
"step": 187
},
{
"epoch": 2.38,
"learning_rate": 9.167159382871039e-05,
"loss": 0.3511,
"step": 188
},
{
"epoch": 2.39,
"learning_rate": 9.15844078533379e-05,
"loss": 0.3712,
"step": 189
},
{
"epoch": 2.41,
"learning_rate": 9.149680981002609e-05,
"loss": 0.3244,
"step": 190
},
{
"epoch": 2.42,
"learning_rate": 9.140880056680088e-05,
"loss": 0.3304,
"step": 191
},
{
"epoch": 2.43,
"learning_rate": 9.13203809957629e-05,
"loss": 0.3476,
"step": 192
},
{
"epoch": 2.44,
"learning_rate": 9.123155197307876e-05,
"loss": 0.2981,
"step": 193
},
{
"epoch": 2.46,
"learning_rate": 9.114231437897244e-05,
"loss": 0.2947,
"step": 194
},
{
"epoch": 2.47,
"learning_rate": 9.105266909771653e-05,
"loss": 0.2627,
"step": 195
},
{
"epoch": 2.48,
"learning_rate": 9.096261701762342e-05,
"loss": 0.306,
"step": 196
},
{
"epoch": 2.49,
"learning_rate": 9.087215903103662e-05,
"loss": 0.3326,
"step": 197
},
{
"epoch": 2.51,
"learning_rate": 9.078129603432181e-05,
"loss": 0.2348,
"step": 198
},
{
"epoch": 2.52,
"learning_rate": 9.069002892785797e-05,
"loss": 0.4223,
"step": 199
},
{
"epoch": 2.53,
"learning_rate": 9.059835861602853e-05,
"loss": 0.294,
"step": 200
},
{
"epoch": 2.54,
"learning_rate": 9.050628600721234e-05,
"loss": 0.269,
"step": 201
},
{
"epoch": 2.56,
"learning_rate": 9.041381201377468e-05,
"loss": 0.3375,
"step": 202
},
{
"epoch": 2.57,
"learning_rate": 9.032093755205822e-05,
"loss": 0.3022,
"step": 203
},
{
"epoch": 2.58,
"learning_rate": 9.0227663542374e-05,
"loss": 0.3297,
"step": 204
},
{
"epoch": 2.59,
"learning_rate": 9.013399090899217e-05,
"loss": 0.3108,
"step": 205
},
{
"epoch": 2.61,
"learning_rate": 9.003992058013302e-05,
"loss": 0.2948,
"step": 206
},
{
"epoch": 2.62,
"learning_rate": 8.994545348795759e-05,
"loss": 0.2719,
"step": 207
},
{
"epoch": 2.63,
"learning_rate": 8.985059056855858e-05,
"loss": 0.3116,
"step": 208
},
{
"epoch": 2.65,
"learning_rate": 8.975533276195102e-05,
"loss": 0.2567,
"step": 209
},
{
"epoch": 2.66,
"learning_rate": 8.965968101206291e-05,
"loss": 0.3751,
"step": 210
},
{
"epoch": 2.67,
"learning_rate": 8.956363626672595e-05,
"loss": 0.2538,
"step": 211
},
{
"epoch": 2.68,
"learning_rate": 8.94671994776661e-05,
"loss": 0.2973,
"step": 212
},
{
"epoch": 2.7,
"learning_rate": 8.937037160049416e-05,
"loss": 0.4477,
"step": 213
},
{
"epoch": 2.71,
"learning_rate": 8.927315359469626e-05,
"loss": 0.3517,
"step": 214
},
{
"epoch": 2.72,
"learning_rate": 8.917554642362443e-05,
"loss": 0.2779,
"step": 215
},
{
"epoch": 2.73,
"learning_rate": 8.907755105448704e-05,
"loss": 0.3196,
"step": 216
},
{
"epoch": 2.75,
"learning_rate": 8.89791684583391e-05,
"loss": 0.31,
"step": 217
},
{
"epoch": 2.76,
"learning_rate": 8.888039961007282e-05,
"loss": 0.2522,
"step": 218
},
{
"epoch": 2.77,
"learning_rate": 8.87812454884078e-05,
"loss": 0.2974,
"step": 219
},
{
"epoch": 2.78,
"learning_rate": 8.868170707588142e-05,
"loss": 0.2817,
"step": 220
},
{
"epoch": 2.8,
"learning_rate": 8.858178535883905e-05,
"loss": 0.3853,
"step": 221
},
{
"epoch": 2.81,
"learning_rate": 8.848148132742431e-05,
"loss": 0.2393,
"step": 222
},
{
"epoch": 2.82,
"learning_rate": 8.838079597556925e-05,
"loss": 0.3607,
"step": 223
},
{
"epoch": 2.84,
"learning_rate": 8.827973030098448e-05,
"loss": 0.3263,
"step": 224
},
{
"epoch": 2.85,
"learning_rate": 8.81782853051493e-05,
"loss": 0.3084,
"step": 225
},
{
"epoch": 2.86,
"learning_rate": 8.807646199330187e-05,
"loss": 0.3019,
"step": 226
},
{
"epoch": 2.87,
"learning_rate": 8.797426137442897e-05,
"loss": 0.3646,
"step": 227
},
{
"epoch": 2.89,
"learning_rate": 8.787168446125638e-05,
"loss": 0.3149,
"step": 228
},
{
"epoch": 2.9,
"learning_rate": 8.776873227023852e-05,
"loss": 0.326,
"step": 229
},
{
"epoch": 2.91,
"learning_rate": 8.766540582154859e-05,
"loss": 0.3216,
"step": 230
},
{
"epoch": 2.92,
"learning_rate": 8.756170613906833e-05,
"loss": 0.3348,
"step": 231
},
{
"epoch": 2.94,
"learning_rate": 8.745763425037797e-05,
"loss": 0.2524,
"step": 232
},
{
"epoch": 2.95,
"learning_rate": 8.735319118674596e-05,
"loss": 0.2725,
"step": 233
},
{
"epoch": 2.96,
"learning_rate": 8.724837798311882e-05,
"loss": 0.2733,
"step": 234
},
{
"epoch": 2.97,
"learning_rate": 8.714319567811088e-05,
"loss": 0.287,
"step": 235
},
{
"epoch": 2.99,
"learning_rate": 8.703764531399392e-05,
"loss": 0.2891,
"step": 236
},
{
"epoch": 3.0,
"learning_rate": 8.69317279366869e-05,
"loss": 0.2329,
"step": 237
},
{
"epoch": 3.01,
"learning_rate": 8.682544459574562e-05,
"loss": 0.1752,
"step": 238
},
{
"epoch": 3.03,
"learning_rate": 8.671879634435224e-05,
"loss": 0.1686,
"step": 239
},
{
"epoch": 3.04,
"learning_rate": 8.661178423930491e-05,
"loss": 0.2164,
"step": 240
},
{
"epoch": 3.05,
"learning_rate": 8.650440934100728e-05,
"loss": 0.1909,
"step": 241
},
{
"epoch": 3.06,
"learning_rate": 8.639667271345798e-05,
"loss": 0.1765,
"step": 242
},
{
"epoch": 3.08,
"learning_rate": 8.628857542424009e-05,
"loss": 0.1558,
"step": 243
},
{
"epoch": 3.09,
"learning_rate": 8.618011854451056e-05,
"loss": 0.1471,
"step": 244
},
{
"epoch": 3.1,
"learning_rate": 8.607130314898956e-05,
"loss": 0.175,
"step": 245
},
{
"epoch": 3.11,
"learning_rate": 8.596213031594991e-05,
"loss": 0.1692,
"step": 246
},
{
"epoch": 3.13,
"learning_rate": 8.585260112720631e-05,
"loss": 0.1176,
"step": 247
},
{
"epoch": 3.14,
"learning_rate": 8.57427166681047e-05,
"loss": 0.1773,
"step": 248
},
{
"epoch": 3.15,
"learning_rate": 8.56324780275114e-05,
"loss": 0.1821,
"step": 249
},
{
"epoch": 3.16,
"learning_rate": 8.552188629780244e-05,
"loss": 0.1471,
"step": 250
},
{
"epoch": 3.18,
"learning_rate": 8.541094257485265e-05,
"loss": 0.1949,
"step": 251
},
{
"epoch": 3.19,
"learning_rate": 8.529964795802485e-05,
"loss": 0.1234,
"step": 252
},
{
"epoch": 3.2,
"learning_rate": 8.518800355015892e-05,
"loss": 0.1625,
"step": 253
},
{
"epoch": 3.22,
"learning_rate": 8.507601045756085e-05,
"loss": 0.1417,
"step": 254
},
{
"epoch": 3.23,
"learning_rate": 8.49636697899919e-05,
"loss": 0.1088,
"step": 255
},
{
"epoch": 3.24,
"learning_rate": 8.485098266065744e-05,
"loss": 0.1524,
"step": 256
},
{
"epoch": 3.25,
"learning_rate": 8.473795018619604e-05,
"loss": 0.1711,
"step": 257
},
{
"epoch": 3.27,
"learning_rate": 8.462457348666835e-05,
"loss": 0.161,
"step": 258
},
{
"epoch": 3.28,
"learning_rate": 8.4510853685546e-05,
"loss": 0.1641,
"step": 259
},
{
"epoch": 3.29,
"learning_rate": 8.439679190970052e-05,
"loss": 0.1633,
"step": 260
},
{
"epoch": 3.3,
"learning_rate": 8.428238928939207e-05,
"loss": 0.1673,
"step": 261
},
{
"epoch": 3.32,
"learning_rate": 8.416764695825835e-05,
"loss": 0.1629,
"step": 262
},
{
"epoch": 3.33,
"learning_rate": 8.405256605330331e-05,
"loss": 0.177,
"step": 263
},
{
"epoch": 3.34,
"learning_rate": 8.39371477148859e-05,
"loss": 0.1188,
"step": 264
},
{
"epoch": 3.35,
"learning_rate": 8.382139308670875e-05,
"loss": 0.1934,
"step": 265
},
{
"epoch": 3.37,
"learning_rate": 8.370530331580686e-05,
"loss": 0.1049,
"step": 266
},
{
"epoch": 3.38,
"learning_rate": 8.35888795525362e-05,
"loss": 0.1376,
"step": 267
},
{
"epoch": 3.39,
"learning_rate": 8.347212295056239e-05,
"loss": 0.1574,
"step": 268
},
{
"epoch": 3.41,
"learning_rate": 8.335503466684915e-05,
"loss": 0.1222,
"step": 269
},
{
"epoch": 3.42,
"learning_rate": 8.323761586164695e-05,
"loss": 0.1634,
"step": 270
},
{
"epoch": 3.43,
"learning_rate": 8.311986769848141e-05,
"loss": 0.1764,
"step": 271
},
{
"epoch": 3.44,
"learning_rate": 8.300179134414188e-05,
"loss": 0.1621,
"step": 272
},
{
"epoch": 3.46,
"learning_rate": 8.288338796866976e-05,
"loss": 0.1549,
"step": 273
},
{
"epoch": 3.47,
"learning_rate": 8.276465874534702e-05,
"loss": 0.1053,
"step": 274
},
{
"epoch": 3.48,
"learning_rate": 8.264560485068446e-05,
"loss": 0.1224,
"step": 275
},
{
"epoch": 3.49,
"learning_rate": 8.252622746441021e-05,
"loss": 0.1361,
"step": 276
},
{
"epoch": 3.51,
"learning_rate": 8.240652776945781e-05,
"loss": 0.1753,
"step": 277
},
{
"epoch": 3.52,
"learning_rate": 8.228650695195472e-05,
"loss": 0.1539,
"step": 278
},
{
"epoch": 3.53,
"learning_rate": 8.216616620121043e-05,
"loss": 0.1603,
"step": 279
},
{
"epoch": 3.54,
"learning_rate": 8.204550670970469e-05,
"loss": 0.1519,
"step": 280
},
{
"epoch": 3.56,
"learning_rate": 8.192452967307576e-05,
"loss": 0.1494,
"step": 281
},
{
"epoch": 3.57,
"learning_rate": 8.180323629010848e-05,
"loss": 0.1695,
"step": 282
},
{
"epoch": 3.58,
"learning_rate": 8.168162776272244e-05,
"loss": 0.1384,
"step": 283
},
{
"epoch": 3.59,
"learning_rate": 8.155970529596006e-05,
"loss": 0.1552,
"step": 284
},
{
"epoch": 3.61,
"learning_rate": 8.143747009797464e-05,
"loss": 0.166,
"step": 285
},
{
"epoch": 3.62,
"learning_rate": 8.131492338001839e-05,
"loss": 0.2007,
"step": 286
},
{
"epoch": 3.63,
"learning_rate": 8.119206635643045e-05,
"loss": 0.1331,
"step": 287
},
{
"epoch": 3.65,
"learning_rate": 8.106890024462481e-05,
"loss": 0.2133,
"step": 288
},
{
"epoch": 3.66,
"learning_rate": 8.094542626507828e-05,
"loss": 0.1507,
"step": 289
},
{
"epoch": 3.67,
"learning_rate": 8.082164564131845e-05,
"loss": 0.1465,
"step": 290
},
{
"epoch": 3.68,
"learning_rate": 8.069755959991142e-05,
"loss": 0.1344,
"step": 291
},
{
"epoch": 3.7,
"learning_rate": 8.057316937044977e-05,
"loss": 0.1623,
"step": 292
},
{
"epoch": 3.71,
"learning_rate": 8.044847618554034e-05,
"loss": 0.1823,
"step": 293
},
{
"epoch": 3.72,
"learning_rate": 8.032348128079203e-05,
"loss": 0.1191,
"step": 294
},
{
"epoch": 3.73,
"learning_rate": 8.019818589480352e-05,
"loss": 0.1536,
"step": 295
},
{
"epoch": 3.75,
"learning_rate": 8.0072591269151e-05,
"loss": 0.1745,
"step": 296
},
{
"epoch": 3.76,
"learning_rate": 7.994669864837594e-05,
"loss": 0.1265,
"step": 297
},
{
"epoch": 3.77,
"learning_rate": 7.982050927997264e-05,
"loss": 0.1475,
"step": 298
},
{
"epoch": 3.78,
"learning_rate": 7.969402441437594e-05,
"loss": 0.1597,
"step": 299
},
{
"epoch": 3.8,
"learning_rate": 7.956724530494887e-05,
"loss": 0.1716,
"step": 300
},
{
"epoch": 3.81,
"learning_rate": 7.944017320797013e-05,
"loss": 0.1569,
"step": 301
},
{
"epoch": 3.82,
"learning_rate": 7.931280938262169e-05,
"loss": 0.1696,
"step": 302
},
{
"epoch": 3.84,
"learning_rate": 7.918515509097634e-05,
"loss": 0.1879,
"step": 303
},
{
"epoch": 3.85,
"learning_rate": 7.905721159798513e-05,
"loss": 0.1568,
"step": 304
},
{
"epoch": 3.86,
"learning_rate": 7.89289801714649e-05,
"loss": 0.1276,
"step": 305
},
{
"epoch": 3.87,
"learning_rate": 7.880046208208563e-05,
"loss": 0.1508,
"step": 306
},
{
"epoch": 3.89,
"learning_rate": 7.867165860335792e-05,
"loss": 0.1641,
"step": 307
},
{
"epoch": 3.9,
"learning_rate": 7.854257101162037e-05,
"loss": 0.1571,
"step": 308
},
{
"epoch": 3.91,
"learning_rate": 7.841320058602688e-05,
"loss": 0.1642,
"step": 309
},
{
"epoch": 3.92,
"learning_rate": 7.828354860853399e-05,
"loss": 0.2288,
"step": 310
},
{
"epoch": 3.94,
"learning_rate": 7.815361636388827e-05,
"loss": 0.1195,
"step": 311
},
{
"epoch": 3.95,
"learning_rate": 7.802340513961342e-05,
"loss": 0.1699,
"step": 312
},
{
"epoch": 3.96,
"learning_rate": 7.789291622599767e-05,
"loss": 0.1761,
"step": 313
},
{
"epoch": 3.97,
"learning_rate": 7.776215091608085e-05,
"loss": 0.2214,
"step": 314
},
{
"epoch": 3.99,
"learning_rate": 7.763111050564178e-05,
"loss": 0.2109,
"step": 315
},
{
"epoch": 4.0,
"learning_rate": 7.749979629318516e-05,
"loss": 0.1801,
"step": 316
},
{
"epoch": 4.01,
"learning_rate": 7.736820957992895e-05,
"loss": 0.0728,
"step": 317
},
{
"epoch": 4.03,
"learning_rate": 7.723635166979133e-05,
"loss": 0.0642,
"step": 318
},
{
"epoch": 4.04,
"learning_rate": 7.710422386937784e-05,
"loss": 0.0868,
"step": 319
},
{
"epoch": 4.05,
"learning_rate": 7.697182748796841e-05,
"loss": 0.0672,
"step": 320
},
{
"epoch": 4.06,
"learning_rate": 7.683916383750436e-05,
"loss": 0.0645,
"step": 321
},
{
"epoch": 4.08,
"learning_rate": 7.670623423257548e-05,
"loss": 0.0746,
"step": 322
},
{
"epoch": 4.09,
"learning_rate": 7.657303999040693e-05,
"loss": 0.0801,
"step": 323
},
{
"epoch": 4.1,
"learning_rate": 7.64395824308462e-05,
"loss": 0.0557,
"step": 324
},
{
"epoch": 4.11,
"learning_rate": 7.630586287635008e-05,
"loss": 0.061,
"step": 325
},
{
"epoch": 4.13,
"learning_rate": 7.617188265197148e-05,
"loss": 0.0666,
"step": 326
},
{
"epoch": 4.14,
"learning_rate": 7.603764308534636e-05,
"loss": 0.0495,
"step": 327
},
{
"epoch": 4.15,
"learning_rate": 7.590314550668054e-05,
"loss": 0.0591,
"step": 328
},
{
"epoch": 4.16,
"learning_rate": 7.576839124873653e-05,
"loss": 0.0914,
"step": 329
},
{
"epoch": 4.18,
"learning_rate": 7.563338164682036e-05,
"loss": 0.1023,
"step": 330
},
{
"epoch": 4.19,
"learning_rate": 7.549811803876825e-05,
"loss": 0.1043,
"step": 331
},
{
"epoch": 4.2,
"learning_rate": 7.536260176493348e-05,
"loss": 0.054,
"step": 332
},
{
"epoch": 4.22,
"learning_rate": 7.5226834168173e-05,
"loss": 0.0712,
"step": 333
},
{
"epoch": 4.23,
"learning_rate": 7.509081659383417e-05,
"loss": 0.0814,
"step": 334
},
{
"epoch": 4.24,
"learning_rate": 7.495455038974146e-05,
"loss": 0.0573,
"step": 335
},
{
"epoch": 4.25,
"learning_rate": 7.481803690618304e-05,
"loss": 0.0603,
"step": 336
},
{
"epoch": 4.27,
"learning_rate": 7.46812774958974e-05,
"loss": 0.0565,
"step": 337
},
{
"epoch": 4.28,
"learning_rate": 7.454427351405999e-05,
"loss": 0.063,
"step": 338
},
{
"epoch": 4.29,
"learning_rate": 7.440702631826977e-05,
"loss": 0.0666,
"step": 339
},
{
"epoch": 4.3,
"learning_rate": 7.426953726853574e-05,
"loss": 0.0669,
"step": 340
},
{
"epoch": 4.32,
"learning_rate": 7.413180772726348e-05,
"loss": 0.0812,
"step": 341
},
{
"epoch": 4.33,
"learning_rate": 7.399383905924165e-05,
"loss": 0.081,
"step": 342
},
{
"epoch": 4.34,
"learning_rate": 7.385563263162847e-05,
"loss": 0.0697,
"step": 343
},
{
"epoch": 4.35,
"learning_rate": 7.371718981393815e-05,
"loss": 0.0639,
"step": 344
},
{
"epoch": 4.37,
"learning_rate": 7.357851197802735e-05,
"loss": 0.0792,
"step": 345
},
{
"epoch": 4.38,
"learning_rate": 7.343960049808156e-05,
"loss": 0.0665,
"step": 346
},
{
"epoch": 4.39,
"learning_rate": 7.330045675060149e-05,
"loss": 0.0593,
"step": 347
},
{
"epoch": 4.41,
"learning_rate": 7.316108211438945e-05,
"loss": 0.0307,
"step": 348
},
{
"epoch": 4.42,
"learning_rate": 7.302147797053569e-05,
"loss": 0.0656,
"step": 349
},
{
"epoch": 4.43,
"learning_rate": 7.288164570240463e-05,
"loss": 0.049,
"step": 350
},
{
"epoch": 4.44,
"learning_rate": 7.274158669562126e-05,
"loss": 0.0653,
"step": 351
},
{
"epoch": 4.46,
"learning_rate": 7.26013023380574e-05,
"loss": 0.0668,
"step": 352
},
{
"epoch": 4.47,
"learning_rate": 7.246079401981784e-05,
"loss": 0.0894,
"step": 353
},
{
"epoch": 4.48,
"learning_rate": 7.232006313322667e-05,
"loss": 0.0641,
"step": 354
},
{
"epoch": 4.49,
"learning_rate": 7.217911107281352e-05,
"loss": 0.0905,
"step": 355
},
{
"epoch": 4.51,
"learning_rate": 7.203793923529956e-05,
"loss": 0.0753,
"step": 356
},
{
"epoch": 4.52,
"learning_rate": 7.189654901958385e-05,
"loss": 0.0737,
"step": 357
},
{
"epoch": 4.53,
"learning_rate": 7.175494182672939e-05,
"loss": 0.0828,
"step": 358
},
{
"epoch": 4.54,
"learning_rate": 7.161311905994922e-05,
"loss": 0.0597,
"step": 359
},
{
"epoch": 4.56,
"learning_rate": 7.147108212459257e-05,
"loss": 0.0623,
"step": 360
},
{
"epoch": 4.57,
"learning_rate": 7.13288324281309e-05,
"loss": 0.0751,
"step": 361
},
{
"epoch": 4.58,
"learning_rate": 7.118637138014396e-05,
"loss": 0.0586,
"step": 362
},
{
"epoch": 4.59,
"learning_rate": 7.104370039230583e-05,
"loss": 0.0794,
"step": 363
},
{
"epoch": 4.61,
"learning_rate": 7.090082087837092e-05,
"loss": 0.0554,
"step": 364
},
{
"epoch": 4.62,
"learning_rate": 7.075773425415994e-05,
"loss": 0.0601,
"step": 365
},
{
"epoch": 4.63,
"learning_rate": 7.061444193754596e-05,
"loss": 0.0867,
"step": 366
},
{
"epoch": 4.65,
"learning_rate": 7.047094534844023e-05,
"loss": 0.0828,
"step": 367
},
{
"epoch": 4.66,
"learning_rate": 7.032724590877821e-05,
"loss": 0.0879,
"step": 368
},
{
"epoch": 4.67,
"learning_rate": 7.018334504250545e-05,
"loss": 0.068,
"step": 369
},
{
"epoch": 4.68,
"learning_rate": 7.003924417556343e-05,
"loss": 0.0586,
"step": 370
},
{
"epoch": 4.7,
"learning_rate": 6.989494473587554e-05,
"loss": 0.0971,
"step": 371
},
{
"epoch": 4.71,
"learning_rate": 6.975044815333282e-05,
"loss": 0.0864,
"step": 372
},
{
"epoch": 4.72,
"learning_rate": 6.960575585977984e-05,
"loss": 0.0482,
"step": 373
},
{
"epoch": 4.73,
"learning_rate": 6.946086928900054e-05,
"loss": 0.0812,
"step": 374
},
{
"epoch": 4.75,
"learning_rate": 6.931578987670396e-05,
"loss": 0.0756,
"step": 375
},
{
"epoch": 4.76,
"learning_rate": 6.917051906051006e-05,
"loss": 0.0585,
"step": 376
},
{
"epoch": 4.77,
"learning_rate": 6.902505827993541e-05,
"loss": 0.0653,
"step": 377
},
{
"epoch": 4.78,
"learning_rate": 6.887940897637908e-05,
"loss": 0.0783,
"step": 378
},
{
"epoch": 4.8,
"learning_rate": 6.873357259310815e-05,
"loss": 0.0849,
"step": 379
},
{
"epoch": 4.81,
"learning_rate": 6.858755057524354e-05,
"loss": 0.0642,
"step": 380
},
{
"epoch": 4.82,
"learning_rate": 6.844134436974567e-05,
"loss": 0.0651,
"step": 381
},
{
"epoch": 4.84,
"learning_rate": 6.829495542540013e-05,
"loss": 0.0511,
"step": 382
},
{
"epoch": 4.85,
"learning_rate": 6.814838519280324e-05,
"loss": 0.0713,
"step": 383
},
{
"epoch": 4.86,
"learning_rate": 6.80016351243478e-05,
"loss": 0.0853,
"step": 384
},
{
"epoch": 4.87,
"learning_rate": 6.785470667420862e-05,
"loss": 0.0777,
"step": 385
},
{
"epoch": 4.89,
"learning_rate": 6.77076012983281e-05,
"loss": 0.0714,
"step": 386
},
{
"epoch": 4.9,
"learning_rate": 6.75603204544019e-05,
"loss": 0.051,
"step": 387
},
{
"epoch": 4.91,
"learning_rate": 6.741286560186437e-05,
"loss": 0.0831,
"step": 388
},
{
"epoch": 4.92,
"learning_rate": 6.726523820187413e-05,
"loss": 0.0613,
"step": 389
},
{
"epoch": 4.94,
"learning_rate": 6.711743971729967e-05,
"loss": 0.092,
"step": 390
},
{
"epoch": 4.95,
"learning_rate": 6.696947161270476e-05,
"loss": 0.0457,
"step": 391
},
{
"epoch": 4.96,
"learning_rate": 6.682133535433393e-05,
"loss": 0.0491,
"step": 392
},
{
"epoch": 4.97,
"learning_rate": 6.667303241009803e-05,
"loss": 0.0788,
"step": 393
},
{
"epoch": 4.99,
"learning_rate": 6.652456424955963e-05,
"loss": 0.0598,
"step": 394
},
{
"epoch": 5.0,
"learning_rate": 6.637593234391843e-05,
"loss": 0.0446,
"step": 395
},
{
"epoch": 5.01,
"learning_rate": 6.622713816599673e-05,
"loss": 0.02,
"step": 396
},
{
"epoch": 5.03,
"learning_rate": 6.60781831902248e-05,
"loss": 0.03,
"step": 397
},
{
"epoch": 5.04,
"learning_rate": 6.592906889262632e-05,
"loss": 0.0296,
"step": 398
},
{
"epoch": 5.05,
"learning_rate": 6.577979675080369e-05,
"loss": 0.0267,
"step": 399
},
{
"epoch": 5.06,
"learning_rate": 6.563036824392344e-05,
"loss": 0.0358,
"step": 400
},
{
"epoch": 5.08,
"learning_rate": 6.548078485270152e-05,
"loss": 0.0387,
"step": 401
},
{
"epoch": 5.09,
"learning_rate": 6.533104805938873e-05,
"loss": 0.0335,
"step": 402
},
{
"epoch": 5.1,
"learning_rate": 6.518115934775585e-05,
"loss": 0.0275,
"step": 403
},
{
"epoch": 5.11,
"learning_rate": 6.503112020307916e-05,
"loss": 0.0258,
"step": 404
},
{
"epoch": 5.13,
"learning_rate": 6.488093211212555e-05,
"loss": 0.0534,
"step": 405
},
{
"epoch": 5.14,
"learning_rate": 6.473059656313782e-05,
"loss": 0.0176,
"step": 406
},
{
"epoch": 5.15,
"learning_rate": 6.458011504582005e-05,
"loss": 0.025,
"step": 407
},
{
"epoch": 5.16,
"learning_rate": 6.442948905132266e-05,
"loss": 0.0172,
"step": 408
},
{
"epoch": 5.18,
"learning_rate": 6.427872007222777e-05,
"loss": 0.0312,
"step": 409
},
{
"epoch": 5.19,
"learning_rate": 6.412780960253436e-05,
"loss": 0.0279,
"step": 410
},
{
"epoch": 5.2,
"learning_rate": 6.397675913764347e-05,
"loss": 0.0279,
"step": 411
},
{
"epoch": 5.22,
"learning_rate": 6.382557017434332e-05,
"loss": 0.0198,
"step": 412
},
{
"epoch": 5.23,
"learning_rate": 6.367424421079463e-05,
"loss": 0.0404,
"step": 413
},
{
"epoch": 5.24,
"learning_rate": 6.352278274651561e-05,
"loss": 0.0266,
"step": 414
},
{
"epoch": 5.25,
"learning_rate": 6.337118728236721e-05,
"loss": 0.0341,
"step": 415
},
{
"epoch": 5.27,
"learning_rate": 6.321945932053822e-05,
"loss": 0.0205,
"step": 416
},
{
"epoch": 5.28,
"learning_rate": 6.306760036453035e-05,
"loss": 0.0341,
"step": 417
},
{
"epoch": 5.29,
"learning_rate": 6.291561191914333e-05,
"loss": 0.0398,
"step": 418
},
{
"epoch": 5.3,
"learning_rate": 6.276349549046007e-05,
"loss": 0.0287,
"step": 419
},
{
"epoch": 5.32,
"learning_rate": 6.261125258583171e-05,
"loss": 0.0491,
"step": 420
},
{
"epoch": 5.33,
"learning_rate": 6.245888471386263e-05,
"loss": 0.0235,
"step": 421
},
{
"epoch": 5.34,
"learning_rate": 6.230639338439549e-05,
"loss": 0.0312,
"step": 422
},
{
"epoch": 5.35,
"learning_rate": 6.215378010849641e-05,
"loss": 0.0413,
"step": 423
},
{
"epoch": 5.37,
"learning_rate": 6.200104639843985e-05,
"loss": 0.0316,
"step": 424
},
{
"epoch": 5.38,
"learning_rate": 6.184819376769364e-05,
"loss": 0.044,
"step": 425
},
{
"epoch": 5.39,
"learning_rate": 6.169522373090412e-05,
"loss": 0.0233,
"step": 426
},
{
"epoch": 5.41,
"learning_rate": 6.154213780388092e-05,
"loss": 0.0152,
"step": 427
},
{
"epoch": 5.42,
"learning_rate": 6.138893750358212e-05,
"loss": 0.0589,
"step": 428
},
{
"epoch": 5.43,
"learning_rate": 6.123562434809912e-05,
"loss": 0.0534,
"step": 429
},
{
"epoch": 5.44,
"learning_rate": 6.108219985664161e-05,
"loss": 0.0453,
"step": 430
},
{
"epoch": 5.46,
"learning_rate": 6.0928665549522554e-05,
"loss": 0.0252,
"step": 431
},
{
"epoch": 5.47,
"learning_rate": 6.0775022948143115e-05,
"loss": 0.0337,
"step": 432
},
{
"epoch": 5.48,
"learning_rate": 6.06212735749775e-05,
"loss": 0.0406,
"step": 433
},
{
"epoch": 5.49,
"learning_rate": 6.046741895355802e-05,
"loss": 0.044,
"step": 434
},
{
"epoch": 5.51,
"learning_rate": 6.031346060845986e-05,
"loss": 0.0646,
"step": 435
},
{
"epoch": 5.52,
"learning_rate": 6.015940006528602e-05,
"loss": 0.0238,
"step": 436
},
{
"epoch": 5.53,
"learning_rate": 6.0005238850652234e-05,
"loss": 0.0517,
"step": 437
},
{
"epoch": 5.54,
"learning_rate": 5.9850978492171794e-05,
"loss": 0.0274,
"step": 438
},
{
"epoch": 5.56,
"learning_rate": 5.96966205184404e-05,
"loss": 0.0247,
"step": 439
},
{
"epoch": 5.57,
"learning_rate": 5.954216645902109e-05,
"loss": 0.0648,
"step": 440
},
{
"epoch": 5.58,
"learning_rate": 5.9387617844429e-05,
"loss": 0.0282,
"step": 441
},
{
"epoch": 5.59,
"learning_rate": 5.923297620611623e-05,
"loss": 0.0537,
"step": 442
},
{
"epoch": 5.61,
"learning_rate": 5.907824307645669e-05,
"loss": 0.048,
"step": 443
},
{
"epoch": 5.62,
"learning_rate": 5.892341998873089e-05,
"loss": 0.0227,
"step": 444
},
{
"epoch": 5.63,
"learning_rate": 5.876850847711073e-05,
"loss": 0.0211,
"step": 445
},
{
"epoch": 5.65,
"learning_rate": 5.861351007664434e-05,
"loss": 0.0318,
"step": 446
},
{
"epoch": 5.66,
"learning_rate": 5.845842632324088e-05,
"loss": 0.0225,
"step": 447
},
{
"epoch": 5.67,
"learning_rate": 5.83032587536552e-05,
"loss": 0.0482,
"step": 448
},
{
"epoch": 5.68,
"learning_rate": 5.814800890547278e-05,
"loss": 0.0543,
"step": 449
},
{
"epoch": 5.7,
"learning_rate": 5.799267831709442e-05,
"loss": 0.0264,
"step": 450
},
{
"epoch": 5.71,
"learning_rate": 5.78372685277209e-05,
"loss": 0.0311,
"step": 451
},
{
"epoch": 5.72,
"learning_rate": 5.7681781077337905e-05,
"loss": 0.0524,
"step": 452
},
{
"epoch": 5.73,
"learning_rate": 5.752621750670068e-05,
"loss": 0.0186,
"step": 453
},
{
"epoch": 5.75,
"learning_rate": 5.737057935731868e-05,
"loss": 0.0269,
"step": 454
},
{
"epoch": 5.76,
"learning_rate": 5.721486817144044e-05,
"loss": 0.0312,
"step": 455
},
{
"epoch": 5.77,
"learning_rate": 5.705908549203823e-05,
"loss": 0.0356,
"step": 456
},
{
"epoch": 5.78,
"learning_rate": 5.690323286279274e-05,
"loss": 0.0507,
"step": 457
},
{
"epoch": 5.8,
"learning_rate": 5.674731182807781e-05,
"loss": 0.0399,
"step": 458
},
{
"epoch": 5.81,
"learning_rate": 5.659132393294514e-05,
"loss": 0.0364,
"step": 459
},
{
"epoch": 5.82,
"learning_rate": 5.643527072310891e-05,
"loss": 0.0453,
"step": 460
},
{
"epoch": 5.84,
"learning_rate": 5.627915374493061e-05,
"loss": 0.021,
"step": 461
},
{
"epoch": 5.85,
"learning_rate": 5.612297454540352e-05,
"loss": 0.0155,
"step": 462
},
{
"epoch": 5.86,
"learning_rate": 5.596673467213756e-05,
"loss": 0.0336,
"step": 463
},
{
"epoch": 5.87,
"learning_rate": 5.581043567334383e-05,
"loss": 0.0245,
"step": 464
},
{
"epoch": 5.89,
"learning_rate": 5.5654079097819345e-05,
"loss": 0.0359,
"step": 465
},
{
"epoch": 5.9,
"learning_rate": 5.5497666494931654e-05,
"loss": 0.0245,
"step": 466
},
{
"epoch": 5.91,
"learning_rate": 5.5341199414603493e-05,
"loss": 0.0246,
"step": 467
},
{
"epoch": 5.92,
"learning_rate": 5.518467940729739e-05,
"loss": 0.0222,
"step": 468
},
{
"epoch": 5.94,
"learning_rate": 5.502810802400039e-05,
"loss": 0.0317,
"step": 469
},
{
"epoch": 5.95,
"learning_rate": 5.487148681620862e-05,
"loss": 0.0199,
"step": 470
},
{
"epoch": 5.96,
"learning_rate": 5.4714817335911894e-05,
"loss": 0.0559,
"step": 471
},
{
"epoch": 5.97,
"learning_rate": 5.455810113557839e-05,
"loss": 0.0405,
"step": 472
},
{
"epoch": 5.99,
"learning_rate": 5.440133976813926e-05,
"loss": 0.0416,
"step": 473
},
{
"epoch": 6.0,
"learning_rate": 5.4244534786973214e-05,
"loss": 0.0208,
"step": 474
},
{
"epoch": 6.01,
"learning_rate": 5.40876877458911e-05,
"loss": 0.0295,
"step": 475
},
{
"epoch": 6.03,
"learning_rate": 5.3930800199120616e-05,
"loss": 0.0106,
"step": 476
},
{
"epoch": 6.04,
"learning_rate": 5.377387370129079e-05,
"loss": 0.0165,
"step": 477
},
{
"epoch": 6.05,
"learning_rate": 5.361690980741663e-05,
"loss": 0.0098,
"step": 478
},
{
"epoch": 6.06,
"learning_rate": 5.345991007288371e-05,
"loss": 0.0259,
"step": 479
},
{
"epoch": 6.08,
"learning_rate": 5.330287605343279e-05,
"loss": 0.0065,
"step": 480
},
{
"epoch": 6.09,
"learning_rate": 5.314580930514431e-05,
"loss": 0.0162,
"step": 481
},
{
"epoch": 6.1,
"learning_rate": 5.298871138442307e-05,
"loss": 0.0107,
"step": 482
},
{
"epoch": 6.11,
"learning_rate": 5.283158384798275e-05,
"loss": 0.0126,
"step": 483
},
{
"epoch": 6.13,
"learning_rate": 5.267442825283048e-05,
"loss": 0.0275,
"step": 484
},
{
"epoch": 6.14,
"learning_rate": 5.2517246156251455e-05,
"loss": 0.022,
"step": 485
},
{
"epoch": 6.15,
"learning_rate": 5.236003911579345e-05,
"loss": 0.0101,
"step": 486
},
{
"epoch": 6.16,
"learning_rate": 5.220280868925145e-05,
"loss": 0.0189,
"step": 487
},
{
"epoch": 6.18,
"learning_rate": 5.204555643465215e-05,
"loss": 0.0205,
"step": 488
},
{
"epoch": 6.19,
"learning_rate": 5.1888283910238555e-05,
"loss": 0.0123,
"step": 489
},
{
"epoch": 6.2,
"learning_rate": 5.173099267445451e-05,
"loss": 0.015,
"step": 490
},
{
"epoch": 6.22,
"learning_rate": 5.157368428592933e-05,
"loss": 0.0116,
"step": 491
},
{
"epoch": 6.23,
"learning_rate": 5.1416360303462206e-05,
"loss": 0.0219,
"step": 492
},
{
"epoch": 6.24,
"learning_rate": 5.125902228600693e-05,
"loss": 0.0095,
"step": 493
},
{
"epoch": 6.25,
"learning_rate": 5.110167179265636e-05,
"loss": 0.0287,
"step": 494
},
{
"epoch": 6.27,
"learning_rate": 5.094431038262693e-05,
"loss": 0.0184,
"step": 495
},
{
"epoch": 6.28,
"learning_rate": 5.078693961524329e-05,
"loss": 0.0118,
"step": 496
},
{
"epoch": 6.29,
"learning_rate": 5.062956104992285e-05,
"loss": 0.0147,
"step": 497
},
{
"epoch": 6.3,
"learning_rate": 5.0472176246160184e-05,
"loss": 0.02,
"step": 498
},
{
"epoch": 6.32,
"learning_rate": 5.031478676351179e-05,
"loss": 0.0126,
"step": 499
},
{
"epoch": 6.33,
"learning_rate": 5.01573941615805e-05,
"loss": 0.0118,
"step": 500
},
{
"epoch": 6.34,
"learning_rate": 5e-05,
"loss": 0.0133,
"step": 501
},
{
"epoch": 6.35,
"learning_rate": 4.984260583841953e-05,
"loss": 0.0237,
"step": 502
},
{
"epoch": 6.37,
"learning_rate": 4.9685213236488216e-05,
"loss": 0.0089,
"step": 503
},
{
"epoch": 6.38,
"learning_rate": 4.9527823753839834e-05,
"loss": 0.0106,
"step": 504
},
{
"epoch": 6.39,
"learning_rate": 4.937043895007717e-05,
"loss": 0.008,
"step": 505
},
{
"epoch": 6.41,
"learning_rate": 4.9213060384756716e-05,
"loss": 0.0117,
"step": 506
},
{
"epoch": 6.42,
"learning_rate": 4.9055689617373084e-05,
"loss": 0.0253,
"step": 507
},
{
"epoch": 6.43,
"learning_rate": 4.8898328207343666e-05,
"loss": 0.0134,
"step": 508
},
{
"epoch": 6.44,
"learning_rate": 4.874097771399308e-05,
"loss": 0.0133,
"step": 509
},
{
"epoch": 6.46,
"learning_rate": 4.858363969653781e-05,
"loss": 0.0103,
"step": 510
},
{
"epoch": 6.47,
"learning_rate": 4.8426315714070684e-05,
"loss": 0.0247,
"step": 511
},
{
"epoch": 6.48,
"learning_rate": 4.8269007325545506e-05,
"loss": 0.0187,
"step": 512
},
{
"epoch": 6.49,
"learning_rate": 4.8111716089761456e-05,
"loss": 0.0308,
"step": 513
},
{
"epoch": 6.51,
"learning_rate": 4.7954443565347865e-05,
"loss": 0.0157,
"step": 514
},
{
"epoch": 6.52,
"learning_rate": 4.779719131074857e-05,
"loss": 0.0131,
"step": 515
},
{
"epoch": 6.53,
"learning_rate": 4.7639960884206576e-05,
"loss": 0.019,
"step": 516
},
{
"epoch": 6.54,
"learning_rate": 4.7482753843748564e-05,
"loss": 0.0252,
"step": 517
},
{
"epoch": 6.56,
"learning_rate": 4.7325571747169545e-05,
"loss": 0.0105,
"step": 518
},
{
"epoch": 6.57,
"learning_rate": 4.716841615201726e-05,
"loss": 0.0082,
"step": 519
},
{
"epoch": 6.58,
"learning_rate": 4.7011288615576934e-05,
"loss": 0.0233,
"step": 520
},
{
"epoch": 6.59,
"learning_rate": 4.6854190694855694e-05,
"loss": 0.006,
"step": 521
},
{
"epoch": 6.61,
"learning_rate": 4.6697123946567227e-05,
"loss": 0.0242,
"step": 522
},
{
"epoch": 6.62,
"learning_rate": 4.65400899271163e-05,
"loss": 0.0136,
"step": 523
},
{
"epoch": 6.63,
"learning_rate": 4.63830901925834e-05,
"loss": 0.0145,
"step": 524
},
{
"epoch": 6.65,
"learning_rate": 4.6226126298709224e-05,
"loss": 0.0194,
"step": 525
},
{
"epoch": 6.66,
"learning_rate": 4.60691998008794e-05,
"loss": 0.0284,
"step": 526
},
{
"epoch": 6.67,
"learning_rate": 4.5912312254108905e-05,
"loss": 0.0179,
"step": 527
},
{
"epoch": 6.68,
"learning_rate": 4.575546521302681e-05,
"loss": 0.0099,
"step": 528
},
{
"epoch": 6.7,
"learning_rate": 4.5598660231860746e-05,
"loss": 0.0278,
"step": 529
},
{
"epoch": 6.71,
"learning_rate": 4.544189886442162e-05,
"loss": 0.0177,
"step": 530
},
{
"epoch": 6.72,
"learning_rate": 4.528518266408811e-05,
"loss": 0.0135,
"step": 531
},
{
"epoch": 6.73,
"learning_rate": 4.5128513183791386e-05,
"loss": 0.0151,
"step": 532
},
{
"epoch": 6.75,
"learning_rate": 4.49718919759996e-05,
"loss": 0.011,
"step": 533
},
{
"epoch": 6.76,
"learning_rate": 4.481532059270262e-05,
"loss": 0.019,
"step": 534
},
{
"epoch": 6.77,
"learning_rate": 4.465880058539652e-05,
"loss": 0.0063,
"step": 535
},
{
"epoch": 6.78,
"learning_rate": 4.450233350506836e-05,
"loss": 0.0142,
"step": 536
},
{
"epoch": 6.8,
"learning_rate": 4.4345920902180647e-05,
"loss": 0.0207,
"step": 537
},
{
"epoch": 6.81,
"learning_rate": 4.418956432665618e-05,
"loss": 0.0142,
"step": 538
},
{
"epoch": 6.82,
"learning_rate": 4.403326532786245e-05,
"loss": 0.0264,
"step": 539
},
{
"epoch": 6.84,
"learning_rate": 4.387702545459649e-05,
"loss": 0.0193,
"step": 540
},
{
"epoch": 6.85,
"learning_rate": 4.3720846255069406e-05,
"loss": 0.013,
"step": 541
},
{
"epoch": 6.86,
"learning_rate": 4.356472927689109e-05,
"loss": 0.0275,
"step": 542
},
{
"epoch": 6.87,
"learning_rate": 4.3408676067054866e-05,
"loss": 0.0122,
"step": 543
},
{
"epoch": 6.89,
"learning_rate": 4.32526881719222e-05,
"loss": 0.0094,
"step": 544
},
{
"epoch": 6.9,
"learning_rate": 4.3096767137207256e-05,
"loss": 0.0101,
"step": 545
},
{
"epoch": 6.91,
"learning_rate": 4.2940914507961775e-05,
"loss": 0.026,
"step": 546
},
{
"epoch": 6.92,
"learning_rate": 4.278513182855956e-05,
"loss": 0.0283,
"step": 547
},
{
"epoch": 6.94,
"learning_rate": 4.262942064268134e-05,
"loss": 0.0126,
"step": 548
},
{
"epoch": 6.95,
"learning_rate": 4.247378249329933e-05,
"loss": 0.0089,
"step": 549
},
{
"epoch": 6.96,
"learning_rate": 4.23182189226621e-05,
"loss": 0.0065,
"step": 550
},
{
"epoch": 6.97,
"learning_rate": 4.21627314722791e-05,
"loss": 0.0121,
"step": 551
},
{
"epoch": 6.99,
"learning_rate": 4.20073216829056e-05,
"loss": 0.0219,
"step": 552
},
{
"epoch": 7.0,
"learning_rate": 4.185199109452721e-05,
"loss": 0.0093,
"step": 553
},
{
"epoch": 7.01,
"learning_rate": 4.169674124634481e-05,
"loss": 0.0086,
"step": 554
},
{
"epoch": 7.03,
"learning_rate": 4.1541573676759126e-05,
"loss": 0.0115,
"step": 555
},
{
"epoch": 7.04,
"learning_rate": 4.138648992335566e-05,
"loss": 0.0077,
"step": 556
},
{
"epoch": 7.05,
"learning_rate": 4.12314915228893e-05,
"loss": 0.0094,
"step": 557
},
{
"epoch": 7.06,
"learning_rate": 4.107658001126913e-05,
"loss": 0.0111,
"step": 558
},
{
"epoch": 7.08,
"learning_rate": 4.092175692354333e-05,
"loss": 0.0113,
"step": 559
},
{
"epoch": 7.09,
"learning_rate": 4.0767023793883785e-05,
"loss": 0.0049,
"step": 560
},
{
"epoch": 7.1,
"learning_rate": 4.0612382155571026e-05,
"loss": 0.0119,
"step": 561
},
{
"epoch": 7.11,
"learning_rate": 4.045783354097893e-05,
"loss": 0.0054,
"step": 562
},
{
"epoch": 7.13,
"learning_rate": 4.0303379481559623e-05,
"loss": 0.0044,
"step": 563
},
{
"epoch": 7.14,
"learning_rate": 4.0149021507828224e-05,
"loss": 0.015,
"step": 564
},
{
"epoch": 7.15,
"learning_rate": 3.9994761149347784e-05,
"loss": 0.0097,
"step": 565
},
{
"epoch": 7.16,
"learning_rate": 3.984059993471399e-05,
"loss": 0.005,
"step": 566
},
{
"epoch": 7.18,
"learning_rate": 3.968653939154016e-05,
"loss": 0.0232,
"step": 567
},
{
"epoch": 7.19,
"learning_rate": 3.9532581046442e-05,
"loss": 0.0282,
"step": 568
},
{
"epoch": 7.2,
"learning_rate": 3.937872642502252e-05,
"loss": 0.0072,
"step": 569
},
{
"epoch": 7.22,
"learning_rate": 3.9224977051856904e-05,
"loss": 0.0071,
"step": 570
},
{
"epoch": 7.23,
"learning_rate": 3.907133445047747e-05,
"loss": 0.0049,
"step": 571
},
{
"epoch": 7.24,
"learning_rate": 3.8917800143358404e-05,
"loss": 0.0107,
"step": 572
},
{
"epoch": 7.25,
"learning_rate": 3.8764375651900906e-05,
"loss": 0.0043,
"step": 573
},
{
"epoch": 7.27,
"learning_rate": 3.861106249641789e-05,
"loss": 0.0035,
"step": 574
},
{
"epoch": 7.28,
"learning_rate": 3.84578621961191e-05,
"loss": 0.0046,
"step": 575
},
{
"epoch": 7.29,
"learning_rate": 3.830477626909589e-05,
"loss": 0.007,
"step": 576
},
{
"epoch": 7.3,
"learning_rate": 3.8151806232306374e-05,
"loss": 0.0116,
"step": 577
},
{
"epoch": 7.32,
"learning_rate": 3.7998953601560175e-05,
"loss": 0.0069,
"step": 578
},
{
"epoch": 7.33,
"learning_rate": 3.784621989150361e-05,
"loss": 0.0038,
"step": 579
},
{
"epoch": 7.34,
"learning_rate": 3.769360661560453e-05,
"loss": 0.0138,
"step": 580
},
{
"epoch": 7.35,
"learning_rate": 3.75411152861374e-05,
"loss": 0.0063,
"step": 581
},
{
"epoch": 7.37,
"learning_rate": 3.73887474141683e-05,
"loss": 0.012,
"step": 582
},
{
"epoch": 7.38,
"learning_rate": 3.723650450953994e-05,
"loss": 0.0058,
"step": 583
},
{
"epoch": 7.39,
"learning_rate": 3.708438808085668e-05,
"loss": 0.0084,
"step": 584
},
{
"epoch": 7.41,
"learning_rate": 3.693239963546967e-05,
"loss": 0.0049,
"step": 585
},
{
"epoch": 7.42,
"learning_rate": 3.6780540679461784e-05,
"loss": 0.0054,
"step": 586
},
{
"epoch": 7.43,
"learning_rate": 3.662881271763279e-05,
"loss": 0.0116,
"step": 587
},
{
"epoch": 7.44,
"learning_rate": 3.64772172534844e-05,
"loss": 0.0077,
"step": 588
},
{
"epoch": 7.46,
"learning_rate": 3.63257557892054e-05,
"loss": 0.0122,
"step": 589
},
{
"epoch": 7.47,
"learning_rate": 3.6174429825656685e-05,
"loss": 0.0052,
"step": 590
},
{
"epoch": 7.48,
"learning_rate": 3.602324086235655e-05,
"loss": 0.0046,
"step": 591
},
{
"epoch": 7.49,
"learning_rate": 3.587219039746564e-05,
"loss": 0.0177,
"step": 592
},
{
"epoch": 7.51,
"learning_rate": 3.572127992777223e-05,
"loss": 0.0059,
"step": 593
},
{
"epoch": 7.52,
"learning_rate": 3.557051094867735e-05,
"loss": 0.0073,
"step": 594
},
{
"epoch": 7.53,
"learning_rate": 3.541988495417997e-05,
"loss": 0.0054,
"step": 595
},
{
"epoch": 7.54,
"learning_rate": 3.5269403436862175e-05,
"loss": 0.0046,
"step": 596
},
{
"epoch": 7.56,
"learning_rate": 3.511906788787447e-05,
"loss": 0.0091,
"step": 597
},
{
"epoch": 7.57,
"learning_rate": 3.496887979692084e-05,
"loss": 0.0124,
"step": 598
},
{
"epoch": 7.58,
"learning_rate": 3.481884065224415e-05,
"loss": 0.01,
"step": 599
},
{
"epoch": 7.59,
"learning_rate": 3.466895194061128e-05,
"loss": 0.0093,
"step": 600
},
{
"epoch": 7.61,
"learning_rate": 3.451921514729848e-05,
"loss": 0.0074,
"step": 601
},
{
"epoch": 7.62,
"learning_rate": 3.436963175607656e-05,
"loss": 0.0082,
"step": 602
},
{
"epoch": 7.63,
"learning_rate": 3.422020324919632e-05,
"loss": 0.0042,
"step": 603
},
{
"epoch": 7.65,
"learning_rate": 3.4070931107373675e-05,
"loss": 0.0079,
"step": 604
},
{
"epoch": 7.66,
"learning_rate": 3.39218168097752e-05,
"loss": 0.0179,
"step": 605
},
{
"epoch": 7.67,
"learning_rate": 3.377286183400328e-05,
"loss": 0.0151,
"step": 606
},
{
"epoch": 7.68,
"learning_rate": 3.362406765608158e-05,
"loss": 0.0098,
"step": 607
},
{
"epoch": 7.7,
"learning_rate": 3.3475435750440356e-05,
"loss": 0.0067,
"step": 608
},
{
"epoch": 7.71,
"learning_rate": 3.332696758990197e-05,
"loss": 0.0094,
"step": 609
},
{
"epoch": 7.72,
"learning_rate": 3.3178664645666066e-05,
"loss": 0.009,
"step": 610
},
{
"epoch": 7.73,
"learning_rate": 3.303052838729525e-05,
"loss": 0.0036,
"step": 611
},
{
"epoch": 7.75,
"learning_rate": 3.2882560282700336e-05,
"loss": 0.0071,
"step": 612
},
{
"epoch": 7.76,
"learning_rate": 3.273476179812588e-05,
"loss": 0.0136,
"step": 613
},
{
"epoch": 7.77,
"learning_rate": 3.258713439813566e-05,
"loss": 0.0078,
"step": 614
},
{
"epoch": 7.78,
"learning_rate": 3.243967954559811e-05,
"loss": 0.0052,
"step": 615
},
{
"epoch": 7.8,
"learning_rate": 3.229239870167191e-05,
"loss": 0.005,
"step": 616
},
{
"epoch": 7.81,
"learning_rate": 3.2145293325791395e-05,
"loss": 0.0136,
"step": 617
},
{
"epoch": 7.82,
"learning_rate": 3.199836487565222e-05,
"loss": 0.012,
"step": 618
},
{
"epoch": 7.84,
"learning_rate": 3.1851614807196774e-05,
"loss": 0.0124,
"step": 619
},
{
"epoch": 7.85,
"learning_rate": 3.170504457459989e-05,
"loss": 0.0115,
"step": 620
},
{
"epoch": 7.86,
"learning_rate": 3.155865563025433e-05,
"loss": 0.0162,
"step": 621
},
{
"epoch": 7.87,
"learning_rate": 3.1412449424756474e-05,
"loss": 0.0144,
"step": 622
},
{
"epoch": 7.89,
"learning_rate": 3.1266427406891856e-05,
"loss": 0.0035,
"step": 623
},
{
"epoch": 7.9,
"learning_rate": 3.112059102362093e-05,
"loss": 0.0089,
"step": 624
},
{
"epoch": 7.91,
"learning_rate": 3.0974941720064585e-05,
"loss": 0.0086,
"step": 625
},
{
"epoch": 7.92,
"learning_rate": 3.082948093948997e-05,
"loss": 0.0048,
"step": 626
},
{
"epoch": 7.94,
"learning_rate": 3.0684210123296055e-05,
"loss": 0.0103,
"step": 627
},
{
"epoch": 7.95,
"learning_rate": 3.053913071099947e-05,
"loss": 0.0196,
"step": 628
},
{
"epoch": 7.96,
"learning_rate": 3.0394244140220163e-05,
"loss": 0.0043,
"step": 629
},
{
"epoch": 7.97,
"learning_rate": 3.0249551846667207e-05,
"loss": 0.0074,
"step": 630
},
{
"epoch": 7.99,
"learning_rate": 3.010505526412447e-05,
"loss": 0.0129,
"step": 631
},
{
"epoch": 8.0,
"learning_rate": 2.996075582443658e-05,
"loss": 0.0035,
"step": 632
},
{
"epoch": 8.01,
"learning_rate": 2.981665495749457e-05,
"loss": 0.0053,
"step": 633
},
{
"epoch": 8.03,
"learning_rate": 2.9672754091221805e-05,
"loss": 0.0079,
"step": 634
},
{
"epoch": 8.04,
"learning_rate": 2.9529054651559772e-05,
"loss": 0.0029,
"step": 635
},
{
"epoch": 8.05,
"learning_rate": 2.938555806245406e-05,
"loss": 0.0061,
"step": 636
},
{
"epoch": 8.06,
"learning_rate": 2.9242265745840063e-05,
"loss": 0.0077,
"step": 637
},
{
"epoch": 8.08,
"learning_rate": 2.9099179121629117e-05,
"loss": 0.0035,
"step": 638
},
{
"epoch": 8.09,
"learning_rate": 2.895629960769417e-05,
"loss": 0.0027,
"step": 639
},
{
"epoch": 8.1,
"learning_rate": 2.881362861985606e-05,
"loss": 0.0063,
"step": 640
},
{
"epoch": 8.11,
"learning_rate": 2.867116757186911e-05,
"loss": 0.0135,
"step": 641
},
{
"epoch": 8.13,
"learning_rate": 2.8528917875407433e-05,
"loss": 0.0049,
"step": 642
},
{
"epoch": 8.14,
"learning_rate": 2.838688094005078e-05,
"loss": 0.0027,
"step": 643
},
{
"epoch": 8.15,
"learning_rate": 2.8245058173270622e-05,
"loss": 0.0089,
"step": 644
},
{
"epoch": 8.16,
"learning_rate": 2.8103450980416136e-05,
"loss": 0.0056,
"step": 645
},
{
"epoch": 8.18,
"learning_rate": 2.796206076470044e-05,
"loss": 0.0072,
"step": 646
},
{
"epoch": 8.19,
"learning_rate": 2.7820888927186483e-05,
"loss": 0.0089,
"step": 647
},
{
"epoch": 8.2,
"learning_rate": 2.7679936866773315e-05,
"loss": 0.0029,
"step": 648
},
{
"epoch": 8.22,
"learning_rate": 2.753920598018217e-05,
"loss": 0.0045,
"step": 649
},
{
"epoch": 8.23,
"learning_rate": 2.739869766194263e-05,
"loss": 0.0029,
"step": 650
},
{
"epoch": 8.24,
"learning_rate": 2.7258413304378734e-05,
"loss": 0.0059,
"step": 651
},
{
"epoch": 8.25,
"learning_rate": 2.7118354297595396e-05,
"loss": 0.012,
"step": 652
},
{
"epoch": 8.27,
"learning_rate": 2.6978522029464325e-05,
"loss": 0.0115,
"step": 653
},
{
"epoch": 8.28,
"learning_rate": 2.683891788561055e-05,
"loss": 0.0038,
"step": 654
},
{
"epoch": 8.29,
"learning_rate": 2.669954324939852e-05,
"loss": 0.0068,
"step": 655
},
{
"epoch": 8.3,
"learning_rate": 2.6560399501918465e-05,
"loss": 0.0059,
"step": 656
},
{
"epoch": 8.32,
"learning_rate": 2.6421488021972673e-05,
"loss": 0.0072,
"step": 657
},
{
"epoch": 8.33,
"learning_rate": 2.6282810186061862e-05,
"loss": 0.0039,
"step": 658
},
{
"epoch": 8.34,
"learning_rate": 2.6144367368371535e-05,
"loss": 0.0029,
"step": 659
},
{
"epoch": 8.35,
"learning_rate": 2.600616094075835e-05,
"loss": 0.0031,
"step": 660
},
{
"epoch": 8.37,
"learning_rate": 2.5868192272736514e-05,
"loss": 0.0037,
"step": 661
},
{
"epoch": 8.38,
"learning_rate": 2.5730462731464273e-05,
"loss": 0.006,
"step": 662
},
{
"epoch": 8.39,
"learning_rate": 2.5592973681730236e-05,
"loss": 0.0029,
"step": 663
},
{
"epoch": 8.41,
"learning_rate": 2.5455726485940012e-05,
"loss": 0.004,
"step": 664
},
{
"epoch": 8.42,
"learning_rate": 2.5318722504102604e-05,
"loss": 0.0034,
"step": 665
},
{
"epoch": 8.43,
"learning_rate": 2.5181963093816962e-05,
"loss": 0.0049,
"step": 666
},
{
"epoch": 8.44,
"learning_rate": 2.504544961025853e-05,
"loss": 0.0053,
"step": 667
},
{
"epoch": 8.46,
"learning_rate": 2.4909183406165836e-05,
"loss": 0.0113,
"step": 668
},
{
"epoch": 8.47,
"learning_rate": 2.4773165831827018e-05,
"loss": 0.0086,
"step": 669
},
{
"epoch": 8.48,
"learning_rate": 2.4637398235066527e-05,
"loss": 0.0053,
"step": 670
},
{
"epoch": 8.49,
"learning_rate": 2.450188196123177e-05,
"loss": 0.0029,
"step": 671
},
{
"epoch": 8.51,
"learning_rate": 2.4366618353179644e-05,
"loss": 0.0119,
"step": 672
},
{
"epoch": 8.52,
"learning_rate": 2.423160875126348e-05,
"loss": 0.0025,
"step": 673
},
{
"epoch": 8.53,
"learning_rate": 2.4096854493319477e-05,
"loss": 0.0021,
"step": 674
},
{
"epoch": 8.54,
"learning_rate": 2.3962356914653657e-05,
"loss": 0.0033,
"step": 675
},
{
"epoch": 8.56,
"learning_rate": 2.3828117348028528e-05,
"loss": 0.0037,
"step": 676
},
{
"epoch": 8.57,
"learning_rate": 2.3694137123649946e-05,
"loss": 0.0025,
"step": 677
},
{
"epoch": 8.58,
"learning_rate": 2.3560417569153796e-05,
"loss": 0.0036,
"step": 678
},
{
"epoch": 8.59,
"learning_rate": 2.342696000959309e-05,
"loss": 0.003,
"step": 679
},
{
"epoch": 8.61,
"learning_rate": 2.3293765767424537e-05,
"loss": 0.0023,
"step": 680
},
{
"epoch": 8.62,
"learning_rate": 2.3160836162495653e-05,
"loss": 0.0023,
"step": 681
},
{
"epoch": 8.63,
"learning_rate": 2.3028172512031604e-05,
"loss": 0.0122,
"step": 682
},
{
"epoch": 8.65,
"learning_rate": 2.289577613062218e-05,
"loss": 0.0083,
"step": 683
},
{
"epoch": 8.66,
"learning_rate": 2.276364833020868e-05,
"loss": 0.0059,
"step": 684
},
{
"epoch": 8.67,
"learning_rate": 2.2631790420071064e-05,
"loss": 0.0028,
"step": 685
},
{
"epoch": 8.68,
"learning_rate": 2.2500203706814856e-05,
"loss": 0.006,
"step": 686
},
{
"epoch": 8.7,
"learning_rate": 2.2368889494358235e-05,
"loss": 0.0103,
"step": 687
},
{
"epoch": 8.71,
"learning_rate": 2.2237849083919142e-05,
"loss": 0.0043,
"step": 688
},
{
"epoch": 8.72,
"learning_rate": 2.2107083774002364e-05,
"loss": 0.0138,
"step": 689
},
{
"epoch": 8.73,
"learning_rate": 2.1976594860386597e-05,
"loss": 0.0134,
"step": 690
},
{
"epoch": 8.75,
"learning_rate": 2.1846383636111743e-05,
"loss": 0.0069,
"step": 691
},
{
"epoch": 8.76,
"learning_rate": 2.1716451391466008e-05,
"loss": 0.0024,
"step": 692
},
{
"epoch": 8.77,
"learning_rate": 2.1586799413973135e-05,
"loss": 0.0033,
"step": 693
},
{
"epoch": 8.78,
"learning_rate": 2.1457428988379635e-05,
"loss": 0.0033,
"step": 694
},
{
"epoch": 8.8,
"learning_rate": 2.1328341396642093e-05,
"loss": 0.0116,
"step": 695
},
{
"epoch": 8.81,
"learning_rate": 2.1199537917914386e-05,
"loss": 0.0071,
"step": 696
},
{
"epoch": 8.82,
"learning_rate": 2.107101982853511e-05,
"loss": 0.0067,
"step": 697
},
{
"epoch": 8.84,
"learning_rate": 2.0942788402014867e-05,
"loss": 0.008,
"step": 698
},
{
"epoch": 8.85,
"learning_rate": 2.0814844909023663e-05,
"loss": 0.0059,
"step": 699
},
{
"epoch": 8.86,
"learning_rate": 2.068719061737831e-05,
"loss": 0.0054,
"step": 700
},
{
"epoch": 8.87,
"learning_rate": 2.0559826792029884e-05,
"loss": 0.0021,
"step": 701
},
{
"epoch": 8.89,
"learning_rate": 2.0432754695051136e-05,
"loss": 0.0058,
"step": 702
},
{
"epoch": 8.9,
"learning_rate": 2.0305975585624058e-05,
"loss": 0.0081,
"step": 703
},
{
"epoch": 8.91,
"learning_rate": 2.0179490720027372e-05,
"loss": 0.0051,
"step": 704
},
{
"epoch": 8.92,
"learning_rate": 2.005330135162408e-05,
"loss": 0.0056,
"step": 705
},
{
"epoch": 8.94,
"learning_rate": 1.992740873084899e-05,
"loss": 0.0045,
"step": 706
},
{
"epoch": 8.95,
"learning_rate": 1.9801814105196497e-05,
"loss": 0.0043,
"step": 707
},
{
"epoch": 8.96,
"learning_rate": 1.9676518719207977e-05,
"loss": 0.0051,
"step": 708
},
{
"epoch": 8.97,
"learning_rate": 1.9551523814459665e-05,
"loss": 0.0038,
"step": 709
},
{
"epoch": 8.99,
"learning_rate": 1.9426830629550242e-05,
"loss": 0.0052,
"step": 710
},
{
"epoch": 9.0,
"learning_rate": 1.9302440400088606e-05,
"loss": 0.0167,
"step": 711
},
{
"epoch": 9.01,
"learning_rate": 1.917835435868155e-05,
"loss": 0.0058,
"step": 712
},
{
"epoch": 9.03,
"learning_rate": 1.9054573734921714e-05,
"loss": 0.0025,
"step": 713
},
{
"epoch": 9.04,
"learning_rate": 1.8931099755375203e-05,
"loss": 0.0059,
"step": 714
},
{
"epoch": 9.05,
"learning_rate": 1.880793364356956e-05,
"loss": 0.0063,
"step": 715
},
{
"epoch": 9.06,
"learning_rate": 1.8685076619981608e-05,
"loss": 0.0034,
"step": 716
},
{
"epoch": 9.08,
"learning_rate": 1.8562529902025372e-05,
"loss": 0.0048,
"step": 717
},
{
"epoch": 9.09,
"learning_rate": 1.844029470403993e-05,
"loss": 0.0059,
"step": 718
},
{
"epoch": 9.1,
"learning_rate": 1.8318372237277565e-05,
"loss": 0.003,
"step": 719
},
{
"epoch": 9.11,
"learning_rate": 1.8196763709891524e-05,
"loss": 0.0019,
"step": 720
},
{
"epoch": 9.13,
"learning_rate": 1.8075470326924243e-05,
"loss": 0.0024,
"step": 721
},
{
"epoch": 9.14,
"learning_rate": 1.795449329029531e-05,
"loss": 0.0123,
"step": 722
},
{
"epoch": 9.15,
"learning_rate": 1.7833833798789595e-05,
"loss": 0.0025,
"step": 723
},
{
"epoch": 9.16,
"learning_rate": 1.7713493048045294e-05,
"loss": 0.0057,
"step": 724
},
{
"epoch": 9.18,
"learning_rate": 1.7593472230542202e-05,
"loss": 0.0083,
"step": 725
},
{
"epoch": 9.19,
"learning_rate": 1.747377253558982e-05,
"loss": 0.002,
"step": 726
},
{
"epoch": 9.2,
"learning_rate": 1.7354395149315534e-05,
"loss": 0.0057,
"step": 727
},
{
"epoch": 9.22,
"learning_rate": 1.7235341254653005e-05,
"loss": 0.0019,
"step": 728
},
{
"epoch": 9.23,
"learning_rate": 1.7116612031330252e-05,
"loss": 0.0018,
"step": 729
},
{
"epoch": 9.24,
"learning_rate": 1.6998208655858137e-05,
"loss": 0.0047,
"step": 730
},
{
"epoch": 9.25,
"learning_rate": 1.6880132301518598e-05,
"loss": 0.004,
"step": 731
},
{
"epoch": 9.27,
"learning_rate": 1.6762384138353078e-05,
"loss": 0.0034,
"step": 732
},
{
"epoch": 9.28,
"learning_rate": 1.6644965333150847e-05,
"loss": 0.0073,
"step": 733
},
{
"epoch": 9.29,
"learning_rate": 1.6527877049437622e-05,
"loss": 0.0155,
"step": 734
},
{
"epoch": 9.3,
"learning_rate": 1.6411120447463807e-05,
"loss": 0.0086,
"step": 735
},
{
"epoch": 9.32,
"learning_rate": 1.6294696684193154e-05,
"loss": 0.0108,
"step": 736
},
{
"epoch": 9.33,
"learning_rate": 1.617860691329126e-05,
"loss": 0.0112,
"step": 737
},
{
"epoch": 9.34,
"learning_rate": 1.6062852285114123e-05,
"loss": 0.0023,
"step": 738
},
{
"epoch": 9.35,
"learning_rate": 1.5947433946696693e-05,
"loss": 0.002,
"step": 739
},
{
"epoch": 9.37,
"learning_rate": 1.583235304174167e-05,
"loss": 0.0025,
"step": 740
},
{
"epoch": 9.38,
"learning_rate": 1.5717610710607948e-05,
"loss": 0.004,
"step": 741
},
{
"epoch": 9.39,
"learning_rate": 1.5603208090299498e-05,
"loss": 0.004,
"step": 742
},
{
"epoch": 9.41,
"learning_rate": 1.5489146314454002e-05,
"loss": 0.002,
"step": 743
},
{
"epoch": 9.42,
"learning_rate": 1.537542651333167e-05,
"loss": 0.0019,
"step": 744
},
{
"epoch": 9.43,
"learning_rate": 1.5262049813803958e-05,
"loss": 0.009,
"step": 745
},
{
"epoch": 9.44,
"learning_rate": 1.5149017339342574e-05,
"loss": 0.0113,
"step": 746
},
{
"epoch": 9.46,
"learning_rate": 1.5036330210008115e-05,
"loss": 0.0025,
"step": 747
},
{
"epoch": 9.47,
"learning_rate": 1.4923989542439159e-05,
"loss": 0.0044,
"step": 748
},
{
"epoch": 9.48,
"learning_rate": 1.4811996449841098e-05,
"loss": 0.0029,
"step": 749
},
{
"epoch": 9.49,
"learning_rate": 1.4700352041975168e-05,
"loss": 0.0023,
"step": 750
},
{
"epoch": 9.51,
"learning_rate": 1.458905742514734e-05,
"loss": 0.0046,
"step": 751
},
{
"epoch": 9.52,
"learning_rate": 1.447811370219757e-05,
"loss": 0.0028,
"step": 752
},
{
"epoch": 9.53,
"learning_rate": 1.4367521972488612e-05,
"loss": 0.0031,
"step": 753
},
{
"epoch": 9.54,
"learning_rate": 1.4257283331895315e-05,
"loss": 0.0037,
"step": 754
},
{
"epoch": 9.56,
"learning_rate": 1.4147398872793693e-05,
"loss": 0.0023,
"step": 755
},
{
"epoch": 9.57,
"learning_rate": 1.4037869684050115e-05,
"loss": 0.0018,
"step": 756
},
{
"epoch": 9.58,
"learning_rate": 1.3928696851010443e-05,
"loss": 0.0048,
"step": 757
},
{
"epoch": 9.59,
"learning_rate": 1.3819881455489458e-05,
"loss": 0.0101,
"step": 758
},
{
"epoch": 9.61,
"learning_rate": 1.3711424575759912e-05,
"loss": 0.003,
"step": 759
},
{
"epoch": 9.62,
"learning_rate": 1.3603327286542023e-05,
"loss": 0.0023,
"step": 760
},
{
"epoch": 9.63,
"learning_rate": 1.3495590658992718e-05,
"loss": 0.0029,
"step": 761
},
{
"epoch": 9.65,
"learning_rate": 1.33882157606951e-05,
"loss": 0.0042,
"step": 762
},
{
"epoch": 9.66,
"learning_rate": 1.3281203655647756e-05,
"loss": 0.0059,
"step": 763
},
{
"epoch": 9.67,
"learning_rate": 1.317455540425439e-05,
"loss": 0.0021,
"step": 764
},
{
"epoch": 9.68,
"learning_rate": 1.3068272063313102e-05,
"loss": 0.0034,
"step": 765
},
{
"epoch": 9.7,
"learning_rate": 1.2962354686006084e-05,
"loss": 0.013,
"step": 766
},
{
"epoch": 9.71,
"learning_rate": 1.2856804321889115e-05,
"loss": 0.0031,
"step": 767
},
{
"epoch": 9.72,
"learning_rate": 1.2751622016881182e-05,
"loss": 0.0028,
"step": 768
},
{
"epoch": 9.73,
"learning_rate": 1.2646808813254035e-05,
"loss": 0.0061,
"step": 769
},
{
"epoch": 9.75,
"learning_rate": 1.2542365749622049e-05,
"loss": 0.0037,
"step": 770
},
{
"epoch": 9.76,
"learning_rate": 1.2438293860931677e-05,
"loss": 0.0029,
"step": 771
},
{
"epoch": 9.77,
"learning_rate": 1.2334594178451425e-05,
"loss": 0.0062,
"step": 772
},
{
"epoch": 9.78,
"learning_rate": 1.2231267729761487e-05,
"loss": 0.0047,
"step": 773
},
{
"epoch": 9.8,
"learning_rate": 1.2128315538743646e-05,
"loss": 0.0059,
"step": 774
},
{
"epoch": 9.81,
"learning_rate": 1.2025738625571026e-05,
"loss": 0.0057,
"step": 775
},
{
"epoch": 9.82,
"learning_rate": 1.1923538006698154e-05,
"loss": 0.0047,
"step": 776
},
{
"epoch": 9.84,
"learning_rate": 1.1821714694850689e-05,
"loss": 0.0035,
"step": 777
},
{
"epoch": 9.85,
"learning_rate": 1.172026969901553e-05,
"loss": 0.0022,
"step": 778
},
{
"epoch": 9.86,
"learning_rate": 1.161920402443077e-05,
"loss": 0.0052,
"step": 779
},
{
"epoch": 9.87,
"learning_rate": 1.1518518672575701e-05,
"loss": 0.0025,
"step": 780
},
{
"epoch": 9.89,
"learning_rate": 1.1418214641160958e-05,
"loss": 0.005,
"step": 781
},
{
"epoch": 9.9,
"learning_rate": 1.1318292924118584e-05,
"loss": 0.0022,
"step": 782
},
{
"epoch": 9.91,
"learning_rate": 1.121875451159221e-05,
"loss": 0.0023,
"step": 783
},
{
"epoch": 9.92,
"learning_rate": 1.1119600389927182e-05,
"loss": 0.0029,
"step": 784
},
{
"epoch": 9.94,
"learning_rate": 1.1020831541660915e-05,
"loss": 0.0056,
"step": 785
},
{
"epoch": 9.95,
"learning_rate": 1.092244894551298e-05,
"loss": 0.0039,
"step": 786
},
{
"epoch": 9.96,
"learning_rate": 1.0824453576375576e-05,
"loss": 0.0071,
"step": 787
},
{
"epoch": 9.97,
"learning_rate": 1.0726846405303754e-05,
"loss": 0.0027,
"step": 788
},
{
"epoch": 9.99,
"learning_rate": 1.062962839950587e-05,
"loss": 0.002,
"step": 789
},
{
"epoch": 10.0,
"learning_rate": 1.0532800522333902e-05,
"loss": 0.0023,
"step": 790
},
{
"epoch": 10.01,
"learning_rate": 1.0436363733274057e-05,
"loss": 0.0056,
"step": 791
},
{
"epoch": 10.03,
"learning_rate": 1.0340318987937097e-05,
"loss": 0.0102,
"step": 792
},
{
"epoch": 10.04,
"learning_rate": 1.0244667238048988e-05,
"loss": 0.0019,
"step": 793
},
{
"epoch": 10.05,
"learning_rate": 1.014940943144142e-05,
"loss": 0.0044,
"step": 794
},
{
"epoch": 10.06,
"learning_rate": 1.0054546512042424e-05,
"loss": 0.0017,
"step": 795
},
{
"epoch": 10.08,
"learning_rate": 9.960079419866985e-06,
"loss": 0.0048,
"step": 796
},
{
"epoch": 10.09,
"learning_rate": 9.866009091007833e-06,
"loss": 0.0021,
"step": 797
},
{
"epoch": 10.1,
"learning_rate": 9.772336457626014e-06,
"loss": 0.012,
"step": 798
},
{
"epoch": 10.11,
"learning_rate": 9.679062447941778e-06,
"loss": 0.006,
"step": 799
},
{
"epoch": 10.13,
"learning_rate": 9.586187986225325e-06,
"loss": 0.0023,
"step": 800
},
{
"epoch": 10.14,
"learning_rate": 9.493713992787672e-06,
"loss": 0.002,
"step": 801
},
{
"epoch": 10.15,
"learning_rate": 9.401641383971477e-06,
"loss": 0.0019,
"step": 802
},
{
"epoch": 10.16,
"learning_rate": 9.309971072142038e-06,
"loss": 0.0018,
"step": 803
},
{
"epoch": 10.18,
"learning_rate": 9.218703965678204e-06,
"loss": 0.0049,
"step": 804
},
{
"epoch": 10.19,
"learning_rate": 9.127840968963381e-06,
"loss": 0.0025,
"step": 805
},
{
"epoch": 10.2,
"learning_rate": 9.03738298237658e-06,
"loss": 0.0041,
"step": 806
},
{
"epoch": 10.22,
"learning_rate": 8.94733090228349e-06,
"loss": 0.0055,
"step": 807
},
{
"epoch": 10.23,
"learning_rate": 8.857685621027568e-06,
"loss": 0.002,
"step": 808
},
{
"epoch": 10.24,
"learning_rate": 8.768448026921245e-06,
"loss": 0.002,
"step": 809
},
{
"epoch": 10.25,
"learning_rate": 8.67961900423711e-06,
"loss": 0.0038,
"step": 810
},
{
"epoch": 10.27,
"learning_rate": 8.591199433199126e-06,
"loss": 0.0043,
"step": 811
},
{
"epoch": 10.28,
"learning_rate": 8.503190189973914e-06,
"loss": 0.0017,
"step": 812
},
{
"epoch": 10.29,
"learning_rate": 8.415592146662104e-06,
"loss": 0.0036,
"step": 813
},
{
"epoch": 10.3,
"learning_rate": 8.328406171289621e-06,
"loss": 0.0044,
"step": 814
},
{
"epoch": 10.32,
"learning_rate": 8.24163312779917e-06,
"loss": 0.0019,
"step": 815
},
{
"epoch": 10.33,
"learning_rate": 8.155273876041614e-06,
"loss": 0.0029,
"step": 816
},
{
"epoch": 10.34,
"learning_rate": 8.069329271767484e-06,
"loss": 0.0022,
"step": 817
},
{
"epoch": 10.35,
"learning_rate": 7.983800166618482e-06,
"loss": 0.004,
"step": 818
},
{
"epoch": 10.37,
"learning_rate": 7.898687408119065e-06,
"loss": 0.0028,
"step": 819
},
{
"epoch": 10.38,
"learning_rate": 7.813991839667995e-06,
"loss": 0.0023,
"step": 820
},
{
"epoch": 10.39,
"learning_rate": 7.72971430053005e-06,
"loss": 0.0019,
"step": 821
},
{
"epoch": 10.41,
"learning_rate": 7.645855625827658e-06,
"loss": 0.0104,
"step": 822
},
{
"epoch": 10.42,
"learning_rate": 7.56241664653266e-06,
"loss": 0.0031,
"step": 823
},
{
"epoch": 10.43,
"learning_rate": 7.4793981894580034e-06,
"loss": 0.0024,
"step": 824
},
{
"epoch": 10.44,
"learning_rate": 7.396801077249676e-06,
"loss": 0.0018,
"step": 825
},
{
"epoch": 10.46,
"learning_rate": 7.3146261283784104e-06,
"loss": 0.0018,
"step": 826
},
{
"epoch": 10.47,
"learning_rate": 7.2328741571316696e-06,
"loss": 0.0041,
"step": 827
},
{
"epoch": 10.48,
"learning_rate": 7.1515459736055505e-06,
"loss": 0.0038,
"step": 828
},
{
"epoch": 10.49,
"learning_rate": 7.070642383696763e-06,
"loss": 0.0031,
"step": 829
},
{
"epoch": 10.51,
"learning_rate": 6.990164189094589e-06,
"loss": 0.0019,
"step": 830
},
{
"epoch": 10.52,
"learning_rate": 6.910112187273066e-06,
"loss": 0.002,
"step": 831
},
{
"epoch": 10.53,
"learning_rate": 6.830487171482935e-06,
"loss": 0.0083,
"step": 832
},
{
"epoch": 10.54,
"learning_rate": 6.751289930743882e-06,
"loss": 0.0021,
"step": 833
},
{
"epoch": 10.56,
"learning_rate": 6.6725212498366885e-06,
"loss": 0.0026,
"step": 834
},
{
"epoch": 10.57,
"learning_rate": 6.594181909295427e-06,
"loss": 0.0078,
"step": 835
},
{
"epoch": 10.58,
"learning_rate": 6.516272685399793e-06,
"loss": 0.0028,
"step": 836
},
{
"epoch": 10.59,
"learning_rate": 6.438794350167337e-06,
"loss": 0.0019,
"step": 837
},
{
"epoch": 10.61,
"learning_rate": 6.36174767134588e-06,
"loss": 0.004,
"step": 838
},
{
"epoch": 10.62,
"learning_rate": 6.285133412405858e-06,
"loss": 0.0029,
"step": 839
},
{
"epoch": 10.63,
"learning_rate": 6.208952332532786e-06,
"loss": 0.0036,
"step": 840
},
{
"epoch": 10.65,
"learning_rate": 6.133205186619695e-06,
"loss": 0.002,
"step": 841
},
{
"epoch": 10.66,
"learning_rate": 6.057892725259717e-06,
"loss": 0.0018,
"step": 842
},
{
"epoch": 10.67,
"learning_rate": 5.983015694738597e-06,
"loss": 0.0048,
"step": 843
},
{
"epoch": 10.68,
"learning_rate": 5.908574837027309e-06,
"loss": 0.0018,
"step": 844
},
{
"epoch": 10.7,
"learning_rate": 5.83457088977471e-06,
"loss": 0.0029,
"step": 845
},
{
"epoch": 10.71,
"learning_rate": 5.761004586300234e-06,
"loss": 0.0033,
"step": 846
},
{
"epoch": 10.72,
"learning_rate": 5.687876655586583e-06,
"loss": 0.0026,
"step": 847
},
{
"epoch": 10.73,
"learning_rate": 5.615187822272583e-06,
"loss": 0.0053,
"step": 848
},
{
"epoch": 10.75,
"learning_rate": 5.542938806645931e-06,
"loss": 0.0022,
"step": 849
},
{
"epoch": 10.76,
"learning_rate": 5.4711303246361144e-06,
"loss": 0.0058,
"step": 850
},
{
"epoch": 10.77,
"learning_rate": 5.399763087807236e-06,
"loss": 0.0086,
"step": 851
},
{
"epoch": 10.78,
"learning_rate": 5.328837803351083e-06,
"loss": 0.0035,
"step": 852
},
{
"epoch": 10.8,
"learning_rate": 5.258355174079993e-06,
"loss": 0.0023,
"step": 853
},
{
"epoch": 10.81,
"learning_rate": 5.188315898419971e-06,
"loss": 0.0023,
"step": 854
},
{
"epoch": 10.82,
"learning_rate": 5.118720670403748e-06,
"loss": 0.0041,
"step": 855
},
{
"epoch": 10.84,
"learning_rate": 5.04957017966391e-06,
"loss": 0.0045,
"step": 856
},
{
"epoch": 10.85,
"learning_rate": 4.980865111426003e-06,
"loss": 0.0022,
"step": 857
},
{
"epoch": 10.86,
"learning_rate": 4.912606146501886e-06,
"loss": 0.0025,
"step": 858
},
{
"epoch": 10.87,
"learning_rate": 4.844793961282812e-06,
"loss": 0.0021,
"step": 859
},
{
"epoch": 10.89,
"learning_rate": 4.777429227732844e-06,
"loss": 0.0024,
"step": 860
},
{
"epoch": 10.9,
"learning_rate": 4.710512613382151e-06,
"loss": 0.0053,
"step": 861
},
{
"epoch": 10.91,
"learning_rate": 4.644044781320422e-06,
"loss": 0.0021,
"step": 862
},
{
"epoch": 10.92,
"learning_rate": 4.578026390190232e-06,
"loss": 0.002,
"step": 863
},
{
"epoch": 10.94,
"learning_rate": 4.5124580941806165e-06,
"loss": 0.0039,
"step": 864
},
{
"epoch": 10.95,
"learning_rate": 4.447340543020473e-06,
"loss": 0.0077,
"step": 865
},
{
"epoch": 10.96,
"learning_rate": 4.382674381972224e-06,
"loss": 0.0018,
"step": 866
},
{
"epoch": 10.97,
"learning_rate": 4.318460251825357e-06,
"loss": 0.002,
"step": 867
},
{
"epoch": 10.99,
"learning_rate": 4.254698788890127e-06,
"loss": 0.0042,
"step": 868
},
{
"epoch": 11.0,
"learning_rate": 4.191390624991159e-06,
"loss": 0.0024,
"step": 869
},
{
"epoch": 11.01,
"learning_rate": 4.12853638746134e-06,
"loss": 0.0021,
"step": 870
},
{
"epoch": 11.03,
"learning_rate": 4.0661366991354365e-06,
"loss": 0.0059,
"step": 871
},
{
"epoch": 11.04,
"learning_rate": 4.004192178344029e-06,
"loss": 0.0064,
"step": 872
},
{
"epoch": 11.05,
"learning_rate": 3.942703438907358e-06,
"loss": 0.0015,
"step": 873
},
{
"epoch": 11.06,
"learning_rate": 3.881671090129247e-06,
"loss": 0.0044,
"step": 874
},
{
"epoch": 11.08,
"learning_rate": 3.821095736791008e-06,
"loss": 0.0017,
"step": 875
},
{
"epoch": 11.09,
"learning_rate": 3.7609779791455744e-06,
"loss": 0.0049,
"step": 876
},
{
"epoch": 11.1,
"learning_rate": 3.7013184129113976e-06,
"loss": 0.0021,
"step": 877
},
{
"epoch": 11.11,
"learning_rate": 3.6421176292666783e-06,
"loss": 0.002,
"step": 878
},
{
"epoch": 11.13,
"learning_rate": 3.58337621484342e-06,
"loss": 0.0015,
"step": 879
},
{
"epoch": 11.14,
"learning_rate": 3.5250947517216637e-06,
"loss": 0.0021,
"step": 880
},
{
"epoch": 11.15,
"learning_rate": 3.4672738174236884e-06,
"loss": 0.0018,
"step": 881
},
{
"epoch": 11.16,
"learning_rate": 3.4099139849083307e-06,
"loss": 0.0049,
"step": 882
},
{
"epoch": 11.18,
"learning_rate": 3.353015822565253e-06,
"loss": 0.0022,
"step": 883
},
{
"epoch": 11.19,
"learning_rate": 3.296579894209345e-06,
"loss": 0.0016,
"step": 884
},
{
"epoch": 11.2,
"learning_rate": 3.2406067590751433e-06,
"loss": 0.0052,
"step": 885
},
{
"epoch": 11.22,
"learning_rate": 3.1850969718112745e-06,
"loss": 0.0019,
"step": 886
},
{
"epoch": 11.23,
"learning_rate": 3.1300510824749273e-06,
"loss": 0.0019,
"step": 887
},
{
"epoch": 11.24,
"learning_rate": 3.0754696365265068e-06,
"loss": 0.0034,
"step": 888
},
{
"epoch": 11.25,
"learning_rate": 3.0213531748240764e-06,
"loss": 0.0065,
"step": 889
},
{
"epoch": 11.27,
"learning_rate": 2.9677022336181413e-06,
"loss": 0.0028,
"step": 890
},
{
"epoch": 11.28,
"learning_rate": 2.914517344546258e-06,
"loss": 0.0024,
"step": 891
},
{
"epoch": 11.29,
"learning_rate": 2.8617990346277657e-06,
"loss": 0.0019,
"step": 892
},
{
"epoch": 11.3,
"learning_rate": 2.8095478262585907e-06,
"loss": 0.0038,
"step": 893
},
{
"epoch": 11.32,
"learning_rate": 2.7577642372060673e-06,
"loss": 0.0024,
"step": 894
},
{
"epoch": 11.33,
"learning_rate": 2.7064487806037985e-06,
"loss": 0.0085,
"step": 895
},
{
"epoch": 11.34,
"learning_rate": 2.6556019649465525e-06,
"loss": 0.0016,
"step": 896
},
{
"epoch": 11.35,
"learning_rate": 2.6052242940852787e-06,
"loss": 0.0019,
"step": 897
},
{
"epoch": 11.37,
"learning_rate": 2.5553162672220465e-06,
"loss": 0.0021,
"step": 898
},
{
"epoch": 11.38,
"learning_rate": 2.5058783789051467e-06,
"loss": 0.0041,
"step": 899
},
{
"epoch": 11.39,
"learning_rate": 2.45691111902418e-06,
"loss": 0.0071,
"step": 900
},
{
"epoch": 11.41,
"learning_rate": 2.4084149728051952e-06,
"loss": 0.0039,
"step": 901
},
{
"epoch": 11.42,
"learning_rate": 2.360390420805869e-06,
"loss": 0.002,
"step": 902
},
{
"epoch": 11.43,
"learning_rate": 2.3128379389108e-06,
"loss": 0.0015,
"step": 903
},
{
"epoch": 11.44,
"learning_rate": 2.2657579983267064e-06,
"loss": 0.0016,
"step": 904
},
{
"epoch": 11.46,
"learning_rate": 2.219151065577829e-06,
"loss": 0.0036,
"step": 905
},
{
"epoch": 11.47,
"learning_rate": 2.1730176025012816e-06,
"loss": 0.0032,
"step": 906
},
{
"epoch": 11.48,
"learning_rate": 2.1273580662424796e-06,
"loss": 0.0035,
"step": 907
},
{
"epoch": 11.49,
"learning_rate": 2.082172909250568e-06,
"loss": 0.0022,
"step": 908
},
{
"epoch": 11.51,
"learning_rate": 2.0374625792740464e-06,
"loss": 0.0067,
"step": 909
},
{
"epoch": 11.52,
"learning_rate": 1.993227519356189e-06,
"loss": 0.0021,
"step": 910
},
{
"epoch": 11.53,
"learning_rate": 1.9494681678307703e-06,
"loss": 0.004,
"step": 911
},
{
"epoch": 11.54,
"learning_rate": 1.906184958317664e-06,
"loss": 0.0074,
"step": 912
},
{
"epoch": 11.56,
"learning_rate": 1.8633783197185783e-06,
"loss": 0.004,
"step": 913
},
{
"epoch": 11.57,
"learning_rate": 1.8210486762127499e-06,
"loss": 0.0031,
"step": 914
},
{
"epoch": 11.58,
"learning_rate": 1.7791964472528232e-06,
"loss": 0.0037,
"step": 915
},
{
"epoch": 11.59,
"learning_rate": 1.737822047560611e-06,
"loss": 0.0026,
"step": 916
},
{
"epoch": 11.61,
"learning_rate": 1.696925887123052e-06,
"loss": 0.0033,
"step": 917
},
{
"epoch": 11.62,
"learning_rate": 1.656508371188109e-06,
"loss": 0.0029,
"step": 918
},
{
"epoch": 11.63,
"learning_rate": 1.6165699002607671e-06,
"loss": 0.0038,
"step": 919
},
{
"epoch": 11.65,
"learning_rate": 1.5771108700990412e-06,
"loss": 0.0038,
"step": 920
},
{
"epoch": 11.66,
"learning_rate": 1.538131671710108e-06,
"loss": 0.0018,
"step": 921
},
{
"epoch": 11.67,
"learning_rate": 1.4996326913463754e-06,
"loss": 0.0041,
"step": 922
},
{
"epoch": 11.68,
"learning_rate": 1.461614310501691e-06,
"loss": 0.0057,
"step": 923
},
{
"epoch": 11.7,
"learning_rate": 1.4240769059075342e-06,
"loss": 0.004,
"step": 924
},
{
"epoch": 11.71,
"learning_rate": 1.387020849529319e-06,
"loss": 0.0037,
"step": 925
},
{
"epoch": 11.72,
"learning_rate": 1.3504465085626638e-06,
"loss": 0.0023,
"step": 926
},
{
"epoch": 11.73,
"learning_rate": 1.3143542454297885e-06,
"loss": 0.0033,
"step": 927
},
{
"epoch": 11.75,
"learning_rate": 1.2787444177759068e-06,
"loss": 0.0015,
"step": 928
},
{
"epoch": 11.76,
"learning_rate": 1.243617378465689e-06,
"loss": 0.0014,
"step": 929
},
{
"epoch": 11.77,
"learning_rate": 1.208973475579761e-06,
"loss": 0.004,
"step": 930
},
{
"epoch": 11.78,
"learning_rate": 1.1748130524112666e-06,
"loss": 0.0039,
"step": 931
},
{
"epoch": 11.8,
"learning_rate": 1.1411364474624264e-06,
"loss": 0.0033,
"step": 932
},
{
"epoch": 11.81,
"learning_rate": 1.1079439944412406e-06,
"loss": 0.0035,
"step": 933
},
{
"epoch": 11.82,
"learning_rate": 1.075236022258147e-06,
"loss": 0.004,
"step": 934
},
{
"epoch": 11.84,
"learning_rate": 1.0430128550227625e-06,
"loss": 0.0019,
"step": 935
},
{
"epoch": 11.85,
"learning_rate": 1.0112748120406856e-06,
"loss": 0.0022,
"step": 936
},
{
"epoch": 11.86,
"learning_rate": 9.800222078103271e-07,
"loss": 0.0026,
"step": 937
},
{
"epoch": 11.87,
"learning_rate": 9.492553520197733e-07,
"loss": 0.0016,
"step": 938
},
{
"epoch": 11.89,
"learning_rate": 9.189745495437608e-07,
"loss": 0.0016,
"step": 939
},
{
"epoch": 11.9,
"learning_rate": 8.891801004406119e-07,
"loss": 0.0025,
"step": 940
},
{
"epoch": 11.91,
"learning_rate": 8.59872299949288e-07,
"loss": 0.0023,
"step": 941
},
{
"epoch": 11.92,
"learning_rate": 8.31051438486441e-07,
"loss": 0.0023,
"step": 942
},
{
"epoch": 11.94,
"learning_rate": 8.027178016435765e-07,
"loss": 0.0019,
"step": 943
},
{
"epoch": 11.95,
"learning_rate": 7.748716701841685e-07,
"loss": 0.0024,
"step": 944
},
{
"epoch": 11.96,
"learning_rate": 7.475133200409212e-07,
"loss": 0.002,
"step": 945
},
{
"epoch": 11.97,
"learning_rate": 7.206430223130278e-07,
"loss": 0.0024,
"step": 946
},
{
"epoch": 11.99,
"learning_rate": 6.9426104326345e-07,
"loss": 0.002,
"step": 947
},
{
"epoch": 12.0,
"learning_rate": 6.683676443163311e-07,
"loss": 0.0016,
"step": 948
},
{
"epoch": 12.01,
"learning_rate": 6.429630820543598e-07,
"loss": 0.0044,
"step": 949
},
{
"epoch": 12.03,
"learning_rate": 6.180476082162656e-07,
"loss": 0.003,
"step": 950
},
{
"epoch": 12.04,
"learning_rate": 5.936214696942887e-07,
"loss": 0.0024,
"step": 951
},
{
"epoch": 12.05,
"learning_rate": 5.696849085317646e-07,
"loss": 0.0032,
"step": 952
},
{
"epoch": 12.06,
"learning_rate": 5.462381619207091e-07,
"loss": 0.0047,
"step": 953
},
{
"epoch": 12.08,
"learning_rate": 5.232814621994598e-07,
"loss": 0.0081,
"step": 954
},
{
"epoch": 12.09,
"learning_rate": 5.008150368503994e-07,
"loss": 0.0021,
"step": 955
},
{
"epoch": 12.1,
"learning_rate": 4.788391084976862e-07,
"loss": 0.0016,
"step": 956
},
{
"epoch": 12.11,
"learning_rate": 4.573538949050327e-07,
"loss": 0.0017,
"step": 957
},
{
"epoch": 12.13,
"learning_rate": 4.363596089735911e-07,
"loss": 0.0029,
"step": 958
},
{
"epoch": 12.14,
"learning_rate": 4.1585645873978284e-07,
"loss": 0.0018,
"step": 959
},
{
"epoch": 12.15,
"learning_rate": 3.958446473733002e-07,
"loss": 0.0029,
"step": 960
},
{
"epoch": 12.16,
"learning_rate": 3.7632437317505207e-07,
"loss": 0.0015,
"step": 961
},
{
"epoch": 12.18,
"learning_rate": 3.572958295752049e-07,
"loss": 0.0016,
"step": 962
},
{
"epoch": 12.19,
"learning_rate": 3.387592051312782e-07,
"loss": 0.0053,
"step": 963
},
{
"epoch": 12.2,
"learning_rate": 3.207146835262742e-07,
"loss": 0.0072,
"step": 964
},
{
"epoch": 12.22,
"learning_rate": 3.0316244356683454e-07,
"loss": 0.0022,
"step": 965
},
{
"epoch": 12.23,
"learning_rate": 2.8610265918151414e-07,
"loss": 0.0015,
"step": 966
},
{
"epoch": 12.24,
"learning_rate": 2.695354994190047e-07,
"loss": 0.0021,
"step": 967
},
{
"epoch": 12.25,
"learning_rate": 2.534611284465083e-07,
"loss": 0.0016,
"step": 968
},
{
"epoch": 12.27,
"learning_rate": 2.3787970554806084e-07,
"loss": 0.0044,
"step": 969
},
{
"epoch": 12.28,
"learning_rate": 2.2279138512300567e-07,
"loss": 0.002,
"step": 970
},
{
"epoch": 12.29,
"learning_rate": 2.0819631668442253e-07,
"loss": 0.0025,
"step": 971
},
{
"epoch": 12.3,
"learning_rate": 1.940946448576675e-07,
"loss": 0.0037,
"step": 972
},
{
"epoch": 12.32,
"learning_rate": 1.8048650937893542e-07,
"loss": 0.0024,
"step": 973
},
{
"epoch": 12.33,
"learning_rate": 1.6737204509387206e-07,
"loss": 0.0025,
"step": 974
},
{
"epoch": 12.34,
"learning_rate": 1.5475138195623629e-07,
"loss": 0.0046,
"step": 975
},
{
"epoch": 12.35,
"learning_rate": 1.4262464502663443e-07,
"loss": 0.0018,
"step": 976
},
{
"epoch": 12.37,
"learning_rate": 1.309919544712268e-07,
"loss": 0.0066,
"step": 977
},
{
"epoch": 12.38,
"learning_rate": 1.1985342556060652e-07,
"loss": 0.0026,
"step": 978
},
{
"epoch": 12.39,
"learning_rate": 1.0920916866861142e-07,
"loss": 0.0054,
"step": 979
},
{
"epoch": 12.41,
"learning_rate": 9.905928927123609e-08,
"loss": 0.0032,
"step": 980
},
{
"epoch": 12.42,
"learning_rate": 8.940388794559939e-08,
"loss": 0.0031,
"step": 981
},
{
"epoch": 12.43,
"learning_rate": 8.02430603689397e-08,
"loss": 0.0019,
"step": 982
},
{
"epoch": 12.44,
"learning_rate": 7.157689731767669e-08,
"loss": 0.0029,
"step": 983
},
{
"epoch": 12.46,
"learning_rate": 6.340548466648443e-08,
"loss": 0.0044,
"step": 984
},
{
"epoch": 12.47,
"learning_rate": 5.572890338748082e-08,
"loss": 0.0057,
"step": 985
},
{
"epoch": 12.48,
"learning_rate": 4.8547229549383844e-08,
"loss": 0.0018,
"step": 986
},
{
"epoch": 12.49,
"learning_rate": 4.186053431680104e-08,
"loss": 0.0033,
"step": 987
},
{
"epoch": 12.51,
"learning_rate": 3.566888394948009e-08,
"loss": 0.0032,
"step": 988
},
{
"epoch": 12.52,
"learning_rate": 2.997233980168157e-08,
"loss": 0.0032,
"step": 989
},
{
"epoch": 12.53,
"learning_rate": 2.4770958321568283e-08,
"loss": 0.0017,
"step": 990
},
{
"epoch": 12.54,
"learning_rate": 2.0064791050633526e-08,
"loss": 0.0018,
"step": 991
},
{
"epoch": 12.56,
"learning_rate": 1.5853884623195925e-08,
"loss": 0.0018,
"step": 992
},
{
"epoch": 12.57,
"learning_rate": 1.2138280765944254e-08,
"loss": 0.0015,
"step": 993
},
{
"epoch": 12.58,
"learning_rate": 8.918016297515541e-09,
"loss": 0.0031,
"step": 994
},
{
"epoch": 12.59,
"learning_rate": 6.193123128134248e-09,
"loss": 0.0049,
"step": 995
},
{
"epoch": 12.61,
"learning_rate": 3.963628259290308e-09,
"loss": 0.006,
"step": 996
},
{
"epoch": 12.62,
"learning_rate": 2.229553783478222e-09,
"loss": 0.0022,
"step": 997
},
{
"epoch": 12.63,
"learning_rate": 9.90916883986115e-10,
"loss": 0.0028,
"step": 998
},
{
"epoch": 12.65,
"learning_rate": 2.477298346958978e-10,
"loss": 0.0022,
"step": 999
},
{
"epoch": 12.66,
"learning_rate": 0.0,
"loss": 0.0029,
"step": 1000
}
],
"max_steps": 1000,
"num_train_epochs": 13,
"total_flos": 7.311457550558822e+16,
"trial_name": null,
"trial_params": null
}