vicuna / checkpoint-1500 /trainer_state.json
chavinlo's picture
Training in progress, step 1500
02e2057
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9802319882372161,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.4492753623188408e-07,
"loss": 2.5958,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 2.8985507246376816e-07,
"loss": 2.4792,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 4.347826086956522e-07,
"loss": 1.9376,
"step": 3
},
{
"epoch": 0.0,
"learning_rate": 5.797101449275363e-07,
"loss": 2.0731,
"step": 4
},
{
"epoch": 0.0,
"learning_rate": 7.246376811594204e-07,
"loss": 1.7538,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 8.695652173913044e-07,
"loss": 1.8296,
"step": 6
},
{
"epoch": 0.0,
"learning_rate": 1.0144927536231885e-06,
"loss": 1.9283,
"step": 7
},
{
"epoch": 0.01,
"learning_rate": 1.1594202898550726e-06,
"loss": 1.8517,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 1.3043478260869566e-06,
"loss": 1.7672,
"step": 9
},
{
"epoch": 0.01,
"learning_rate": 1.4492753623188408e-06,
"loss": 1.7437,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 1.5942028985507246e-06,
"loss": 1.7038,
"step": 11
},
{
"epoch": 0.01,
"learning_rate": 1.7391304347826088e-06,
"loss": 2.0155,
"step": 12
},
{
"epoch": 0.01,
"learning_rate": 1.884057971014493e-06,
"loss": 2.0078,
"step": 13
},
{
"epoch": 0.01,
"learning_rate": 2.028985507246377e-06,
"loss": 1.9901,
"step": 14
},
{
"epoch": 0.01,
"learning_rate": 2.173913043478261e-06,
"loss": 1.7954,
"step": 15
},
{
"epoch": 0.01,
"learning_rate": 2.3188405797101453e-06,
"loss": 1.7183,
"step": 16
},
{
"epoch": 0.01,
"learning_rate": 2.4637681159420295e-06,
"loss": 1.842,
"step": 17
},
{
"epoch": 0.01,
"learning_rate": 2.6086956521739132e-06,
"loss": 1.7957,
"step": 18
},
{
"epoch": 0.01,
"learning_rate": 2.7536231884057974e-06,
"loss": 1.7775,
"step": 19
},
{
"epoch": 0.01,
"learning_rate": 2.8985507246376816e-06,
"loss": 1.8612,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 3.043478260869566e-06,
"loss": 1.68,
"step": 21
},
{
"epoch": 0.01,
"learning_rate": 3.188405797101449e-06,
"loss": 1.928,
"step": 22
},
{
"epoch": 0.02,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.7262,
"step": 23
},
{
"epoch": 0.02,
"learning_rate": 3.4782608695652175e-06,
"loss": 1.9312,
"step": 24
},
{
"epoch": 0.02,
"learning_rate": 3.6231884057971017e-06,
"loss": 1.9374,
"step": 25
},
{
"epoch": 0.02,
"learning_rate": 3.768115942028986e-06,
"loss": 1.5449,
"step": 26
},
{
"epoch": 0.02,
"learning_rate": 3.91304347826087e-06,
"loss": 1.7894,
"step": 27
},
{
"epoch": 0.02,
"learning_rate": 4.057971014492754e-06,
"loss": 2.0061,
"step": 28
},
{
"epoch": 0.02,
"learning_rate": 4.202898550724638e-06,
"loss": 1.775,
"step": 29
},
{
"epoch": 0.02,
"learning_rate": 4.347826086956522e-06,
"loss": 1.856,
"step": 30
},
{
"epoch": 0.02,
"learning_rate": 4.492753623188406e-06,
"loss": 1.7222,
"step": 31
},
{
"epoch": 0.02,
"learning_rate": 4.637681159420291e-06,
"loss": 2.0042,
"step": 32
},
{
"epoch": 0.02,
"learning_rate": 4.782608695652174e-06,
"loss": 1.7216,
"step": 33
},
{
"epoch": 0.02,
"learning_rate": 4.927536231884059e-06,
"loss": 1.6443,
"step": 34
},
{
"epoch": 0.02,
"learning_rate": 5.072463768115943e-06,
"loss": 1.6971,
"step": 35
},
{
"epoch": 0.02,
"learning_rate": 5.2173913043478265e-06,
"loss": 1.7971,
"step": 36
},
{
"epoch": 0.02,
"learning_rate": 5.362318840579711e-06,
"loss": 1.771,
"step": 37
},
{
"epoch": 0.02,
"learning_rate": 5.507246376811595e-06,
"loss": 1.7365,
"step": 38
},
{
"epoch": 0.03,
"learning_rate": 5.652173913043479e-06,
"loss": 1.9443,
"step": 39
},
{
"epoch": 0.03,
"learning_rate": 5.797101449275363e-06,
"loss": 1.8931,
"step": 40
},
{
"epoch": 0.03,
"learning_rate": 5.942028985507247e-06,
"loss": 1.7748,
"step": 41
},
{
"epoch": 0.03,
"learning_rate": 6.086956521739132e-06,
"loss": 1.8616,
"step": 42
},
{
"epoch": 0.03,
"learning_rate": 6.2318840579710145e-06,
"loss": 1.8224,
"step": 43
},
{
"epoch": 0.03,
"learning_rate": 6.376811594202898e-06,
"loss": 1.5656,
"step": 44
},
{
"epoch": 0.03,
"learning_rate": 6.521739130434783e-06,
"loss": 1.6967,
"step": 45
},
{
"epoch": 0.03,
"learning_rate": 6.666666666666667e-06,
"loss": 1.7999,
"step": 46
},
{
"epoch": 0.03,
"learning_rate": 6.811594202898551e-06,
"loss": 1.7706,
"step": 47
},
{
"epoch": 0.03,
"learning_rate": 6.956521739130435e-06,
"loss": 1.732,
"step": 48
},
{
"epoch": 0.03,
"learning_rate": 7.10144927536232e-06,
"loss": 1.9167,
"step": 49
},
{
"epoch": 0.03,
"learning_rate": 7.246376811594203e-06,
"loss": 1.7641,
"step": 50
},
{
"epoch": 0.03,
"learning_rate": 7.391304347826087e-06,
"loss": 1.5833,
"step": 51
},
{
"epoch": 0.03,
"learning_rate": 7.536231884057972e-06,
"loss": 1.7085,
"step": 52
},
{
"epoch": 0.03,
"learning_rate": 7.681159420289856e-06,
"loss": 1.7761,
"step": 53
},
{
"epoch": 0.04,
"learning_rate": 7.82608695652174e-06,
"loss": 1.7624,
"step": 54
},
{
"epoch": 0.04,
"learning_rate": 7.971014492753623e-06,
"loss": 1.7813,
"step": 55
},
{
"epoch": 0.04,
"learning_rate": 8.115942028985508e-06,
"loss": 1.6884,
"step": 56
},
{
"epoch": 0.04,
"learning_rate": 8.260869565217392e-06,
"loss": 1.7478,
"step": 57
},
{
"epoch": 0.04,
"learning_rate": 8.405797101449275e-06,
"loss": 1.6749,
"step": 58
},
{
"epoch": 0.04,
"learning_rate": 8.55072463768116e-06,
"loss": 1.5301,
"step": 59
},
{
"epoch": 0.04,
"learning_rate": 8.695652173913044e-06,
"loss": 1.6166,
"step": 60
},
{
"epoch": 0.04,
"learning_rate": 8.840579710144929e-06,
"loss": 1.5818,
"step": 61
},
{
"epoch": 0.04,
"learning_rate": 8.985507246376812e-06,
"loss": 1.7006,
"step": 62
},
{
"epoch": 0.04,
"learning_rate": 9.130434782608697e-06,
"loss": 1.7545,
"step": 63
},
{
"epoch": 0.04,
"learning_rate": 9.275362318840581e-06,
"loss": 1.7942,
"step": 64
},
{
"epoch": 0.04,
"learning_rate": 9.420289855072464e-06,
"loss": 1.8001,
"step": 65
},
{
"epoch": 0.04,
"learning_rate": 9.565217391304349e-06,
"loss": 1.8964,
"step": 66
},
{
"epoch": 0.04,
"learning_rate": 9.710144927536233e-06,
"loss": 1.8787,
"step": 67
},
{
"epoch": 0.04,
"learning_rate": 9.855072463768118e-06,
"loss": 1.6214,
"step": 68
},
{
"epoch": 0.05,
"learning_rate": 1e-05,
"loss": 1.7235,
"step": 69
},
{
"epoch": 0.05,
"learning_rate": 1.0144927536231885e-05,
"loss": 1.6727,
"step": 70
},
{
"epoch": 0.05,
"learning_rate": 1.0289855072463768e-05,
"loss": 1.5962,
"step": 71
},
{
"epoch": 0.05,
"learning_rate": 1.0434782608695653e-05,
"loss": 1.5443,
"step": 72
},
{
"epoch": 0.05,
"learning_rate": 1.0579710144927538e-05,
"loss": 1.8079,
"step": 73
},
{
"epoch": 0.05,
"learning_rate": 1.0724637681159422e-05,
"loss": 1.7635,
"step": 74
},
{
"epoch": 0.05,
"learning_rate": 1.0869565217391305e-05,
"loss": 1.6006,
"step": 75
},
{
"epoch": 0.05,
"learning_rate": 1.101449275362319e-05,
"loss": 1.8047,
"step": 76
},
{
"epoch": 0.05,
"learning_rate": 1.1159420289855074e-05,
"loss": 1.7687,
"step": 77
},
{
"epoch": 0.05,
"learning_rate": 1.1304347826086957e-05,
"loss": 1.6998,
"step": 78
},
{
"epoch": 0.05,
"learning_rate": 1.1449275362318842e-05,
"loss": 1.717,
"step": 79
},
{
"epoch": 0.05,
"learning_rate": 1.1594202898550726e-05,
"loss": 1.6178,
"step": 80
},
{
"epoch": 0.05,
"learning_rate": 1.1739130434782611e-05,
"loss": 1.9585,
"step": 81
},
{
"epoch": 0.05,
"learning_rate": 1.1884057971014494e-05,
"loss": 1.6892,
"step": 82
},
{
"epoch": 0.05,
"learning_rate": 1.2028985507246379e-05,
"loss": 1.6169,
"step": 83
},
{
"epoch": 0.05,
"learning_rate": 1.2173913043478263e-05,
"loss": 2.0966,
"step": 84
},
{
"epoch": 0.06,
"learning_rate": 1.2318840579710144e-05,
"loss": 2.0971,
"step": 85
},
{
"epoch": 0.06,
"learning_rate": 1.2463768115942029e-05,
"loss": 1.6939,
"step": 86
},
{
"epoch": 0.06,
"learning_rate": 1.2608695652173915e-05,
"loss": 1.7684,
"step": 87
},
{
"epoch": 0.06,
"learning_rate": 1.2753623188405797e-05,
"loss": 1.5717,
"step": 88
},
{
"epoch": 0.06,
"learning_rate": 1.2898550724637681e-05,
"loss": 1.7117,
"step": 89
},
{
"epoch": 0.06,
"learning_rate": 1.3043478260869566e-05,
"loss": 1.9092,
"step": 90
},
{
"epoch": 0.06,
"learning_rate": 1.318840579710145e-05,
"loss": 1.9193,
"step": 91
},
{
"epoch": 0.06,
"learning_rate": 1.3333333333333333e-05,
"loss": 1.8541,
"step": 92
},
{
"epoch": 0.06,
"learning_rate": 1.3478260869565218e-05,
"loss": 2.1758,
"step": 93
},
{
"epoch": 0.06,
"learning_rate": 1.3623188405797103e-05,
"loss": 1.897,
"step": 94
},
{
"epoch": 0.06,
"learning_rate": 1.3768115942028985e-05,
"loss": 1.6832,
"step": 95
},
{
"epoch": 0.06,
"learning_rate": 1.391304347826087e-05,
"loss": 1.7025,
"step": 96
},
{
"epoch": 0.06,
"learning_rate": 1.4057971014492755e-05,
"loss": 1.6876,
"step": 97
},
{
"epoch": 0.06,
"learning_rate": 1.420289855072464e-05,
"loss": 1.7254,
"step": 98
},
{
"epoch": 0.06,
"learning_rate": 1.4347826086956522e-05,
"loss": 1.7035,
"step": 99
},
{
"epoch": 0.07,
"learning_rate": 1.4492753623188407e-05,
"loss": 1.6701,
"step": 100
},
{
"epoch": 0.07,
"learning_rate": 1.4637681159420291e-05,
"loss": 1.755,
"step": 101
},
{
"epoch": 0.07,
"learning_rate": 1.4782608695652174e-05,
"loss": 1.8233,
"step": 102
},
{
"epoch": 0.07,
"learning_rate": 1.4927536231884059e-05,
"loss": 1.5721,
"step": 103
},
{
"epoch": 0.07,
"learning_rate": 1.5072463768115944e-05,
"loss": 1.92,
"step": 104
},
{
"epoch": 0.07,
"learning_rate": 1.5217391304347828e-05,
"loss": 1.7907,
"step": 105
},
{
"epoch": 0.07,
"learning_rate": 1.536231884057971e-05,
"loss": 1.7155,
"step": 106
},
{
"epoch": 0.07,
"learning_rate": 1.5507246376811594e-05,
"loss": 1.5529,
"step": 107
},
{
"epoch": 0.07,
"learning_rate": 1.565217391304348e-05,
"loss": 1.6133,
"step": 108
},
{
"epoch": 0.07,
"learning_rate": 1.5797101449275363e-05,
"loss": 1.6704,
"step": 109
},
{
"epoch": 0.07,
"learning_rate": 1.5942028985507246e-05,
"loss": 1.5658,
"step": 110
},
{
"epoch": 0.07,
"learning_rate": 1.6086956521739132e-05,
"loss": 1.638,
"step": 111
},
{
"epoch": 0.07,
"learning_rate": 1.6231884057971015e-05,
"loss": 1.9667,
"step": 112
},
{
"epoch": 0.07,
"learning_rate": 1.6376811594202898e-05,
"loss": 1.5857,
"step": 113
},
{
"epoch": 0.07,
"learning_rate": 1.6521739130434785e-05,
"loss": 1.8468,
"step": 114
},
{
"epoch": 0.08,
"learning_rate": 1.6666666666666667e-05,
"loss": 1.8011,
"step": 115
},
{
"epoch": 0.08,
"learning_rate": 1.681159420289855e-05,
"loss": 1.5584,
"step": 116
},
{
"epoch": 0.08,
"learning_rate": 1.6956521739130437e-05,
"loss": 1.5325,
"step": 117
},
{
"epoch": 0.08,
"learning_rate": 1.710144927536232e-05,
"loss": 1.6412,
"step": 118
},
{
"epoch": 0.08,
"learning_rate": 1.7246376811594206e-05,
"loss": 1.7962,
"step": 119
},
{
"epoch": 0.08,
"learning_rate": 1.739130434782609e-05,
"loss": 1.771,
"step": 120
},
{
"epoch": 0.08,
"learning_rate": 1.7536231884057972e-05,
"loss": 1.619,
"step": 121
},
{
"epoch": 0.08,
"learning_rate": 1.7681159420289858e-05,
"loss": 1.7047,
"step": 122
},
{
"epoch": 0.08,
"learning_rate": 1.782608695652174e-05,
"loss": 1.7217,
"step": 123
},
{
"epoch": 0.08,
"learning_rate": 1.7971014492753624e-05,
"loss": 1.819,
"step": 124
},
{
"epoch": 0.08,
"learning_rate": 1.811594202898551e-05,
"loss": 1.5605,
"step": 125
},
{
"epoch": 0.08,
"learning_rate": 1.8260869565217393e-05,
"loss": 1.7051,
"step": 126
},
{
"epoch": 0.08,
"learning_rate": 1.8405797101449276e-05,
"loss": 1.6241,
"step": 127
},
{
"epoch": 0.08,
"learning_rate": 1.8550724637681162e-05,
"loss": 1.7919,
"step": 128
},
{
"epoch": 0.08,
"learning_rate": 1.8695652173913045e-05,
"loss": 1.971,
"step": 129
},
{
"epoch": 0.08,
"learning_rate": 1.8840579710144928e-05,
"loss": 1.8275,
"step": 130
},
{
"epoch": 0.09,
"learning_rate": 1.8985507246376814e-05,
"loss": 1.6933,
"step": 131
},
{
"epoch": 0.09,
"learning_rate": 1.9130434782608697e-05,
"loss": 1.7722,
"step": 132
},
{
"epoch": 0.09,
"learning_rate": 1.927536231884058e-05,
"loss": 1.7086,
"step": 133
},
{
"epoch": 0.09,
"learning_rate": 1.9420289855072467e-05,
"loss": 1.6655,
"step": 134
},
{
"epoch": 0.09,
"learning_rate": 1.956521739130435e-05,
"loss": 1.7234,
"step": 135
},
{
"epoch": 0.09,
"learning_rate": 1.9710144927536236e-05,
"loss": 1.729,
"step": 136
},
{
"epoch": 0.09,
"learning_rate": 1.9855072463768115e-05,
"loss": 1.6154,
"step": 137
},
{
"epoch": 0.09,
"learning_rate": 2e-05,
"loss": 1.8506,
"step": 138
},
{
"epoch": 0.09,
"learning_rate": 1.9999997510228907e-05,
"loss": 1.886,
"step": 139
},
{
"epoch": 0.09,
"learning_rate": 1.999999004091687e-05,
"loss": 1.768,
"step": 140
},
{
"epoch": 0.09,
"learning_rate": 1.9999977592067603e-05,
"loss": 1.6536,
"step": 141
},
{
"epoch": 0.09,
"learning_rate": 1.9999960163687307e-05,
"loss": 1.8171,
"step": 142
},
{
"epoch": 0.09,
"learning_rate": 1.999993775578466e-05,
"loss": 1.62,
"step": 143
},
{
"epoch": 0.09,
"learning_rate": 1.9999910368370826e-05,
"loss": 1.9765,
"step": 144
},
{
"epoch": 0.09,
"learning_rate": 1.9999878001459436e-05,
"loss": 1.7381,
"step": 145
},
{
"epoch": 0.1,
"learning_rate": 1.9999840655066608e-05,
"loss": 1.8361,
"step": 146
},
{
"epoch": 0.1,
"learning_rate": 1.9999798329210938e-05,
"loss": 1.7863,
"step": 147
},
{
"epoch": 0.1,
"learning_rate": 1.9999751023913506e-05,
"loss": 1.8941,
"step": 148
},
{
"epoch": 0.1,
"learning_rate": 1.999969873919787e-05,
"loss": 1.8171,
"step": 149
},
{
"epoch": 0.1,
"learning_rate": 1.999964147509006e-05,
"loss": 1.5924,
"step": 150
},
{
"epoch": 0.1,
"learning_rate": 1.9999579231618588e-05,
"loss": 1.6363,
"step": 151
},
{
"epoch": 0.1,
"learning_rate": 1.999951200881446e-05,
"loss": 1.6188,
"step": 152
},
{
"epoch": 0.1,
"learning_rate": 1.9999439806711137e-05,
"loss": 1.6399,
"step": 153
},
{
"epoch": 0.1,
"learning_rate": 1.9999362625344584e-05,
"loss": 1.7759,
"step": 154
},
{
"epoch": 0.1,
"learning_rate": 1.999928046475322e-05,
"loss": 1.7508,
"step": 155
},
{
"epoch": 0.1,
"learning_rate": 1.9999193324977974e-05,
"loss": 1.7955,
"step": 156
},
{
"epoch": 0.1,
"learning_rate": 1.9999101206062224e-05,
"loss": 1.9154,
"step": 157
},
{
"epoch": 0.1,
"learning_rate": 1.9999004108051846e-05,
"loss": 1.7574,
"step": 158
},
{
"epoch": 0.1,
"learning_rate": 1.999890203099519e-05,
"loss": 1.6867,
"step": 159
},
{
"epoch": 0.1,
"learning_rate": 1.9998794974943087e-05,
"loss": 1.8837,
"step": 160
},
{
"epoch": 0.11,
"learning_rate": 1.9998682939948843e-05,
"loss": 1.7995,
"step": 161
},
{
"epoch": 0.11,
"learning_rate": 1.9998565926068253e-05,
"loss": 1.7261,
"step": 162
},
{
"epoch": 0.11,
"learning_rate": 1.9998443933359576e-05,
"loss": 1.7059,
"step": 163
},
{
"epoch": 0.11,
"learning_rate": 1.9998316961883563e-05,
"loss": 1.8152,
"step": 164
},
{
"epoch": 0.11,
"learning_rate": 1.999818501170344e-05,
"loss": 1.7966,
"step": 165
},
{
"epoch": 0.11,
"learning_rate": 1.999804808288491e-05,
"loss": 1.4982,
"step": 166
},
{
"epoch": 0.11,
"learning_rate": 1.999790617549616e-05,
"loss": 1.9083,
"step": 167
},
{
"epoch": 0.11,
"learning_rate": 1.9997759289607854e-05,
"loss": 1.6917,
"step": 168
},
{
"epoch": 0.11,
"learning_rate": 1.999760742529313e-05,
"loss": 1.7672,
"step": 169
},
{
"epoch": 0.11,
"learning_rate": 1.9997450582627614e-05,
"loss": 1.8419,
"step": 170
},
{
"epoch": 0.11,
"learning_rate": 1.9997288761689404e-05,
"loss": 1.7393,
"step": 171
},
{
"epoch": 0.11,
"learning_rate": 1.999712196255908e-05,
"loss": 1.9489,
"step": 172
},
{
"epoch": 0.11,
"learning_rate": 1.99969501853197e-05,
"loss": 1.6927,
"step": 173
},
{
"epoch": 0.11,
"learning_rate": 1.9996773430056806e-05,
"loss": 1.729,
"step": 174
},
{
"epoch": 0.11,
"learning_rate": 1.9996591696858407e-05,
"loss": 1.8146,
"step": 175
},
{
"epoch": 0.12,
"learning_rate": 1.9996404985814998e-05,
"loss": 1.5406,
"step": 176
},
{
"epoch": 0.12,
"learning_rate": 1.9996213297019556e-05,
"loss": 1.834,
"step": 177
},
{
"epoch": 0.12,
"learning_rate": 1.9996016630567535e-05,
"loss": 1.8505,
"step": 178
},
{
"epoch": 0.12,
"learning_rate": 1.999581498655686e-05,
"loss": 1.5933,
"step": 179
},
{
"epoch": 0.12,
"learning_rate": 1.9995608365087945e-05,
"loss": 1.7946,
"step": 180
},
{
"epoch": 0.12,
"learning_rate": 1.9995396766263677e-05,
"loss": 1.6388,
"step": 181
},
{
"epoch": 0.12,
"learning_rate": 1.9995180190189424e-05,
"loss": 1.9924,
"step": 182
},
{
"epoch": 0.12,
"learning_rate": 1.9994958636973024e-05,
"loss": 1.5948,
"step": 183
},
{
"epoch": 0.12,
"learning_rate": 1.999473210672481e-05,
"loss": 1.5846,
"step": 184
},
{
"epoch": 0.12,
"learning_rate": 1.9994500599557577e-05,
"loss": 1.6471,
"step": 185
},
{
"epoch": 0.12,
"learning_rate": 1.999426411558661e-05,
"loss": 1.9565,
"step": 186
},
{
"epoch": 0.12,
"learning_rate": 1.9994022654929665e-05,
"loss": 1.6896,
"step": 187
},
{
"epoch": 0.12,
"learning_rate": 1.9993776217706972e-05,
"loss": 1.9445,
"step": 188
},
{
"epoch": 0.12,
"learning_rate": 1.9993524804041255e-05,
"loss": 1.714,
"step": 189
},
{
"epoch": 0.12,
"learning_rate": 1.9993268414057704e-05,
"loss": 1.7137,
"step": 190
},
{
"epoch": 0.12,
"learning_rate": 1.9993007047883988e-05,
"loss": 1.6729,
"step": 191
},
{
"epoch": 0.13,
"learning_rate": 1.9992740705650252e-05,
"loss": 1.691,
"step": 192
},
{
"epoch": 0.13,
"learning_rate": 1.9992469387489127e-05,
"loss": 1.7324,
"step": 193
},
{
"epoch": 0.13,
"learning_rate": 1.999219309353572e-05,
"loss": 1.57,
"step": 194
},
{
"epoch": 0.13,
"learning_rate": 1.9991911823927607e-05,
"loss": 2.0345,
"step": 195
},
{
"epoch": 0.13,
"learning_rate": 1.999162557880485e-05,
"loss": 1.5059,
"step": 196
},
{
"epoch": 0.13,
"learning_rate": 1.999133435830998e-05,
"loss": 1.8392,
"step": 197
},
{
"epoch": 0.13,
"learning_rate": 1.9991038162588018e-05,
"loss": 1.7923,
"step": 198
},
{
"epoch": 0.13,
"learning_rate": 1.9990736991786453e-05,
"loss": 1.78,
"step": 199
},
{
"epoch": 0.13,
"learning_rate": 1.999043084605526e-05,
"loss": 1.8779,
"step": 200
},
{
"epoch": 0.13,
"learning_rate": 1.999011972554688e-05,
"loss": 1.6544,
"step": 201
},
{
"epoch": 0.13,
"learning_rate": 1.998980363041624e-05,
"loss": 1.8014,
"step": 202
},
{
"epoch": 0.13,
"learning_rate": 1.9989482560820734e-05,
"loss": 1.6421,
"step": 203
},
{
"epoch": 0.13,
"learning_rate": 1.9989156516920248e-05,
"loss": 1.8874,
"step": 204
},
{
"epoch": 0.13,
"learning_rate": 1.998882549887713e-05,
"loss": 1.7511,
"step": 205
},
{
"epoch": 0.13,
"learning_rate": 1.9988489506856218e-05,
"loss": 1.7904,
"step": 206
},
{
"epoch": 0.14,
"learning_rate": 1.998814854102482e-05,
"loss": 1.7859,
"step": 207
},
{
"epoch": 0.14,
"learning_rate": 1.9987802601552717e-05,
"loss": 1.7957,
"step": 208
},
{
"epoch": 0.14,
"learning_rate": 1.9987451688612176e-05,
"loss": 1.6068,
"step": 209
},
{
"epoch": 0.14,
"learning_rate": 1.9987095802377933e-05,
"loss": 1.7852,
"step": 210
},
{
"epoch": 0.14,
"learning_rate": 1.99867349430272e-05,
"loss": 1.7431,
"step": 211
},
{
"epoch": 0.14,
"learning_rate": 1.9986369110739675e-05,
"loss": 1.8569,
"step": 212
},
{
"epoch": 0.14,
"learning_rate": 1.998599830569752e-05,
"loss": 1.6872,
"step": 213
},
{
"epoch": 0.14,
"learning_rate": 1.9985622528085382e-05,
"loss": 1.7792,
"step": 214
},
{
"epoch": 0.14,
"learning_rate": 1.998524177809038e-05,
"loss": 1.9195,
"step": 215
},
{
"epoch": 0.14,
"learning_rate": 1.998485605590211e-05,
"loss": 1.6664,
"step": 216
},
{
"epoch": 0.14,
"learning_rate": 1.998446536171265e-05,
"loss": 1.6921,
"step": 217
},
{
"epoch": 0.14,
"learning_rate": 1.9984069695716534e-05,
"loss": 1.7893,
"step": 218
},
{
"epoch": 0.14,
"learning_rate": 1.99836690581108e-05,
"loss": 1.8192,
"step": 219
},
{
"epoch": 0.14,
"learning_rate": 1.998326344909494e-05,
"loss": 1.885,
"step": 220
},
{
"epoch": 0.14,
"learning_rate": 1.9982852868870927e-05,
"loss": 1.556,
"step": 221
},
{
"epoch": 0.15,
"learning_rate": 1.9982437317643218e-05,
"loss": 1.7575,
"step": 222
},
{
"epoch": 0.15,
"learning_rate": 1.998201679561873e-05,
"loss": 1.7023,
"step": 223
},
{
"epoch": 0.15,
"learning_rate": 1.998159130300687e-05,
"loss": 1.6913,
"step": 224
},
{
"epoch": 0.15,
"learning_rate": 1.9981160840019513e-05,
"loss": 1.7819,
"step": 225
},
{
"epoch": 0.15,
"learning_rate": 1.9980725406871007e-05,
"loss": 1.5825,
"step": 226
},
{
"epoch": 0.15,
"learning_rate": 1.998028500377818e-05,
"loss": 1.9634,
"step": 227
},
{
"epoch": 0.15,
"learning_rate": 1.9979839630960333e-05,
"loss": 1.8114,
"step": 228
},
{
"epoch": 0.15,
"learning_rate": 1.9979389288639237e-05,
"loss": 1.6686,
"step": 229
},
{
"epoch": 0.15,
"learning_rate": 1.997893397703915e-05,
"loss": 1.8282,
"step": 230
},
{
"epoch": 0.15,
"learning_rate": 1.9978473696386787e-05,
"loss": 1.7462,
"step": 231
},
{
"epoch": 0.15,
"learning_rate": 1.9978008446911354e-05,
"loss": 1.7526,
"step": 232
},
{
"epoch": 0.15,
"learning_rate": 1.997753822884452e-05,
"loss": 1.6126,
"step": 233
},
{
"epoch": 0.15,
"learning_rate": 1.9977063042420438e-05,
"loss": 1.8654,
"step": 234
},
{
"epoch": 0.15,
"learning_rate": 1.997658288787572e-05,
"loss": 1.5399,
"step": 235
},
{
"epoch": 0.15,
"learning_rate": 1.9976097765449463e-05,
"loss": 1.8716,
"step": 236
},
{
"epoch": 0.15,
"learning_rate": 1.9975607675383243e-05,
"loss": 1.9406,
"step": 237
},
{
"epoch": 0.16,
"learning_rate": 1.9975112617921097e-05,
"loss": 1.7728,
"step": 238
},
{
"epoch": 0.16,
"learning_rate": 1.997461259330954e-05,
"loss": 1.8756,
"step": 239
},
{
"epoch": 0.16,
"learning_rate": 1.997410760179756e-05,
"loss": 1.8006,
"step": 240
},
{
"epoch": 0.16,
"learning_rate": 1.997359764363663e-05,
"loss": 1.7244,
"step": 241
},
{
"epoch": 0.16,
"learning_rate": 1.9973082719080673e-05,
"loss": 1.8612,
"step": 242
},
{
"epoch": 0.16,
"learning_rate": 1.99725628283861e-05,
"loss": 1.7037,
"step": 243
},
{
"epoch": 0.16,
"learning_rate": 1.9972037971811802e-05,
"loss": 1.939,
"step": 244
},
{
"epoch": 0.16,
"learning_rate": 1.9971508149619126e-05,
"loss": 1.5057,
"step": 245
},
{
"epoch": 0.16,
"learning_rate": 1.99709733620719e-05,
"loss": 1.9402,
"step": 246
},
{
"epoch": 0.16,
"learning_rate": 1.9970433609436426e-05,
"loss": 1.6762,
"step": 247
},
{
"epoch": 0.16,
"learning_rate": 1.9969888891981473e-05,
"loss": 1.8236,
"step": 248
},
{
"epoch": 0.16,
"learning_rate": 1.9969339209978287e-05,
"loss": 1.7346,
"step": 249
},
{
"epoch": 0.16,
"learning_rate": 1.9968784563700586e-05,
"loss": 1.5757,
"step": 250
},
{
"epoch": 0.16,
"learning_rate": 1.9968224953424553e-05,
"loss": 1.7573,
"step": 251
},
{
"epoch": 0.16,
"learning_rate": 1.9967660379428855e-05,
"loss": 1.5817,
"step": 252
},
{
"epoch": 0.17,
"learning_rate": 1.996709084199462e-05,
"loss": 1.5974,
"step": 253
},
{
"epoch": 0.17,
"learning_rate": 1.9966516341405452e-05,
"loss": 1.8537,
"step": 254
},
{
"epoch": 0.17,
"learning_rate": 1.996593687794743e-05,
"loss": 1.71,
"step": 255
},
{
"epoch": 0.17,
"learning_rate": 1.996535245190909e-05,
"loss": 1.7969,
"step": 256
},
{
"epoch": 0.17,
"learning_rate": 1.9964763063581465e-05,
"loss": 1.8761,
"step": 257
},
{
"epoch": 0.17,
"learning_rate": 1.996416871325803e-05,
"loss": 1.8265,
"step": 258
},
{
"epoch": 0.17,
"learning_rate": 1.996356940123475e-05,
"loss": 1.5745,
"step": 259
},
{
"epoch": 0.17,
"learning_rate": 1.996296512781005e-05,
"loss": 1.575,
"step": 260
},
{
"epoch": 0.17,
"learning_rate": 1.996235589328484e-05,
"loss": 1.537,
"step": 261
},
{
"epoch": 0.17,
"learning_rate": 1.996174169796248e-05,
"loss": 1.6117,
"step": 262
},
{
"epoch": 0.17,
"learning_rate": 1.9961122542148823e-05,
"loss": 1.7155,
"step": 263
},
{
"epoch": 0.17,
"learning_rate": 1.996049842615217e-05,
"loss": 1.6558,
"step": 264
},
{
"epoch": 0.17,
"learning_rate": 1.9959869350283306e-05,
"loss": 1.6215,
"step": 265
},
{
"epoch": 0.17,
"learning_rate": 1.9959235314855485e-05,
"loss": 1.877,
"step": 266
},
{
"epoch": 0.17,
"learning_rate": 1.995859632018442e-05,
"loss": 1.7136,
"step": 267
},
{
"epoch": 0.18,
"learning_rate": 1.9957952366588307e-05,
"loss": 1.6772,
"step": 268
},
{
"epoch": 0.18,
"learning_rate": 1.9957303454387803e-05,
"loss": 1.6511,
"step": 269
},
{
"epoch": 0.18,
"learning_rate": 1.995664958390604e-05,
"loss": 1.717,
"step": 270
},
{
"epoch": 0.18,
"learning_rate": 1.9955990755468614e-05,
"loss": 1.9313,
"step": 271
},
{
"epoch": 0.18,
"learning_rate": 1.9955326969403587e-05,
"loss": 1.8296,
"step": 272
},
{
"epoch": 0.18,
"learning_rate": 1.99546582260415e-05,
"loss": 1.7657,
"step": 273
},
{
"epoch": 0.18,
"learning_rate": 1.9953984525715354e-05,
"loss": 1.6089,
"step": 274
},
{
"epoch": 0.18,
"learning_rate": 1.995330586876062e-05,
"loss": 1.6183,
"step": 275
},
{
"epoch": 0.18,
"learning_rate": 1.995262225551524e-05,
"loss": 1.8083,
"step": 276
},
{
"epoch": 0.18,
"learning_rate": 1.9951933686319624e-05,
"loss": 1.7245,
"step": 277
},
{
"epoch": 0.18,
"learning_rate": 1.9951240161516643e-05,
"loss": 1.9381,
"step": 278
},
{
"epoch": 0.18,
"learning_rate": 1.9950541681451644e-05,
"loss": 1.6028,
"step": 279
},
{
"epoch": 0.18,
"learning_rate": 1.9949838246472436e-05,
"loss": 1.6434,
"step": 280
},
{
"epoch": 0.18,
"learning_rate": 1.9949129856929295e-05,
"loss": 1.7555,
"step": 281
},
{
"epoch": 0.18,
"learning_rate": 1.9948416513174976e-05,
"loss": 1.7311,
"step": 282
},
{
"epoch": 0.18,
"learning_rate": 1.994769821556468e-05,
"loss": 1.6387,
"step": 283
},
{
"epoch": 0.19,
"learning_rate": 1.9946974964456094e-05,
"loss": 1.8235,
"step": 284
},
{
"epoch": 0.19,
"learning_rate": 1.994624676020936e-05,
"loss": 1.9122,
"step": 285
},
{
"epoch": 0.19,
"learning_rate": 1.9945513603187096e-05,
"loss": 1.838,
"step": 286
},
{
"epoch": 0.19,
"learning_rate": 1.9944775493754374e-05,
"loss": 1.6418,
"step": 287
},
{
"epoch": 0.19,
"learning_rate": 1.9944032432278743e-05,
"loss": 1.5999,
"step": 288
},
{
"epoch": 0.19,
"learning_rate": 1.994328441913021e-05,
"loss": 1.8707,
"step": 289
},
{
"epoch": 0.19,
"learning_rate": 1.9942531454681254e-05,
"loss": 1.7181,
"step": 290
},
{
"epoch": 0.19,
"learning_rate": 1.9941773539306818e-05,
"loss": 1.999,
"step": 291
},
{
"epoch": 0.19,
"learning_rate": 1.9941010673384307e-05,
"loss": 1.7784,
"step": 292
},
{
"epoch": 0.19,
"learning_rate": 1.9940242857293594e-05,
"loss": 1.6533,
"step": 293
},
{
"epoch": 0.19,
"learning_rate": 1.9939470091417012e-05,
"loss": 1.7313,
"step": 294
},
{
"epoch": 0.19,
"learning_rate": 1.993869237613937e-05,
"loss": 1.6005,
"step": 295
},
{
"epoch": 0.19,
"learning_rate": 1.993790971184793e-05,
"loss": 1.8541,
"step": 296
},
{
"epoch": 0.19,
"learning_rate": 1.9937122098932428e-05,
"loss": 1.7487,
"step": 297
},
{
"epoch": 0.19,
"learning_rate": 1.9936329537785054e-05,
"loss": 1.9028,
"step": 298
},
{
"epoch": 0.2,
"learning_rate": 1.9935532028800465e-05,
"loss": 1.7109,
"step": 299
},
{
"epoch": 0.2,
"learning_rate": 1.9934729572375792e-05,
"loss": 1.5248,
"step": 300
},
{
"epoch": 0.2,
"learning_rate": 1.9933922168910617e-05,
"loss": 1.7893,
"step": 301
},
{
"epoch": 0.2,
"learning_rate": 1.993310981880699e-05,
"loss": 1.6346,
"step": 302
},
{
"epoch": 0.2,
"learning_rate": 1.9932292522469424e-05,
"loss": 1.6256,
"step": 303
},
{
"epoch": 0.2,
"learning_rate": 1.9931470280304895e-05,
"loss": 1.8888,
"step": 304
},
{
"epoch": 0.2,
"learning_rate": 1.9930643092722843e-05,
"loss": 1.6717,
"step": 305
},
{
"epoch": 0.2,
"learning_rate": 1.992981096013517e-05,
"loss": 1.9557,
"step": 306
},
{
"epoch": 0.2,
"learning_rate": 1.992897388295624e-05,
"loss": 1.9722,
"step": 307
},
{
"epoch": 0.2,
"learning_rate": 1.9928131861602878e-05,
"loss": 1.7194,
"step": 308
},
{
"epoch": 0.2,
"learning_rate": 1.992728489649437e-05,
"loss": 1.7614,
"step": 309
},
{
"epoch": 0.2,
"learning_rate": 1.992643298805247e-05,
"loss": 1.7441,
"step": 310
},
{
"epoch": 0.2,
"learning_rate": 1.9925576136701386e-05,
"loss": 1.728,
"step": 311
},
{
"epoch": 0.2,
"learning_rate": 1.992471434286779e-05,
"loss": 1.7892,
"step": 312
},
{
"epoch": 0.2,
"learning_rate": 1.9923847606980824e-05,
"loss": 1.6874,
"step": 313
},
{
"epoch": 0.21,
"learning_rate": 1.9922975929472076e-05,
"loss": 1.8102,
"step": 314
},
{
"epoch": 0.21,
"learning_rate": 1.99220993107756e-05,
"loss": 1.682,
"step": 315
},
{
"epoch": 0.21,
"learning_rate": 1.9921217751327916e-05,
"loss": 1.9336,
"step": 316
},
{
"epoch": 0.21,
"learning_rate": 1.9920331251568e-05,
"loss": 1.6113,
"step": 317
},
{
"epoch": 0.21,
"learning_rate": 1.9919439811937283e-05,
"loss": 1.9143,
"step": 318
},
{
"epoch": 0.21,
"learning_rate": 1.9918543432879667e-05,
"loss": 1.7181,
"step": 319
},
{
"epoch": 0.21,
"learning_rate": 1.9917642114841505e-05,
"loss": 1.6744,
"step": 320
},
{
"epoch": 0.21,
"learning_rate": 1.9916735858271615e-05,
"loss": 1.7523,
"step": 321
},
{
"epoch": 0.21,
"learning_rate": 1.9915824663621267e-05,
"loss": 1.78,
"step": 322
},
{
"epoch": 0.21,
"learning_rate": 1.9914908531344198e-05,
"loss": 1.9025,
"step": 323
},
{
"epoch": 0.21,
"learning_rate": 1.9913987461896597e-05,
"loss": 1.7448,
"step": 324
},
{
"epoch": 0.21,
"learning_rate": 1.9913061455737116e-05,
"loss": 1.8273,
"step": 325
},
{
"epoch": 0.21,
"learning_rate": 1.9912130513326863e-05,
"loss": 1.6921,
"step": 326
},
{
"epoch": 0.21,
"learning_rate": 1.991119463512941e-05,
"loss": 1.7164,
"step": 327
},
{
"epoch": 0.21,
"learning_rate": 1.991025382161077e-05,
"loss": 1.6556,
"step": 328
},
{
"epoch": 0.21,
"learning_rate": 1.9909308073239433e-05,
"loss": 1.6554,
"step": 329
},
{
"epoch": 0.22,
"learning_rate": 1.9908357390486342e-05,
"loss": 1.7639,
"step": 330
},
{
"epoch": 0.22,
"learning_rate": 1.9907401773824887e-05,
"loss": 1.7496,
"step": 331
},
{
"epoch": 0.22,
"learning_rate": 1.990644122373092e-05,
"loss": 1.8797,
"step": 332
},
{
"epoch": 0.22,
"learning_rate": 1.9905475740682756e-05,
"loss": 1.5464,
"step": 333
},
{
"epoch": 0.22,
"learning_rate": 1.990450532516116e-05,
"loss": 1.7168,
"step": 334
},
{
"epoch": 0.22,
"learning_rate": 1.990352997764935e-05,
"loss": 1.7238,
"step": 335
},
{
"epoch": 0.22,
"learning_rate": 1.990254969863301e-05,
"loss": 1.983,
"step": 336
},
{
"epoch": 0.22,
"learning_rate": 1.9901564488600274e-05,
"loss": 1.6583,
"step": 337
},
{
"epoch": 0.22,
"learning_rate": 1.9900574348041728e-05,
"loss": 1.9771,
"step": 338
},
{
"epoch": 0.22,
"learning_rate": 1.9899579277450417e-05,
"loss": 1.7111,
"step": 339
},
{
"epoch": 0.22,
"learning_rate": 1.9898579277321845e-05,
"loss": 1.8824,
"step": 340
},
{
"epoch": 0.22,
"learning_rate": 1.9897574348153963e-05,
"loss": 1.7126,
"step": 341
},
{
"epoch": 0.22,
"learning_rate": 1.989656449044718e-05,
"loss": 1.6109,
"step": 342
},
{
"epoch": 0.22,
"learning_rate": 1.9895549704704358e-05,
"loss": 1.6111,
"step": 343
},
{
"epoch": 0.22,
"learning_rate": 1.9894529991430814e-05,
"loss": 1.5684,
"step": 344
},
{
"epoch": 0.23,
"learning_rate": 1.9893505351134322e-05,
"loss": 1.7286,
"step": 345
},
{
"epoch": 0.23,
"learning_rate": 1.98924757843251e-05,
"loss": 1.6518,
"step": 346
},
{
"epoch": 0.23,
"learning_rate": 1.989144129151583e-05,
"loss": 1.6504,
"step": 347
},
{
"epoch": 0.23,
"learning_rate": 1.9890401873221642e-05,
"loss": 1.4915,
"step": 348
},
{
"epoch": 0.23,
"learning_rate": 1.9889357529960114e-05,
"loss": 1.7182,
"step": 349
},
{
"epoch": 0.23,
"learning_rate": 1.9888308262251286e-05,
"loss": 1.6,
"step": 350
},
{
"epoch": 0.23,
"learning_rate": 1.9887254070617644e-05,
"loss": 1.6773,
"step": 351
},
{
"epoch": 0.23,
"learning_rate": 1.9886194955584126e-05,
"loss": 1.7222,
"step": 352
},
{
"epoch": 0.23,
"learning_rate": 1.9885130917678123e-05,
"loss": 1.799,
"step": 353
},
{
"epoch": 0.23,
"learning_rate": 1.988406195742948e-05,
"loss": 1.9064,
"step": 354
},
{
"epoch": 0.23,
"learning_rate": 1.988298807537049e-05,
"loss": 1.652,
"step": 355
},
{
"epoch": 0.23,
"learning_rate": 1.9881909272035888e-05,
"loss": 1.8517,
"step": 356
},
{
"epoch": 0.23,
"learning_rate": 1.9880825547962882e-05,
"loss": 1.7441,
"step": 357
},
{
"epoch": 0.23,
"learning_rate": 1.9879736903691107e-05,
"loss": 1.6021,
"step": 358
},
{
"epoch": 0.23,
"learning_rate": 1.9878643339762663e-05,
"loss": 1.7011,
"step": 359
},
{
"epoch": 0.24,
"learning_rate": 1.9877544856722094e-05,
"loss": 1.6778,
"step": 360
},
{
"epoch": 0.24,
"learning_rate": 1.9876441455116393e-05,
"loss": 1.7144,
"step": 361
},
{
"epoch": 0.24,
"learning_rate": 1.9875333135495e-05,
"loss": 2.0308,
"step": 362
},
{
"epoch": 0.24,
"learning_rate": 1.987421989840982e-05,
"loss": 1.7471,
"step": 363
},
{
"epoch": 0.24,
"learning_rate": 1.987310174441518e-05,
"loss": 1.5851,
"step": 364
},
{
"epoch": 0.24,
"learning_rate": 1.987197867406788e-05,
"loss": 1.873,
"step": 365
},
{
"epoch": 0.24,
"learning_rate": 1.987085068792715e-05,
"loss": 1.7713,
"step": 366
},
{
"epoch": 0.24,
"learning_rate": 1.986971778655468e-05,
"loss": 1.6572,
"step": 367
},
{
"epoch": 0.24,
"learning_rate": 1.98685799705146e-05,
"loss": 1.7438,
"step": 368
},
{
"epoch": 0.24,
"learning_rate": 1.986743724037349e-05,
"loss": 1.7512,
"step": 369
},
{
"epoch": 0.24,
"learning_rate": 1.9866289596700383e-05,
"loss": 1.7058,
"step": 370
},
{
"epoch": 0.24,
"learning_rate": 1.9865137040066744e-05,
"loss": 1.7243,
"step": 371
},
{
"epoch": 0.24,
"learning_rate": 1.98639795710465e-05,
"loss": 1.5387,
"step": 372
},
{
"epoch": 0.24,
"learning_rate": 1.986281719021602e-05,
"loss": 1.8603,
"step": 373
},
{
"epoch": 0.24,
"learning_rate": 1.9861649898154107e-05,
"loss": 1.7283,
"step": 374
},
{
"epoch": 0.25,
"learning_rate": 1.986047769544203e-05,
"loss": 1.6644,
"step": 375
},
{
"epoch": 0.25,
"learning_rate": 1.985930058266348e-05,
"loss": 1.455,
"step": 376
},
{
"epoch": 0.25,
"learning_rate": 1.985811856040462e-05,
"loss": 1.6884,
"step": 377
},
{
"epoch": 0.25,
"learning_rate": 1.9856931629254032e-05,
"loss": 1.7179,
"step": 378
},
{
"epoch": 0.25,
"learning_rate": 1.9855739789802753e-05,
"loss": 1.9469,
"step": 379
},
{
"epoch": 0.25,
"learning_rate": 1.985454304264427e-05,
"loss": 1.6094,
"step": 380
},
{
"epoch": 0.25,
"learning_rate": 1.9853341388374504e-05,
"loss": 1.6679,
"step": 381
},
{
"epoch": 0.25,
"learning_rate": 1.985213482759183e-05,
"loss": 1.5478,
"step": 382
},
{
"epoch": 0.25,
"learning_rate": 1.9850923360897055e-05,
"loss": 1.6236,
"step": 383
},
{
"epoch": 0.25,
"learning_rate": 1.9849706988893433e-05,
"loss": 1.8009,
"step": 384
},
{
"epoch": 0.25,
"learning_rate": 1.9848485712186665e-05,
"loss": 1.4946,
"step": 385
},
{
"epoch": 0.25,
"learning_rate": 1.984725953138489e-05,
"loss": 1.6641,
"step": 386
},
{
"epoch": 0.25,
"learning_rate": 1.9846028447098687e-05,
"loss": 1.6904,
"step": 387
},
{
"epoch": 0.25,
"learning_rate": 1.984479245994108e-05,
"loss": 1.6803,
"step": 388
},
{
"epoch": 0.25,
"learning_rate": 1.9843551570527543e-05,
"loss": 1.8788,
"step": 389
},
{
"epoch": 0.25,
"learning_rate": 1.984230577947597e-05,
"loss": 1.8476,
"step": 390
},
{
"epoch": 0.26,
"learning_rate": 1.9841055087406715e-05,
"loss": 1.5684,
"step": 391
},
{
"epoch": 0.26,
"learning_rate": 1.983979949494256e-05,
"loss": 1.4454,
"step": 392
},
{
"epoch": 0.26,
"learning_rate": 1.983853900270874e-05,
"loss": 1.6985,
"step": 393
},
{
"epoch": 0.26,
"learning_rate": 1.9837273611332918e-05,
"loss": 1.9378,
"step": 394
},
{
"epoch": 0.26,
"learning_rate": 1.9836003321445198e-05,
"loss": 1.5318,
"step": 395
},
{
"epoch": 0.26,
"learning_rate": 1.983472813367813e-05,
"loss": 1.7142,
"step": 396
},
{
"epoch": 0.26,
"learning_rate": 1.98334480486667e-05,
"loss": 1.7121,
"step": 397
},
{
"epoch": 0.26,
"learning_rate": 1.9832163067048335e-05,
"loss": 1.7841,
"step": 398
},
{
"epoch": 0.26,
"learning_rate": 1.9830873189462886e-05,
"loss": 1.6868,
"step": 399
},
{
"epoch": 0.26,
"learning_rate": 1.982957841655266e-05,
"loss": 1.9112,
"step": 400
},
{
"epoch": 0.26,
"learning_rate": 1.9828278748962396e-05,
"loss": 1.5438,
"step": 401
},
{
"epoch": 0.26,
"learning_rate": 1.9826974187339267e-05,
"loss": 1.8109,
"step": 402
},
{
"epoch": 0.26,
"learning_rate": 1.9825664732332886e-05,
"loss": 1.784,
"step": 403
},
{
"epoch": 0.26,
"learning_rate": 1.9824350384595295e-05,
"loss": 1.5111,
"step": 404
},
{
"epoch": 0.26,
"learning_rate": 1.982303114478099e-05,
"loss": 1.5256,
"step": 405
},
{
"epoch": 0.27,
"learning_rate": 1.9821707013546885e-05,
"loss": 1.6807,
"step": 406
},
{
"epoch": 0.27,
"learning_rate": 1.9820377991552338e-05,
"loss": 1.606,
"step": 407
},
{
"epoch": 0.27,
"learning_rate": 1.9819044079459142e-05,
"loss": 1.5803,
"step": 408
},
{
"epoch": 0.27,
"learning_rate": 1.9817705277931526e-05,
"loss": 1.7415,
"step": 409
},
{
"epoch": 0.27,
"learning_rate": 1.9816361587636143e-05,
"loss": 1.7781,
"step": 410
},
{
"epoch": 0.27,
"learning_rate": 1.9815013009242103e-05,
"loss": 1.679,
"step": 411
},
{
"epoch": 0.27,
"learning_rate": 1.9813659543420924e-05,
"loss": 1.7402,
"step": 412
},
{
"epoch": 0.27,
"learning_rate": 1.981230119084658e-05,
"loss": 1.6561,
"step": 413
},
{
"epoch": 0.27,
"learning_rate": 1.981093795219546e-05,
"loss": 1.4753,
"step": 414
},
{
"epoch": 0.27,
"learning_rate": 1.9809569828146396e-05,
"loss": 1.8676,
"step": 415
},
{
"epoch": 0.27,
"learning_rate": 1.9808196819380656e-05,
"loss": 1.5367,
"step": 416
},
{
"epoch": 0.27,
"learning_rate": 1.980681892658193e-05,
"loss": 1.7324,
"step": 417
},
{
"epoch": 0.27,
"learning_rate": 1.9805436150436352e-05,
"loss": 1.6161,
"step": 418
},
{
"epoch": 0.27,
"learning_rate": 1.9804048491632475e-05,
"loss": 1.5056,
"step": 419
},
{
"epoch": 0.27,
"learning_rate": 1.980265595086129e-05,
"loss": 1.7321,
"step": 420
},
{
"epoch": 0.28,
"learning_rate": 1.9801258528816223e-05,
"loss": 1.5752,
"step": 421
},
{
"epoch": 0.28,
"learning_rate": 1.9799856226193125e-05,
"loss": 1.8505,
"step": 422
},
{
"epoch": 0.28,
"learning_rate": 1.9798449043690272e-05,
"loss": 1.7899,
"step": 423
},
{
"epoch": 0.28,
"learning_rate": 1.9797036982008385e-05,
"loss": 1.8264,
"step": 424
},
{
"epoch": 0.28,
"learning_rate": 1.9795620041850602e-05,
"loss": 1.9919,
"step": 425
},
{
"epoch": 0.28,
"learning_rate": 1.9794198223922496e-05,
"loss": 1.6524,
"step": 426
},
{
"epoch": 0.28,
"learning_rate": 1.9792771528932064e-05,
"loss": 1.7086,
"step": 427
},
{
"epoch": 0.28,
"learning_rate": 1.979133995758974e-05,
"loss": 1.8144,
"step": 428
},
{
"epoch": 0.28,
"learning_rate": 1.9789903510608374e-05,
"loss": 1.5938,
"step": 429
},
{
"epoch": 0.28,
"learning_rate": 1.978846218870326e-05,
"loss": 1.8841,
"step": 430
},
{
"epoch": 0.28,
"learning_rate": 1.97870159925921e-05,
"loss": 1.6414,
"step": 431
},
{
"epoch": 0.28,
"learning_rate": 1.9785564922995042e-05,
"loss": 1.6442,
"step": 432
},
{
"epoch": 0.28,
"learning_rate": 1.978410898063465e-05,
"loss": 1.6781,
"step": 433
},
{
"epoch": 0.28,
"learning_rate": 1.978264816623591e-05,
"loss": 1.6857,
"step": 434
},
{
"epoch": 0.28,
"learning_rate": 1.978118248052625e-05,
"loss": 1.6302,
"step": 435
},
{
"epoch": 0.28,
"learning_rate": 1.977971192423551e-05,
"loss": 1.7229,
"step": 436
},
{
"epoch": 0.29,
"learning_rate": 1.977823649809596e-05,
"loss": 1.8492,
"step": 437
},
{
"epoch": 0.29,
"learning_rate": 1.9776756202842297e-05,
"loss": 1.7095,
"step": 438
},
{
"epoch": 0.29,
"learning_rate": 1.9775271039211633e-05,
"loss": 1.8962,
"step": 439
},
{
"epoch": 0.29,
"learning_rate": 1.977378100794352e-05,
"loss": 1.809,
"step": 440
},
{
"epoch": 0.29,
"learning_rate": 1.977228610977992e-05,
"loss": 1.5157,
"step": 441
},
{
"epoch": 0.29,
"learning_rate": 1.977078634546523e-05,
"loss": 1.8447,
"step": 442
},
{
"epoch": 0.29,
"learning_rate": 1.9769281715746258e-05,
"loss": 1.5596,
"step": 443
},
{
"epoch": 0.29,
"learning_rate": 1.976777222137224e-05,
"loss": 1.6913,
"step": 444
},
{
"epoch": 0.29,
"learning_rate": 1.9766257863094843e-05,
"loss": 1.5567,
"step": 445
},
{
"epoch": 0.29,
"learning_rate": 1.9764738641668137e-05,
"loss": 1.7594,
"step": 446
},
{
"epoch": 0.29,
"learning_rate": 1.9763214557848634e-05,
"loss": 1.8343,
"step": 447
},
{
"epoch": 0.29,
"learning_rate": 1.9761685612395253e-05,
"loss": 1.8573,
"step": 448
},
{
"epoch": 0.29,
"learning_rate": 1.976015180606934e-05,
"loss": 1.6969,
"step": 449
},
{
"epoch": 0.29,
"learning_rate": 1.9758613139634662e-05,
"loss": 1.6318,
"step": 450
},
{
"epoch": 0.29,
"learning_rate": 1.9757069613857404e-05,
"loss": 1.7388,
"step": 451
},
{
"epoch": 0.3,
"learning_rate": 1.9755521229506164e-05,
"loss": 1.7768,
"step": 452
},
{
"epoch": 0.3,
"learning_rate": 1.975396798735198e-05,
"loss": 1.684,
"step": 453
},
{
"epoch": 0.3,
"learning_rate": 1.9752409888168285e-05,
"loss": 1.6109,
"step": 454
},
{
"epoch": 0.3,
"learning_rate": 1.9750846932730947e-05,
"loss": 1.8364,
"step": 455
},
{
"epoch": 0.3,
"learning_rate": 1.9749279121818235e-05,
"loss": 1.8766,
"step": 456
},
{
"epoch": 0.3,
"learning_rate": 1.9747706456210865e-05,
"loss": 1.5277,
"step": 457
},
{
"epoch": 0.3,
"learning_rate": 1.974612893669194e-05,
"loss": 1.8831,
"step": 458
},
{
"epoch": 0.3,
"learning_rate": 1.9744546564046995e-05,
"loss": 1.8072,
"step": 459
},
{
"epoch": 0.3,
"learning_rate": 1.9742959339063977e-05,
"loss": 1.7829,
"step": 460
},
{
"epoch": 0.3,
"learning_rate": 1.974136726253326e-05,
"loss": 1.708,
"step": 461
},
{
"epoch": 0.3,
"learning_rate": 1.9739770335247616e-05,
"loss": 1.6817,
"step": 462
},
{
"epoch": 0.3,
"learning_rate": 1.9738168558002245e-05,
"loss": 1.8286,
"step": 463
},
{
"epoch": 0.3,
"learning_rate": 1.973656193159476e-05,
"loss": 1.8612,
"step": 464
},
{
"epoch": 0.3,
"learning_rate": 1.9734950456825187e-05,
"loss": 1.7412,
"step": 465
},
{
"epoch": 0.3,
"learning_rate": 1.9733334134495963e-05,
"loss": 1.6253,
"step": 466
},
{
"epoch": 0.31,
"learning_rate": 1.9731712965411947e-05,
"loss": 1.8272,
"step": 467
},
{
"epoch": 0.31,
"learning_rate": 1.9730086950380404e-05,
"loss": 1.6659,
"step": 468
},
{
"epoch": 0.31,
"learning_rate": 1.9728456090211017e-05,
"loss": 1.7135,
"step": 469
},
{
"epoch": 0.31,
"learning_rate": 1.9726820385715877e-05,
"loss": 1.9411,
"step": 470
},
{
"epoch": 0.31,
"learning_rate": 1.9725179837709494e-05,
"loss": 1.7032,
"step": 471
},
{
"epoch": 0.31,
"learning_rate": 1.9723534447008785e-05,
"loss": 1.7129,
"step": 472
},
{
"epoch": 0.31,
"learning_rate": 1.9721884214433077e-05,
"loss": 1.8704,
"step": 473
},
{
"epoch": 0.31,
"learning_rate": 1.972022914080411e-05,
"loss": 1.667,
"step": 474
},
{
"epoch": 0.31,
"learning_rate": 1.9718569226946035e-05,
"loss": 1.5918,
"step": 475
},
{
"epoch": 0.31,
"learning_rate": 1.9716904473685417e-05,
"loss": 1.5348,
"step": 476
},
{
"epoch": 0.31,
"learning_rate": 1.9715234881851226e-05,
"loss": 1.7724,
"step": 477
},
{
"epoch": 0.31,
"learning_rate": 1.971356045227484e-05,
"loss": 1.927,
"step": 478
},
{
"epoch": 0.31,
"learning_rate": 1.971188118579004e-05,
"loss": 1.8668,
"step": 479
},
{
"epoch": 0.31,
"learning_rate": 1.9710197083233044e-05,
"loss": 1.8976,
"step": 480
},
{
"epoch": 0.31,
"learning_rate": 1.9708508145442443e-05,
"loss": 1.7757,
"step": 481
},
{
"epoch": 0.31,
"learning_rate": 1.970681437325925e-05,
"loss": 1.8585,
"step": 482
},
{
"epoch": 0.32,
"learning_rate": 1.9705115767526894e-05,
"loss": 1.7761,
"step": 483
},
{
"epoch": 0.32,
"learning_rate": 1.97034123290912e-05,
"loss": 1.7709,
"step": 484
},
{
"epoch": 0.32,
"learning_rate": 1.97017040588004e-05,
"loss": 1.6896,
"step": 485
},
{
"epoch": 0.32,
"learning_rate": 1.9699990957505136e-05,
"loss": 1.7458,
"step": 486
},
{
"epoch": 0.32,
"learning_rate": 1.9698273026058454e-05,
"loss": 1.7141,
"step": 487
},
{
"epoch": 0.32,
"learning_rate": 1.9696550265315805e-05,
"loss": 1.6869,
"step": 488
},
{
"epoch": 0.32,
"learning_rate": 1.9694822676135045e-05,
"loss": 1.7167,
"step": 489
},
{
"epoch": 0.32,
"learning_rate": 1.9693090259376436e-05,
"loss": 1.8046,
"step": 490
},
{
"epoch": 0.32,
"learning_rate": 1.9691353015902638e-05,
"loss": 1.9046,
"step": 491
},
{
"epoch": 0.32,
"learning_rate": 1.9689610946578725e-05,
"loss": 1.5962,
"step": 492
},
{
"epoch": 0.32,
"learning_rate": 1.968786405227216e-05,
"loss": 1.7309,
"step": 493
},
{
"epoch": 0.32,
"learning_rate": 1.9686112333852826e-05,
"loss": 1.6692,
"step": 494
},
{
"epoch": 0.32,
"learning_rate": 1.968435579219299e-05,
"loss": 1.6645,
"step": 495
},
{
"epoch": 0.32,
"learning_rate": 1.968259442816733e-05,
"loss": 1.9673,
"step": 496
},
{
"epoch": 0.32,
"learning_rate": 1.968082824265293e-05,
"loss": 1.8449,
"step": 497
},
{
"epoch": 0.33,
"learning_rate": 1.9679057236529266e-05,
"loss": 1.4464,
"step": 498
},
{
"epoch": 0.33,
"learning_rate": 1.9677281410678216e-05,
"loss": 1.6464,
"step": 499
},
{
"epoch": 0.33,
"learning_rate": 1.9675500765984066e-05,
"loss": 1.6744,
"step": 500
},
{
"epoch": 0.33,
"learning_rate": 1.9673715303333495e-05,
"loss": 1.4672,
"step": 501
},
{
"epoch": 0.33,
"learning_rate": 1.9671925023615572e-05,
"loss": 1.6775,
"step": 502
},
{
"epoch": 0.33,
"learning_rate": 1.9670129927721783e-05,
"loss": 1.7813,
"step": 503
},
{
"epoch": 0.33,
"learning_rate": 1.9668330016546004e-05,
"loss": 1.6196,
"step": 504
},
{
"epoch": 0.33,
"learning_rate": 1.9666525290984505e-05,
"loss": 1.6561,
"step": 505
},
{
"epoch": 0.33,
"learning_rate": 1.9664715751935958e-05,
"loss": 1.7315,
"step": 506
},
{
"epoch": 0.33,
"learning_rate": 1.9662901400301432e-05,
"loss": 1.8063,
"step": 507
},
{
"epoch": 0.33,
"learning_rate": 1.9661082236984387e-05,
"loss": 1.4929,
"step": 508
},
{
"epoch": 0.33,
"learning_rate": 1.9659258262890683e-05,
"loss": 1.8631,
"step": 509
},
{
"epoch": 0.33,
"learning_rate": 1.965742947892858e-05,
"loss": 1.6901,
"step": 510
},
{
"epoch": 0.33,
"learning_rate": 1.965559588600873e-05,
"loss": 1.7438,
"step": 511
},
{
"epoch": 0.33,
"learning_rate": 1.965375748504417e-05,
"loss": 1.6929,
"step": 512
},
{
"epoch": 0.34,
"learning_rate": 1.965191427695035e-05,
"loss": 1.6071,
"step": 513
},
{
"epoch": 0.34,
"learning_rate": 1.9650066262645097e-05,
"loss": 1.5969,
"step": 514
},
{
"epoch": 0.34,
"learning_rate": 1.9648213443048635e-05,
"loss": 1.634,
"step": 515
},
{
"epoch": 0.34,
"learning_rate": 1.964635581908359e-05,
"loss": 1.8549,
"step": 516
},
{
"epoch": 0.34,
"learning_rate": 1.9644493391674966e-05,
"loss": 1.6202,
"step": 517
},
{
"epoch": 0.34,
"learning_rate": 1.9642626161750176e-05,
"loss": 1.8871,
"step": 518
},
{
"epoch": 0.34,
"learning_rate": 1.9640754130239004e-05,
"loss": 1.6687,
"step": 519
},
{
"epoch": 0.34,
"learning_rate": 1.9638877298073645e-05,
"loss": 1.612,
"step": 520
},
{
"epoch": 0.34,
"learning_rate": 1.9636995666188677e-05,
"loss": 1.785,
"step": 521
},
{
"epoch": 0.34,
"learning_rate": 1.9635109235521057e-05,
"loss": 1.5052,
"step": 522
},
{
"epoch": 0.34,
"learning_rate": 1.9633218007010146e-05,
"loss": 1.7157,
"step": 523
},
{
"epoch": 0.34,
"learning_rate": 1.963132198159769e-05,
"loss": 1.5638,
"step": 524
},
{
"epoch": 0.34,
"learning_rate": 1.9629421160227825e-05,
"loss": 1.5479,
"step": 525
},
{
"epoch": 0.34,
"learning_rate": 1.9627515543847068e-05,
"loss": 1.5164,
"step": 526
},
{
"epoch": 0.34,
"learning_rate": 1.962560513340433e-05,
"loss": 1.648,
"step": 527
},
{
"epoch": 0.35,
"learning_rate": 1.962368992985091e-05,
"loss": 1.7224,
"step": 528
},
{
"epoch": 0.35,
"learning_rate": 1.962176993414049e-05,
"loss": 1.7266,
"step": 529
},
{
"epoch": 0.35,
"learning_rate": 1.961984514722914e-05,
"loss": 1.5928,
"step": 530
},
{
"epoch": 0.35,
"learning_rate": 1.9617915570075317e-05,
"loss": 1.5717,
"step": 531
},
{
"epoch": 0.35,
"learning_rate": 1.961598120363986e-05,
"loss": 1.6539,
"step": 532
},
{
"epoch": 0.35,
"learning_rate": 1.9614042048886e-05,
"loss": 1.7196,
"step": 533
},
{
"epoch": 0.35,
"learning_rate": 1.961209810677934e-05,
"loss": 1.6336,
"step": 534
},
{
"epoch": 0.35,
"learning_rate": 1.9610149378287877e-05,
"loss": 1.6449,
"step": 535
},
{
"epoch": 0.35,
"learning_rate": 1.9608195864381994e-05,
"loss": 1.7273,
"step": 536
},
{
"epoch": 0.35,
"learning_rate": 1.9606237566034443e-05,
"loss": 1.9775,
"step": 537
},
{
"epoch": 0.35,
"learning_rate": 1.960427448422037e-05,
"loss": 1.6844,
"step": 538
},
{
"epoch": 0.35,
"learning_rate": 1.9602306619917305e-05,
"loss": 1.4817,
"step": 539
},
{
"epoch": 0.35,
"learning_rate": 1.9600333974105145e-05,
"loss": 1.6722,
"step": 540
},
{
"epoch": 0.35,
"learning_rate": 1.9598356547766183e-05,
"loss": 1.7554,
"step": 541
},
{
"epoch": 0.35,
"learning_rate": 1.9596374341885093e-05,
"loss": 1.6304,
"step": 542
},
{
"epoch": 0.35,
"learning_rate": 1.9594387357448908e-05,
"loss": 1.922,
"step": 543
},
{
"epoch": 0.36,
"learning_rate": 1.9592395595447064e-05,
"loss": 1.7577,
"step": 544
},
{
"epoch": 0.36,
"learning_rate": 1.9590399056871366e-05,
"loss": 1.764,
"step": 545
},
{
"epoch": 0.36,
"learning_rate": 1.9588397742716004e-05,
"loss": 1.794,
"step": 546
},
{
"epoch": 0.36,
"learning_rate": 1.958639165397753e-05,
"loss": 1.846,
"step": 547
},
{
"epoch": 0.36,
"learning_rate": 1.9584380791654896e-05,
"loss": 1.7184,
"step": 548
},
{
"epoch": 0.36,
"learning_rate": 1.958236515674941e-05,
"loss": 1.7393,
"step": 549
},
{
"epoch": 0.36,
"learning_rate": 1.958034475026477e-05,
"loss": 1.6775,
"step": 550
},
{
"epoch": 0.36,
"learning_rate": 1.9578319573207047e-05,
"loss": 1.8334,
"step": 551
},
{
"epoch": 0.36,
"learning_rate": 1.9576289626584685e-05,
"loss": 1.4963,
"step": 552
},
{
"epoch": 0.36,
"learning_rate": 1.9574254911408502e-05,
"loss": 1.5934,
"step": 553
},
{
"epoch": 0.36,
"learning_rate": 1.95722154286917e-05,
"loss": 1.8733,
"step": 554
},
{
"epoch": 0.36,
"learning_rate": 1.957017117944984e-05,
"loss": 1.6129,
"step": 555
},
{
"epoch": 0.36,
"learning_rate": 1.9568122164700867e-05,
"loss": 1.5116,
"step": 556
},
{
"epoch": 0.36,
"learning_rate": 1.95660683854651e-05,
"loss": 1.7035,
"step": 557
},
{
"epoch": 0.36,
"learning_rate": 1.9564009842765225e-05,
"loss": 1.4937,
"step": 558
},
{
"epoch": 0.37,
"learning_rate": 1.95619465376263e-05,
"loss": 1.7986,
"step": 559
},
{
"epoch": 0.37,
"learning_rate": 1.9559878471075763e-05,
"loss": 1.7467,
"step": 560
},
{
"epoch": 0.37,
"learning_rate": 1.9557805644143408e-05,
"loss": 1.4965,
"step": 561
},
{
"epoch": 0.37,
"learning_rate": 1.955572805786141e-05,
"loss": 1.7962,
"step": 562
},
{
"epoch": 0.37,
"learning_rate": 1.9553645713264312e-05,
"loss": 1.6441,
"step": 563
},
{
"epoch": 0.37,
"learning_rate": 1.955155861138903e-05,
"loss": 1.7992,
"step": 564
},
{
"epoch": 0.37,
"learning_rate": 1.9549466753274843e-05,
"loss": 1.7547,
"step": 565
},
{
"epoch": 0.37,
"learning_rate": 1.9547370139963406e-05,
"loss": 1.6163,
"step": 566
},
{
"epoch": 0.37,
"learning_rate": 1.9545268772498723e-05,
"loss": 1.6678,
"step": 567
},
{
"epoch": 0.37,
"learning_rate": 1.954316265192719e-05,
"loss": 1.7166,
"step": 568
},
{
"epoch": 0.37,
"learning_rate": 1.9541051779297553e-05,
"loss": 1.8128,
"step": 569
},
{
"epoch": 0.37,
"learning_rate": 1.9538936155660934e-05,
"loss": 2.1209,
"step": 570
},
{
"epoch": 0.37,
"learning_rate": 1.9536815782070816e-05,
"loss": 1.5158,
"step": 571
},
{
"epoch": 0.37,
"learning_rate": 1.9534690659583045e-05,
"loss": 1.7709,
"step": 572
},
{
"epoch": 0.37,
"learning_rate": 1.9532560789255838e-05,
"loss": 1.5841,
"step": 573
},
{
"epoch": 0.38,
"learning_rate": 1.953042617214977e-05,
"loss": 1.5374,
"step": 574
},
{
"epoch": 0.38,
"learning_rate": 1.9528286809327786e-05,
"loss": 1.5786,
"step": 575
},
{
"epoch": 0.38,
"learning_rate": 1.9526142701855187e-05,
"loss": 1.688,
"step": 576
},
{
"epoch": 0.38,
"learning_rate": 1.9523993850799644e-05,
"loss": 1.5085,
"step": 577
},
{
"epoch": 0.38,
"learning_rate": 1.9521840257231183e-05,
"loss": 1.6474,
"step": 578
},
{
"epoch": 0.38,
"learning_rate": 1.9519681922222195e-05,
"loss": 1.9484,
"step": 579
},
{
"epoch": 0.38,
"learning_rate": 1.9517518846847437e-05,
"loss": 1.7364,
"step": 580
},
{
"epoch": 0.38,
"learning_rate": 1.951535103218402e-05,
"loss": 1.7895,
"step": 581
},
{
"epoch": 0.38,
"learning_rate": 1.951317847931141e-05,
"loss": 1.8405,
"step": 582
},
{
"epoch": 0.38,
"learning_rate": 1.9511001189311446e-05,
"loss": 1.6786,
"step": 583
},
{
"epoch": 0.38,
"learning_rate": 1.9508819163268315e-05,
"loss": 1.8383,
"step": 584
},
{
"epoch": 0.38,
"learning_rate": 1.950663240226857e-05,
"loss": 1.732,
"step": 585
},
{
"epoch": 0.38,
"learning_rate": 1.9504440907401113e-05,
"loss": 1.6186,
"step": 586
},
{
"epoch": 0.38,
"learning_rate": 1.950224467975721e-05,
"loss": 1.6698,
"step": 587
},
{
"epoch": 0.38,
"learning_rate": 1.9500043720430484e-05,
"loss": 1.5525,
"step": 588
},
{
"epoch": 0.38,
"learning_rate": 1.9497838030516905e-05,
"loss": 1.5394,
"step": 589
},
{
"epoch": 0.39,
"learning_rate": 1.9495627611114817e-05,
"loss": 1.7961,
"step": 590
},
{
"epoch": 0.39,
"learning_rate": 1.9493412463324903e-05,
"loss": 1.7375,
"step": 591
},
{
"epoch": 0.39,
"learning_rate": 1.9491192588250198e-05,
"loss": 1.6577,
"step": 592
},
{
"epoch": 0.39,
"learning_rate": 1.9488967986996105e-05,
"loss": 1.6901,
"step": 593
},
{
"epoch": 0.39,
"learning_rate": 1.9486738660670373e-05,
"loss": 1.6328,
"step": 594
},
{
"epoch": 0.39,
"learning_rate": 1.9484504610383103e-05,
"loss": 1.7063,
"step": 595
},
{
"epoch": 0.39,
"learning_rate": 1.948226583724675e-05,
"loss": 1.7627,
"step": 596
},
{
"epoch": 0.39,
"learning_rate": 1.9480022342376125e-05,
"loss": 1.8574,
"step": 597
},
{
"epoch": 0.39,
"learning_rate": 1.947777412688838e-05,
"loss": 1.5624,
"step": 598
},
{
"epoch": 0.39,
"learning_rate": 1.9475521191903023e-05,
"loss": 1.5426,
"step": 599
},
{
"epoch": 0.39,
"learning_rate": 1.9473263538541916e-05,
"loss": 1.5822,
"step": 600
},
{
"epoch": 0.39,
"learning_rate": 1.9471001167929266e-05,
"loss": 1.7343,
"step": 601
},
{
"epoch": 0.39,
"learning_rate": 1.9468734081191627e-05,
"loss": 1.9196,
"step": 602
},
{
"epoch": 0.39,
"learning_rate": 1.946646227945791e-05,
"loss": 1.6072,
"step": 603
},
{
"epoch": 0.39,
"learning_rate": 1.9464185763859365e-05,
"loss": 1.7154,
"step": 604
},
{
"epoch": 0.4,
"learning_rate": 1.9461904535529593e-05,
"loss": 1.6671,
"step": 605
},
{
"epoch": 0.4,
"learning_rate": 1.945961859560454e-05,
"loss": 1.5347,
"step": 606
},
{
"epoch": 0.4,
"learning_rate": 1.9457327945222505e-05,
"loss": 1.6004,
"step": 607
},
{
"epoch": 0.4,
"learning_rate": 1.9455032585524115e-05,
"loss": 1.6925,
"step": 608
},
{
"epoch": 0.4,
"learning_rate": 1.9452732517652366e-05,
"loss": 1.801,
"step": 609
},
{
"epoch": 0.4,
"learning_rate": 1.9450427742752583e-05,
"loss": 1.534,
"step": 610
},
{
"epoch": 0.4,
"learning_rate": 1.9448118261972437e-05,
"loss": 1.6647,
"step": 611
},
{
"epoch": 0.4,
"learning_rate": 1.9445804076461946e-05,
"loss": 1.8082,
"step": 612
},
{
"epoch": 0.4,
"learning_rate": 1.9443485187373463e-05,
"loss": 1.6061,
"step": 613
},
{
"epoch": 0.4,
"learning_rate": 1.944116159586169e-05,
"loss": 1.535,
"step": 614
},
{
"epoch": 0.4,
"learning_rate": 1.9438833303083677e-05,
"loss": 1.7386,
"step": 615
},
{
"epoch": 0.4,
"learning_rate": 1.94365003101988e-05,
"loss": 1.7329,
"step": 616
},
{
"epoch": 0.4,
"learning_rate": 1.9434162618368784e-05,
"loss": 1.9582,
"step": 617
},
{
"epoch": 0.4,
"learning_rate": 1.943182022875769e-05,
"loss": 1.6357,
"step": 618
},
{
"epoch": 0.4,
"learning_rate": 1.942947314253193e-05,
"loss": 1.6327,
"step": 619
},
{
"epoch": 0.41,
"learning_rate": 1.9427121360860233e-05,
"loss": 1.9452,
"step": 620
},
{
"epoch": 0.41,
"learning_rate": 1.942476488491369e-05,
"loss": 1.5513,
"step": 621
},
{
"epoch": 0.41,
"learning_rate": 1.9422403715865708e-05,
"loss": 1.5723,
"step": 622
},
{
"epoch": 0.41,
"learning_rate": 1.9420037854892053e-05,
"loss": 1.6096,
"step": 623
},
{
"epoch": 0.41,
"learning_rate": 1.9417667303170803e-05,
"loss": 1.693,
"step": 624
},
{
"epoch": 0.41,
"learning_rate": 1.9415292061882386e-05,
"loss": 1.7972,
"step": 625
},
{
"epoch": 0.41,
"learning_rate": 1.9412912132209573e-05,
"loss": 1.6269,
"step": 626
},
{
"epoch": 0.41,
"learning_rate": 1.941052751533745e-05,
"loss": 1.6658,
"step": 627
},
{
"epoch": 0.41,
"learning_rate": 1.9408138212453456e-05,
"loss": 1.6587,
"step": 628
},
{
"epoch": 0.41,
"learning_rate": 1.9405744224747348e-05,
"loss": 1.6423,
"step": 629
},
{
"epoch": 0.41,
"learning_rate": 1.940334555341122e-05,
"loss": 1.7249,
"step": 630
},
{
"epoch": 0.41,
"learning_rate": 1.940094219963951e-05,
"loss": 1.5317,
"step": 631
},
{
"epoch": 0.41,
"learning_rate": 1.9398534164628968e-05,
"loss": 1.762,
"step": 632
},
{
"epoch": 0.41,
"learning_rate": 1.9396121449578694e-05,
"loss": 1.7897,
"step": 633
},
{
"epoch": 0.41,
"learning_rate": 1.93937040556901e-05,
"loss": 1.7555,
"step": 634
},
{
"epoch": 0.41,
"learning_rate": 1.9391281984166944e-05,
"loss": 1.3809,
"step": 635
},
{
"epoch": 0.42,
"learning_rate": 1.938885523621531e-05,
"loss": 1.7534,
"step": 636
},
{
"epoch": 0.42,
"learning_rate": 1.93864238130436e-05,
"loss": 1.881,
"step": 637
},
{
"epoch": 0.42,
"learning_rate": 1.9383987715862554e-05,
"loss": 1.5816,
"step": 638
},
{
"epoch": 0.42,
"learning_rate": 1.9381546945885236e-05,
"loss": 1.8615,
"step": 639
},
{
"epoch": 0.42,
"learning_rate": 1.9379101504327044e-05,
"loss": 1.6472,
"step": 640
},
{
"epoch": 0.42,
"learning_rate": 1.9376651392405685e-05,
"loss": 1.8553,
"step": 641
},
{
"epoch": 0.42,
"learning_rate": 1.9374196611341212e-05,
"loss": 1.5499,
"step": 642
},
{
"epoch": 0.42,
"learning_rate": 1.9371737162355986e-05,
"loss": 1.5872,
"step": 643
},
{
"epoch": 0.42,
"learning_rate": 1.9369273046674708e-05,
"loss": 1.4952,
"step": 644
},
{
"epoch": 0.42,
"learning_rate": 1.9366804265524388e-05,
"loss": 1.7629,
"step": 645
},
{
"epoch": 0.42,
"learning_rate": 1.936433082013437e-05,
"loss": 1.5493,
"step": 646
},
{
"epoch": 0.42,
"learning_rate": 1.9361852711736317e-05,
"loss": 1.5898,
"step": 647
},
{
"epoch": 0.42,
"learning_rate": 1.935936994156421e-05,
"loss": 1.6559,
"step": 648
},
{
"epoch": 0.42,
"learning_rate": 1.935688251085436e-05,
"loss": 1.6534,
"step": 649
},
{
"epoch": 0.42,
"learning_rate": 1.9354390420845387e-05,
"loss": 1.8454,
"step": 650
},
{
"epoch": 0.43,
"learning_rate": 1.935189367277824e-05,
"loss": 1.661,
"step": 651
},
{
"epoch": 0.43,
"learning_rate": 1.934939226789619e-05,
"loss": 1.6613,
"step": 652
},
{
"epoch": 0.43,
"learning_rate": 1.9346886207444817e-05,
"loss": 1.4516,
"step": 653
},
{
"epoch": 0.43,
"learning_rate": 1.9344375492672024e-05,
"loss": 1.6293,
"step": 654
},
{
"epoch": 0.43,
"learning_rate": 1.9341860124828037e-05,
"loss": 1.5812,
"step": 655
},
{
"epoch": 0.43,
"learning_rate": 1.933934010516539e-05,
"loss": 1.6494,
"step": 656
},
{
"epoch": 0.43,
"learning_rate": 1.9336815434938935e-05,
"loss": 1.8236,
"step": 657
},
{
"epoch": 0.43,
"learning_rate": 1.933428611540585e-05,
"loss": 1.4262,
"step": 658
},
{
"epoch": 0.43,
"learning_rate": 1.9331752147825615e-05,
"loss": 1.8037,
"step": 659
},
{
"epoch": 0.43,
"learning_rate": 1.932921353346003e-05,
"loss": 1.5748,
"step": 660
},
{
"epoch": 0.43,
"learning_rate": 1.9326670273573208e-05,
"loss": 1.7351,
"step": 661
},
{
"epoch": 0.43,
"learning_rate": 1.932412236943158e-05,
"loss": 1.6881,
"step": 662
},
{
"epoch": 0.43,
"learning_rate": 1.932156982230388e-05,
"loss": 1.7435,
"step": 663
},
{
"epoch": 0.43,
"learning_rate": 1.9319012633461164e-05,
"loss": 1.8803,
"step": 664
},
{
"epoch": 0.43,
"learning_rate": 1.9316450804176794e-05,
"loss": 2.0769,
"step": 665
},
{
"epoch": 0.44,
"learning_rate": 1.9313884335726443e-05,
"loss": 1.6239,
"step": 666
},
{
"epoch": 0.44,
"learning_rate": 1.9311313229388094e-05,
"loss": 1.6221,
"step": 667
},
{
"epoch": 0.44,
"learning_rate": 1.9308737486442045e-05,
"loss": 1.6181,
"step": 668
},
{
"epoch": 0.44,
"learning_rate": 1.9306157108170892e-05,
"loss": 2.0024,
"step": 669
},
{
"epoch": 0.44,
"learning_rate": 1.9303572095859545e-05,
"loss": 1.6205,
"step": 670
},
{
"epoch": 0.44,
"learning_rate": 1.930098245079523e-05,
"loss": 1.4555,
"step": 671
},
{
"epoch": 0.44,
"learning_rate": 1.9298388174267463e-05,
"loss": 1.7335,
"step": 672
},
{
"epoch": 0.44,
"learning_rate": 1.9295789267568078e-05,
"loss": 1.6883,
"step": 673
},
{
"epoch": 0.44,
"learning_rate": 1.9293185731991212e-05,
"loss": 1.6186,
"step": 674
},
{
"epoch": 0.44,
"learning_rate": 1.9290577568833305e-05,
"loss": 1.6508,
"step": 675
},
{
"epoch": 0.44,
"learning_rate": 1.9287964779393105e-05,
"loss": 1.4298,
"step": 676
},
{
"epoch": 0.44,
"learning_rate": 1.9285347364971662e-05,
"loss": 1.868,
"step": 677
},
{
"epoch": 0.44,
"learning_rate": 1.9282725326872324e-05,
"loss": 1.5846,
"step": 678
},
{
"epoch": 0.44,
"learning_rate": 1.9280098666400752e-05,
"loss": 1.733,
"step": 679
},
{
"epoch": 0.44,
"learning_rate": 1.92774673848649e-05,
"loss": 1.5303,
"step": 680
},
{
"epoch": 0.45,
"learning_rate": 1.927483148357502e-05,
"loss": 1.4126,
"step": 681
},
{
"epoch": 0.45,
"learning_rate": 1.927219096384368e-05,
"loss": 1.5526,
"step": 682
},
{
"epoch": 0.45,
"learning_rate": 1.9269545826985733e-05,
"loss": 1.7014,
"step": 683
},
{
"epoch": 0.45,
"learning_rate": 1.9266896074318335e-05,
"loss": 1.7935,
"step": 684
},
{
"epoch": 0.45,
"learning_rate": 1.9264241707160946e-05,
"loss": 1.7879,
"step": 685
},
{
"epoch": 0.45,
"learning_rate": 1.9261582726835316e-05,
"loss": 1.4795,
"step": 686
},
{
"epoch": 0.45,
"learning_rate": 1.9258919134665494e-05,
"loss": 1.73,
"step": 687
},
{
"epoch": 0.45,
"learning_rate": 1.925625093197783e-05,
"loss": 1.6534,
"step": 688
},
{
"epoch": 0.45,
"learning_rate": 1.9253578120100962e-05,
"loss": 1.6437,
"step": 689
},
{
"epoch": 0.45,
"learning_rate": 1.9250900700365837e-05,
"loss": 1.6342,
"step": 690
},
{
"epoch": 0.45,
"learning_rate": 1.9248218674105678e-05,
"loss": 1.3695,
"step": 691
},
{
"epoch": 0.45,
"learning_rate": 1.924553204265602e-05,
"loss": 1.8255,
"step": 692
},
{
"epoch": 0.45,
"learning_rate": 1.9242840807354676e-05,
"loss": 1.9887,
"step": 693
},
{
"epoch": 0.45,
"learning_rate": 1.9240144969541754e-05,
"loss": 1.7133,
"step": 694
},
{
"epoch": 0.45,
"learning_rate": 1.923744453055967e-05,
"loss": 1.5942,
"step": 695
},
{
"epoch": 0.45,
"learning_rate": 1.923473949175311e-05,
"loss": 1.5758,
"step": 696
},
{
"epoch": 0.46,
"learning_rate": 1.9232029854469064e-05,
"loss": 1.8235,
"step": 697
},
{
"epoch": 0.46,
"learning_rate": 1.9229315620056805e-05,
"loss": 1.6101,
"step": 698
},
{
"epoch": 0.46,
"learning_rate": 1.9226596789867895e-05,
"loss": 1.6482,
"step": 699
},
{
"epoch": 0.46,
"learning_rate": 1.9223873365256192e-05,
"loss": 1.6308,
"step": 700
},
{
"epoch": 0.46,
"learning_rate": 1.9221145347577833e-05,
"loss": 1.9061,
"step": 701
},
{
"epoch": 0.46,
"learning_rate": 1.921841273819125e-05,
"loss": 1.7377,
"step": 702
},
{
"epoch": 0.46,
"learning_rate": 1.9215675538457155e-05,
"loss": 1.7004,
"step": 703
},
{
"epoch": 0.46,
"learning_rate": 1.9212933749738546e-05,
"loss": 1.5572,
"step": 704
},
{
"epoch": 0.46,
"learning_rate": 1.921018737340071e-05,
"loss": 1.5642,
"step": 705
},
{
"epoch": 0.46,
"learning_rate": 1.920743641081122e-05,
"loss": 1.476,
"step": 706
},
{
"epoch": 0.46,
"learning_rate": 1.9204680863339922e-05,
"loss": 1.6446,
"step": 707
},
{
"epoch": 0.46,
"learning_rate": 1.920192073235896e-05,
"loss": 1.7077,
"step": 708
},
{
"epoch": 0.46,
"learning_rate": 1.919915601924275e-05,
"loss": 1.7782,
"step": 709
},
{
"epoch": 0.46,
"learning_rate": 1.919638672536799e-05,
"loss": 1.8962,
"step": 710
},
{
"epoch": 0.46,
"learning_rate": 1.9193612852113666e-05,
"loss": 1.7171,
"step": 711
},
{
"epoch": 0.47,
"learning_rate": 1.9190834400861035e-05,
"loss": 1.6728,
"step": 712
},
{
"epoch": 0.47,
"learning_rate": 1.9188051372993645e-05,
"loss": 1.5485,
"step": 713
},
{
"epoch": 0.47,
"learning_rate": 1.918526376989731e-05,
"loss": 1.7059,
"step": 714
},
{
"epoch": 0.47,
"learning_rate": 1.918247159296013e-05,
"loss": 1.6561,
"step": 715
},
{
"epoch": 0.47,
"learning_rate": 1.9179674843572487e-05,
"loss": 1.6245,
"step": 716
},
{
"epoch": 0.47,
"learning_rate": 1.9176873523127025e-05,
"loss": 1.449,
"step": 717
},
{
"epoch": 0.47,
"learning_rate": 1.9174067633018682e-05,
"loss": 1.553,
"step": 718
},
{
"epoch": 0.47,
"learning_rate": 1.9171257174644658e-05,
"loss": 1.7433,
"step": 719
},
{
"epoch": 0.47,
"learning_rate": 1.9168442149404438e-05,
"loss": 1.4599,
"step": 720
},
{
"epoch": 0.47,
"learning_rate": 1.9165622558699763e-05,
"loss": 1.7968,
"step": 721
},
{
"epoch": 0.47,
"learning_rate": 1.916279840393467e-05,
"loss": 1.761,
"step": 722
},
{
"epoch": 0.47,
"learning_rate": 1.9159969686515462e-05,
"loss": 1.764,
"step": 723
},
{
"epoch": 0.47,
"learning_rate": 1.9157136407850706e-05,
"loss": 1.8362,
"step": 724
},
{
"epoch": 0.47,
"learning_rate": 1.915429856935124e-05,
"loss": 1.7414,
"step": 725
},
{
"epoch": 0.47,
"learning_rate": 1.9151456172430186e-05,
"loss": 1.6869,
"step": 726
},
{
"epoch": 0.48,
"learning_rate": 1.9148609218502923e-05,
"loss": 1.7816,
"step": 727
},
{
"epoch": 0.48,
"learning_rate": 1.9145757708987103e-05,
"loss": 1.7204,
"step": 728
},
{
"epoch": 0.48,
"learning_rate": 1.9142901645302654e-05,
"loss": 1.7609,
"step": 729
},
{
"epoch": 0.48,
"learning_rate": 1.914004102887176e-05,
"loss": 1.7315,
"step": 730
},
{
"epoch": 0.48,
"learning_rate": 1.9137175861118873e-05,
"loss": 1.6165,
"step": 731
},
{
"epoch": 0.48,
"learning_rate": 1.9134306143470722e-05,
"loss": 1.3642,
"step": 732
},
{
"epoch": 0.48,
"learning_rate": 1.9131431877356292e-05,
"loss": 1.5392,
"step": 733
},
{
"epoch": 0.48,
"learning_rate": 1.9128553064206835e-05,
"loss": 1.854,
"step": 734
},
{
"epoch": 0.48,
"learning_rate": 1.912566970545587e-05,
"loss": 1.8508,
"step": 735
},
{
"epoch": 0.48,
"learning_rate": 1.912278180253918e-05,
"loss": 1.7225,
"step": 736
},
{
"epoch": 0.48,
"learning_rate": 1.9119889356894804e-05,
"loss": 1.841,
"step": 737
},
{
"epoch": 0.48,
"learning_rate": 1.911699236996305e-05,
"loss": 1.622,
"step": 738
},
{
"epoch": 0.48,
"learning_rate": 1.9114090843186482e-05,
"loss": 1.8323,
"step": 739
},
{
"epoch": 0.48,
"learning_rate": 1.9111184778009934e-05,
"loss": 1.7356,
"step": 740
},
{
"epoch": 0.48,
"learning_rate": 1.9108274175880483e-05,
"loss": 1.6885,
"step": 741
},
{
"epoch": 0.48,
"learning_rate": 1.9105359038247484e-05,
"loss": 1.7559,
"step": 742
},
{
"epoch": 0.49,
"learning_rate": 1.910243936656254e-05,
"loss": 1.6461,
"step": 743
},
{
"epoch": 0.49,
"learning_rate": 1.9099515162279515e-05,
"loss": 1.4499,
"step": 744
},
{
"epoch": 0.49,
"learning_rate": 1.9096586426854523e-05,
"loss": 1.6665,
"step": 745
},
{
"epoch": 0.49,
"learning_rate": 1.909365316174595e-05,
"loss": 2.0526,
"step": 746
},
{
"epoch": 0.49,
"learning_rate": 1.909071536841442e-05,
"loss": 1.4342,
"step": 747
},
{
"epoch": 0.49,
"learning_rate": 1.908777304832282e-05,
"loss": 1.5707,
"step": 748
},
{
"epoch": 0.49,
"learning_rate": 1.9084826202936292e-05,
"loss": 1.5996,
"step": 749
},
{
"epoch": 0.49,
"learning_rate": 1.9081874833722234e-05,
"loss": 1.6187,
"step": 750
},
{
"epoch": 0.49,
"learning_rate": 1.9078918942150292e-05,
"loss": 1.6321,
"step": 751
},
{
"epoch": 0.49,
"learning_rate": 1.9075958529692355e-05,
"loss": 1.7505,
"step": 752
},
{
"epoch": 0.49,
"learning_rate": 1.9072993597822586e-05,
"loss": 1.7084,
"step": 753
},
{
"epoch": 0.49,
"learning_rate": 1.9070024148017375e-05,
"loss": 1.8183,
"step": 754
},
{
"epoch": 0.49,
"learning_rate": 1.9067050181755377e-05,
"loss": 1.4917,
"step": 755
},
{
"epoch": 0.49,
"learning_rate": 1.906407170051749e-05,
"loss": 1.6107,
"step": 756
},
{
"epoch": 0.49,
"learning_rate": 1.9061088705786863e-05,
"loss": 1.7112,
"step": 757
},
{
"epoch": 0.5,
"learning_rate": 1.905810119904889e-05,
"loss": 1.7943,
"step": 758
},
{
"epoch": 0.5,
"learning_rate": 1.905510918179121e-05,
"loss": 1.6517,
"step": 759
},
{
"epoch": 0.5,
"learning_rate": 1.9052112655503713e-05,
"loss": 1.5343,
"step": 760
},
{
"epoch": 0.5,
"learning_rate": 1.9049111621678534e-05,
"loss": 1.6211,
"step": 761
},
{
"epoch": 0.5,
"learning_rate": 1.9046106081810047e-05,
"loss": 1.5274,
"step": 762
},
{
"epoch": 0.5,
"learning_rate": 1.904309603739487e-05,
"loss": 1.6842,
"step": 763
},
{
"epoch": 0.5,
"learning_rate": 1.904008148993188e-05,
"loss": 1.4002,
"step": 764
},
{
"epoch": 0.5,
"learning_rate": 1.903706244092217e-05,
"loss": 1.5549,
"step": 765
},
{
"epoch": 0.5,
"learning_rate": 1.903403889186909e-05,
"loss": 1.7794,
"step": 766
},
{
"epoch": 0.5,
"learning_rate": 1.903101084427824e-05,
"loss": 1.6031,
"step": 767
},
{
"epoch": 0.5,
"learning_rate": 1.9027978299657436e-05,
"loss": 1.8203,
"step": 768
},
{
"epoch": 0.5,
"learning_rate": 1.9024941259516753e-05,
"loss": 1.6061,
"step": 769
},
{
"epoch": 0.5,
"learning_rate": 1.9021899725368498e-05,
"loss": 1.535,
"step": 770
},
{
"epoch": 0.5,
"learning_rate": 1.9018853698727216e-05,
"loss": 1.6099,
"step": 771
},
{
"epoch": 0.5,
"learning_rate": 1.9015803181109685e-05,
"loss": 1.5597,
"step": 772
},
{
"epoch": 0.51,
"learning_rate": 1.9012748174034926e-05,
"loss": 1.54,
"step": 773
},
{
"epoch": 0.51,
"learning_rate": 1.900968867902419e-05,
"loss": 1.6177,
"step": 774
},
{
"epoch": 0.51,
"learning_rate": 1.900662469760097e-05,
"loss": 1.6051,
"step": 775
},
{
"epoch": 0.51,
"learning_rate": 1.9003556231290988e-05,
"loss": 1.5715,
"step": 776
},
{
"epoch": 0.51,
"learning_rate": 1.9000483281622198e-05,
"loss": 1.528,
"step": 777
},
{
"epoch": 0.51,
"learning_rate": 1.8997405850124786e-05,
"loss": 1.4962,
"step": 778
},
{
"epoch": 0.51,
"learning_rate": 1.8994323938331174e-05,
"loss": 1.5675,
"step": 779
},
{
"epoch": 0.51,
"learning_rate": 1.8991237547776014e-05,
"loss": 1.7189,
"step": 780
},
{
"epoch": 0.51,
"learning_rate": 1.8988146679996184e-05,
"loss": 1.5303,
"step": 781
},
{
"epoch": 0.51,
"learning_rate": 1.89850513365308e-05,
"loss": 1.581,
"step": 782
},
{
"epoch": 0.51,
"learning_rate": 1.8981951518921194e-05,
"loss": 1.4948,
"step": 783
},
{
"epoch": 0.51,
"learning_rate": 1.897884722871094e-05,
"loss": 1.5518,
"step": 784
},
{
"epoch": 0.51,
"learning_rate": 1.897573846744583e-05,
"loss": 1.637,
"step": 785
},
{
"epoch": 0.51,
"learning_rate": 1.8972625236673887e-05,
"loss": 1.6985,
"step": 786
},
{
"epoch": 0.51,
"learning_rate": 1.8969507537945354e-05,
"loss": 1.7558,
"step": 787
},
{
"epoch": 0.51,
"learning_rate": 1.8966385372812703e-05,
"loss": 1.5349,
"step": 788
},
{
"epoch": 0.52,
"learning_rate": 1.896325874283063e-05,
"loss": 1.6001,
"step": 789
},
{
"epoch": 0.52,
"learning_rate": 1.896012764955605e-05,
"loss": 1.5696,
"step": 790
},
{
"epoch": 0.52,
"learning_rate": 1.895699209454811e-05,
"loss": 1.5245,
"step": 791
},
{
"epoch": 0.52,
"learning_rate": 1.895385207936817e-05,
"loss": 1.8194,
"step": 792
},
{
"epoch": 0.52,
"learning_rate": 1.8950707605579816e-05,
"loss": 1.6387,
"step": 793
},
{
"epoch": 0.52,
"learning_rate": 1.8947558674748844e-05,
"loss": 1.685,
"step": 794
},
{
"epoch": 0.52,
"learning_rate": 1.894440528844329e-05,
"loss": 1.5585,
"step": 795
},
{
"epoch": 0.52,
"learning_rate": 1.8941247448233386e-05,
"loss": 1.5872,
"step": 796
},
{
"epoch": 0.52,
"learning_rate": 1.8938085155691598e-05,
"loss": 1.5518,
"step": 797
},
{
"epoch": 0.52,
"learning_rate": 1.8934918412392596e-05,
"loss": 1.601,
"step": 798
},
{
"epoch": 0.52,
"learning_rate": 1.893174721991328e-05,
"loss": 1.4921,
"step": 799
},
{
"epoch": 0.52,
"learning_rate": 1.8928571579832756e-05,
"loss": 1.5792,
"step": 800
},
{
"epoch": 0.52,
"learning_rate": 1.8925391493732346e-05,
"loss": 1.6429,
"step": 801
},
{
"epoch": 0.52,
"learning_rate": 1.892220696319559e-05,
"loss": 1.5469,
"step": 802
},
{
"epoch": 0.52,
"learning_rate": 1.8919017989808238e-05,
"loss": 1.8114,
"step": 803
},
{
"epoch": 0.53,
"learning_rate": 1.891582457515825e-05,
"loss": 1.8066,
"step": 804
},
{
"epoch": 0.53,
"learning_rate": 1.89126267208358e-05,
"loss": 1.4935,
"step": 805
},
{
"epoch": 0.53,
"learning_rate": 1.8909424428433278e-05,
"loss": 1.9584,
"step": 806
},
{
"epoch": 0.53,
"learning_rate": 1.890621769954528e-05,
"loss": 1.49,
"step": 807
},
{
"epoch": 0.53,
"learning_rate": 1.89030065357686e-05,
"loss": 1.6055,
"step": 808
},
{
"epoch": 0.53,
"learning_rate": 1.889979093870226e-05,
"loss": 1.4847,
"step": 809
},
{
"epoch": 0.53,
"learning_rate": 1.8896570909947477e-05,
"loss": 1.8265,
"step": 810
},
{
"epoch": 0.53,
"learning_rate": 1.8893346451107674e-05,
"loss": 1.6387,
"step": 811
},
{
"epoch": 0.53,
"learning_rate": 1.889011756378849e-05,
"loss": 1.9597,
"step": 812
},
{
"epoch": 0.53,
"learning_rate": 1.8886884249597763e-05,
"loss": 1.5448,
"step": 813
},
{
"epoch": 0.53,
"learning_rate": 1.888364651014553e-05,
"loss": 1.5469,
"step": 814
},
{
"epoch": 0.53,
"learning_rate": 1.8880404347044046e-05,
"loss": 1.7137,
"step": 815
},
{
"epoch": 0.53,
"learning_rate": 1.887715776190775e-05,
"loss": 1.5354,
"step": 816
},
{
"epoch": 0.53,
"learning_rate": 1.88739067563533e-05,
"loss": 1.6245,
"step": 817
},
{
"epoch": 0.53,
"learning_rate": 1.8870651331999542e-05,
"loss": 1.7649,
"step": 818
},
{
"epoch": 0.54,
"learning_rate": 1.8867391490467534e-05,
"loss": 1.886,
"step": 819
},
{
"epoch": 0.54,
"learning_rate": 1.886412723338052e-05,
"loss": 1.7526,
"step": 820
},
{
"epoch": 0.54,
"learning_rate": 1.8860858562363957e-05,
"loss": 1.7467,
"step": 821
},
{
"epoch": 0.54,
"learning_rate": 1.8857585479045493e-05,
"loss": 1.5234,
"step": 822
},
{
"epoch": 0.54,
"learning_rate": 1.8854307985054973e-05,
"loss": 1.5892,
"step": 823
},
{
"epoch": 0.54,
"learning_rate": 1.885102608202444e-05,
"loss": 1.4823,
"step": 824
},
{
"epoch": 0.54,
"learning_rate": 1.884773977158813e-05,
"loss": 1.8852,
"step": 825
},
{
"epoch": 0.54,
"learning_rate": 1.8844449055382473e-05,
"loss": 1.8052,
"step": 826
},
{
"epoch": 0.54,
"learning_rate": 1.8841153935046098e-05,
"loss": 1.6245,
"step": 827
},
{
"epoch": 0.54,
"learning_rate": 1.8837854412219828e-05,
"loss": 1.5381,
"step": 828
},
{
"epoch": 0.54,
"learning_rate": 1.8834550488546663e-05,
"loss": 1.6746,
"step": 829
},
{
"epoch": 0.54,
"learning_rate": 1.8831242165671816e-05,
"loss": 1.6597,
"step": 830
},
{
"epoch": 0.54,
"learning_rate": 1.8827929445242678e-05,
"loss": 1.658,
"step": 831
},
{
"epoch": 0.54,
"learning_rate": 1.8824612328908828e-05,
"loss": 1.6956,
"step": 832
},
{
"epoch": 0.54,
"learning_rate": 1.882129081832204e-05,
"loss": 1.6015,
"step": 833
},
{
"epoch": 0.55,
"learning_rate": 1.8817964915136277e-05,
"loss": 1.7438,
"step": 834
},
{
"epoch": 0.55,
"learning_rate": 1.8814634621007685e-05,
"loss": 2.0059,
"step": 835
},
{
"epoch": 0.55,
"learning_rate": 1.8811299937594598e-05,
"loss": 1.7808,
"step": 836
},
{
"epoch": 0.55,
"learning_rate": 1.8807960866557535e-05,
"loss": 1.7095,
"step": 837
},
{
"epoch": 0.55,
"learning_rate": 1.88046174095592e-05,
"loss": 1.4648,
"step": 838
},
{
"epoch": 0.55,
"learning_rate": 1.880126956826448e-05,
"loss": 1.5142,
"step": 839
},
{
"epoch": 0.55,
"learning_rate": 1.8797917344340453e-05,
"loss": 1.7125,
"step": 840
},
{
"epoch": 0.55,
"learning_rate": 1.879456073945637e-05,
"loss": 1.5195,
"step": 841
},
{
"epoch": 0.55,
"learning_rate": 1.8791199755283664e-05,
"loss": 1.777,
"step": 842
},
{
"epoch": 0.55,
"learning_rate": 1.8787834393495952e-05,
"loss": 1.6173,
"step": 843
},
{
"epoch": 0.55,
"learning_rate": 1.8784464655769033e-05,
"loss": 1.6622,
"step": 844
},
{
"epoch": 0.55,
"learning_rate": 1.8781090543780876e-05,
"loss": 1.5369,
"step": 845
},
{
"epoch": 0.55,
"learning_rate": 1.8777712059211643e-05,
"loss": 1.5513,
"step": 846
},
{
"epoch": 0.55,
"learning_rate": 1.877432920374366e-05,
"loss": 1.5864,
"step": 847
},
{
"epoch": 0.55,
"learning_rate": 1.8770941979061432e-05,
"loss": 1.5691,
"step": 848
},
{
"epoch": 0.55,
"learning_rate": 1.8767550386851646e-05,
"loss": 1.5007,
"step": 849
},
{
"epoch": 0.56,
"learning_rate": 1.8764154428803155e-05,
"loss": 1.5827,
"step": 850
},
{
"epoch": 0.56,
"learning_rate": 1.8760754106606997e-05,
"loss": 1.5528,
"step": 851
},
{
"epoch": 0.56,
"learning_rate": 1.875734942195637e-05,
"loss": 1.7157,
"step": 852
},
{
"epoch": 0.56,
"learning_rate": 1.8753940376546656e-05,
"loss": 1.5487,
"step": 853
},
{
"epoch": 0.56,
"learning_rate": 1.87505269720754e-05,
"loss": 1.6451,
"step": 854
},
{
"epoch": 0.56,
"learning_rate": 1.8747109210242326e-05,
"loss": 1.8242,
"step": 855
},
{
"epoch": 0.56,
"learning_rate": 1.8743687092749318e-05,
"loss": 1.7555,
"step": 856
},
{
"epoch": 0.56,
"learning_rate": 1.8740260621300433e-05,
"loss": 1.6325,
"step": 857
},
{
"epoch": 0.56,
"learning_rate": 1.8736829797601903e-05,
"loss": 1.6707,
"step": 858
},
{
"epoch": 0.56,
"learning_rate": 1.8733394623362116e-05,
"loss": 1.7867,
"step": 859
},
{
"epoch": 0.56,
"learning_rate": 1.8729955100291634e-05,
"loss": 1.5048,
"step": 860
},
{
"epoch": 0.56,
"learning_rate": 1.8726511230103183e-05,
"loss": 1.6509,
"step": 861
},
{
"epoch": 0.56,
"learning_rate": 1.872306301451165e-05,
"loss": 1.6407,
"step": 862
},
{
"epoch": 0.56,
"learning_rate": 1.871961045523409e-05,
"loss": 1.6166,
"step": 863
},
{
"epoch": 0.56,
"learning_rate": 1.8716153553989716e-05,
"loss": 1.508,
"step": 864
},
{
"epoch": 0.57,
"learning_rate": 1.871269231249991e-05,
"loss": 1.6792,
"step": 865
},
{
"epoch": 0.57,
"learning_rate": 1.8709226732488216e-05,
"loss": 1.6021,
"step": 866
},
{
"epoch": 0.57,
"learning_rate": 1.8705756815680324e-05,
"loss": 1.6887,
"step": 867
},
{
"epoch": 0.57,
"learning_rate": 1.87022825638041e-05,
"loss": 1.6528,
"step": 868
},
{
"epoch": 0.57,
"learning_rate": 1.8698803978589564e-05,
"loss": 1.6668,
"step": 869
},
{
"epoch": 0.57,
"learning_rate": 1.8695321061768886e-05,
"loss": 1.4839,
"step": 870
},
{
"epoch": 0.57,
"learning_rate": 1.8691833815076406e-05,
"loss": 1.6164,
"step": 871
},
{
"epoch": 0.57,
"learning_rate": 1.868834224024861e-05,
"loss": 1.8147,
"step": 872
},
{
"epoch": 0.57,
"learning_rate": 1.8684846339024145e-05,
"loss": 1.8088,
"step": 873
},
{
"epoch": 0.57,
"learning_rate": 1.86813461131438e-05,
"loss": 1.5114,
"step": 874
},
{
"epoch": 0.57,
"learning_rate": 1.867784156435054e-05,
"loss": 1.4502,
"step": 875
},
{
"epoch": 0.57,
"learning_rate": 1.8674332694389464e-05,
"loss": 1.5312,
"step": 876
},
{
"epoch": 0.57,
"learning_rate": 1.8670819505007826e-05,
"loss": 1.6685,
"step": 877
},
{
"epoch": 0.57,
"learning_rate": 1.8667301997955038e-05,
"loss": 1.9059,
"step": 878
},
{
"epoch": 0.57,
"learning_rate": 1.8663780174982656e-05,
"loss": 1.5351,
"step": 879
},
{
"epoch": 0.58,
"learning_rate": 1.866025403784439e-05,
"loss": 1.673,
"step": 880
},
{
"epoch": 0.58,
"learning_rate": 1.8656723588296085e-05,
"loss": 1.7394,
"step": 881
},
{
"epoch": 0.58,
"learning_rate": 1.8653188828095754e-05,
"loss": 1.8356,
"step": 882
},
{
"epoch": 0.58,
"learning_rate": 1.864964975900354e-05,
"loss": 1.5036,
"step": 883
},
{
"epoch": 0.58,
"learning_rate": 1.8646106382781738e-05,
"loss": 1.6354,
"step": 884
},
{
"epoch": 0.58,
"learning_rate": 1.8642558701194787e-05,
"loss": 1.6454,
"step": 885
},
{
"epoch": 0.58,
"learning_rate": 1.8639006716009275e-05,
"loss": 1.7203,
"step": 886
},
{
"epoch": 0.58,
"learning_rate": 1.863545042899392e-05,
"loss": 1.7076,
"step": 887
},
{
"epoch": 0.58,
"learning_rate": 1.8631889841919596e-05,
"loss": 1.4176,
"step": 888
},
{
"epoch": 0.58,
"learning_rate": 1.8628324956559313e-05,
"loss": 1.5047,
"step": 889
},
{
"epoch": 0.58,
"learning_rate": 1.862475577468821e-05,
"loss": 1.9395,
"step": 890
},
{
"epoch": 0.58,
"learning_rate": 1.862118229808359e-05,
"loss": 1.4564,
"step": 891
},
{
"epoch": 0.58,
"learning_rate": 1.8617604528524876e-05,
"loss": 1.5646,
"step": 892
},
{
"epoch": 0.58,
"learning_rate": 1.861402246779363e-05,
"loss": 1.7551,
"step": 893
},
{
"epoch": 0.58,
"learning_rate": 1.8610436117673557e-05,
"loss": 1.5105,
"step": 894
},
{
"epoch": 0.58,
"learning_rate": 1.8606845479950494e-05,
"loss": 1.6701,
"step": 895
},
{
"epoch": 0.59,
"learning_rate": 1.8603250556412418e-05,
"loss": 1.6202,
"step": 896
},
{
"epoch": 0.59,
"learning_rate": 1.859965134884943e-05,
"loss": 1.5629,
"step": 897
},
{
"epoch": 0.59,
"learning_rate": 1.8596047859053776e-05,
"loss": 1.6441,
"step": 898
},
{
"epoch": 0.59,
"learning_rate": 1.859244008881983e-05,
"loss": 1.6198,
"step": 899
},
{
"epoch": 0.59,
"learning_rate": 1.8588828039944086e-05,
"loss": 1.5501,
"step": 900
},
{
"epoch": 0.59,
"learning_rate": 1.858521171422519e-05,
"loss": 1.5689,
"step": 901
},
{
"epoch": 0.59,
"learning_rate": 1.8581591113463903e-05,
"loss": 1.9153,
"step": 902
},
{
"epoch": 0.59,
"learning_rate": 1.857796623946312e-05,
"loss": 1.6043,
"step": 903
},
{
"epoch": 0.59,
"learning_rate": 1.8574337094027858e-05,
"loss": 1.6687,
"step": 904
},
{
"epoch": 0.59,
"learning_rate": 1.857070367896527e-05,
"loss": 1.7897,
"step": 905
},
{
"epoch": 0.59,
"learning_rate": 1.8567065996084628e-05,
"loss": 1.5528,
"step": 906
},
{
"epoch": 0.59,
"learning_rate": 1.856342404719733e-05,
"loss": 1.6756,
"step": 907
},
{
"epoch": 0.59,
"learning_rate": 1.8559777834116906e-05,
"loss": 1.6897,
"step": 908
},
{
"epoch": 0.59,
"learning_rate": 1.8556127358658995e-05,
"loss": 1.6393,
"step": 909
},
{
"epoch": 0.59,
"learning_rate": 1.8552472622641372e-05,
"loss": 1.8571,
"step": 910
},
{
"epoch": 0.6,
"learning_rate": 1.8548813627883924e-05,
"loss": 1.922,
"step": 911
},
{
"epoch": 0.6,
"learning_rate": 1.854515037620867e-05,
"loss": 1.3198,
"step": 912
},
{
"epoch": 0.6,
"learning_rate": 1.8541482869439736e-05,
"loss": 1.6532,
"step": 913
},
{
"epoch": 0.6,
"learning_rate": 1.8537811109403372e-05,
"loss": 1.6219,
"step": 914
},
{
"epoch": 0.6,
"learning_rate": 1.853413509792795e-05,
"loss": 1.6325,
"step": 915
},
{
"epoch": 0.6,
"learning_rate": 1.8530454836843953e-05,
"loss": 1.5912,
"step": 916
},
{
"epoch": 0.6,
"learning_rate": 1.852677032798398e-05,
"loss": 1.7899,
"step": 917
},
{
"epoch": 0.6,
"learning_rate": 1.8523081573182754e-05,
"loss": 1.6063,
"step": 918
},
{
"epoch": 0.6,
"learning_rate": 1.8519388574277105e-05,
"loss": 1.763,
"step": 919
},
{
"epoch": 0.6,
"learning_rate": 1.851569133310597e-05,
"loss": 1.4422,
"step": 920
},
{
"epoch": 0.6,
"learning_rate": 1.8511989851510416e-05,
"loss": 1.6333,
"step": 921
},
{
"epoch": 0.6,
"learning_rate": 1.8508284131333604e-05,
"loss": 1.6701,
"step": 922
},
{
"epoch": 0.6,
"learning_rate": 1.8504574174420812e-05,
"loss": 1.4986,
"step": 923
},
{
"epoch": 0.6,
"learning_rate": 1.8500859982619438e-05,
"loss": 1.809,
"step": 924
},
{
"epoch": 0.6,
"learning_rate": 1.849714155777897e-05,
"loss": 1.5901,
"step": 925
},
{
"epoch": 0.61,
"learning_rate": 1.8493418901751016e-05,
"loss": 1.7828,
"step": 926
},
{
"epoch": 0.61,
"learning_rate": 1.848969201638929e-05,
"loss": 1.5578,
"step": 927
},
{
"epoch": 0.61,
"learning_rate": 1.8485960903549614e-05,
"loss": 1.593,
"step": 928
},
{
"epoch": 0.61,
"learning_rate": 1.8482225565089905e-05,
"loss": 1.7663,
"step": 929
},
{
"epoch": 0.61,
"learning_rate": 1.847848600287019e-05,
"loss": 1.7714,
"step": 930
},
{
"epoch": 0.61,
"learning_rate": 1.84747422187526e-05,
"loss": 1.5569,
"step": 931
},
{
"epoch": 0.61,
"learning_rate": 1.8470994214601378e-05,
"loss": 1.6464,
"step": 932
},
{
"epoch": 0.61,
"learning_rate": 1.8467241992282842e-05,
"loss": 1.5306,
"step": 933
},
{
"epoch": 0.61,
"learning_rate": 1.846348555366544e-05,
"loss": 1.5146,
"step": 934
},
{
"epoch": 0.61,
"learning_rate": 1.8459724900619705e-05,
"loss": 1.6297,
"step": 935
},
{
"epoch": 0.61,
"learning_rate": 1.845596003501826e-05,
"loss": 1.4991,
"step": 936
},
{
"epoch": 0.61,
"learning_rate": 1.845219095873585e-05,
"loss": 1.6013,
"step": 937
},
{
"epoch": 0.61,
"learning_rate": 1.8448417673649292e-05,
"loss": 1.6493,
"step": 938
},
{
"epoch": 0.61,
"learning_rate": 1.8444640181637514e-05,
"loss": 1.6812,
"step": 939
},
{
"epoch": 0.61,
"learning_rate": 1.844085848458153e-05,
"loss": 1.4978,
"step": 940
},
{
"epoch": 0.61,
"learning_rate": 1.8437072584364456e-05,
"loss": 1.3872,
"step": 941
},
{
"epoch": 0.62,
"learning_rate": 1.8433282482871497e-05,
"loss": 1.6746,
"step": 942
},
{
"epoch": 0.62,
"learning_rate": 1.8429488181989946e-05,
"loss": 1.6504,
"step": 943
},
{
"epoch": 0.62,
"learning_rate": 1.8425689683609198e-05,
"loss": 1.6379,
"step": 944
},
{
"epoch": 0.62,
"learning_rate": 1.8421886989620724e-05,
"loss": 1.5317,
"step": 945
},
{
"epoch": 0.62,
"learning_rate": 1.8418080101918095e-05,
"loss": 1.9125,
"step": 946
},
{
"epoch": 0.62,
"learning_rate": 1.8414269022396967e-05,
"loss": 1.5284,
"step": 947
},
{
"epoch": 0.62,
"learning_rate": 1.841045375295508e-05,
"loss": 1.7975,
"step": 948
},
{
"epoch": 0.62,
"learning_rate": 1.8406634295492266e-05,
"loss": 1.6434,
"step": 949
},
{
"epoch": 0.62,
"learning_rate": 1.8402810651910444e-05,
"loss": 1.457,
"step": 950
},
{
"epoch": 0.62,
"learning_rate": 1.839898282411361e-05,
"loss": 1.6691,
"step": 951
},
{
"epoch": 0.62,
"learning_rate": 1.839515081400784e-05,
"loss": 1.8307,
"step": 952
},
{
"epoch": 0.62,
"learning_rate": 1.8391314623501315e-05,
"loss": 1.5053,
"step": 953
},
{
"epoch": 0.62,
"learning_rate": 1.8387474254504265e-05,
"loss": 1.5939,
"step": 954
},
{
"epoch": 0.62,
"learning_rate": 1.838362970892903e-05,
"loss": 1.6342,
"step": 955
},
{
"epoch": 0.62,
"learning_rate": 1.8379780988690014e-05,
"loss": 1.4749,
"step": 956
},
{
"epoch": 0.63,
"learning_rate": 1.8375928095703703e-05,
"loss": 1.7633,
"step": 957
},
{
"epoch": 0.63,
"learning_rate": 1.837207103188866e-05,
"loss": 1.7029,
"step": 958
},
{
"epoch": 0.63,
"learning_rate": 1.836820979916553e-05,
"loss": 1.4715,
"step": 959
},
{
"epoch": 0.63,
"learning_rate": 1.8364344399457028e-05,
"loss": 1.4499,
"step": 960
},
{
"epoch": 0.63,
"learning_rate": 1.8360474834687944e-05,
"loss": 1.5667,
"step": 961
},
{
"epoch": 0.63,
"learning_rate": 1.8356601106785148e-05,
"loss": 1.6502,
"step": 962
},
{
"epoch": 0.63,
"learning_rate": 1.835272321767758e-05,
"loss": 1.6614,
"step": 963
},
{
"epoch": 0.63,
"learning_rate": 1.8348841169296247e-05,
"loss": 1.7366,
"step": 964
},
{
"epoch": 0.63,
"learning_rate": 1.8344954963574227e-05,
"loss": 1.5804,
"step": 965
},
{
"epoch": 0.63,
"learning_rate": 1.8341064602446686e-05,
"loss": 1.5135,
"step": 966
},
{
"epoch": 0.63,
"learning_rate": 1.8337170087850834e-05,
"loss": 1.5833,
"step": 967
},
{
"epoch": 0.63,
"learning_rate": 1.8333271421725968e-05,
"loss": 1.6302,
"step": 968
},
{
"epoch": 0.63,
"learning_rate": 1.832936860601344e-05,
"loss": 1.4175,
"step": 969
},
{
"epoch": 0.63,
"learning_rate": 1.8325461642656676e-05,
"loss": 1.6856,
"step": 970
},
{
"epoch": 0.63,
"learning_rate": 1.8321550533601166e-05,
"loss": 1.4986,
"step": 971
},
{
"epoch": 0.64,
"learning_rate": 1.8317635280794466e-05,
"loss": 1.7103,
"step": 972
},
{
"epoch": 0.64,
"learning_rate": 1.8313715886186183e-05,
"loss": 1.6611,
"step": 973
},
{
"epoch": 0.64,
"learning_rate": 1.8309792351728006e-05,
"loss": 1.7768,
"step": 974
},
{
"epoch": 0.64,
"learning_rate": 1.8305864679373667e-05,
"loss": 1.4449,
"step": 975
},
{
"epoch": 0.64,
"learning_rate": 1.8301932871078975e-05,
"loss": 1.6403,
"step": 976
},
{
"epoch": 0.64,
"learning_rate": 1.8297996928801787e-05,
"loss": 1.8645,
"step": 977
},
{
"epoch": 0.64,
"learning_rate": 1.829405685450202e-05,
"loss": 1.9234,
"step": 978
},
{
"epoch": 0.64,
"learning_rate": 1.8290112650141652e-05,
"loss": 1.6654,
"step": 979
},
{
"epoch": 0.64,
"learning_rate": 1.828616431768472e-05,
"loss": 1.4603,
"step": 980
},
{
"epoch": 0.64,
"learning_rate": 1.8282211859097308e-05,
"loss": 1.2843,
"step": 981
},
{
"epoch": 0.64,
"learning_rate": 1.8278255276347563e-05,
"loss": 1.7198,
"step": 982
},
{
"epoch": 0.64,
"learning_rate": 1.8274294571405678e-05,
"loss": 1.6672,
"step": 983
},
{
"epoch": 0.64,
"learning_rate": 1.8270329746243903e-05,
"loss": 1.5261,
"step": 984
},
{
"epoch": 0.64,
"learning_rate": 1.8266360802836542e-05,
"loss": 1.6983,
"step": 985
},
{
"epoch": 0.64,
"learning_rate": 1.826238774315995e-05,
"loss": 1.4945,
"step": 986
},
{
"epoch": 0.64,
"learning_rate": 1.8258410569192526e-05,
"loss": 1.4757,
"step": 987
},
{
"epoch": 0.65,
"learning_rate": 1.8254429282914715e-05,
"loss": 1.61,
"step": 988
},
{
"epoch": 0.65,
"learning_rate": 1.8250443886309023e-05,
"loss": 1.6111,
"step": 989
},
{
"epoch": 0.65,
"learning_rate": 1.824645438135999e-05,
"loss": 1.7097,
"step": 990
},
{
"epoch": 0.65,
"learning_rate": 1.824246077005421e-05,
"loss": 1.5042,
"step": 991
},
{
"epoch": 0.65,
"learning_rate": 1.823846305438032e-05,
"loss": 1.4909,
"step": 992
},
{
"epoch": 0.65,
"learning_rate": 1.8234461236328992e-05,
"loss": 1.5205,
"step": 993
},
{
"epoch": 0.65,
"learning_rate": 1.8230455317892957e-05,
"loss": 1.6378,
"step": 994
},
{
"epoch": 0.65,
"learning_rate": 1.8226445301066974e-05,
"loss": 1.6407,
"step": 995
},
{
"epoch": 0.65,
"learning_rate": 1.822243118784785e-05,
"loss": 1.7994,
"step": 996
},
{
"epoch": 0.65,
"learning_rate": 1.8218412980234426e-05,
"loss": 1.6106,
"step": 997
},
{
"epoch": 0.65,
"learning_rate": 1.8214390680227588e-05,
"loss": 1.8341,
"step": 998
},
{
"epoch": 0.65,
"learning_rate": 1.821036428983026e-05,
"loss": 1.5347,
"step": 999
},
{
"epoch": 0.65,
"learning_rate": 1.82063338110474e-05,
"loss": 1.5895,
"step": 1000
},
{
"epoch": 0.65,
"learning_rate": 1.820229924588599e-05,
"loss": 1.5102,
"step": 1001
},
{
"epoch": 0.65,
"learning_rate": 1.8198260596355077e-05,
"loss": 1.5667,
"step": 1002
},
{
"epoch": 0.66,
"learning_rate": 1.8194217864465708e-05,
"loss": 1.5489,
"step": 1003
},
{
"epoch": 0.66,
"learning_rate": 1.819017105223099e-05,
"loss": 1.6404,
"step": 1004
},
{
"epoch": 0.66,
"learning_rate": 1.818612016166604e-05,
"loss": 1.5251,
"step": 1005
},
{
"epoch": 0.66,
"learning_rate": 1.8182065194788024e-05,
"loss": 1.67,
"step": 1006
},
{
"epoch": 0.66,
"learning_rate": 1.817800615361613e-05,
"loss": 1.4905,
"step": 1007
},
{
"epoch": 0.66,
"learning_rate": 1.8173943040171567e-05,
"loss": 1.6519,
"step": 1008
},
{
"epoch": 0.66,
"learning_rate": 1.8169875856477587e-05,
"loss": 1.5537,
"step": 1009
},
{
"epoch": 0.66,
"learning_rate": 1.8165804604559455e-05,
"loss": 1.4941,
"step": 1010
},
{
"epoch": 0.66,
"learning_rate": 1.8161729286444474e-05,
"loss": 1.7506,
"step": 1011
},
{
"epoch": 0.66,
"learning_rate": 1.8157649904161963e-05,
"loss": 1.4533,
"step": 1012
},
{
"epoch": 0.66,
"learning_rate": 1.8153566459743272e-05,
"loss": 1.9132,
"step": 1013
},
{
"epoch": 0.66,
"learning_rate": 1.814947895522176e-05,
"loss": 1.9375,
"step": 1014
},
{
"epoch": 0.66,
"learning_rate": 1.8145387392632824e-05,
"loss": 1.7396,
"step": 1015
},
{
"epoch": 0.66,
"learning_rate": 1.8141291774013876e-05,
"loss": 1.3647,
"step": 1016
},
{
"epoch": 0.66,
"learning_rate": 1.8137192101404343e-05,
"loss": 1.5785,
"step": 1017
},
{
"epoch": 0.67,
"learning_rate": 1.8133088376845675e-05,
"loss": 1.7008,
"step": 1018
},
{
"epoch": 0.67,
"learning_rate": 1.8128980602381335e-05,
"loss": 1.585,
"step": 1019
},
{
"epoch": 0.67,
"learning_rate": 1.8124868780056814e-05,
"loss": 1.4954,
"step": 1020
},
{
"epoch": 0.67,
"learning_rate": 1.812075291191961e-05,
"loss": 1.6717,
"step": 1021
},
{
"epoch": 0.67,
"learning_rate": 1.8116633000019233e-05,
"loss": 1.6371,
"step": 1022
},
{
"epoch": 0.67,
"learning_rate": 1.811250904640721e-05,
"loss": 1.4633,
"step": 1023
},
{
"epoch": 0.67,
"learning_rate": 1.8108381053137086e-05,
"loss": 1.7185,
"step": 1024
},
{
"epoch": 0.67,
"learning_rate": 1.810424902226441e-05,
"loss": 1.673,
"step": 1025
},
{
"epoch": 0.67,
"learning_rate": 1.8100112955846746e-05,
"loss": 1.6419,
"step": 1026
},
{
"epoch": 0.67,
"learning_rate": 1.8095972855943664e-05,
"loss": 1.6119,
"step": 1027
},
{
"epoch": 0.67,
"learning_rate": 1.8091828724616742e-05,
"loss": 1.4914,
"step": 1028
},
{
"epoch": 0.67,
"learning_rate": 1.8087680563929574e-05,
"loss": 1.5202,
"step": 1029
},
{
"epoch": 0.67,
"learning_rate": 1.8083528375947744e-05,
"loss": 1.7046,
"step": 1030
},
{
"epoch": 0.67,
"learning_rate": 1.8079372162738863e-05,
"loss": 1.543,
"step": 1031
},
{
"epoch": 0.67,
"learning_rate": 1.807521192637253e-05,
"loss": 1.5611,
"step": 1032
},
{
"epoch": 0.68,
"learning_rate": 1.807104766892035e-05,
"loss": 1.5267,
"step": 1033
},
{
"epoch": 0.68,
"learning_rate": 1.8066879392455932e-05,
"loss": 1.6693,
"step": 1034
},
{
"epoch": 0.68,
"learning_rate": 1.806270709905489e-05,
"loss": 1.6162,
"step": 1035
},
{
"epoch": 0.68,
"learning_rate": 1.8058530790794837e-05,
"loss": 1.7124,
"step": 1036
},
{
"epoch": 0.68,
"learning_rate": 1.805435046975538e-05,
"loss": 1.8221,
"step": 1037
},
{
"epoch": 0.68,
"learning_rate": 1.805016613801813e-05,
"loss": 1.6226,
"step": 1038
},
{
"epoch": 0.68,
"learning_rate": 1.8045977797666685e-05,
"loss": 1.5912,
"step": 1039
},
{
"epoch": 0.68,
"learning_rate": 1.8041785450786655e-05,
"loss": 1.5264,
"step": 1040
},
{
"epoch": 0.68,
"learning_rate": 1.8037589099465637e-05,
"loss": 1.6275,
"step": 1041
},
{
"epoch": 0.68,
"learning_rate": 1.8033388745793218e-05,
"loss": 1.5983,
"step": 1042
},
{
"epoch": 0.68,
"learning_rate": 1.8029184391860982e-05,
"loss": 1.6715,
"step": 1043
},
{
"epoch": 0.68,
"learning_rate": 1.8024976039762507e-05,
"loss": 1.635,
"step": 1044
},
{
"epoch": 0.68,
"learning_rate": 1.8020763691593356e-05,
"loss": 1.4405,
"step": 1045
},
{
"epoch": 0.68,
"learning_rate": 1.801654734945109e-05,
"loss": 1.69,
"step": 1046
},
{
"epoch": 0.68,
"learning_rate": 1.8012327015435255e-05,
"loss": 1.3915,
"step": 1047
},
{
"epoch": 0.68,
"learning_rate": 1.800810269164738e-05,
"loss": 1.4274,
"step": 1048
},
{
"epoch": 0.69,
"learning_rate": 1.8003874380190983e-05,
"loss": 1.5034,
"step": 1049
},
{
"epoch": 0.69,
"learning_rate": 1.7999642083171576e-05,
"loss": 1.8018,
"step": 1050
},
{
"epoch": 0.69,
"learning_rate": 1.7995405802696645e-05,
"loss": 1.6397,
"step": 1051
},
{
"epoch": 0.69,
"learning_rate": 1.7991165540875662e-05,
"loss": 1.6003,
"step": 1052
},
{
"epoch": 0.69,
"learning_rate": 1.798692129982009e-05,
"loss": 1.7233,
"step": 1053
},
{
"epoch": 0.69,
"learning_rate": 1.7982673081643364e-05,
"loss": 1.5548,
"step": 1054
},
{
"epoch": 0.69,
"learning_rate": 1.79784208884609e-05,
"loss": 1.7301,
"step": 1055
},
{
"epoch": 0.69,
"learning_rate": 1.7974164722390094e-05,
"loss": 1.4906,
"step": 1056
},
{
"epoch": 0.69,
"learning_rate": 1.7969904585550325e-05,
"loss": 1.8082,
"step": 1057
},
{
"epoch": 0.69,
"learning_rate": 1.7965640480062945e-05,
"loss": 1.4621,
"step": 1058
},
{
"epoch": 0.69,
"learning_rate": 1.796137240805129e-05,
"loss": 1.4862,
"step": 1059
},
{
"epoch": 0.69,
"learning_rate": 1.795710037164065e-05,
"loss": 1.713,
"step": 1060
},
{
"epoch": 0.69,
"learning_rate": 1.7952824372958315e-05,
"loss": 1.6419,
"step": 1061
},
{
"epoch": 0.69,
"learning_rate": 1.7948544414133534e-05,
"loss": 1.4154,
"step": 1062
},
{
"epoch": 0.69,
"learning_rate": 1.794426049729753e-05,
"loss": 1.8449,
"step": 1063
},
{
"epoch": 0.7,
"learning_rate": 1.7939972624583496e-05,
"loss": 1.4536,
"step": 1064
},
{
"epoch": 0.7,
"learning_rate": 1.7935680798126597e-05,
"loss": 1.5207,
"step": 1065
},
{
"epoch": 0.7,
"learning_rate": 1.793138502006397e-05,
"loss": 1.5854,
"step": 1066
},
{
"epoch": 0.7,
"learning_rate": 1.7927085292534713e-05,
"loss": 1.7638,
"step": 1067
},
{
"epoch": 0.7,
"learning_rate": 1.792278161767989e-05,
"loss": 1.5954,
"step": 1068
},
{
"epoch": 0.7,
"learning_rate": 1.7918473997642535e-05,
"loss": 1.4516,
"step": 1069
},
{
"epoch": 0.7,
"learning_rate": 1.7914162434567653e-05,
"loss": 1.4543,
"step": 1070
},
{
"epoch": 0.7,
"learning_rate": 1.79098469306022e-05,
"loss": 1.7491,
"step": 1071
},
{
"epoch": 0.7,
"learning_rate": 1.7905527487895095e-05,
"loss": 1.5245,
"step": 1072
},
{
"epoch": 0.7,
"learning_rate": 1.790120410859723e-05,
"loss": 1.6609,
"step": 1073
},
{
"epoch": 0.7,
"learning_rate": 1.7896876794861443e-05,
"loss": 1.9546,
"step": 1074
},
{
"epoch": 0.7,
"learning_rate": 1.7892545548842545e-05,
"loss": 1.5431,
"step": 1075
},
{
"epoch": 0.7,
"learning_rate": 1.7888210372697292e-05,
"loss": 1.6567,
"step": 1076
},
{
"epoch": 0.7,
"learning_rate": 1.788387126858441e-05,
"loss": 1.4355,
"step": 1077
},
{
"epoch": 0.7,
"learning_rate": 1.7879528238664567e-05,
"loss": 1.4631,
"step": 1078
},
{
"epoch": 0.71,
"learning_rate": 1.78751812851004e-05,
"loss": 1.7689,
"step": 1079
},
{
"epoch": 0.71,
"learning_rate": 1.7870830410056487e-05,
"loss": 1.5755,
"step": 1080
},
{
"epoch": 0.71,
"learning_rate": 1.786647561569937e-05,
"loss": 1.8112,
"step": 1081
},
{
"epoch": 0.71,
"learning_rate": 1.7862116904197534e-05,
"loss": 1.677,
"step": 1082
},
{
"epoch": 0.71,
"learning_rate": 1.7857754277721417e-05,
"loss": 1.4163,
"step": 1083
},
{
"epoch": 0.71,
"learning_rate": 1.7853387738443407e-05,
"loss": 1.698,
"step": 1084
},
{
"epoch": 0.71,
"learning_rate": 1.7849017288537844e-05,
"loss": 1.5766,
"step": 1085
},
{
"epoch": 0.71,
"learning_rate": 1.7844642930181008e-05,
"loss": 1.5706,
"step": 1086
},
{
"epoch": 0.71,
"learning_rate": 1.784026466555113e-05,
"loss": 1.563,
"step": 1087
},
{
"epoch": 0.71,
"learning_rate": 1.7835882496828392e-05,
"loss": 1.5243,
"step": 1088
},
{
"epoch": 0.71,
"learning_rate": 1.7831496426194906e-05,
"loss": 1.4538,
"step": 1089
},
{
"epoch": 0.71,
"learning_rate": 1.782710645583473e-05,
"loss": 1.6889,
"step": 1090
},
{
"epoch": 0.71,
"learning_rate": 1.7822712587933884e-05,
"loss": 1.4803,
"step": 1091
},
{
"epoch": 0.71,
"learning_rate": 1.78183148246803e-05,
"loss": 1.6867,
"step": 1092
},
{
"epoch": 0.71,
"learning_rate": 1.7813913168263865e-05,
"loss": 1.4595,
"step": 1093
},
{
"epoch": 0.71,
"learning_rate": 1.7809507620876406e-05,
"loss": 1.7997,
"step": 1094
},
{
"epoch": 0.72,
"learning_rate": 1.780509818471168e-05,
"loss": 1.4327,
"step": 1095
},
{
"epoch": 0.72,
"learning_rate": 1.7800684861965386e-05,
"loss": 1.6829,
"step": 1096
},
{
"epoch": 0.72,
"learning_rate": 1.779626765483516e-05,
"loss": 1.4585,
"step": 1097
},
{
"epoch": 0.72,
"learning_rate": 1.779184656552056e-05,
"loss": 1.4819,
"step": 1098
},
{
"epoch": 0.72,
"learning_rate": 1.7787421596223093e-05,
"loss": 1.6057,
"step": 1099
},
{
"epoch": 0.72,
"learning_rate": 1.778299274914619e-05,
"loss": 1.6136,
"step": 1100
},
{
"epoch": 0.72,
"learning_rate": 1.7778560026495218e-05,
"loss": 1.4229,
"step": 1101
},
{
"epoch": 0.72,
"learning_rate": 1.7774123430477464e-05,
"loss": 1.6269,
"step": 1102
},
{
"epoch": 0.72,
"learning_rate": 1.776968296330215e-05,
"loss": 1.5571,
"step": 1103
},
{
"epoch": 0.72,
"learning_rate": 1.7765238627180424e-05,
"loss": 1.3867,
"step": 1104
},
{
"epoch": 0.72,
"learning_rate": 1.776079042432537e-05,
"loss": 2.1153,
"step": 1105
},
{
"epoch": 0.72,
"learning_rate": 1.775633835695198e-05,
"loss": 1.6678,
"step": 1106
},
{
"epoch": 0.72,
"learning_rate": 1.775188242727719e-05,
"loss": 1.4696,
"step": 1107
},
{
"epoch": 0.72,
"learning_rate": 1.7747422637519838e-05,
"loss": 1.5321,
"step": 1108
},
{
"epoch": 0.72,
"learning_rate": 1.77429589899007e-05,
"loss": 1.6162,
"step": 1109
},
{
"epoch": 0.73,
"learning_rate": 1.773849148664247e-05,
"loss": 1.9606,
"step": 1110
},
{
"epoch": 0.73,
"learning_rate": 1.773402012996976e-05,
"loss": 1.4,
"step": 1111
},
{
"epoch": 0.73,
"learning_rate": 1.7729544922109097e-05,
"loss": 1.3941,
"step": 1112
},
{
"epoch": 0.73,
"learning_rate": 1.772506586528893e-05,
"loss": 1.5109,
"step": 1113
},
{
"epoch": 0.73,
"learning_rate": 1.7720582961739628e-05,
"loss": 1.6409,
"step": 1114
},
{
"epoch": 0.73,
"learning_rate": 1.7716096213693474e-05,
"loss": 1.9652,
"step": 1115
},
{
"epoch": 0.73,
"learning_rate": 1.7711605623384653e-05,
"loss": 1.4223,
"step": 1116
},
{
"epoch": 0.73,
"learning_rate": 1.7707111193049283e-05,
"loss": 1.5568,
"step": 1117
},
{
"epoch": 0.73,
"learning_rate": 1.7702612924925377e-05,
"loss": 1.5349,
"step": 1118
},
{
"epoch": 0.73,
"learning_rate": 1.7698110821252873e-05,
"loss": 1.4372,
"step": 1119
},
{
"epoch": 0.73,
"learning_rate": 1.769360488427361e-05,
"loss": 1.5536,
"step": 1120
},
{
"epoch": 0.73,
"learning_rate": 1.7689095116231338e-05,
"loss": 1.4213,
"step": 1121
},
{
"epoch": 0.73,
"learning_rate": 1.7684581519371714e-05,
"loss": 1.6457,
"step": 1122
},
{
"epoch": 0.73,
"learning_rate": 1.7680064095942306e-05,
"loss": 1.4718,
"step": 1123
},
{
"epoch": 0.73,
"learning_rate": 1.767554284819258e-05,
"loss": 1.7224,
"step": 1124
},
{
"epoch": 0.74,
"learning_rate": 1.7671017778373914e-05,
"loss": 1.6295,
"step": 1125
},
{
"epoch": 0.74,
"learning_rate": 1.7666488888739587e-05,
"loss": 1.8031,
"step": 1126
},
{
"epoch": 0.74,
"learning_rate": 1.766195618154477e-05,
"loss": 1.6379,
"step": 1127
},
{
"epoch": 0.74,
"learning_rate": 1.7657419659046555e-05,
"loss": 1.419,
"step": 1128
},
{
"epoch": 0.74,
"learning_rate": 1.765287932350391e-05,
"loss": 1.5535,
"step": 1129
},
{
"epoch": 0.74,
"learning_rate": 1.7648335177177725e-05,
"loss": 1.4204,
"step": 1130
},
{
"epoch": 0.74,
"learning_rate": 1.7643787222330773e-05,
"loss": 1.7073,
"step": 1131
},
{
"epoch": 0.74,
"learning_rate": 1.7639235461227727e-05,
"loss": 1.5901,
"step": 1132
},
{
"epoch": 0.74,
"learning_rate": 1.7634679896135157e-05,
"loss": 1.4866,
"step": 1133
},
{
"epoch": 0.74,
"learning_rate": 1.7630120529321518e-05,
"loss": 1.491,
"step": 1134
},
{
"epoch": 0.74,
"learning_rate": 1.762555736305718e-05,
"loss": 1.8014,
"step": 1135
},
{
"epoch": 0.74,
"learning_rate": 1.7620990399614385e-05,
"loss": 1.7054,
"step": 1136
},
{
"epoch": 0.74,
"learning_rate": 1.7616419641267267e-05,
"loss": 1.4904,
"step": 1137
},
{
"epoch": 0.74,
"learning_rate": 1.7611845090291858e-05,
"loss": 1.6492,
"step": 1138
},
{
"epoch": 0.74,
"learning_rate": 1.760726674896607e-05,
"loss": 1.5781,
"step": 1139
},
{
"epoch": 0.74,
"learning_rate": 1.760268461956972e-05,
"loss": 1.5678,
"step": 1140
},
{
"epoch": 0.75,
"learning_rate": 1.759809870438449e-05,
"loss": 1.6218,
"step": 1141
},
{
"epoch": 0.75,
"learning_rate": 1.759350900569395e-05,
"loss": 1.4394,
"step": 1142
},
{
"epoch": 0.75,
"learning_rate": 1.758891552578357e-05,
"loss": 1.4857,
"step": 1143
},
{
"epoch": 0.75,
"learning_rate": 1.758431826694069e-05,
"loss": 1.8382,
"step": 1144
},
{
"epoch": 0.75,
"learning_rate": 1.757971723145453e-05,
"loss": 1.6818,
"step": 1145
},
{
"epoch": 0.75,
"learning_rate": 1.7575112421616203e-05,
"loss": 1.5985,
"step": 1146
},
{
"epoch": 0.75,
"learning_rate": 1.7570503839718688e-05,
"loss": 1.6139,
"step": 1147
},
{
"epoch": 0.75,
"learning_rate": 1.7565891488056846e-05,
"loss": 1.5467,
"step": 1148
},
{
"epoch": 0.75,
"learning_rate": 1.7561275368927422e-05,
"loss": 1.6411,
"step": 1149
},
{
"epoch": 0.75,
"learning_rate": 1.7556655484629028e-05,
"loss": 1.4484,
"step": 1150
},
{
"epoch": 0.75,
"learning_rate": 1.755203183746216e-05,
"loss": 1.6862,
"step": 1151
},
{
"epoch": 0.75,
"learning_rate": 1.7547404429729174e-05,
"loss": 1.8155,
"step": 1152
},
{
"epoch": 0.75,
"learning_rate": 1.7542773263734317e-05,
"loss": 1.489,
"step": 1153
},
{
"epoch": 0.75,
"learning_rate": 1.753813834178369e-05,
"loss": 1.6902,
"step": 1154
},
{
"epoch": 0.75,
"learning_rate": 1.753349966618528e-05,
"loss": 1.8875,
"step": 1155
},
{
"epoch": 0.76,
"learning_rate": 1.7528857239248925e-05,
"loss": 1.3996,
"step": 1156
},
{
"epoch": 0.76,
"learning_rate": 1.752421106328635e-05,
"loss": 1.5227,
"step": 1157
},
{
"epoch": 0.76,
"learning_rate": 1.751956114061113e-05,
"loss": 1.8296,
"step": 1158
},
{
"epoch": 0.76,
"learning_rate": 1.751490747353872e-05,
"loss": 1.4018,
"step": 1159
},
{
"epoch": 0.76,
"learning_rate": 1.751025006438643e-05,
"loss": 1.6367,
"step": 1160
},
{
"epoch": 0.76,
"learning_rate": 1.7505588915473437e-05,
"loss": 2.0103,
"step": 1161
},
{
"epoch": 0.76,
"learning_rate": 1.7500924029120782e-05,
"loss": 1.7178,
"step": 1162
},
{
"epoch": 0.76,
"learning_rate": 1.7496255407651364e-05,
"loss": 1.4717,
"step": 1163
},
{
"epoch": 0.76,
"learning_rate": 1.7491583053389937e-05,
"loss": 1.4377,
"step": 1164
},
{
"epoch": 0.76,
"learning_rate": 1.7486906968663128e-05,
"loss": 1.7,
"step": 1165
},
{
"epoch": 0.76,
"learning_rate": 1.748222715579941e-05,
"loss": 1.4921,
"step": 1166
},
{
"epoch": 0.76,
"learning_rate": 1.747754361712911e-05,
"loss": 1.6197,
"step": 1167
},
{
"epoch": 0.76,
"learning_rate": 1.7472856354984427e-05,
"loss": 1.4526,
"step": 1168
},
{
"epoch": 0.76,
"learning_rate": 1.7468165371699393e-05,
"loss": 1.5418,
"step": 1169
},
{
"epoch": 0.76,
"learning_rate": 1.7463470669609907e-05,
"loss": 1.4521,
"step": 1170
},
{
"epoch": 0.77,
"learning_rate": 1.7458772251053714e-05,
"loss": 1.4436,
"step": 1171
},
{
"epoch": 0.77,
"learning_rate": 1.7454070118370416e-05,
"loss": 1.5855,
"step": 1172
},
{
"epoch": 0.77,
"learning_rate": 1.7449364273901457e-05,
"loss": 1.5573,
"step": 1173
},
{
"epoch": 0.77,
"learning_rate": 1.7444654719990128e-05,
"loss": 1.4148,
"step": 1174
},
{
"epoch": 0.77,
"learning_rate": 1.7439941458981578e-05,
"loss": 1.4835,
"step": 1175
},
{
"epoch": 0.77,
"learning_rate": 1.743522449322279e-05,
"loss": 1.4503,
"step": 1176
},
{
"epoch": 0.77,
"learning_rate": 1.74305038250626e-05,
"loss": 1.4578,
"step": 1177
},
{
"epoch": 0.77,
"learning_rate": 1.7425779456851683e-05,
"loss": 1.6703,
"step": 1178
},
{
"epoch": 0.77,
"learning_rate": 1.742105139094256e-05,
"loss": 1.7049,
"step": 1179
},
{
"epoch": 0.77,
"learning_rate": 1.741631962968959e-05,
"loss": 1.4083,
"step": 1180
},
{
"epoch": 0.77,
"learning_rate": 1.741158417544897e-05,
"loss": 1.7199,
"step": 1181
},
{
"epoch": 0.77,
"learning_rate": 1.7406845030578747e-05,
"loss": 1.5893,
"step": 1182
},
{
"epoch": 0.77,
"learning_rate": 1.7402102197438795e-05,
"loss": 1.6181,
"step": 1183
},
{
"epoch": 0.77,
"learning_rate": 1.7397355678390823e-05,
"loss": 1.5717,
"step": 1184
},
{
"epoch": 0.77,
"learning_rate": 1.7392605475798387e-05,
"loss": 1.4847,
"step": 1185
},
{
"epoch": 0.78,
"learning_rate": 1.7387851592026868e-05,
"loss": 1.4206,
"step": 1186
},
{
"epoch": 0.78,
"learning_rate": 1.738309402944348e-05,
"loss": 1.4546,
"step": 1187
},
{
"epoch": 0.78,
"learning_rate": 1.7378332790417275e-05,
"loss": 1.6486,
"step": 1188
},
{
"epoch": 0.78,
"learning_rate": 1.7373567877319128e-05,
"loss": 1.606,
"step": 1189
},
{
"epoch": 0.78,
"learning_rate": 1.7368799292521754e-05,
"loss": 1.6465,
"step": 1190
},
{
"epoch": 0.78,
"learning_rate": 1.7364027038399682e-05,
"loss": 1.5372,
"step": 1191
},
{
"epoch": 0.78,
"learning_rate": 1.7359251117329284e-05,
"loss": 1.5218,
"step": 1192
},
{
"epoch": 0.78,
"learning_rate": 1.7354471531688743e-05,
"loss": 1.4834,
"step": 1193
},
{
"epoch": 0.78,
"learning_rate": 1.734968828385808e-05,
"loss": 1.6427,
"step": 1194
},
{
"epoch": 0.78,
"learning_rate": 1.7344901376219126e-05,
"loss": 1.5489,
"step": 1195
},
{
"epoch": 0.78,
"learning_rate": 1.734011081115555e-05,
"loss": 1.7161,
"step": 1196
},
{
"epoch": 0.78,
"learning_rate": 1.7335316591052828e-05,
"loss": 1.5205,
"step": 1197
},
{
"epoch": 0.78,
"learning_rate": 1.7330518718298263e-05,
"loss": 1.4087,
"step": 1198
},
{
"epoch": 0.78,
"learning_rate": 1.7325717195280982e-05,
"loss": 1.5175,
"step": 1199
},
{
"epoch": 0.78,
"learning_rate": 1.7320912024391915e-05,
"loss": 1.8338,
"step": 1200
},
{
"epoch": 0.78,
"learning_rate": 1.7316103208023824e-05,
"loss": 1.7972,
"step": 1201
},
{
"epoch": 0.79,
"learning_rate": 1.7311290748571273e-05,
"loss": 1.6374,
"step": 1202
},
{
"epoch": 0.79,
"learning_rate": 1.7306474648430653e-05,
"loss": 1.6317,
"step": 1203
},
{
"epoch": 0.79,
"learning_rate": 1.730165491000016e-05,
"loss": 1.4618,
"step": 1204
},
{
"epoch": 0.79,
"learning_rate": 1.72968315356798e-05,
"loss": 1.515,
"step": 1205
},
{
"epoch": 0.79,
"learning_rate": 1.729200452787139e-05,
"loss": 1.4267,
"step": 1206
},
{
"epoch": 0.79,
"learning_rate": 1.7287173888978567e-05,
"loss": 1.395,
"step": 1207
},
{
"epoch": 0.79,
"learning_rate": 1.7282339621406764e-05,
"loss": 1.6709,
"step": 1208
},
{
"epoch": 0.79,
"learning_rate": 1.7277501727563227e-05,
"loss": 1.6366,
"step": 1209
},
{
"epoch": 0.79,
"learning_rate": 1.7272660209857e-05,
"loss": 1.4587,
"step": 1210
},
{
"epoch": 0.79,
"learning_rate": 1.7267815070698942e-05,
"loss": 1.3186,
"step": 1211
},
{
"epoch": 0.79,
"learning_rate": 1.726296631250171e-05,
"loss": 1.5449,
"step": 1212
},
{
"epoch": 0.79,
"learning_rate": 1.725811393767976e-05,
"loss": 1.4588,
"step": 1213
},
{
"epoch": 0.79,
"learning_rate": 1.7253257948649357e-05,
"loss": 1.5899,
"step": 1214
},
{
"epoch": 0.79,
"learning_rate": 1.724839834782856e-05,
"loss": 1.5331,
"step": 1215
},
{
"epoch": 0.79,
"learning_rate": 1.7243535137637227e-05,
"loss": 1.5731,
"step": 1216
},
{
"epoch": 0.8,
"learning_rate": 1.7238668320497014e-05,
"loss": 1.7107,
"step": 1217
},
{
"epoch": 0.8,
"learning_rate": 1.7233797898831376e-05,
"loss": 1.3423,
"step": 1218
},
{
"epoch": 0.8,
"learning_rate": 1.7228923875065553e-05,
"loss": 1.5851,
"step": 1219
},
{
"epoch": 0.8,
"learning_rate": 1.7224046251626596e-05,
"loss": 1.8504,
"step": 1220
},
{
"epoch": 0.8,
"learning_rate": 1.7219165030943327e-05,
"loss": 1.5308,
"step": 1221
},
{
"epoch": 0.8,
"learning_rate": 1.721428021544638e-05,
"loss": 1.6922,
"step": 1222
},
{
"epoch": 0.8,
"learning_rate": 1.720939180756816e-05,
"loss": 1.9478,
"step": 1223
},
{
"epoch": 0.8,
"learning_rate": 1.720449980974288e-05,
"loss": 1.5656,
"step": 1224
},
{
"epoch": 0.8,
"learning_rate": 1.7199604224406524e-05,
"loss": 1.6334,
"step": 1225
},
{
"epoch": 0.8,
"learning_rate": 1.7194705053996873e-05,
"loss": 1.4287,
"step": 1226
},
{
"epoch": 0.8,
"learning_rate": 1.7189802300953488e-05,
"loss": 1.4779,
"step": 1227
},
{
"epoch": 0.8,
"learning_rate": 1.7184895967717715e-05,
"loss": 1.4817,
"step": 1228
},
{
"epoch": 0.8,
"learning_rate": 1.7179986056732688e-05,
"loss": 1.6298,
"step": 1229
},
{
"epoch": 0.8,
"learning_rate": 1.717507257044331e-05,
"loss": 1.6829,
"step": 1230
},
{
"epoch": 0.8,
"learning_rate": 1.7170155511296284e-05,
"loss": 1.6387,
"step": 1231
},
{
"epoch": 0.81,
"learning_rate": 1.7165234881740064e-05,
"loss": 1.6279,
"step": 1232
},
{
"epoch": 0.81,
"learning_rate": 1.716031068422491e-05,
"loss": 1.555,
"step": 1233
},
{
"epoch": 0.81,
"learning_rate": 1.7155382921202844e-05,
"loss": 1.5474,
"step": 1234
},
{
"epoch": 0.81,
"learning_rate": 1.715045159512767e-05,
"loss": 1.5371,
"step": 1235
},
{
"epoch": 0.81,
"learning_rate": 1.7145516708454953e-05,
"loss": 1.5676,
"step": 1236
},
{
"epoch": 0.81,
"learning_rate": 1.714057826364205e-05,
"loss": 1.5857,
"step": 1237
},
{
"epoch": 0.81,
"learning_rate": 1.713563626314808e-05,
"loss": 1.5049,
"step": 1238
},
{
"epoch": 0.81,
"learning_rate": 1.7130690709433927e-05,
"loss": 1.4421,
"step": 1239
},
{
"epoch": 0.81,
"learning_rate": 1.7125741604962254e-05,
"loss": 1.5904,
"step": 1240
},
{
"epoch": 0.81,
"learning_rate": 1.7120788952197486e-05,
"loss": 1.4129,
"step": 1241
},
{
"epoch": 0.81,
"learning_rate": 1.711583275360582e-05,
"loss": 1.6143,
"step": 1242
},
{
"epoch": 0.81,
"learning_rate": 1.7110873011655216e-05,
"loss": 1.3685,
"step": 1243
},
{
"epoch": 0.81,
"learning_rate": 1.7105909728815398e-05,
"loss": 1.4485,
"step": 1244
},
{
"epoch": 0.81,
"learning_rate": 1.710094290755785e-05,
"loss": 1.6888,
"step": 1245
},
{
"epoch": 0.81,
"learning_rate": 1.709597255035583e-05,
"loss": 1.6177,
"step": 1246
},
{
"epoch": 0.81,
"learning_rate": 1.709099865968434e-05,
"loss": 1.6119,
"step": 1247
},
{
"epoch": 0.82,
"learning_rate": 1.7086021238020154e-05,
"loss": 1.6045,
"step": 1248
},
{
"epoch": 0.82,
"learning_rate": 1.7081040287841796e-05,
"loss": 1.4358,
"step": 1249
},
{
"epoch": 0.82,
"learning_rate": 1.7076055811629556e-05,
"loss": 1.7393,
"step": 1250
},
{
"epoch": 0.82,
"learning_rate": 1.7071067811865477e-05,
"loss": 1.5706,
"step": 1251
},
{
"epoch": 0.82,
"learning_rate": 1.7066076291033348e-05,
"loss": 1.3629,
"step": 1252
},
{
"epoch": 0.82,
"learning_rate": 1.706108125161872e-05,
"loss": 1.632,
"step": 1253
},
{
"epoch": 0.82,
"learning_rate": 1.7056082696108896e-05,
"loss": 1.6904,
"step": 1254
},
{
"epoch": 0.82,
"learning_rate": 1.7051080626992926e-05,
"loss": 1.7947,
"step": 1255
},
{
"epoch": 0.82,
"learning_rate": 1.7046075046761614e-05,
"loss": 1.5797,
"step": 1256
},
{
"epoch": 0.82,
"learning_rate": 1.70410659579075e-05,
"loss": 1.5823,
"step": 1257
},
{
"epoch": 0.82,
"learning_rate": 1.7036053362924896e-05,
"loss": 1.4622,
"step": 1258
},
{
"epoch": 0.82,
"learning_rate": 1.7031037264309836e-05,
"loss": 1.2984,
"step": 1259
},
{
"epoch": 0.82,
"learning_rate": 1.7026017664560108e-05,
"loss": 1.3848,
"step": 1260
},
{
"epoch": 0.82,
"learning_rate": 1.7020994566175245e-05,
"loss": 1.6528,
"step": 1261
},
{
"epoch": 0.82,
"learning_rate": 1.7015967971656513e-05,
"loss": 1.5819,
"step": 1262
},
{
"epoch": 0.83,
"learning_rate": 1.7010937883506936e-05,
"loss": 1.6634,
"step": 1263
},
{
"epoch": 0.83,
"learning_rate": 1.7005904304231263e-05,
"loss": 1.5576,
"step": 1264
},
{
"epoch": 0.83,
"learning_rate": 1.7000867236335987e-05,
"loss": 1.534,
"step": 1265
},
{
"epoch": 0.83,
"learning_rate": 1.699582668232934e-05,
"loss": 1.4886,
"step": 1266
},
{
"epoch": 0.83,
"learning_rate": 1.6990782644721277e-05,
"loss": 1.6178,
"step": 1267
},
{
"epoch": 0.83,
"learning_rate": 1.6985735126023505e-05,
"loss": 1.6469,
"step": 1268
},
{
"epoch": 0.83,
"learning_rate": 1.698068412874946e-05,
"loss": 1.4569,
"step": 1269
},
{
"epoch": 0.83,
"learning_rate": 1.6975629655414304e-05,
"loss": 1.6704,
"step": 1270
},
{
"epoch": 0.83,
"learning_rate": 1.6970571708534933e-05,
"loss": 1.6969,
"step": 1271
},
{
"epoch": 0.83,
"learning_rate": 1.6965510290629973e-05,
"loss": 1.6062,
"step": 1272
},
{
"epoch": 0.83,
"learning_rate": 1.696044540421978e-05,
"loss": 1.6301,
"step": 1273
},
{
"epoch": 0.83,
"learning_rate": 1.695537705182643e-05,
"loss": 1.7837,
"step": 1274
},
{
"epoch": 0.83,
"learning_rate": 1.695030523597374e-05,
"loss": 1.4173,
"step": 1275
},
{
"epoch": 0.83,
"learning_rate": 1.6945229959187232e-05,
"loss": 1.5892,
"step": 1276
},
{
"epoch": 0.83,
"learning_rate": 1.694015122399417e-05,
"loss": 1.6154,
"step": 1277
},
{
"epoch": 0.84,
"learning_rate": 1.6935069032923525e-05,
"loss": 1.3911,
"step": 1278
},
{
"epoch": 0.84,
"learning_rate": 1.6929983388506002e-05,
"loss": 1.7103,
"step": 1279
},
{
"epoch": 0.84,
"learning_rate": 1.6924894293274016e-05,
"loss": 1.5427,
"step": 1280
},
{
"epoch": 0.84,
"learning_rate": 1.6919801749761696e-05,
"loss": 1.5578,
"step": 1281
},
{
"epoch": 0.84,
"learning_rate": 1.6914705760504913e-05,
"loss": 1.5861,
"step": 1282
},
{
"epoch": 0.84,
"learning_rate": 1.6909606328041218e-05,
"loss": 1.3745,
"step": 1283
},
{
"epoch": 0.84,
"learning_rate": 1.6904503454909905e-05,
"loss": 1.5076,
"step": 1284
},
{
"epoch": 0.84,
"learning_rate": 1.689939714365197e-05,
"loss": 1.4516,
"step": 1285
},
{
"epoch": 0.84,
"learning_rate": 1.689428739681012e-05,
"loss": 1.6988,
"step": 1286
},
{
"epoch": 0.84,
"learning_rate": 1.6889174216928776e-05,
"loss": 1.4721,
"step": 1287
},
{
"epoch": 0.84,
"learning_rate": 1.688405760655407e-05,
"loss": 1.7272,
"step": 1288
},
{
"epoch": 0.84,
"learning_rate": 1.6878937568233838e-05,
"loss": 1.6072,
"step": 1289
},
{
"epoch": 0.84,
"learning_rate": 1.6873814104517617e-05,
"loss": 1.5224,
"step": 1290
},
{
"epoch": 0.84,
"learning_rate": 1.6868687217956673e-05,
"loss": 1.7234,
"step": 1291
},
{
"epoch": 0.84,
"learning_rate": 1.6863556911103948e-05,
"loss": 1.5219,
"step": 1292
},
{
"epoch": 0.84,
"learning_rate": 1.6858423186514108e-05,
"loss": 1.3494,
"step": 1293
},
{
"epoch": 0.85,
"learning_rate": 1.6853286046743505e-05,
"loss": 1.8708,
"step": 1294
},
{
"epoch": 0.85,
"learning_rate": 1.6848145494350206e-05,
"loss": 1.6964,
"step": 1295
},
{
"epoch": 0.85,
"learning_rate": 1.6843001531893967e-05,
"loss": 1.7243,
"step": 1296
},
{
"epoch": 0.85,
"learning_rate": 1.6837854161936244e-05,
"loss": 1.4143,
"step": 1297
},
{
"epoch": 0.85,
"learning_rate": 1.68327033870402e-05,
"loss": 1.3846,
"step": 1298
},
{
"epoch": 0.85,
"learning_rate": 1.6827549209770676e-05,
"loss": 1.8837,
"step": 1299
},
{
"epoch": 0.85,
"learning_rate": 1.682239163269422e-05,
"loss": 1.3264,
"step": 1300
},
{
"epoch": 0.85,
"learning_rate": 1.681723065837907e-05,
"loss": 1.8319,
"step": 1301
},
{
"epoch": 0.85,
"learning_rate": 1.6812066289395157e-05,
"loss": 1.4266,
"step": 1302
},
{
"epoch": 0.85,
"learning_rate": 1.6806898528314095e-05,
"loss": 1.4893,
"step": 1303
},
{
"epoch": 0.85,
"learning_rate": 1.6801727377709195e-05,
"loss": 1.4113,
"step": 1304
},
{
"epoch": 0.85,
"learning_rate": 1.6796552840155453e-05,
"loss": 1.6108,
"step": 1305
},
{
"epoch": 0.85,
"learning_rate": 1.679137491822955e-05,
"loss": 1.3327,
"step": 1306
},
{
"epoch": 0.85,
"learning_rate": 1.6786193614509863e-05,
"loss": 1.5565,
"step": 1307
},
{
"epoch": 0.85,
"learning_rate": 1.6781008931576433e-05,
"loss": 1.586,
"step": 1308
},
{
"epoch": 0.86,
"learning_rate": 1.6775820872010998e-05,
"loss": 1.4242,
"step": 1309
},
{
"epoch": 0.86,
"learning_rate": 1.6770629438396973e-05,
"loss": 1.3707,
"step": 1310
},
{
"epoch": 0.86,
"learning_rate": 1.676543463331946e-05,
"loss": 1.2963,
"step": 1311
},
{
"epoch": 0.86,
"learning_rate": 1.676023645936523e-05,
"loss": 1.4525,
"step": 1312
},
{
"epoch": 0.86,
"learning_rate": 1.6755034919122737e-05,
"loss": 1.6639,
"step": 1313
},
{
"epoch": 0.86,
"learning_rate": 1.6749830015182106e-05,
"loss": 1.7687,
"step": 1314
},
{
"epoch": 0.86,
"learning_rate": 1.6744621750135146e-05,
"loss": 1.3493,
"step": 1315
},
{
"epoch": 0.86,
"learning_rate": 1.673941012657533e-05,
"loss": 1.5223,
"step": 1316
},
{
"epoch": 0.86,
"learning_rate": 1.6734195147097815e-05,
"loss": 1.6162,
"step": 1317
},
{
"epoch": 0.86,
"learning_rate": 1.6728976814299413e-05,
"loss": 1.4223,
"step": 1318
},
{
"epoch": 0.86,
"learning_rate": 1.6723755130778622e-05,
"loss": 1.7573,
"step": 1319
},
{
"epoch": 0.86,
"learning_rate": 1.6718530099135596e-05,
"loss": 1.6299,
"step": 1320
},
{
"epoch": 0.86,
"learning_rate": 1.6713301721972165e-05,
"loss": 1.6002,
"step": 1321
},
{
"epoch": 0.86,
"learning_rate": 1.670807000189182e-05,
"loss": 1.5642,
"step": 1322
},
{
"epoch": 0.86,
"learning_rate": 1.6702834941499718e-05,
"loss": 1.6645,
"step": 1323
},
{
"epoch": 0.87,
"learning_rate": 1.6697596543402684e-05,
"loss": 1.5904,
"step": 1324
},
{
"epoch": 0.87,
"learning_rate": 1.6692354810209193e-05,
"loss": 1.6357,
"step": 1325
},
{
"epoch": 0.87,
"learning_rate": 1.6687109744529394e-05,
"loss": 1.5074,
"step": 1326
},
{
"epoch": 0.87,
"learning_rate": 1.6681861348975084e-05,
"loss": 1.613,
"step": 1327
},
{
"epoch": 0.87,
"learning_rate": 1.667660962615973e-05,
"loss": 1.5526,
"step": 1328
},
{
"epoch": 0.87,
"learning_rate": 1.6671354578698444e-05,
"loss": 1.7324,
"step": 1329
},
{
"epoch": 0.87,
"learning_rate": 1.6666096209208e-05,
"loss": 1.3244,
"step": 1330
},
{
"epoch": 0.87,
"learning_rate": 1.666083452030683e-05,
"loss": 1.6168,
"step": 1331
},
{
"epoch": 0.87,
"learning_rate": 1.665556951461501e-05,
"loss": 1.5872,
"step": 1332
},
{
"epoch": 0.87,
"learning_rate": 1.6650301194754276e-05,
"loss": 1.3228,
"step": 1333
},
{
"epoch": 0.87,
"learning_rate": 1.6645029563348e-05,
"loss": 1.5,
"step": 1334
},
{
"epoch": 0.87,
"learning_rate": 1.6639754623021227e-05,
"loss": 1.5924,
"step": 1335
},
{
"epoch": 0.87,
"learning_rate": 1.6634476376400624e-05,
"loss": 1.7951,
"step": 1336
},
{
"epoch": 0.87,
"learning_rate": 1.6629194826114524e-05,
"loss": 1.4789,
"step": 1337
},
{
"epoch": 0.87,
"learning_rate": 1.6623909974792888e-05,
"loss": 1.7149,
"step": 1338
},
{
"epoch": 0.88,
"learning_rate": 1.661862182506734e-05,
"loss": 1.4997,
"step": 1339
},
{
"epoch": 0.88,
"learning_rate": 1.6613330379571135e-05,
"loss": 1.3992,
"step": 1340
},
{
"epoch": 0.88,
"learning_rate": 1.6608035640939165e-05,
"loss": 1.6216,
"step": 1341
},
{
"epoch": 0.88,
"learning_rate": 1.6602737611807975e-05,
"loss": 1.6128,
"step": 1342
},
{
"epoch": 0.88,
"learning_rate": 1.6597436294815733e-05,
"loss": 1.6165,
"step": 1343
},
{
"epoch": 0.88,
"learning_rate": 1.6592131692602257e-05,
"loss": 1.3647,
"step": 1344
},
{
"epoch": 0.88,
"learning_rate": 1.6586823807808998e-05,
"loss": 1.4116,
"step": 1345
},
{
"epoch": 0.88,
"learning_rate": 1.6581512643079028e-05,
"loss": 1.735,
"step": 1346
},
{
"epoch": 0.88,
"learning_rate": 1.657619820105708e-05,
"loss": 1.4182,
"step": 1347
},
{
"epoch": 0.88,
"learning_rate": 1.657088048438949e-05,
"loss": 1.7027,
"step": 1348
},
{
"epoch": 0.88,
"learning_rate": 1.6565559495724244e-05,
"loss": 1.7515,
"step": 1349
},
{
"epoch": 0.88,
"learning_rate": 1.656023523771095e-05,
"loss": 1.6471,
"step": 1350
},
{
"epoch": 0.88,
"learning_rate": 1.6554907713000845e-05,
"loss": 1.3941,
"step": 1351
},
{
"epoch": 0.88,
"learning_rate": 1.654957692424679e-05,
"loss": 1.5245,
"step": 1352
},
{
"epoch": 0.88,
"learning_rate": 1.6544242874103278e-05,
"loss": 1.4564,
"step": 1353
},
{
"epoch": 0.88,
"learning_rate": 1.6538905565226416e-05,
"loss": 1.4734,
"step": 1354
},
{
"epoch": 0.89,
"learning_rate": 1.6533565000273942e-05,
"loss": 1.834,
"step": 1355
},
{
"epoch": 0.89,
"learning_rate": 1.6528221181905217e-05,
"loss": 1.6713,
"step": 1356
},
{
"epoch": 0.89,
"learning_rate": 1.6522874112781213e-05,
"loss": 1.2969,
"step": 1357
},
{
"epoch": 0.89,
"learning_rate": 1.6517523795564527e-05,
"loss": 1.3919,
"step": 1358
},
{
"epoch": 0.89,
"learning_rate": 1.6512170232919372e-05,
"loss": 1.4925,
"step": 1359
},
{
"epoch": 0.89,
"learning_rate": 1.6506813427511574e-05,
"loss": 1.5832,
"step": 1360
},
{
"epoch": 0.89,
"learning_rate": 1.6501453382008582e-05,
"loss": 1.6789,
"step": 1361
},
{
"epoch": 0.89,
"learning_rate": 1.6496090099079452e-05,
"loss": 1.5286,
"step": 1362
},
{
"epoch": 0.89,
"learning_rate": 1.649072358139485e-05,
"loss": 1.5833,
"step": 1363
},
{
"epoch": 0.89,
"learning_rate": 1.648535383162706e-05,
"loss": 1.5594,
"step": 1364
},
{
"epoch": 0.89,
"learning_rate": 1.6479980852449974e-05,
"loss": 1.5787,
"step": 1365
},
{
"epoch": 0.89,
"learning_rate": 1.647460464653908e-05,
"loss": 1.6188,
"step": 1366
},
{
"epoch": 0.89,
"learning_rate": 1.6469225216571488e-05,
"loss": 1.5153,
"step": 1367
},
{
"epoch": 0.89,
"learning_rate": 1.6463842565225914e-05,
"loss": 1.4809,
"step": 1368
},
{
"epoch": 0.89,
"learning_rate": 1.6458456695182662e-05,
"loss": 1.9198,
"step": 1369
},
{
"epoch": 0.9,
"learning_rate": 1.6453067609123656e-05,
"loss": 1.3828,
"step": 1370
},
{
"epoch": 0.9,
"learning_rate": 1.6447675309732403e-05,
"loss": 1.6919,
"step": 1371
},
{
"epoch": 0.9,
"learning_rate": 1.6442279799694037e-05,
"loss": 1.6289,
"step": 1372
},
{
"epoch": 0.9,
"learning_rate": 1.6436881081695264e-05,
"loss": 1.5149,
"step": 1373
},
{
"epoch": 0.9,
"learning_rate": 1.64314791584244e-05,
"loss": 1.4283,
"step": 1374
},
{
"epoch": 0.9,
"learning_rate": 1.642607403257136e-05,
"loss": 1.5012,
"step": 1375
},
{
"epoch": 0.9,
"learning_rate": 1.6420665706827647e-05,
"loss": 1.6472,
"step": 1376
},
{
"epoch": 0.9,
"learning_rate": 1.6415254183886356e-05,
"loss": 1.5665,
"step": 1377
},
{
"epoch": 0.9,
"learning_rate": 1.640983946644218e-05,
"loss": 1.4238,
"step": 1378
},
{
"epoch": 0.9,
"learning_rate": 1.64044215571914e-05,
"loss": 1.362,
"step": 1379
},
{
"epoch": 0.9,
"learning_rate": 1.6399000458831893e-05,
"loss": 1.5561,
"step": 1380
},
{
"epoch": 0.9,
"learning_rate": 1.6393576174063105e-05,
"loss": 1.4644,
"step": 1381
},
{
"epoch": 0.9,
"learning_rate": 1.6388148705586097e-05,
"loss": 1.3424,
"step": 1382
},
{
"epoch": 0.9,
"learning_rate": 1.6382718056103486e-05,
"loss": 1.6558,
"step": 1383
},
{
"epoch": 0.9,
"learning_rate": 1.6377284228319496e-05,
"loss": 1.516,
"step": 1384
},
{
"epoch": 0.91,
"learning_rate": 1.637184722493992e-05,
"loss": 1.5104,
"step": 1385
},
{
"epoch": 0.91,
"learning_rate": 1.6366407048672135e-05,
"loss": 1.5578,
"step": 1386
},
{
"epoch": 0.91,
"learning_rate": 1.6360963702225106e-05,
"loss": 1.7138,
"step": 1387
},
{
"epoch": 0.91,
"learning_rate": 1.6355517188309366e-05,
"loss": 1.4916,
"step": 1388
},
{
"epoch": 0.91,
"learning_rate": 1.6350067509637028e-05,
"loss": 1.5132,
"step": 1389
},
{
"epoch": 0.91,
"learning_rate": 1.6344614668921787e-05,
"loss": 1.5638,
"step": 1390
},
{
"epoch": 0.91,
"learning_rate": 1.6339158668878906e-05,
"loss": 1.3036,
"step": 1391
},
{
"epoch": 0.91,
"learning_rate": 1.6333699512225224e-05,
"loss": 1.6657,
"step": 1392
},
{
"epoch": 0.91,
"learning_rate": 1.632823720167915e-05,
"loss": 1.3613,
"step": 1393
},
{
"epoch": 0.91,
"learning_rate": 1.6322771739960664e-05,
"loss": 1.4236,
"step": 1394
},
{
"epoch": 0.91,
"learning_rate": 1.6317303129791315e-05,
"loss": 1.5803,
"step": 1395
},
{
"epoch": 0.91,
"learning_rate": 1.6311831373894228e-05,
"loss": 1.3319,
"step": 1396
},
{
"epoch": 0.91,
"learning_rate": 1.6306356474994074e-05,
"loss": 1.5267,
"step": 1397
},
{
"epoch": 0.91,
"learning_rate": 1.6300878435817115e-05,
"loss": 1.4642,
"step": 1398
},
{
"epoch": 0.91,
"learning_rate": 1.629539725909115e-05,
"loss": 1.6824,
"step": 1399
},
{
"epoch": 0.91,
"learning_rate": 1.628991294754557e-05,
"loss": 1.6095,
"step": 1400
},
{
"epoch": 0.92,
"learning_rate": 1.62844255039113e-05,
"loss": 1.5767,
"step": 1401
},
{
"epoch": 0.92,
"learning_rate": 1.6278934930920834e-05,
"loss": 1.6414,
"step": 1402
},
{
"epoch": 0.92,
"learning_rate": 1.627344123130824e-05,
"loss": 1.6902,
"step": 1403
},
{
"epoch": 0.92,
"learning_rate": 1.626794440780911e-05,
"loss": 1.6202,
"step": 1404
},
{
"epoch": 0.92,
"learning_rate": 1.6262444463160627e-05,
"loss": 1.4388,
"step": 1405
},
{
"epoch": 0.92,
"learning_rate": 1.62569414001015e-05,
"loss": 1.4056,
"step": 1406
},
{
"epoch": 0.92,
"learning_rate": 1.6251435221372007e-05,
"loss": 1.5921,
"step": 1407
},
{
"epoch": 0.92,
"learning_rate": 1.6245925929713976e-05,
"loss": 1.4402,
"step": 1408
},
{
"epoch": 0.92,
"learning_rate": 1.6240413527870777e-05,
"loss": 1.2565,
"step": 1409
},
{
"epoch": 0.92,
"learning_rate": 1.6234898018587336e-05,
"loss": 1.3355,
"step": 1410
},
{
"epoch": 0.92,
"learning_rate": 1.6229379404610126e-05,
"loss": 1.5435,
"step": 1411
},
{
"epoch": 0.92,
"learning_rate": 1.622385768868716e-05,
"loss": 1.4971,
"step": 1412
},
{
"epoch": 0.92,
"learning_rate": 1.6218332873568e-05,
"loss": 1.5156,
"step": 1413
},
{
"epoch": 0.92,
"learning_rate": 1.6212804962003757e-05,
"loss": 1.6963,
"step": 1414
},
{
"epoch": 0.92,
"learning_rate": 1.620727395674707e-05,
"loss": 1.5227,
"step": 1415
},
{
"epoch": 0.93,
"learning_rate": 1.620173986055213e-05,
"loss": 1.7506,
"step": 1416
},
{
"epoch": 0.93,
"learning_rate": 1.6196202676174667e-05,
"loss": 1.7131,
"step": 1417
},
{
"epoch": 0.93,
"learning_rate": 1.6190662406371937e-05,
"loss": 1.5139,
"step": 1418
},
{
"epoch": 0.93,
"learning_rate": 1.6185119053902748e-05,
"loss": 1.6643,
"step": 1419
},
{
"epoch": 0.93,
"learning_rate": 1.6179572621527434e-05,
"loss": 1.4894,
"step": 1420
},
{
"epoch": 0.93,
"learning_rate": 1.6174023112007863e-05,
"loss": 1.5739,
"step": 1421
},
{
"epoch": 0.93,
"learning_rate": 1.616847052810744e-05,
"loss": 1.5332,
"step": 1422
},
{
"epoch": 0.93,
"learning_rate": 1.616291487259109e-05,
"loss": 1.517,
"step": 1423
},
{
"epoch": 0.93,
"learning_rate": 1.6157356148225282e-05,
"loss": 1.3684,
"step": 1424
},
{
"epoch": 0.93,
"learning_rate": 1.6151794357778006e-05,
"loss": 1.487,
"step": 1425
},
{
"epoch": 0.93,
"learning_rate": 1.6146229504018777e-05,
"loss": 1.601,
"step": 1426
},
{
"epoch": 0.93,
"learning_rate": 1.6140661589718635e-05,
"loss": 1.6847,
"step": 1427
},
{
"epoch": 0.93,
"learning_rate": 1.613509061765015e-05,
"loss": 1.3258,
"step": 1428
},
{
"epoch": 0.93,
"learning_rate": 1.6129516590587413e-05,
"loss": 1.3365,
"step": 1429
},
{
"epoch": 0.93,
"learning_rate": 1.6123939511306028e-05,
"loss": 1.5961,
"step": 1430
},
{
"epoch": 0.94,
"learning_rate": 1.611835938258313e-05,
"loss": 1.5094,
"step": 1431
},
{
"epoch": 0.94,
"learning_rate": 1.6112776207197368e-05,
"loss": 1.3497,
"step": 1432
},
{
"epoch": 0.94,
"learning_rate": 1.6107189987928906e-05,
"loss": 1.5475,
"step": 1433
},
{
"epoch": 0.94,
"learning_rate": 1.6101600727559423e-05,
"loss": 1.4358,
"step": 1434
},
{
"epoch": 0.94,
"learning_rate": 1.609600842887212e-05,
"loss": 1.4539,
"step": 1435
},
{
"epoch": 0.94,
"learning_rate": 1.60904130946517e-05,
"loss": 1.5833,
"step": 1436
},
{
"epoch": 0.94,
"learning_rate": 1.6084814727684386e-05,
"loss": 1.6307,
"step": 1437
},
{
"epoch": 0.94,
"learning_rate": 1.607921333075791e-05,
"loss": 1.3293,
"step": 1438
},
{
"epoch": 0.94,
"learning_rate": 1.6073608906661512e-05,
"loss": 1.6082,
"step": 1439
},
{
"epoch": 0.94,
"learning_rate": 1.6068001458185934e-05,
"loss": 1.5496,
"step": 1440
},
{
"epoch": 0.94,
"learning_rate": 1.6062390988123432e-05,
"loss": 1.3785,
"step": 1441
},
{
"epoch": 0.94,
"learning_rate": 1.6056777499267764e-05,
"loss": 1.4928,
"step": 1442
},
{
"epoch": 0.94,
"learning_rate": 1.6051160994414187e-05,
"loss": 1.6122,
"step": 1443
},
{
"epoch": 0.94,
"learning_rate": 1.6045541476359464e-05,
"loss": 1.7787,
"step": 1444
},
{
"epoch": 0.94,
"learning_rate": 1.6039918947901864e-05,
"loss": 1.5258,
"step": 1445
},
{
"epoch": 0.94,
"learning_rate": 1.603429341184114e-05,
"loss": 1.606,
"step": 1446
},
{
"epoch": 0.95,
"learning_rate": 1.602866487097855e-05,
"loss": 1.6026,
"step": 1447
},
{
"epoch": 0.95,
"learning_rate": 1.6023033328116863e-05,
"loss": 1.2689,
"step": 1448
},
{
"epoch": 0.95,
"learning_rate": 1.601739878606032e-05,
"loss": 1.845,
"step": 1449
},
{
"epoch": 0.95,
"learning_rate": 1.6011761247614664e-05,
"loss": 1.615,
"step": 1450
},
{
"epoch": 0.95,
"learning_rate": 1.6006120715587133e-05,
"loss": 1.5094,
"step": 1451
},
{
"epoch": 0.95,
"learning_rate": 1.600047719278645e-05,
"loss": 1.5424,
"step": 1452
},
{
"epoch": 0.95,
"learning_rate": 1.5994830682022842e-05,
"loss": 1.4819,
"step": 1453
},
{
"epoch": 0.95,
"learning_rate": 1.5989181186108003e-05,
"loss": 1.4587,
"step": 1454
},
{
"epoch": 0.95,
"learning_rate": 1.5983528707855128e-05,
"loss": 1.6719,
"step": 1455
},
{
"epoch": 0.95,
"learning_rate": 1.597787325007889e-05,
"loss": 1.4875,
"step": 1456
},
{
"epoch": 0.95,
"learning_rate": 1.5972214815595446e-05,
"loss": 1.7498,
"step": 1457
},
{
"epoch": 0.95,
"learning_rate": 1.596655340722244e-05,
"loss": 1.6009,
"step": 1458
},
{
"epoch": 0.95,
"learning_rate": 1.5960889027779e-05,
"loss": 1.6307,
"step": 1459
},
{
"epoch": 0.95,
"learning_rate": 1.5955221680085715e-05,
"loss": 1.5637,
"step": 1460
},
{
"epoch": 0.95,
"learning_rate": 1.5949551366964675e-05,
"loss": 1.6304,
"step": 1461
},
{
"epoch": 0.96,
"learning_rate": 1.594387809123943e-05,
"loss": 1.7701,
"step": 1462
},
{
"epoch": 0.96,
"learning_rate": 1.5938201855735017e-05,
"loss": 1.3166,
"step": 1463
},
{
"epoch": 0.96,
"learning_rate": 1.593252266327794e-05,
"loss": 1.5759,
"step": 1464
},
{
"epoch": 0.96,
"learning_rate": 1.5926840516696174e-05,
"loss": 1.6532,
"step": 1465
},
{
"epoch": 0.96,
"learning_rate": 1.592115541881917e-05,
"loss": 1.626,
"step": 1466
},
{
"epoch": 0.96,
"learning_rate": 1.5915467372477843e-05,
"loss": 1.6911,
"step": 1467
},
{
"epoch": 0.96,
"learning_rate": 1.5909776380504583e-05,
"loss": 1.5162,
"step": 1468
},
{
"epoch": 0.96,
"learning_rate": 1.5904082445733246e-05,
"loss": 1.4394,
"step": 1469
},
{
"epoch": 0.96,
"learning_rate": 1.5898385570999146e-05,
"loss": 1.676,
"step": 1470
},
{
"epoch": 0.96,
"learning_rate": 1.589268575913907e-05,
"loss": 1.437,
"step": 1471
},
{
"epoch": 0.96,
"learning_rate": 1.588698301299126e-05,
"loss": 1.3831,
"step": 1472
},
{
"epoch": 0.96,
"learning_rate": 1.5881277335395425e-05,
"loss": 1.3165,
"step": 1473
},
{
"epoch": 0.96,
"learning_rate": 1.5875568729192728e-05,
"loss": 1.3961,
"step": 1474
},
{
"epoch": 0.96,
"learning_rate": 1.5869857197225792e-05,
"loss": 1.438,
"step": 1475
},
{
"epoch": 0.96,
"learning_rate": 1.5864142742338707e-05,
"loss": 1.6099,
"step": 1476
},
{
"epoch": 0.97,
"learning_rate": 1.5858425367377002e-05,
"loss": 1.4836,
"step": 1477
},
{
"epoch": 0.97,
"learning_rate": 1.5852705075187674e-05,
"loss": 1.4785,
"step": 1478
},
{
"epoch": 0.97,
"learning_rate": 1.584698186861916e-05,
"loss": 1.6087,
"step": 1479
},
{
"epoch": 0.97,
"learning_rate": 1.584125575052136e-05,
"loss": 1.4407,
"step": 1480
},
{
"epoch": 0.97,
"learning_rate": 1.5835526723745617e-05,
"loss": 1.5133,
"step": 1481
},
{
"epoch": 0.97,
"learning_rate": 1.5829794791144723e-05,
"loss": 1.3278,
"step": 1482
},
{
"epoch": 0.97,
"learning_rate": 1.582405995557292e-05,
"loss": 1.5904,
"step": 1483
},
{
"epoch": 0.97,
"learning_rate": 1.5818322219885888e-05,
"loss": 1.5659,
"step": 1484
},
{
"epoch": 0.97,
"learning_rate": 1.5812581586940767e-05,
"loss": 1.53,
"step": 1485
},
{
"epoch": 0.97,
"learning_rate": 1.580683805959612e-05,
"loss": 1.555,
"step": 1486
},
{
"epoch": 0.97,
"learning_rate": 1.5801091640711966e-05,
"loss": 1.5431,
"step": 1487
},
{
"epoch": 0.97,
"learning_rate": 1.5795342333149757e-05,
"loss": 1.5262,
"step": 1488
},
{
"epoch": 0.97,
"learning_rate": 1.5789590139772384e-05,
"loss": 1.4655,
"step": 1489
},
{
"epoch": 0.97,
"learning_rate": 1.5783835063444176e-05,
"loss": 1.5653,
"step": 1490
},
{
"epoch": 0.97,
"learning_rate": 1.57780771070309e-05,
"loss": 1.3519,
"step": 1491
},
{
"epoch": 0.98,
"learning_rate": 1.577231627339975e-05,
"loss": 1.3815,
"step": 1492
},
{
"epoch": 0.98,
"learning_rate": 1.5766552565419366e-05,
"loss": 1.4251,
"step": 1493
},
{
"epoch": 0.98,
"learning_rate": 1.57607859859598e-05,
"loss": 1.3859,
"step": 1494
},
{
"epoch": 0.98,
"learning_rate": 1.5755016537892553e-05,
"loss": 1.5416,
"step": 1495
},
{
"epoch": 0.98,
"learning_rate": 1.5749244224090537e-05,
"loss": 1.4639,
"step": 1496
},
{
"epoch": 0.98,
"learning_rate": 1.5743469047428113e-05,
"loss": 1.4823,
"step": 1497
},
{
"epoch": 0.98,
"learning_rate": 1.573769101078104e-05,
"loss": 1.6074,
"step": 1498
},
{
"epoch": 0.98,
"learning_rate": 1.5731910117026528e-05,
"loss": 1.4769,
"step": 1499
},
{
"epoch": 0.98,
"learning_rate": 1.572612636904319e-05,
"loss": 1.3847,
"step": 1500
}
],
"max_steps": 4590,
"num_train_epochs": 3,
"total_flos": 1.7487141458149376e+17,
"trial_name": null,
"trial_params": null
}