UserGPT / trainer_state.json
furau's picture
Upload 22 files
deb437e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.986072423398329,
"global_step": 402,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.5384615384615387e-06,
"loss": 3.6566,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 3.0769230769230774e-06,
"loss": 3.7319,
"step": 2
},
{
"epoch": 0.02,
"learning_rate": 4.615384615384616e-06,
"loss": 2.9617,
"step": 3
},
{
"epoch": 0.03,
"learning_rate": 6.153846153846155e-06,
"loss": 2.7649,
"step": 4
},
{
"epoch": 0.04,
"learning_rate": 7.692307692307694e-06,
"loss": 2.8078,
"step": 5
},
{
"epoch": 0.04,
"learning_rate": 9.230769230769232e-06,
"loss": 2.5837,
"step": 6
},
{
"epoch": 0.05,
"learning_rate": 1.076923076923077e-05,
"loss": 2.5011,
"step": 7
},
{
"epoch": 0.06,
"learning_rate": 1.230769230769231e-05,
"loss": 2.3675,
"step": 8
},
{
"epoch": 0.07,
"learning_rate": 1.3846153846153847e-05,
"loss": 2.1685,
"step": 9
},
{
"epoch": 0.07,
"learning_rate": 1.5384615384615387e-05,
"loss": 2.1721,
"step": 10
},
{
"epoch": 0.08,
"learning_rate": 1.6923076923076924e-05,
"loss": 2.1973,
"step": 11
},
{
"epoch": 0.09,
"learning_rate": 1.8461538461538465e-05,
"loss": 2.1513,
"step": 12
},
{
"epoch": 0.1,
"learning_rate": 2e-05,
"loss": 2.2307,
"step": 13
},
{
"epoch": 0.1,
"learning_rate": 1.9999673886943734e-05,
"loss": 2.0475,
"step": 14
},
{
"epoch": 0.11,
"learning_rate": 1.999869556904488e-05,
"loss": 2.2153,
"step": 15
},
{
"epoch": 0.12,
"learning_rate": 1.9997065110111884e-05,
"loss": 1.8805,
"step": 16
},
{
"epoch": 0.13,
"learning_rate": 1.9994782616487538e-05,
"loss": 1.9966,
"step": 17
},
{
"epoch": 0.13,
"learning_rate": 1.9991848237042037e-05,
"loss": 2.0823,
"step": 18
},
{
"epoch": 0.14,
"learning_rate": 1.9988262163163265e-05,
"loss": 2.098,
"step": 19
},
{
"epoch": 0.15,
"learning_rate": 1.998402462874433e-05,
"loss": 2.0602,
"step": 20
},
{
"epoch": 0.16,
"learning_rate": 1.9979135910168292e-05,
"loss": 1.9297,
"step": 21
},
{
"epoch": 0.16,
"learning_rate": 1.9973596326290136e-05,
"loss": 1.9211,
"step": 22
},
{
"epoch": 0.17,
"learning_rate": 1.9967406238415998e-05,
"loss": 2.0546,
"step": 23
},
{
"epoch": 0.18,
"learning_rate": 1.9960566050279568e-05,
"loss": 1.9672,
"step": 24
},
{
"epoch": 0.19,
"learning_rate": 1.9953076208015772e-05,
"loss": 2.0002,
"step": 25
},
{
"epoch": 0.19,
"learning_rate": 1.994493720013169e-05,
"loss": 2.0499,
"step": 26
},
{
"epoch": 0.2,
"learning_rate": 1.9936149557474668e-05,
"loss": 2.0324,
"step": 27
},
{
"epoch": 0.21,
"learning_rate": 1.9926713853197696e-05,
"loss": 1.9641,
"step": 28
},
{
"epoch": 0.22,
"learning_rate": 1.991663070272206e-05,
"loss": 1.8782,
"step": 29
},
{
"epoch": 0.22,
"learning_rate": 1.9905900763697152e-05,
"loss": 2.0222,
"step": 30
},
{
"epoch": 0.23,
"learning_rate": 1.9894524735957624e-05,
"loss": 1.927,
"step": 31
},
{
"epoch": 0.24,
"learning_rate": 1.9882503361477707e-05,
"loss": 1.9989,
"step": 32
},
{
"epoch": 0.25,
"learning_rate": 1.9869837424322827e-05,
"loss": 1.9593,
"step": 33
},
{
"epoch": 0.25,
"learning_rate": 1.9856527750598493e-05,
"loss": 1.96,
"step": 34
},
{
"epoch": 0.26,
"learning_rate": 1.9842575208396374e-05,
"loss": 1.9815,
"step": 35
},
{
"epoch": 0.27,
"learning_rate": 1.9827980707737704e-05,
"loss": 2.001,
"step": 36
},
{
"epoch": 0.27,
"learning_rate": 1.9812745200513926e-05,
"loss": 1.9909,
"step": 37
},
{
"epoch": 0.28,
"learning_rate": 1.979686968042461e-05,
"loss": 1.9635,
"step": 38
},
{
"epoch": 0.29,
"learning_rate": 1.9780355182912626e-05,
"loss": 2.0265,
"step": 39
},
{
"epoch": 0.3,
"learning_rate": 1.976320278509663e-05,
"loss": 1.7656,
"step": 40
},
{
"epoch": 0.3,
"learning_rate": 1.974541360570079e-05,
"loss": 1.9266,
"step": 41
},
{
"epoch": 0.31,
"learning_rate": 1.9726988804981847e-05,
"loss": 1.9298,
"step": 42
},
{
"epoch": 0.32,
"learning_rate": 1.970792958465341e-05,
"loss": 2.0026,
"step": 43
},
{
"epoch": 0.33,
"learning_rate": 1.9688237187807594e-05,
"loss": 2.0285,
"step": 44
},
{
"epoch": 0.33,
"learning_rate": 1.9667912898833953e-05,
"loss": 1.9038,
"step": 45
},
{
"epoch": 0.34,
"learning_rate": 1.9646958043335678e-05,
"loss": 1.8253,
"step": 46
},
{
"epoch": 0.35,
"learning_rate": 1.9625373988043167e-05,
"loss": 1.8563,
"step": 47
},
{
"epoch": 0.36,
"learning_rate": 1.9603162140724863e-05,
"loss": 1.9414,
"step": 48
},
{
"epoch": 0.36,
"learning_rate": 1.958032395009545e-05,
"loss": 1.8428,
"step": 49
},
{
"epoch": 0.37,
"learning_rate": 1.9556860905721363e-05,
"loss": 1.9282,
"step": 50
},
{
"epoch": 0.38,
"learning_rate": 1.9532774537923617e-05,
"loss": 2.091,
"step": 51
},
{
"epoch": 0.39,
"learning_rate": 1.950806641767802e-05,
"loss": 1.925,
"step": 52
},
{
"epoch": 0.39,
"learning_rate": 1.9482738156512694e-05,
"loss": 1.9726,
"step": 53
},
{
"epoch": 0.4,
"learning_rate": 1.9456791406402964e-05,
"loss": 1.9473,
"step": 54
},
{
"epoch": 0.41,
"learning_rate": 1.9430227859663634e-05,
"loss": 1.9427,
"step": 55
},
{
"epoch": 0.42,
"learning_rate": 1.940304924883858e-05,
"loss": 1.835,
"step": 56
},
{
"epoch": 0.42,
"learning_rate": 1.9375257346587776e-05,
"loss": 1.9231,
"step": 57
},
{
"epoch": 0.43,
"learning_rate": 1.934685396557165e-05,
"loss": 1.9197,
"step": 58
},
{
"epoch": 0.44,
"learning_rate": 1.931784095833289e-05,
"loss": 1.9501,
"step": 59
},
{
"epoch": 0.45,
"learning_rate": 1.9288220217175583e-05,
"loss": 1.9518,
"step": 60
},
{
"epoch": 0.45,
"learning_rate": 1.9257993674041814e-05,
"loss": 1.9073,
"step": 61
},
{
"epoch": 0.46,
"learning_rate": 1.9227163300385662e-05,
"loss": 1.8463,
"step": 62
},
{
"epoch": 0.47,
"learning_rate": 1.9195731107044596e-05,
"loss": 1.8878,
"step": 63
},
{
"epoch": 0.48,
"learning_rate": 1.9163699144108343e-05,
"loss": 1.8452,
"step": 64
},
{
"epoch": 0.48,
"learning_rate": 1.9131069500785173e-05,
"loss": 1.9069,
"step": 65
},
{
"epoch": 0.49,
"learning_rate": 1.9097844305265625e-05,
"loss": 1.9706,
"step": 66
},
{
"epoch": 0.5,
"learning_rate": 1.906402572458371e-05,
"loss": 1.8499,
"step": 67
},
{
"epoch": 0.51,
"learning_rate": 1.9029615964475572e-05,
"loss": 1.8589,
"step": 68
},
{
"epoch": 0.51,
"learning_rate": 1.8994617269235615e-05,
"loss": 1.7905,
"step": 69
},
{
"epoch": 0.52,
"learning_rate": 1.8959031921570136e-05,
"loss": 1.8836,
"step": 70
},
{
"epoch": 0.53,
"learning_rate": 1.892286224244843e-05,
"loss": 1.8668,
"step": 71
},
{
"epoch": 0.53,
"learning_rate": 1.8886110590951417e-05,
"loss": 2.0072,
"step": 72
},
{
"epoch": 0.54,
"learning_rate": 1.8848779364117774e-05,
"loss": 1.9274,
"step": 73
},
{
"epoch": 0.55,
"learning_rate": 1.88108709967876e-05,
"loss": 1.9034,
"step": 74
},
{
"epoch": 0.56,
"learning_rate": 1.87723879614436e-05,
"loss": 1.8714,
"step": 75
},
{
"epoch": 0.56,
"learning_rate": 1.873333276804983e-05,
"loss": 1.8106,
"step": 76
},
{
"epoch": 0.57,
"learning_rate": 1.869370796388798e-05,
"loss": 1.9745,
"step": 77
},
{
"epoch": 0.58,
"learning_rate": 1.865351613339125e-05,
"loss": 1.8142,
"step": 78
},
{
"epoch": 0.59,
"learning_rate": 1.861275989797578e-05,
"loss": 1.906,
"step": 79
},
{
"epoch": 0.59,
"learning_rate": 1.8571441915869663e-05,
"loss": 1.7411,
"step": 80
},
{
"epoch": 0.6,
"learning_rate": 1.852956488193959e-05,
"loss": 1.9871,
"step": 81
},
{
"epoch": 0.61,
"learning_rate": 1.848713152751506e-05,
"loss": 1.8511,
"step": 82
},
{
"epoch": 0.62,
"learning_rate": 1.8444144620210255e-05,
"loss": 2.0191,
"step": 83
},
{
"epoch": 0.62,
"learning_rate": 1.8400606963743517e-05,
"loss": 1.9095,
"step": 84
},
{
"epoch": 0.63,
"learning_rate": 1.8356521397754495e-05,
"loss": 1.7602,
"step": 85
},
{
"epoch": 0.64,
"learning_rate": 1.8311890797618918e-05,
"loss": 1.9675,
"step": 86
},
{
"epoch": 0.65,
"learning_rate": 1.8266718074261064e-05,
"loss": 1.8566,
"step": 87
},
{
"epoch": 0.65,
"learning_rate": 1.822100617396391e-05,
"loss": 1.7544,
"step": 88
},
{
"epoch": 0.66,
"learning_rate": 1.8174758078176963e-05,
"loss": 1.8744,
"step": 89
},
{
"epoch": 0.67,
"learning_rate": 1.8127976803321793e-05,
"loss": 1.9445,
"step": 90
},
{
"epoch": 0.68,
"learning_rate": 1.8080665400595303e-05,
"loss": 1.9952,
"step": 91
},
{
"epoch": 0.68,
"learning_rate": 1.8032826955770723e-05,
"loss": 1.8672,
"step": 92
},
{
"epoch": 0.69,
"learning_rate": 1.7984464588996342e-05,
"loss": 1.8689,
"step": 93
},
{
"epoch": 0.7,
"learning_rate": 1.7935581454592005e-05,
"loss": 1.7157,
"step": 94
},
{
"epoch": 0.71,
"learning_rate": 1.7886180740843385e-05,
"loss": 1.9338,
"step": 95
},
{
"epoch": 0.71,
"learning_rate": 1.7836265669794032e-05,
"loss": 1.8889,
"step": 96
},
{
"epoch": 0.72,
"learning_rate": 1.7785839497035222e-05,
"loss": 1.7935,
"step": 97
},
{
"epoch": 0.73,
"learning_rate": 1.7734905511493614e-05,
"loss": 1.955,
"step": 98
},
{
"epoch": 0.74,
"learning_rate": 1.768346703521675e-05,
"loss": 1.8675,
"step": 99
},
{
"epoch": 0.74,
"learning_rate": 1.763152742315637e-05,
"loss": 1.891,
"step": 100
},
{
"epoch": 0.75,
"learning_rate": 1.75790900629496e-05,
"loss": 1.921,
"step": 101
},
{
"epoch": 0.76,
"learning_rate": 1.7526158374697997e-05,
"loss": 1.9667,
"step": 102
},
{
"epoch": 0.77,
"learning_rate": 1.7472735810744496e-05,
"loss": 1.8718,
"step": 103
},
{
"epoch": 0.77,
"learning_rate": 1.7418825855448208e-05,
"loss": 1.8478,
"step": 104
},
{
"epoch": 0.78,
"learning_rate": 1.7364432024957192e-05,
"loss": 1.8649,
"step": 105
},
{
"epoch": 0.79,
"learning_rate": 1.7309557866979113e-05,
"loss": 1.8813,
"step": 106
},
{
"epoch": 0.79,
"learning_rate": 1.7254206960549844e-05,
"loss": 1.954,
"step": 107
},
{
"epoch": 0.8,
"learning_rate": 1.7198382915800034e-05,
"loss": 1.814,
"step": 108
},
{
"epoch": 0.81,
"learning_rate": 1.7142089373719654e-05,
"loss": 1.8448,
"step": 109
},
{
"epoch": 0.82,
"learning_rate": 1.7085330005920516e-05,
"loss": 1.8477,
"step": 110
},
{
"epoch": 0.82,
"learning_rate": 1.70281085143968e-05,
"loss": 1.7807,
"step": 111
},
{
"epoch": 0.83,
"learning_rate": 1.6970428631283602e-05,
"loss": 1.8641,
"step": 112
},
{
"epoch": 0.84,
"learning_rate": 1.6912294118613518e-05,
"loss": 1.906,
"step": 113
},
{
"epoch": 0.85,
"learning_rate": 1.6853708768071265e-05,
"loss": 1.8747,
"step": 114
},
{
"epoch": 0.85,
"learning_rate": 1.679467640074639e-05,
"loss": 1.9149,
"step": 115
},
{
"epoch": 0.86,
"learning_rate": 1.6735200866884037e-05,
"loss": 1.8116,
"step": 116
},
{
"epoch": 0.87,
"learning_rate": 1.667528604563383e-05,
"loss": 2.0109,
"step": 117
},
{
"epoch": 0.88,
"learning_rate": 1.6614935844796863e-05,
"loss": 1.8141,
"step": 118
},
{
"epoch": 0.88,
"learning_rate": 1.6554154200570828e-05,
"loss": 1.9185,
"step": 119
},
{
"epoch": 0.89,
"learning_rate": 1.649294507729327e-05,
"loss": 1.892,
"step": 120
},
{
"epoch": 0.9,
"learning_rate": 1.643131246718305e-05,
"loss": 1.8075,
"step": 121
},
{
"epoch": 0.91,
"learning_rate": 1.6369260390079933e-05,
"loss": 1.8891,
"step": 122
},
{
"epoch": 0.91,
"learning_rate": 1.6306792893182422e-05,
"loss": 1.8502,
"step": 123
},
{
"epoch": 0.92,
"learning_rate": 1.6243914050783783e-05,
"loss": 1.9021,
"step": 124
},
{
"epoch": 0.93,
"learning_rate": 1.6180627964006313e-05,
"loss": 1.694,
"step": 125
},
{
"epoch": 0.94,
"learning_rate": 1.6116938760533843e-05,
"loss": 1.8065,
"step": 126
},
{
"epoch": 0.94,
"learning_rate": 1.6052850594342534e-05,
"loss": 1.8479,
"step": 127
},
{
"epoch": 0.95,
"learning_rate": 1.5988367645429938e-05,
"loss": 1.9265,
"step": 128
},
{
"epoch": 0.96,
"learning_rate": 1.592349411954236e-05,
"loss": 1.8542,
"step": 129
},
{
"epoch": 0.97,
"learning_rate": 1.585823424790056e-05,
"loss": 1.7848,
"step": 130
},
{
"epoch": 0.97,
"learning_rate": 1.579259228692378e-05,
"loss": 1.9324,
"step": 131
},
{
"epoch": 0.98,
"learning_rate": 1.5726572517952122e-05,
"loss": 1.773,
"step": 132
},
{
"epoch": 0.99,
"learning_rate": 1.566017924696731e-05,
"loss": 1.8328,
"step": 133
},
{
"epoch": 1.0,
"learning_rate": 1.559341680431185e-05,
"loss": 1.8331,
"step": 134
},
{
"epoch": 1.0,
"learning_rate": 1.5526289544406585e-05,
"loss": 1.8482,
"step": 135
},
{
"epoch": 1.01,
"learning_rate": 1.545880184546669e-05,
"loss": 1.5427,
"step": 136
},
{
"epoch": 1.02,
"learning_rate": 1.539095810921612e-05,
"loss": 1.5302,
"step": 137
},
{
"epoch": 1.03,
"learning_rate": 1.532276276060051e-05,
"loss": 1.6252,
"step": 138
},
{
"epoch": 1.03,
"learning_rate": 1.5254220247498572e-05,
"loss": 1.5662,
"step": 139
},
{
"epoch": 1.04,
"learning_rate": 1.518533504043199e-05,
"loss": 1.5451,
"step": 140
},
{
"epoch": 1.05,
"learning_rate": 1.5116111632273848e-05,
"loss": 1.6063,
"step": 141
},
{
"epoch": 1.05,
"learning_rate": 1.5046554537955587e-05,
"loss": 1.5512,
"step": 142
},
{
"epoch": 1.06,
"learning_rate": 1.4976668294172528e-05,
"loss": 1.5608,
"step": 143
},
{
"epoch": 1.07,
"learning_rate": 1.4906457459087977e-05,
"loss": 1.5362,
"step": 144
},
{
"epoch": 1.08,
"learning_rate": 1.4835926612035944e-05,
"loss": 1.6489,
"step": 145
},
{
"epoch": 1.08,
"learning_rate": 1.4765080353222447e-05,
"loss": 1.6442,
"step": 146
},
{
"epoch": 1.09,
"learning_rate": 1.4693923303425479e-05,
"loss": 1.5944,
"step": 147
},
{
"epoch": 1.1,
"learning_rate": 1.462246010369364e-05,
"loss": 1.5571,
"step": 148
},
{
"epoch": 1.11,
"learning_rate": 1.4550695415043421e-05,
"loss": 1.5987,
"step": 149
},
{
"epoch": 1.11,
"learning_rate": 1.4478633918155216e-05,
"loss": 1.5843,
"step": 150
},
{
"epoch": 1.12,
"learning_rate": 1.4406280313068019e-05,
"loss": 1.51,
"step": 151
},
{
"epoch": 1.13,
"learning_rate": 1.4333639318872891e-05,
"loss": 1.5727,
"step": 152
},
{
"epoch": 1.14,
"learning_rate": 1.4260715673405157e-05,
"loss": 1.6347,
"step": 153
},
{
"epoch": 1.14,
"learning_rate": 1.4187514132935393e-05,
"loss": 1.5709,
"step": 154
},
{
"epoch": 1.15,
"learning_rate": 1.4114039471859221e-05,
"loss": 1.6186,
"step": 155
},
{
"epoch": 1.16,
"learning_rate": 1.4040296482385893e-05,
"loss": 1.5457,
"step": 156
},
{
"epoch": 1.17,
"learning_rate": 1.3966289974225751e-05,
"loss": 1.4672,
"step": 157
},
{
"epoch": 1.17,
"learning_rate": 1.3892024774276496e-05,
"loss": 1.5874,
"step": 158
},
{
"epoch": 1.18,
"learning_rate": 1.3817505726308402e-05,
"loss": 1.5051,
"step": 159
},
{
"epoch": 1.19,
"learning_rate": 1.3742737690648362e-05,
"loss": 1.5036,
"step": 160
},
{
"epoch": 1.2,
"learning_rate": 1.3667725543862906e-05,
"loss": 1.5277,
"step": 161
},
{
"epoch": 1.2,
"learning_rate": 1.3592474178440116e-05,
"loss": 1.4715,
"step": 162
},
{
"epoch": 1.21,
"learning_rate": 1.351698850247055e-05,
"loss": 1.5668,
"step": 163
},
{
"epoch": 1.22,
"learning_rate": 1.34412734393271e-05,
"loss": 1.578,
"step": 164
},
{
"epoch": 1.23,
"learning_rate": 1.3365333927343906e-05,
"loss": 1.5374,
"step": 165
},
{
"epoch": 1.23,
"learning_rate": 1.3289174919494228e-05,
"loss": 1.5562,
"step": 166
},
{
"epoch": 1.24,
"learning_rate": 1.3212801383067431e-05,
"loss": 1.5418,
"step": 167
},
{
"epoch": 1.25,
"learning_rate": 1.3136218299344993e-05,
"loss": 1.4952,
"step": 168
},
{
"epoch": 1.26,
"learning_rate": 1.305943066327561e-05,
"loss": 1.5251,
"step": 169
},
{
"epoch": 1.26,
"learning_rate": 1.2982443483149423e-05,
"loss": 1.5562,
"step": 170
},
{
"epoch": 1.27,
"learning_rate": 1.2905261780271345e-05,
"loss": 1.535,
"step": 171
},
{
"epoch": 1.28,
"learning_rate": 1.2827890588633589e-05,
"loss": 1.474,
"step": 172
},
{
"epoch": 1.29,
"learning_rate": 1.2750334954587297e-05,
"loss": 1.5725,
"step": 173
},
{
"epoch": 1.29,
"learning_rate": 1.267259993651345e-05,
"loss": 1.4509,
"step": 174
},
{
"epoch": 1.3,
"learning_rate": 1.2594690604492906e-05,
"loss": 1.5969,
"step": 175
},
{
"epoch": 1.31,
"learning_rate": 1.2516612039975745e-05,
"loss": 1.477,
"step": 176
},
{
"epoch": 1.31,
"learning_rate": 1.2438369335449822e-05,
"loss": 1.4507,
"step": 177
},
{
"epoch": 1.32,
"learning_rate": 1.2359967594108643e-05,
"loss": 1.5572,
"step": 178
},
{
"epoch": 1.33,
"learning_rate": 1.22814119295185e-05,
"loss": 1.5147,
"step": 179
},
{
"epoch": 1.34,
"learning_rate": 1.2202707465284973e-05,
"loss": 1.5563,
"step": 180
},
{
"epoch": 1.34,
"learning_rate": 1.2123859334718732e-05,
"loss": 1.5903,
"step": 181
},
{
"epoch": 1.35,
"learning_rate": 1.2044872680500743e-05,
"loss": 1.5443,
"step": 182
},
{
"epoch": 1.36,
"learning_rate": 1.1965752654346853e-05,
"loss": 1.6039,
"step": 183
},
{
"epoch": 1.37,
"learning_rate": 1.188650441667177e-05,
"loss": 1.48,
"step": 184
},
{
"epoch": 1.37,
"learning_rate": 1.1807133136252491e-05,
"loss": 1.5275,
"step": 185
},
{
"epoch": 1.38,
"learning_rate": 1.172764398989118e-05,
"loss": 1.6416,
"step": 186
},
{
"epoch": 1.39,
"learning_rate": 1.1648042162077536e-05,
"loss": 1.6704,
"step": 187
},
{
"epoch": 1.4,
"learning_rate": 1.1568332844650623e-05,
"loss": 1.4922,
"step": 188
},
{
"epoch": 1.4,
"learning_rate": 1.1488521236460266e-05,
"loss": 1.5443,
"step": 189
},
{
"epoch": 1.41,
"learning_rate": 1.1408612543027963e-05,
"loss": 1.5277,
"step": 190
},
{
"epoch": 1.42,
"learning_rate": 1.1328611976207358e-05,
"loss": 1.5198,
"step": 191
},
{
"epoch": 1.43,
"learning_rate": 1.1248524753844325e-05,
"loss": 1.4403,
"step": 192
},
{
"epoch": 1.43,
"learning_rate": 1.116835609943663e-05,
"loss": 1.4591,
"step": 193
},
{
"epoch": 1.44,
"learning_rate": 1.1088111241793258e-05,
"loss": 1.5585,
"step": 194
},
{
"epoch": 1.45,
"learning_rate": 1.1007795414693361e-05,
"loss": 1.6089,
"step": 195
},
{
"epoch": 1.46,
"learning_rate": 1.0927413856544906e-05,
"loss": 1.6047,
"step": 196
},
{
"epoch": 1.46,
"learning_rate": 1.0846971810043017e-05,
"loss": 1.5849,
"step": 197
},
{
"epoch": 1.47,
"learning_rate": 1.0766474521828022e-05,
"loss": 1.495,
"step": 198
},
{
"epoch": 1.48,
"learning_rate": 1.0685927242143248e-05,
"loss": 1.5816,
"step": 199
},
{
"epoch": 1.49,
"learning_rate": 1.0605335224492617e-05,
"loss": 1.4354,
"step": 200
},
{
"epoch": 1.49,
"learning_rate": 1.0524703725297957e-05,
"loss": 1.6279,
"step": 201
},
{
"epoch": 1.5,
"learning_rate": 1.0444038003556201e-05,
"loss": 1.4828,
"step": 202
},
{
"epoch": 1.51,
"learning_rate": 1.0363343320496358e-05,
"loss": 1.4755,
"step": 203
},
{
"epoch": 1.52,
"learning_rate": 1.0282624939236367e-05,
"loss": 1.5889,
"step": 204
},
{
"epoch": 1.52,
"learning_rate": 1.0201888124439836e-05,
"loss": 1.583,
"step": 205
},
{
"epoch": 1.53,
"learning_rate": 1.0121138141972649e-05,
"loss": 1.6678,
"step": 206
},
{
"epoch": 1.54,
"learning_rate": 1.004038025855952e-05,
"loss": 1.467,
"step": 207
},
{
"epoch": 1.55,
"learning_rate": 9.959619741440486e-06,
"loss": 1.4655,
"step": 208
},
{
"epoch": 1.55,
"learning_rate": 9.878861858027355e-06,
"loss": 1.4602,
"step": 209
},
{
"epoch": 1.56,
"learning_rate": 9.798111875560167e-06,
"loss": 1.6014,
"step": 210
},
{
"epoch": 1.57,
"learning_rate": 9.717375060763636e-06,
"loss": 1.5798,
"step": 211
},
{
"epoch": 1.57,
"learning_rate": 9.636656679503647e-06,
"loss": 1.6139,
"step": 212
},
{
"epoch": 1.58,
"learning_rate": 9.555961996443802e-06,
"loss": 1.6,
"step": 213
},
{
"epoch": 1.59,
"learning_rate": 9.475296274702044e-06,
"loss": 1.4852,
"step": 214
},
{
"epoch": 1.6,
"learning_rate": 9.394664775507386e-06,
"loss": 1.568,
"step": 215
},
{
"epoch": 1.6,
"learning_rate": 9.314072757856752e-06,
"loss": 1.6171,
"step": 216
},
{
"epoch": 1.61,
"learning_rate": 9.233525478171985e-06,
"loss": 1.5977,
"step": 217
},
{
"epoch": 1.62,
"learning_rate": 9.153028189956986e-06,
"loss": 1.5454,
"step": 218
},
{
"epoch": 1.63,
"learning_rate": 9.072586143455096e-06,
"loss": 1.6758,
"step": 219
},
{
"epoch": 1.63,
"learning_rate": 8.99220458530664e-06,
"loss": 1.5476,
"step": 220
},
{
"epoch": 1.64,
"learning_rate": 8.911888758206747e-06,
"loss": 1.569,
"step": 221
},
{
"epoch": 1.65,
"learning_rate": 8.831643900563372e-06,
"loss": 1.5424,
"step": 222
},
{
"epoch": 1.66,
"learning_rate": 8.751475246155678e-06,
"loss": 1.5137,
"step": 223
},
{
"epoch": 1.66,
"learning_rate": 8.671388023792642e-06,
"loss": 1.4392,
"step": 224
},
{
"epoch": 1.67,
"learning_rate": 8.59138745697204e-06,
"loss": 1.5653,
"step": 225
},
{
"epoch": 1.68,
"learning_rate": 8.511478763539737e-06,
"loss": 1.4838,
"step": 226
},
{
"epoch": 1.69,
"learning_rate": 8.43166715534938e-06,
"loss": 1.5887,
"step": 227
},
{
"epoch": 1.69,
"learning_rate": 8.351957837922467e-06,
"loss": 1.5384,
"step": 228
},
{
"epoch": 1.7,
"learning_rate": 8.27235601010882e-06,
"loss": 1.4996,
"step": 229
},
{
"epoch": 1.71,
"learning_rate": 8.192866863747516e-06,
"loss": 1.6276,
"step": 230
},
{
"epoch": 1.72,
"learning_rate": 8.113495583328234e-06,
"loss": 1.5748,
"step": 231
},
{
"epoch": 1.72,
"learning_rate": 8.034247345653148e-06,
"loss": 1.4351,
"step": 232
},
{
"epoch": 1.73,
"learning_rate": 7.955127319499259e-06,
"loss": 1.4449,
"step": 233
},
{
"epoch": 1.74,
"learning_rate": 7.876140665281273e-06,
"loss": 1.3847,
"step": 234
},
{
"epoch": 1.75,
"learning_rate": 7.79729253471503e-06,
"loss": 1.5532,
"step": 235
},
{
"epoch": 1.75,
"learning_rate": 7.718588070481501e-06,
"loss": 1.48,
"step": 236
},
{
"epoch": 1.76,
"learning_rate": 7.64003240589136e-06,
"loss": 1.4335,
"step": 237
},
{
"epoch": 1.77,
"learning_rate": 7.561630664550179e-06,
"loss": 1.5568,
"step": 238
},
{
"epoch": 1.78,
"learning_rate": 7.483387960024261e-06,
"loss": 1.577,
"step": 239
},
{
"epoch": 1.78,
"learning_rate": 7.405309395507098e-06,
"loss": 1.4819,
"step": 240
},
{
"epoch": 1.79,
"learning_rate": 7.327400063486554e-06,
"loss": 1.4613,
"step": 241
},
{
"epoch": 1.8,
"learning_rate": 7.249665045412704e-06,
"loss": 1.585,
"step": 242
},
{
"epoch": 1.81,
"learning_rate": 7.172109411366417e-06,
"loss": 1.5929,
"step": 243
},
{
"epoch": 1.81,
"learning_rate": 7.0947382197286566e-06,
"loss": 1.4685,
"step": 244
},
{
"epoch": 1.82,
"learning_rate": 7.017556516850581e-06,
"loss": 1.5956,
"step": 245
},
{
"epoch": 1.83,
"learning_rate": 6.94056933672439e-06,
"loss": 1.5801,
"step": 246
},
{
"epoch": 1.83,
"learning_rate": 6.863781700655013e-06,
"loss": 1.5173,
"step": 247
},
{
"epoch": 1.84,
"learning_rate": 6.787198616932571e-06,
"loss": 1.6037,
"step": 248
},
{
"epoch": 1.85,
"learning_rate": 6.710825080505775e-06,
"loss": 1.536,
"step": 249
},
{
"epoch": 1.86,
"learning_rate": 6.634666072656097e-06,
"loss": 1.5086,
"step": 250
},
{
"epoch": 1.86,
"learning_rate": 6.558726560672899e-06,
"loss": 1.4448,
"step": 251
},
{
"epoch": 1.87,
"learning_rate": 6.483011497529457e-06,
"loss": 1.4841,
"step": 252
},
{
"epoch": 1.88,
"learning_rate": 6.4075258215598875e-06,
"loss": 1.418,
"step": 253
},
{
"epoch": 1.89,
"learning_rate": 6.332274456137097e-06,
"loss": 1.5594,
"step": 254
},
{
"epoch": 1.89,
"learning_rate": 6.257262309351637e-06,
"loss": 1.5807,
"step": 255
},
{
"epoch": 1.9,
"learning_rate": 6.182494273691602e-06,
"loss": 1.5188,
"step": 256
},
{
"epoch": 1.91,
"learning_rate": 6.107975225723506e-06,
"loss": 1.4614,
"step": 257
},
{
"epoch": 1.92,
"learning_rate": 6.033710025774253e-06,
"loss": 1.5837,
"step": 258
},
{
"epoch": 1.92,
"learning_rate": 5.959703517614107e-06,
"loss": 1.5286,
"step": 259
},
{
"epoch": 1.93,
"learning_rate": 5.885960528140784e-06,
"loss": 1.4598,
"step": 260
},
{
"epoch": 1.94,
"learning_rate": 5.812485867064608e-06,
"loss": 1.5025,
"step": 261
},
{
"epoch": 1.95,
"learning_rate": 5.739284326594845e-06,
"loss": 1.5862,
"step": 262
},
{
"epoch": 1.95,
"learning_rate": 5.666360681127109e-06,
"loss": 1.418,
"step": 263
},
{
"epoch": 1.96,
"learning_rate": 5.59371968693198e-06,
"loss": 1.4598,
"step": 264
},
{
"epoch": 1.97,
"learning_rate": 5.521366081844788e-06,
"loss": 1.4986,
"step": 265
},
{
"epoch": 1.98,
"learning_rate": 5.449304584956582e-06,
"loss": 1.4822,
"step": 266
},
{
"epoch": 1.98,
"learning_rate": 5.377539896306363e-06,
"loss": 1.5696,
"step": 267
},
{
"epoch": 1.99,
"learning_rate": 5.306076696574522e-06,
"loss": 1.5305,
"step": 268
},
{
"epoch": 2.0,
"learning_rate": 5.234919646777557e-06,
"loss": 1.5481,
"step": 269
},
{
"epoch": 2.01,
"learning_rate": 5.164073387964057e-06,
"loss": 1.3277,
"step": 270
},
{
"epoch": 2.01,
"learning_rate": 5.093542540912024e-06,
"loss": 1.2237,
"step": 271
},
{
"epoch": 2.02,
"learning_rate": 5.023331705827477e-06,
"loss": 1.3044,
"step": 272
},
{
"epoch": 2.03,
"learning_rate": 4.953445462044415e-06,
"loss": 1.3071,
"step": 273
},
{
"epoch": 2.04,
"learning_rate": 4.883888367726153e-06,
"loss": 1.273,
"step": 274
},
{
"epoch": 2.04,
"learning_rate": 4.8146649595680104e-06,
"loss": 1.2474,
"step": 275
},
{
"epoch": 2.05,
"learning_rate": 4.74577975250143e-06,
"loss": 1.1685,
"step": 276
},
{
"epoch": 2.06,
"learning_rate": 4.6772372393994895e-06,
"loss": 1.2536,
"step": 277
},
{
"epoch": 2.06,
"learning_rate": 4.609041890783882e-06,
"loss": 1.3136,
"step": 278
},
{
"epoch": 2.07,
"learning_rate": 4.541198154533312e-06,
"loss": 1.2524,
"step": 279
},
{
"epoch": 2.08,
"learning_rate": 4.473710455593416e-06,
"loss": 1.4121,
"step": 280
},
{
"epoch": 2.09,
"learning_rate": 4.40658319568815e-06,
"loss": 1.2505,
"step": 281
},
{
"epoch": 2.09,
"learning_rate": 4.339820753032692e-06,
"loss": 1.1917,
"step": 282
},
{
"epoch": 2.1,
"learning_rate": 4.27342748204788e-06,
"loss": 1.1752,
"step": 283
},
{
"epoch": 2.11,
"learning_rate": 4.207407713076221e-06,
"loss": 1.3431,
"step": 284
},
{
"epoch": 2.12,
"learning_rate": 4.14176575209944e-06,
"loss": 1.2406,
"step": 285
},
{
"epoch": 2.12,
"learning_rate": 4.076505880457642e-06,
"loss": 1.1902,
"step": 286
},
{
"epoch": 2.13,
"learning_rate": 4.011632354570068e-06,
"loss": 1.388,
"step": 287
},
{
"epoch": 2.14,
"learning_rate": 3.947149405657469e-06,
"loss": 1.1345,
"step": 288
},
{
"epoch": 2.15,
"learning_rate": 3.88306123946616e-06,
"loss": 1.2937,
"step": 289
},
{
"epoch": 2.15,
"learning_rate": 3.8193720359936905e-06,
"loss": 1.2492,
"step": 290
},
{
"epoch": 2.16,
"learning_rate": 3.7560859492162184e-06,
"loss": 1.4007,
"step": 291
},
{
"epoch": 2.17,
"learning_rate": 3.69320710681758e-06,
"loss": 1.32,
"step": 292
},
{
"epoch": 2.18,
"learning_rate": 3.6307396099200686e-06,
"loss": 1.2589,
"step": 293
},
{
"epoch": 2.18,
"learning_rate": 3.5686875328169513e-06,
"loss": 1.2508,
"step": 294
},
{
"epoch": 2.19,
"learning_rate": 3.507054922706733e-06,
"loss": 1.2725,
"step": 295
},
{
"epoch": 2.2,
"learning_rate": 3.4458457994291763e-06,
"loss": 1.275,
"step": 296
},
{
"epoch": 2.21,
"learning_rate": 3.385064155203138e-06,
"loss": 1.2923,
"step": 297
},
{
"epoch": 2.21,
"learning_rate": 3.324713954366171e-06,
"loss": 1.2513,
"step": 298
},
{
"epoch": 2.22,
"learning_rate": 3.2647991331159632e-06,
"loss": 1.2202,
"step": 299
},
{
"epoch": 2.23,
"learning_rate": 3.2053235992536137e-06,
"loss": 1.1305,
"step": 300
},
{
"epoch": 2.24,
"learning_rate": 3.1462912319287376e-06,
"loss": 1.3371,
"step": 301
},
{
"epoch": 2.24,
"learning_rate": 3.0877058813864856e-06,
"loss": 1.257,
"step": 302
},
{
"epoch": 2.25,
"learning_rate": 3.0295713687164006e-06,
"loss": 1.1057,
"step": 303
},
{
"epoch": 2.26,
"learning_rate": 2.9718914856032033e-06,
"loss": 1.3259,
"step": 304
},
{
"epoch": 2.27,
"learning_rate": 2.9146699940794864e-06,
"loss": 1.3001,
"step": 305
},
{
"epoch": 2.27,
"learning_rate": 2.8579106262803467e-06,
"loss": 1.2887,
"step": 306
},
{
"epoch": 2.28,
"learning_rate": 2.801617084199967e-06,
"loss": 1.2804,
"step": 307
},
{
"epoch": 2.29,
"learning_rate": 2.7457930394501564e-06,
"loss": 1.3318,
"step": 308
},
{
"epoch": 2.3,
"learning_rate": 2.6904421330208886e-06,
"loss": 1.2538,
"step": 309
},
{
"epoch": 2.3,
"learning_rate": 2.635567975042809e-06,
"loss": 1.2637,
"step": 310
},
{
"epoch": 2.31,
"learning_rate": 2.5811741445517947e-06,
"loss": 1.251,
"step": 311
},
{
"epoch": 2.32,
"learning_rate": 2.527264189255507e-06,
"loss": 1.3331,
"step": 312
},
{
"epoch": 2.32,
"learning_rate": 2.473841625302006e-06,
"loss": 1.3239,
"step": 313
},
{
"epoch": 2.33,
"learning_rate": 2.420909937050405e-06,
"loss": 1.385,
"step": 314
},
{
"epoch": 2.34,
"learning_rate": 2.3684725768436333e-06,
"loss": 1.2563,
"step": 315
},
{
"epoch": 2.35,
"learning_rate": 2.3165329647832525e-06,
"loss": 1.2487,
"step": 316
},
{
"epoch": 2.35,
"learning_rate": 2.26509448850639e-06,
"loss": 1.2652,
"step": 317
},
{
"epoch": 2.36,
"learning_rate": 2.214160502964783e-06,
"loss": 1.3718,
"step": 318
},
{
"epoch": 2.37,
"learning_rate": 2.163734330205971e-06,
"loss": 1.1692,
"step": 319
},
{
"epoch": 2.38,
"learning_rate": 2.1138192591566177e-06,
"loss": 1.2578,
"step": 320
},
{
"epoch": 2.38,
"learning_rate": 2.064418545407998e-06,
"loss": 1.1949,
"step": 321
},
{
"epoch": 2.39,
"learning_rate": 2.0155354110036607e-06,
"loss": 1.2732,
"step": 322
},
{
"epoch": 2.4,
"learning_rate": 1.967173044229278e-06,
"loss": 1.2938,
"step": 323
},
{
"epoch": 2.41,
"learning_rate": 1.9193345994046965e-06,
"loss": 1.2665,
"step": 324
},
{
"epoch": 2.41,
"learning_rate": 1.8720231966782065e-06,
"loss": 1.3421,
"step": 325
},
{
"epoch": 2.42,
"learning_rate": 1.8252419218230389e-06,
"loss": 1.3351,
"step": 326
},
{
"epoch": 2.43,
"learning_rate": 1.7789938260360907e-06,
"loss": 1.2473,
"step": 327
},
{
"epoch": 2.44,
"learning_rate": 1.7332819257389388e-06,
"loss": 1.2964,
"step": 328
},
{
"epoch": 2.44,
"learning_rate": 1.6881092023810853e-06,
"loss": 1.1983,
"step": 329
},
{
"epoch": 2.45,
"learning_rate": 1.6434786022455073e-06,
"loss": 1.2549,
"step": 330
},
{
"epoch": 2.46,
"learning_rate": 1.5993930362564835e-06,
"loss": 1.2283,
"step": 331
},
{
"epoch": 2.47,
"learning_rate": 1.5558553797897469e-06,
"loss": 1.1693,
"step": 332
},
{
"epoch": 2.47,
"learning_rate": 1.512868472484943e-06,
"loss": 1.2312,
"step": 333
},
{
"epoch": 2.48,
"learning_rate": 1.4704351180604126e-06,
"loss": 1.253,
"step": 334
},
{
"epoch": 2.49,
"learning_rate": 1.4285580841303382e-06,
"loss": 1.2178,
"step": 335
},
{
"epoch": 2.5,
"learning_rate": 1.3872401020242222e-06,
"loss": 1.2663,
"step": 336
},
{
"epoch": 2.5,
"learning_rate": 1.346483866608751e-06,
"loss": 1.2452,
"step": 337
},
{
"epoch": 2.51,
"learning_rate": 1.3062920361120224e-06,
"loss": 1.2539,
"step": 338
},
{
"epoch": 2.52,
"learning_rate": 1.2666672319501737e-06,
"loss": 1.1804,
"step": 339
},
{
"epoch": 2.53,
"learning_rate": 1.2276120385564006e-06,
"loss": 1.1672,
"step": 340
},
{
"epoch": 2.53,
"learning_rate": 1.1891290032124003e-06,
"loss": 1.2853,
"step": 341
},
{
"epoch": 2.54,
"learning_rate": 1.1512206358822264e-06,
"loss": 1.2424,
"step": 342
},
{
"epoch": 2.55,
"learning_rate": 1.1138894090485863e-06,
"loss": 1.2441,
"step": 343
},
{
"epoch": 2.56,
"learning_rate": 1.077137757551573e-06,
"loss": 1.2049,
"step": 344
},
{
"epoch": 2.56,
"learning_rate": 1.040968078429866e-06,
"loss": 1.2567,
"step": 345
},
{
"epoch": 2.57,
"learning_rate": 1.005382730764386e-06,
"loss": 1.2805,
"step": 346
},
{
"epoch": 2.58,
"learning_rate": 9.703840355244287e-07,
"loss": 1.2225,
"step": 347
},
{
"epoch": 2.58,
"learning_rate": 9.359742754162926e-07,
"loss": 1.3789,
"step": 348
},
{
"epoch": 2.59,
"learning_rate": 9.021556947343791e-07,
"loss": 1.2516,
"step": 349
},
{
"epoch": 2.6,
"learning_rate": 8.689304992148285e-07,
"loss": 1.3251,
"step": 350
},
{
"epoch": 2.61,
"learning_rate": 8.363008558916575e-07,
"loss": 1.263,
"step": 351
},
{
"epoch": 2.61,
"learning_rate": 8.042688929554076e-07,
"loss": 1.3264,
"step": 352
},
{
"epoch": 2.62,
"learning_rate": 7.728366996143399e-07,
"loss": 1.3477,
"step": 353
},
{
"epoch": 2.63,
"learning_rate": 7.420063259581856e-07,
"loss": 1.2613,
"step": 354
},
{
"epoch": 2.64,
"learning_rate": 7.117797828244177e-07,
"loss": 1.2239,
"step": 355
},
{
"epoch": 2.64,
"learning_rate": 6.821590416671108e-07,
"loss": 1.2255,
"step": 356
},
{
"epoch": 2.65,
"learning_rate": 6.531460344283513e-07,
"loss": 1.2252,
"step": 357
},
{
"epoch": 2.66,
"learning_rate": 6.247426534122292e-07,
"loss": 1.3576,
"step": 358
},
{
"epoch": 2.67,
"learning_rate": 5.969507511614225e-07,
"loss": 1.2192,
"step": 359
},
{
"epoch": 2.67,
"learning_rate": 5.697721403363699e-07,
"loss": 1.3762,
"step": 360
},
{
"epoch": 2.68,
"learning_rate": 5.432085935970388e-07,
"loss": 1.301,
"step": 361
},
{
"epoch": 2.69,
"learning_rate": 5.172618434873112e-07,
"loss": 1.319,
"step": 362
},
{
"epoch": 2.7,
"learning_rate": 4.919335823219817e-07,
"loss": 1.1958,
"step": 363
},
{
"epoch": 2.7,
"learning_rate": 4.672254620763839e-07,
"loss": 1.1333,
"step": 364
},
{
"epoch": 2.71,
"learning_rate": 4.4313909427863957e-07,
"loss": 1.3401,
"step": 365
},
{
"epoch": 2.72,
"learning_rate": 4.196760499045505e-07,
"loss": 1.1234,
"step": 366
},
{
"epoch": 2.73,
"learning_rate": 3.968378592751399e-07,
"loss": 1.2775,
"step": 367
},
{
"epoch": 2.73,
"learning_rate": 3.746260119568368e-07,
"loss": 1.3351,
"step": 368
},
{
"epoch": 2.74,
"learning_rate": 3.5304195666432396e-07,
"loss": 1.1144,
"step": 369
},
{
"epoch": 2.75,
"learning_rate": 3.320871011660498e-07,
"loss": 1.1784,
"step": 370
},
{
"epoch": 2.76,
"learning_rate": 3.1176281219240657e-07,
"loss": 1.2739,
"step": 371
},
{
"epoch": 2.76,
"learning_rate": 2.920704153465936e-07,
"loss": 1.1408,
"step": 372
},
{
"epoch": 2.77,
"learning_rate": 2.7301119501815464e-07,
"loss": 1.2877,
"step": 373
},
{
"epoch": 2.78,
"learning_rate": 2.5458639429921105e-07,
"loss": 1.2526,
"step": 374
},
{
"epoch": 2.79,
"learning_rate": 2.3679721490337258e-07,
"loss": 1.2956,
"step": 375
},
{
"epoch": 2.79,
"learning_rate": 2.196448170873755e-07,
"loss": 1.2416,
"step": 376
},
{
"epoch": 2.8,
"learning_rate": 2.0313031957539198e-07,
"loss": 1.2028,
"step": 377
},
{
"epoch": 2.81,
"learning_rate": 1.8725479948607515e-07,
"loss": 1.2249,
"step": 378
},
{
"epoch": 2.82,
"learning_rate": 1.7201929226229873e-07,
"loss": 1.2774,
"step": 379
},
{
"epoch": 2.82,
"learning_rate": 1.5742479160362978e-07,
"loss": 1.3433,
"step": 380
},
{
"epoch": 2.83,
"learning_rate": 1.434722494015084e-07,
"loss": 1.4227,
"step": 381
},
{
"epoch": 2.84,
"learning_rate": 1.3016257567717295e-07,
"loss": 1.2546,
"step": 382
},
{
"epoch": 2.84,
"learning_rate": 1.1749663852229864e-07,
"loss": 1.3226,
"step": 383
},
{
"epoch": 2.85,
"learning_rate": 1.054752640423784e-07,
"loss": 1.3051,
"step": 384
},
{
"epoch": 2.86,
"learning_rate": 9.409923630284812e-08,
"loss": 1.353,
"step": 385
},
{
"epoch": 2.87,
"learning_rate": 8.336929727794318e-08,
"loss": 1.2531,
"step": 386
},
{
"epoch": 2.87,
"learning_rate": 7.328614680230495e-08,
"loss": 1.2814,
"step": 387
},
{
"epoch": 2.88,
"learning_rate": 6.385044252533723e-08,
"loss": 1.2351,
"step": 388
},
{
"epoch": 2.89,
"learning_rate": 5.506279986831065e-08,
"loss": 1.3189,
"step": 389
},
{
"epoch": 2.9,
"learning_rate": 4.692379198422803e-08,
"loss": 1.2508,
"step": 390
},
{
"epoch": 2.9,
"learning_rate": 3.9433949720435464e-08,
"loss": 1.3068,
"step": 391
},
{
"epoch": 2.91,
"learning_rate": 3.259376158400329e-08,
"loss": 1.3143,
"step": 392
},
{
"epoch": 2.92,
"learning_rate": 2.6403673709863854e-08,
"loss": 1.3058,
"step": 393
},
{
"epoch": 2.93,
"learning_rate": 2.0864089831711398e-08,
"loss": 1.2946,
"step": 394
},
{
"epoch": 2.93,
"learning_rate": 1.5975371255672056e-08,
"loss": 1.3232,
"step": 395
},
{
"epoch": 2.94,
"learning_rate": 1.1737836836737126e-08,
"loss": 1.2446,
"step": 396
},
{
"epoch": 2.95,
"learning_rate": 8.15176295796638e-09,
"loss": 1.2853,
"step": 397
},
{
"epoch": 2.96,
"learning_rate": 5.217383512463592e-09,
"loss": 1.2427,
"step": 398
},
{
"epoch": 2.96,
"learning_rate": 2.934889888116521e-09,
"loss": 1.3966,
"step": 399
},
{
"epoch": 2.97,
"learning_rate": 1.3044309551213385e-09,
"loss": 1.2288,
"step": 400
},
{
"epoch": 2.98,
"learning_rate": 3.2611305626706336e-10,
"loss": 1.2406,
"step": 401
},
{
"epoch": 2.99,
"learning_rate": 0.0,
"loss": 1.2031,
"step": 402
}
],
"max_steps": 402,
"num_train_epochs": 3,
"total_flos": 1.0404875099533476e+18,
"trial_name": null,
"trial_params": null
}