tiny-roberta-indonesia / trainer_state.json
akahana's picture
first-commit
fb0d7e1
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"global_step": 269140,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 9.981422308092442e-05,
"loss": 6.7888,
"step": 500
},
{
"epoch": 0.07,
"learning_rate": 9.962844616184886e-05,
"loss": 6.2621,
"step": 1000
},
{
"epoch": 0.11,
"learning_rate": 9.944266924277327e-05,
"loss": 6.2021,
"step": 1500
},
{
"epoch": 0.15,
"learning_rate": 9.92568923236977e-05,
"loss": 6.1445,
"step": 2000
},
{
"epoch": 0.19,
"learning_rate": 9.907111540462214e-05,
"loss": 6.0937,
"step": 2500
},
{
"epoch": 0.22,
"learning_rate": 9.888533848554656e-05,
"loss": 6.0322,
"step": 3000
},
{
"epoch": 0.26,
"learning_rate": 9.869956156647099e-05,
"loss": 5.9909,
"step": 3500
},
{
"epoch": 0.3,
"learning_rate": 9.851378464739541e-05,
"loss": 5.965,
"step": 4000
},
{
"epoch": 0.33,
"learning_rate": 9.832800772831984e-05,
"loss": 5.9108,
"step": 4500
},
{
"epoch": 0.37,
"learning_rate": 9.814223080924426e-05,
"loss": 5.877,
"step": 5000
},
{
"epoch": 0.41,
"learning_rate": 9.795645389016869e-05,
"loss": 5.8644,
"step": 5500
},
{
"epoch": 0.45,
"learning_rate": 9.777067697109311e-05,
"loss": 5.8114,
"step": 6000
},
{
"epoch": 0.48,
"learning_rate": 9.758490005201754e-05,
"loss": 5.803,
"step": 6500
},
{
"epoch": 0.52,
"learning_rate": 9.739912313294197e-05,
"loss": 5.7799,
"step": 7000
},
{
"epoch": 0.56,
"learning_rate": 9.721334621386639e-05,
"loss": 5.7654,
"step": 7500
},
{
"epoch": 0.59,
"learning_rate": 9.702756929479082e-05,
"loss": 5.7631,
"step": 8000
},
{
"epoch": 0.63,
"learning_rate": 9.684179237571524e-05,
"loss": 5.7225,
"step": 8500
},
{
"epoch": 0.67,
"learning_rate": 9.665601545663968e-05,
"loss": 5.7058,
"step": 9000
},
{
"epoch": 0.71,
"learning_rate": 9.64702385375641e-05,
"loss": 5.688,
"step": 9500
},
{
"epoch": 0.74,
"learning_rate": 9.628446161848853e-05,
"loss": 5.677,
"step": 10000
},
{
"epoch": 0.78,
"learning_rate": 9.609868469941294e-05,
"loss": 5.6553,
"step": 10500
},
{
"epoch": 0.82,
"learning_rate": 9.591290778033738e-05,
"loss": 5.6418,
"step": 11000
},
{
"epoch": 0.85,
"learning_rate": 9.572713086126181e-05,
"loss": 5.6214,
"step": 11500
},
{
"epoch": 0.89,
"learning_rate": 9.554135394218623e-05,
"loss": 5.6269,
"step": 12000
},
{
"epoch": 0.93,
"learning_rate": 9.535557702311066e-05,
"loss": 5.5986,
"step": 12500
},
{
"epoch": 0.97,
"learning_rate": 9.516980010403508e-05,
"loss": 5.5702,
"step": 13000
},
{
"epoch": 1.0,
"learning_rate": 9.49840231849595e-05,
"loss": 5.5384,
"step": 13500
},
{
"epoch": 1.04,
"learning_rate": 9.479824626588393e-05,
"loss": 5.5147,
"step": 14000
},
{
"epoch": 1.08,
"learning_rate": 9.461246934680835e-05,
"loss": 5.4813,
"step": 14500
},
{
"epoch": 1.11,
"learning_rate": 9.44266924277328e-05,
"loss": 5.4493,
"step": 15000
},
{
"epoch": 1.15,
"learning_rate": 9.424091550865721e-05,
"loss": 5.4311,
"step": 15500
},
{
"epoch": 1.19,
"learning_rate": 9.405513858958163e-05,
"loss": 5.384,
"step": 16000
},
{
"epoch": 1.23,
"learning_rate": 9.386936167050606e-05,
"loss": 5.3435,
"step": 16500
},
{
"epoch": 1.26,
"learning_rate": 9.368358475143048e-05,
"loss": 5.2896,
"step": 17000
},
{
"epoch": 1.3,
"learning_rate": 9.349780783235491e-05,
"loss": 5.2532,
"step": 17500
},
{
"epoch": 1.34,
"learning_rate": 9.331203091327933e-05,
"loss": 5.1974,
"step": 18000
},
{
"epoch": 1.37,
"learning_rate": 9.312625399420376e-05,
"loss": 5.1504,
"step": 18500
},
{
"epoch": 1.41,
"learning_rate": 9.294047707512818e-05,
"loss": 5.131,
"step": 19000
},
{
"epoch": 1.45,
"learning_rate": 9.275470015605262e-05,
"loss": 5.0999,
"step": 19500
},
{
"epoch": 1.49,
"learning_rate": 9.256892323697705e-05,
"loss": 5.0754,
"step": 20000
},
{
"epoch": 1.52,
"learning_rate": 9.238314631790147e-05,
"loss": 5.0338,
"step": 20500
},
{
"epoch": 1.56,
"learning_rate": 9.21973693988259e-05,
"loss": 5.0154,
"step": 21000
},
{
"epoch": 1.6,
"learning_rate": 9.201159247975032e-05,
"loss": 4.966,
"step": 21500
},
{
"epoch": 1.63,
"learning_rate": 9.182581556067475e-05,
"loss": 4.9607,
"step": 22000
},
{
"epoch": 1.67,
"learning_rate": 9.164003864159917e-05,
"loss": 4.9289,
"step": 22500
},
{
"epoch": 1.71,
"learning_rate": 9.14542617225236e-05,
"loss": 4.8969,
"step": 23000
},
{
"epoch": 1.75,
"learning_rate": 9.126848480344803e-05,
"loss": 4.8953,
"step": 23500
},
{
"epoch": 1.78,
"learning_rate": 9.108270788437245e-05,
"loss": 4.8567,
"step": 24000
},
{
"epoch": 1.82,
"learning_rate": 9.089693096529688e-05,
"loss": 4.8592,
"step": 24500
},
{
"epoch": 1.86,
"learning_rate": 9.07111540462213e-05,
"loss": 4.8639,
"step": 25000
},
{
"epoch": 1.89,
"learning_rate": 9.052537712714573e-05,
"loss": 4.8299,
"step": 25500
},
{
"epoch": 1.93,
"learning_rate": 9.033960020807015e-05,
"loss": 4.8168,
"step": 26000
},
{
"epoch": 1.97,
"learning_rate": 9.015382328899457e-05,
"loss": 4.7802,
"step": 26500
},
{
"epoch": 2.01,
"learning_rate": 8.9968046369919e-05,
"loss": 4.7703,
"step": 27000
},
{
"epoch": 2.04,
"learning_rate": 8.978226945084343e-05,
"loss": 4.7495,
"step": 27500
},
{
"epoch": 2.08,
"learning_rate": 8.959649253176787e-05,
"loss": 4.742,
"step": 28000
},
{
"epoch": 2.12,
"learning_rate": 8.941071561269229e-05,
"loss": 4.7124,
"step": 28500
},
{
"epoch": 2.16,
"learning_rate": 8.92249386936167e-05,
"loss": 4.723,
"step": 29000
},
{
"epoch": 2.19,
"learning_rate": 8.903916177454114e-05,
"loss": 4.6895,
"step": 29500
},
{
"epoch": 2.23,
"learning_rate": 8.885338485546555e-05,
"loss": 4.6883,
"step": 30000
},
{
"epoch": 2.27,
"learning_rate": 8.866760793638999e-05,
"loss": 4.6492,
"step": 30500
},
{
"epoch": 2.3,
"learning_rate": 8.84818310173144e-05,
"loss": 4.6279,
"step": 31000
},
{
"epoch": 2.34,
"learning_rate": 8.829605409823884e-05,
"loss": 4.6412,
"step": 31500
},
{
"epoch": 2.38,
"learning_rate": 8.811027717916327e-05,
"loss": 4.609,
"step": 32000
},
{
"epoch": 2.42,
"learning_rate": 8.792450026008769e-05,
"loss": 4.6149,
"step": 32500
},
{
"epoch": 2.45,
"learning_rate": 8.773872334101212e-05,
"loss": 4.5923,
"step": 33000
},
{
"epoch": 2.49,
"learning_rate": 8.755294642193654e-05,
"loss": 4.5997,
"step": 33500
},
{
"epoch": 2.53,
"learning_rate": 8.736716950286097e-05,
"loss": 4.5669,
"step": 34000
},
{
"epoch": 2.56,
"learning_rate": 8.718139258378539e-05,
"loss": 4.5359,
"step": 34500
},
{
"epoch": 2.6,
"learning_rate": 8.699561566470982e-05,
"loss": 4.5578,
"step": 35000
},
{
"epoch": 2.64,
"learning_rate": 8.680983874563424e-05,
"loss": 4.5222,
"step": 35500
},
{
"epoch": 2.68,
"learning_rate": 8.662406182655867e-05,
"loss": 4.5189,
"step": 36000
},
{
"epoch": 2.71,
"learning_rate": 8.64382849074831e-05,
"loss": 4.5025,
"step": 36500
},
{
"epoch": 2.75,
"learning_rate": 8.625250798840752e-05,
"loss": 4.4724,
"step": 37000
},
{
"epoch": 2.79,
"learning_rate": 8.606673106933196e-05,
"loss": 4.4692,
"step": 37500
},
{
"epoch": 2.82,
"learning_rate": 8.588095415025637e-05,
"loss": 4.4595,
"step": 38000
},
{
"epoch": 2.86,
"learning_rate": 8.569517723118081e-05,
"loss": 4.4474,
"step": 38500
},
{
"epoch": 2.9,
"learning_rate": 8.550940031210523e-05,
"loss": 4.4585,
"step": 39000
},
{
"epoch": 2.94,
"learning_rate": 8.532362339302964e-05,
"loss": 4.4306,
"step": 39500
},
{
"epoch": 2.97,
"learning_rate": 8.513784647395408e-05,
"loss": 4.4443,
"step": 40000
},
{
"epoch": 3.01,
"learning_rate": 8.495206955487851e-05,
"loss": 4.4032,
"step": 40500
},
{
"epoch": 3.05,
"learning_rate": 8.476629263580294e-05,
"loss": 4.4121,
"step": 41000
},
{
"epoch": 3.08,
"learning_rate": 8.458051571672736e-05,
"loss": 4.394,
"step": 41500
},
{
"epoch": 3.12,
"learning_rate": 8.439473879765179e-05,
"loss": 4.3491,
"step": 42000
},
{
"epoch": 3.16,
"learning_rate": 8.420896187857621e-05,
"loss": 4.3639,
"step": 42500
},
{
"epoch": 3.2,
"learning_rate": 8.402318495950063e-05,
"loss": 4.3754,
"step": 43000
},
{
"epoch": 3.23,
"learning_rate": 8.383740804042506e-05,
"loss": 4.3606,
"step": 43500
},
{
"epoch": 3.27,
"learning_rate": 8.365163112134948e-05,
"loss": 4.3412,
"step": 44000
},
{
"epoch": 3.31,
"learning_rate": 8.346585420227392e-05,
"loss": 4.3307,
"step": 44500
},
{
"epoch": 3.34,
"learning_rate": 8.328007728319834e-05,
"loss": 4.3254,
"step": 45000
},
{
"epoch": 3.38,
"learning_rate": 8.309430036412276e-05,
"loss": 4.3204,
"step": 45500
},
{
"epoch": 3.42,
"learning_rate": 8.29085234450472e-05,
"loss": 4.318,
"step": 46000
},
{
"epoch": 3.46,
"learning_rate": 8.272274652597161e-05,
"loss": 4.3174,
"step": 46500
},
{
"epoch": 3.49,
"learning_rate": 8.253696960689605e-05,
"loss": 4.2962,
"step": 47000
},
{
"epoch": 3.53,
"learning_rate": 8.235119268782046e-05,
"loss": 4.287,
"step": 47500
},
{
"epoch": 3.57,
"learning_rate": 8.21654157687449e-05,
"loss": 4.2903,
"step": 48000
},
{
"epoch": 3.6,
"learning_rate": 8.197963884966931e-05,
"loss": 4.2733,
"step": 48500
},
{
"epoch": 3.64,
"learning_rate": 8.179386193059375e-05,
"loss": 4.2686,
"step": 49000
},
{
"epoch": 3.68,
"learning_rate": 8.160808501151818e-05,
"loss": 4.26,
"step": 49500
},
{
"epoch": 3.72,
"learning_rate": 8.14223080924426e-05,
"loss": 4.2583,
"step": 50000
},
{
"epoch": 3.75,
"learning_rate": 8.123653117336703e-05,
"loss": 4.2561,
"step": 50500
},
{
"epoch": 3.79,
"learning_rate": 8.105075425429145e-05,
"loss": 4.2421,
"step": 51000
},
{
"epoch": 3.83,
"learning_rate": 8.086497733521588e-05,
"loss": 4.2306,
"step": 51500
},
{
"epoch": 3.86,
"learning_rate": 8.06792004161403e-05,
"loss": 4.2164,
"step": 52000
},
{
"epoch": 3.9,
"learning_rate": 8.049342349706472e-05,
"loss": 4.2084,
"step": 52500
},
{
"epoch": 3.94,
"learning_rate": 8.030764657798916e-05,
"loss": 4.2099,
"step": 53000
},
{
"epoch": 3.98,
"learning_rate": 8.012186965891358e-05,
"loss": 4.2162,
"step": 53500
},
{
"epoch": 4.01,
"learning_rate": 7.993609273983801e-05,
"loss": 4.2236,
"step": 54000
},
{
"epoch": 4.05,
"learning_rate": 7.975031582076243e-05,
"loss": 4.1909,
"step": 54500
},
{
"epoch": 4.09,
"learning_rate": 7.956453890168686e-05,
"loss": 4.1765,
"step": 55000
},
{
"epoch": 4.12,
"learning_rate": 7.937876198261128e-05,
"loss": 4.1988,
"step": 55500
},
{
"epoch": 4.16,
"learning_rate": 7.91929850635357e-05,
"loss": 4.1815,
"step": 56000
},
{
"epoch": 4.2,
"learning_rate": 7.900720814446013e-05,
"loss": 4.1768,
"step": 56500
},
{
"epoch": 4.24,
"learning_rate": 7.882143122538455e-05,
"loss": 4.1699,
"step": 57000
},
{
"epoch": 4.27,
"learning_rate": 7.8635654306309e-05,
"loss": 4.1473,
"step": 57500
},
{
"epoch": 4.31,
"learning_rate": 7.844987738723342e-05,
"loss": 4.1826,
"step": 58000
},
{
"epoch": 4.35,
"learning_rate": 7.826410046815784e-05,
"loss": 4.1364,
"step": 58500
},
{
"epoch": 4.38,
"learning_rate": 7.807832354908227e-05,
"loss": 4.1521,
"step": 59000
},
{
"epoch": 4.42,
"learning_rate": 7.789254663000669e-05,
"loss": 4.1614,
"step": 59500
},
{
"epoch": 4.46,
"learning_rate": 7.770676971093112e-05,
"loss": 4.1415,
"step": 60000
},
{
"epoch": 4.5,
"learning_rate": 7.752099279185554e-05,
"loss": 4.118,
"step": 60500
},
{
"epoch": 4.53,
"learning_rate": 7.733521587277997e-05,
"loss": 4.1446,
"step": 61000
},
{
"epoch": 4.57,
"learning_rate": 7.71494389537044e-05,
"loss": 4.1323,
"step": 61500
},
{
"epoch": 4.61,
"learning_rate": 7.696366203462882e-05,
"loss": 4.1072,
"step": 62000
},
{
"epoch": 4.64,
"learning_rate": 7.677788511555325e-05,
"loss": 4.1396,
"step": 62500
},
{
"epoch": 4.68,
"learning_rate": 7.659210819647767e-05,
"loss": 4.0946,
"step": 63000
},
{
"epoch": 4.72,
"learning_rate": 7.64063312774021e-05,
"loss": 4.1253,
"step": 63500
},
{
"epoch": 4.76,
"learning_rate": 7.622055435832652e-05,
"loss": 4.104,
"step": 64000
},
{
"epoch": 4.79,
"learning_rate": 7.603477743925095e-05,
"loss": 4.0819,
"step": 64500
},
{
"epoch": 4.83,
"learning_rate": 7.584900052017537e-05,
"loss": 4.0795,
"step": 65000
},
{
"epoch": 4.87,
"learning_rate": 7.566322360109979e-05,
"loss": 4.0805,
"step": 65500
},
{
"epoch": 4.9,
"learning_rate": 7.547744668202424e-05,
"loss": 4.0792,
"step": 66000
},
{
"epoch": 4.94,
"learning_rate": 7.529166976294866e-05,
"loss": 4.0704,
"step": 66500
},
{
"epoch": 4.98,
"learning_rate": 7.510589284387309e-05,
"loss": 4.0694,
"step": 67000
},
{
"epoch": 5.02,
"learning_rate": 7.49201159247975e-05,
"loss": 4.0499,
"step": 67500
},
{
"epoch": 5.05,
"learning_rate": 7.473433900572194e-05,
"loss": 4.0686,
"step": 68000
},
{
"epoch": 5.09,
"learning_rate": 7.454856208664636e-05,
"loss": 4.0632,
"step": 68500
},
{
"epoch": 5.13,
"learning_rate": 7.436278516757078e-05,
"loss": 4.0458,
"step": 69000
},
{
"epoch": 5.16,
"learning_rate": 7.417700824849521e-05,
"loss": 4.03,
"step": 69500
},
{
"epoch": 5.2,
"learning_rate": 7.399123132941964e-05,
"loss": 4.0443,
"step": 70000
},
{
"epoch": 5.24,
"learning_rate": 7.380545441034407e-05,
"loss": 4.0303,
"step": 70500
},
{
"epoch": 5.28,
"learning_rate": 7.361967749126849e-05,
"loss": 4.0296,
"step": 71000
},
{
"epoch": 5.31,
"learning_rate": 7.343390057219291e-05,
"loss": 4.0206,
"step": 71500
},
{
"epoch": 5.35,
"learning_rate": 7.324812365311734e-05,
"loss": 4.0245,
"step": 72000
},
{
"epoch": 5.39,
"learning_rate": 7.306234673404176e-05,
"loss": 4.0397,
"step": 72500
},
{
"epoch": 5.42,
"learning_rate": 7.287656981496619e-05,
"loss": 4.0174,
"step": 73000
},
{
"epoch": 5.46,
"learning_rate": 7.269079289589061e-05,
"loss": 4.0038,
"step": 73500
},
{
"epoch": 5.5,
"learning_rate": 7.250501597681504e-05,
"loss": 4.0074,
"step": 74000
},
{
"epoch": 5.54,
"learning_rate": 7.231923905773948e-05,
"loss": 4.0191,
"step": 74500
},
{
"epoch": 5.57,
"learning_rate": 7.21334621386639e-05,
"loss": 4.0251,
"step": 75000
},
{
"epoch": 5.61,
"learning_rate": 7.194768521958833e-05,
"loss": 3.9988,
"step": 75500
},
{
"epoch": 5.65,
"learning_rate": 7.176190830051274e-05,
"loss": 4.0091,
"step": 76000
},
{
"epoch": 5.68,
"learning_rate": 7.157613138143718e-05,
"loss": 4.0174,
"step": 76500
},
{
"epoch": 5.72,
"learning_rate": 7.13903544623616e-05,
"loss": 4.0052,
"step": 77000
},
{
"epoch": 5.76,
"learning_rate": 7.120457754328603e-05,
"loss": 3.9793,
"step": 77500
},
{
"epoch": 5.8,
"learning_rate": 7.101880062421045e-05,
"loss": 4.0016,
"step": 78000
},
{
"epoch": 5.83,
"learning_rate": 7.083302370513488e-05,
"loss": 3.997,
"step": 78500
},
{
"epoch": 5.87,
"learning_rate": 7.064724678605931e-05,
"loss": 3.9835,
"step": 79000
},
{
"epoch": 5.91,
"learning_rate": 7.046146986698373e-05,
"loss": 3.985,
"step": 79500
},
{
"epoch": 5.94,
"learning_rate": 7.027569294790816e-05,
"loss": 3.9706,
"step": 80000
},
{
"epoch": 5.98,
"learning_rate": 7.008991602883258e-05,
"loss": 3.9823,
"step": 80500
},
{
"epoch": 6.02,
"learning_rate": 6.990413910975701e-05,
"loss": 3.9795,
"step": 81000
},
{
"epoch": 6.06,
"learning_rate": 6.971836219068143e-05,
"loss": 3.9638,
"step": 81500
},
{
"epoch": 6.09,
"learning_rate": 6.953258527160585e-05,
"loss": 3.9441,
"step": 82000
},
{
"epoch": 6.13,
"learning_rate": 6.93468083525303e-05,
"loss": 3.9639,
"step": 82500
},
{
"epoch": 6.17,
"learning_rate": 6.916103143345471e-05,
"loss": 3.9706,
"step": 83000
},
{
"epoch": 6.2,
"learning_rate": 6.897525451437915e-05,
"loss": 3.9546,
"step": 83500
},
{
"epoch": 6.24,
"learning_rate": 6.878947759530356e-05,
"loss": 3.9641,
"step": 84000
},
{
"epoch": 6.28,
"learning_rate": 6.860370067622798e-05,
"loss": 3.957,
"step": 84500
},
{
"epoch": 6.32,
"learning_rate": 6.841792375715241e-05,
"loss": 3.9565,
"step": 85000
},
{
"epoch": 6.35,
"learning_rate": 6.823214683807683e-05,
"loss": 3.9318,
"step": 85500
},
{
"epoch": 6.39,
"learning_rate": 6.804636991900127e-05,
"loss": 3.9241,
"step": 86000
},
{
"epoch": 6.43,
"learning_rate": 6.786059299992568e-05,
"loss": 3.946,
"step": 86500
},
{
"epoch": 6.47,
"learning_rate": 6.767481608085012e-05,
"loss": 3.9291,
"step": 87000
},
{
"epoch": 6.5,
"learning_rate": 6.748903916177455e-05,
"loss": 3.9367,
"step": 87500
},
{
"epoch": 6.54,
"learning_rate": 6.730326224269897e-05,
"loss": 3.9206,
"step": 88000
},
{
"epoch": 6.58,
"learning_rate": 6.71174853236234e-05,
"loss": 3.9244,
"step": 88500
},
{
"epoch": 6.61,
"learning_rate": 6.693170840454782e-05,
"loss": 3.9265,
"step": 89000
},
{
"epoch": 6.65,
"learning_rate": 6.674593148547225e-05,
"loss": 3.9047,
"step": 89500
},
{
"epoch": 6.69,
"learning_rate": 6.656015456639667e-05,
"loss": 3.9176,
"step": 90000
},
{
"epoch": 6.73,
"learning_rate": 6.63743776473211e-05,
"loss": 3.9342,
"step": 90500
},
{
"epoch": 6.76,
"learning_rate": 6.618860072824553e-05,
"loss": 3.9144,
"step": 91000
},
{
"epoch": 6.8,
"learning_rate": 6.600282380916995e-05,
"loss": 3.9232,
"step": 91500
},
{
"epoch": 6.84,
"learning_rate": 6.581704689009438e-05,
"loss": 3.9213,
"step": 92000
},
{
"epoch": 6.87,
"learning_rate": 6.56312699710188e-05,
"loss": 3.8885,
"step": 92500
},
{
"epoch": 6.91,
"learning_rate": 6.544549305194323e-05,
"loss": 3.8859,
"step": 93000
},
{
"epoch": 6.95,
"learning_rate": 6.525971613286765e-05,
"loss": 3.9033,
"step": 93500
},
{
"epoch": 6.99,
"learning_rate": 6.507393921379209e-05,
"loss": 3.9087,
"step": 94000
},
{
"epoch": 7.02,
"learning_rate": 6.48881622947165e-05,
"loss": 3.9058,
"step": 94500
},
{
"epoch": 7.06,
"learning_rate": 6.470238537564092e-05,
"loss": 3.8973,
"step": 95000
},
{
"epoch": 7.1,
"learning_rate": 6.451660845656537e-05,
"loss": 3.89,
"step": 95500
},
{
"epoch": 7.13,
"learning_rate": 6.433083153748979e-05,
"loss": 3.8861,
"step": 96000
},
{
"epoch": 7.17,
"learning_rate": 6.414505461841422e-05,
"loss": 3.9072,
"step": 96500
},
{
"epoch": 7.21,
"learning_rate": 6.395927769933864e-05,
"loss": 3.8848,
"step": 97000
},
{
"epoch": 7.25,
"learning_rate": 6.377350078026306e-05,
"loss": 3.8844,
"step": 97500
},
{
"epoch": 7.28,
"learning_rate": 6.358772386118749e-05,
"loss": 3.8934,
"step": 98000
},
{
"epoch": 7.32,
"learning_rate": 6.340194694211191e-05,
"loss": 3.8635,
"step": 98500
},
{
"epoch": 7.36,
"learning_rate": 6.321617002303634e-05,
"loss": 3.8807,
"step": 99000
},
{
"epoch": 7.39,
"learning_rate": 6.303039310396077e-05,
"loss": 3.8656,
"step": 99500
},
{
"epoch": 7.43,
"learning_rate": 6.284461618488519e-05,
"loss": 3.8823,
"step": 100000
},
{
"epoch": 7.47,
"learning_rate": 6.265883926580962e-05,
"loss": 3.8634,
"step": 100500
},
{
"epoch": 7.51,
"learning_rate": 6.247306234673404e-05,
"loss": 3.8661,
"step": 101000
},
{
"epoch": 7.54,
"learning_rate": 6.228728542765847e-05,
"loss": 3.8707,
"step": 101500
},
{
"epoch": 7.58,
"learning_rate": 6.210150850858289e-05,
"loss": 3.8768,
"step": 102000
},
{
"epoch": 7.62,
"learning_rate": 6.191573158950732e-05,
"loss": 3.8466,
"step": 102500
},
{
"epoch": 7.65,
"learning_rate": 6.172995467043174e-05,
"loss": 3.8705,
"step": 103000
},
{
"epoch": 7.69,
"learning_rate": 6.154417775135617e-05,
"loss": 3.8378,
"step": 103500
},
{
"epoch": 7.73,
"learning_rate": 6.13584008322806e-05,
"loss": 3.8644,
"step": 104000
},
{
"epoch": 7.77,
"learning_rate": 6.117262391320503e-05,
"loss": 3.8536,
"step": 104500
},
{
"epoch": 7.8,
"learning_rate": 6.098684699412946e-05,
"loss": 3.8729,
"step": 105000
},
{
"epoch": 7.84,
"learning_rate": 6.0801070075053876e-05,
"loss": 3.8516,
"step": 105500
},
{
"epoch": 7.88,
"learning_rate": 6.06152931559783e-05,
"loss": 3.8524,
"step": 106000
},
{
"epoch": 7.91,
"learning_rate": 6.042951623690273e-05,
"loss": 3.8463,
"step": 106500
},
{
"epoch": 7.95,
"learning_rate": 6.024373931782715e-05,
"loss": 3.8415,
"step": 107000
},
{
"epoch": 7.99,
"learning_rate": 6.005796239875158e-05,
"loss": 3.8421,
"step": 107500
},
{
"epoch": 8.03,
"learning_rate": 5.987218547967601e-05,
"loss": 3.8555,
"step": 108000
},
{
"epoch": 8.06,
"learning_rate": 5.9686408560600435e-05,
"loss": 3.8433,
"step": 108500
},
{
"epoch": 8.1,
"learning_rate": 5.950063164152486e-05,
"loss": 3.859,
"step": 109000
},
{
"epoch": 8.14,
"learning_rate": 5.9314854722449286e-05,
"loss": 3.8425,
"step": 109500
},
{
"epoch": 8.17,
"learning_rate": 5.912907780337371e-05,
"loss": 3.8299,
"step": 110000
},
{
"epoch": 8.21,
"learning_rate": 5.894330088429814e-05,
"loss": 3.8344,
"step": 110500
},
{
"epoch": 8.25,
"learning_rate": 5.875752396522256e-05,
"loss": 3.8103,
"step": 111000
},
{
"epoch": 8.29,
"learning_rate": 5.857174704614699e-05,
"loss": 3.8241,
"step": 111500
},
{
"epoch": 8.32,
"learning_rate": 5.838597012707141e-05,
"loss": 3.8318,
"step": 112000
},
{
"epoch": 8.36,
"learning_rate": 5.8200193207995845e-05,
"loss": 3.83,
"step": 112500
},
{
"epoch": 8.4,
"learning_rate": 5.801441628892027e-05,
"loss": 3.8282,
"step": 113000
},
{
"epoch": 8.43,
"learning_rate": 5.7828639369844696e-05,
"loss": 3.8268,
"step": 113500
},
{
"epoch": 8.47,
"learning_rate": 5.764286245076912e-05,
"loss": 3.8282,
"step": 114000
},
{
"epoch": 8.51,
"learning_rate": 5.7457085531693546e-05,
"loss": 3.8148,
"step": 114500
},
{
"epoch": 8.55,
"learning_rate": 5.727130861261797e-05,
"loss": 3.8032,
"step": 115000
},
{
"epoch": 8.58,
"learning_rate": 5.708553169354239e-05,
"loss": 3.8169,
"step": 115500
},
{
"epoch": 8.62,
"learning_rate": 5.6899754774466816e-05,
"loss": 3.7967,
"step": 116000
},
{
"epoch": 8.66,
"learning_rate": 5.6713977855391255e-05,
"loss": 3.8234,
"step": 116500
},
{
"epoch": 8.69,
"learning_rate": 5.652820093631568e-05,
"loss": 3.802,
"step": 117000
},
{
"epoch": 8.73,
"learning_rate": 5.6342424017240106e-05,
"loss": 3.7919,
"step": 117500
},
{
"epoch": 8.77,
"learning_rate": 5.615664709816453e-05,
"loss": 3.7957,
"step": 118000
},
{
"epoch": 8.81,
"learning_rate": 5.597087017908895e-05,
"loss": 3.79,
"step": 118500
},
{
"epoch": 8.84,
"learning_rate": 5.5785093260013375e-05,
"loss": 3.7996,
"step": 119000
},
{
"epoch": 8.88,
"learning_rate": 5.55993163409378e-05,
"loss": 3.8089,
"step": 119500
},
{
"epoch": 8.92,
"learning_rate": 5.5413539421862226e-05,
"loss": 3.8031,
"step": 120000
},
{
"epoch": 8.95,
"learning_rate": 5.5227762502786665e-05,
"loss": 3.7984,
"step": 120500
},
{
"epoch": 8.99,
"learning_rate": 5.504198558371108e-05,
"loss": 3.7881,
"step": 121000
},
{
"epoch": 9.03,
"learning_rate": 5.485620866463551e-05,
"loss": 3.8057,
"step": 121500
},
{
"epoch": 9.07,
"learning_rate": 5.4670431745559934e-05,
"loss": 3.7843,
"step": 122000
},
{
"epoch": 9.1,
"learning_rate": 5.448465482648436e-05,
"loss": 3.7845,
"step": 122500
},
{
"epoch": 9.14,
"learning_rate": 5.4298877907408785e-05,
"loss": 3.8025,
"step": 123000
},
{
"epoch": 9.18,
"learning_rate": 5.411310098833321e-05,
"loss": 3.7746,
"step": 123500
},
{
"epoch": 9.21,
"learning_rate": 5.3927324069257636e-05,
"loss": 3.7869,
"step": 124000
},
{
"epoch": 9.25,
"learning_rate": 5.374154715018206e-05,
"loss": 3.7852,
"step": 124500
},
{
"epoch": 9.29,
"learning_rate": 5.355577023110649e-05,
"loss": 3.7819,
"step": 125000
},
{
"epoch": 9.33,
"learning_rate": 5.336999331203092e-05,
"loss": 3.8059,
"step": 125500
},
{
"epoch": 9.36,
"learning_rate": 5.3184216392955344e-05,
"loss": 3.7748,
"step": 126000
},
{
"epoch": 9.4,
"learning_rate": 5.299843947387977e-05,
"loss": 3.7689,
"step": 126500
},
{
"epoch": 9.44,
"learning_rate": 5.2812662554804195e-05,
"loss": 3.7766,
"step": 127000
},
{
"epoch": 9.47,
"learning_rate": 5.262688563572862e-05,
"loss": 3.7697,
"step": 127500
},
{
"epoch": 9.51,
"learning_rate": 5.2441108716653045e-05,
"loss": 3.7897,
"step": 128000
},
{
"epoch": 9.55,
"learning_rate": 5.2255331797577464e-05,
"loss": 3.7679,
"step": 128500
},
{
"epoch": 9.59,
"learning_rate": 5.20695548785019e-05,
"loss": 3.7808,
"step": 129000
},
{
"epoch": 9.62,
"learning_rate": 5.188377795942633e-05,
"loss": 3.7562,
"step": 129500
},
{
"epoch": 9.66,
"learning_rate": 5.1698001040350754e-05,
"loss": 3.7502,
"step": 130000
},
{
"epoch": 9.7,
"learning_rate": 5.151222412127518e-05,
"loss": 3.7809,
"step": 130500
},
{
"epoch": 9.73,
"learning_rate": 5.1326447202199604e-05,
"loss": 3.7752,
"step": 131000
},
{
"epoch": 9.77,
"learning_rate": 5.114067028312402e-05,
"loss": 3.7532,
"step": 131500
},
{
"epoch": 9.81,
"learning_rate": 5.095489336404845e-05,
"loss": 3.7683,
"step": 132000
},
{
"epoch": 9.85,
"learning_rate": 5.0769116444972874e-05,
"loss": 3.7538,
"step": 132500
},
{
"epoch": 9.88,
"learning_rate": 5.05833395258973e-05,
"loss": 3.7633,
"step": 133000
},
{
"epoch": 9.92,
"learning_rate": 5.039756260682174e-05,
"loss": 3.7457,
"step": 133500
},
{
"epoch": 9.96,
"learning_rate": 5.021178568774616e-05,
"loss": 3.7745,
"step": 134000
},
{
"epoch": 9.99,
"learning_rate": 5.002600876867058e-05,
"loss": 3.7592,
"step": 134500
},
{
"epoch": 10.03,
"learning_rate": 4.984023184959501e-05,
"loss": 3.7506,
"step": 135000
},
{
"epoch": 10.07,
"learning_rate": 4.965445493051943e-05,
"loss": 3.7601,
"step": 135500
},
{
"epoch": 10.11,
"learning_rate": 4.946867801144386e-05,
"loss": 3.7622,
"step": 136000
},
{
"epoch": 10.14,
"learning_rate": 4.928290109236829e-05,
"loss": 3.7059,
"step": 136500
},
{
"epoch": 10.18,
"learning_rate": 4.9097124173292716e-05,
"loss": 3.7491,
"step": 137000
},
{
"epoch": 10.22,
"learning_rate": 4.891134725421714e-05,
"loss": 3.7145,
"step": 137500
},
{
"epoch": 10.25,
"learning_rate": 4.872557033514156e-05,
"loss": 3.7217,
"step": 138000
},
{
"epoch": 10.29,
"learning_rate": 4.853979341606599e-05,
"loss": 3.7461,
"step": 138500
},
{
"epoch": 10.33,
"learning_rate": 4.835401649699042e-05,
"loss": 3.7317,
"step": 139000
},
{
"epoch": 10.37,
"learning_rate": 4.816823957791484e-05,
"loss": 3.7605,
"step": 139500
},
{
"epoch": 10.4,
"learning_rate": 4.798246265883927e-05,
"loss": 3.7422,
"step": 140000
},
{
"epoch": 10.44,
"learning_rate": 4.7796685739763694e-05,
"loss": 3.738,
"step": 140500
},
{
"epoch": 10.48,
"learning_rate": 4.761090882068812e-05,
"loss": 3.7314,
"step": 141000
},
{
"epoch": 10.51,
"learning_rate": 4.7425131901612544e-05,
"loss": 3.7486,
"step": 141500
},
{
"epoch": 10.55,
"learning_rate": 4.723935498253697e-05,
"loss": 3.7335,
"step": 142000
},
{
"epoch": 10.59,
"learning_rate": 4.7053578063461395e-05,
"loss": 3.7377,
"step": 142500
},
{
"epoch": 10.63,
"learning_rate": 4.686780114438583e-05,
"loss": 3.7252,
"step": 143000
},
{
"epoch": 10.66,
"learning_rate": 4.668202422531025e-05,
"loss": 3.7199,
"step": 143500
},
{
"epoch": 10.7,
"learning_rate": 4.649624730623468e-05,
"loss": 3.7202,
"step": 144000
},
{
"epoch": 10.74,
"learning_rate": 4.63104703871591e-05,
"loss": 3.7152,
"step": 144500
},
{
"epoch": 10.78,
"learning_rate": 4.612469346808353e-05,
"loss": 3.6946,
"step": 145000
},
{
"epoch": 10.81,
"learning_rate": 4.5938916549007954e-05,
"loss": 3.7183,
"step": 145500
},
{
"epoch": 10.85,
"learning_rate": 4.575313962993238e-05,
"loss": 3.7272,
"step": 146000
},
{
"epoch": 10.89,
"learning_rate": 4.5567362710856805e-05,
"loss": 3.7211,
"step": 146500
},
{
"epoch": 10.92,
"learning_rate": 4.538158579178124e-05,
"loss": 3.7328,
"step": 147000
},
{
"epoch": 10.96,
"learning_rate": 4.5195808872705656e-05,
"loss": 3.7167,
"step": 147500
},
{
"epoch": 11.0,
"learning_rate": 4.501003195363008e-05,
"loss": 3.7314,
"step": 148000
},
{
"epoch": 11.04,
"learning_rate": 4.4824255034554506e-05,
"loss": 3.707,
"step": 148500
},
{
"epoch": 11.07,
"learning_rate": 4.463847811547894e-05,
"loss": 3.7117,
"step": 149000
},
{
"epoch": 11.11,
"learning_rate": 4.4452701196403364e-05,
"loss": 3.698,
"step": 149500
},
{
"epoch": 11.15,
"learning_rate": 4.426692427732779e-05,
"loss": 3.7057,
"step": 150000
},
{
"epoch": 11.18,
"learning_rate": 4.4081147358252215e-05,
"loss": 3.7069,
"step": 150500
},
{
"epoch": 11.22,
"learning_rate": 4.3895370439176633e-05,
"loss": 3.7068,
"step": 151000
},
{
"epoch": 11.26,
"learning_rate": 4.3709593520101066e-05,
"loss": 3.7025,
"step": 151500
},
{
"epoch": 11.3,
"learning_rate": 4.352381660102549e-05,
"loss": 3.6943,
"step": 152000
},
{
"epoch": 11.33,
"learning_rate": 4.3338039681949916e-05,
"loss": 3.712,
"step": 152500
},
{
"epoch": 11.37,
"learning_rate": 4.315226276287434e-05,
"loss": 3.6925,
"step": 153000
},
{
"epoch": 11.41,
"learning_rate": 4.2966485843798774e-05,
"loss": 3.6832,
"step": 153500
},
{
"epoch": 11.44,
"learning_rate": 4.278070892472319e-05,
"loss": 3.6895,
"step": 154000
},
{
"epoch": 11.48,
"learning_rate": 4.259493200564762e-05,
"loss": 3.6915,
"step": 154500
},
{
"epoch": 11.52,
"learning_rate": 4.240915508657204e-05,
"loss": 3.6923,
"step": 155000
},
{
"epoch": 11.56,
"learning_rate": 4.2223378167496475e-05,
"loss": 3.6957,
"step": 155500
},
{
"epoch": 11.59,
"learning_rate": 4.20376012484209e-05,
"loss": 3.7213,
"step": 156000
},
{
"epoch": 11.63,
"learning_rate": 4.1851824329345326e-05,
"loss": 3.6744,
"step": 156500
},
{
"epoch": 11.67,
"learning_rate": 4.166604741026975e-05,
"loss": 3.6872,
"step": 157000
},
{
"epoch": 11.7,
"learning_rate": 4.148027049119418e-05,
"loss": 3.695,
"step": 157500
},
{
"epoch": 11.74,
"learning_rate": 4.12944935721186e-05,
"loss": 3.6935,
"step": 158000
},
{
"epoch": 11.78,
"learning_rate": 4.110871665304303e-05,
"loss": 3.684,
"step": 158500
},
{
"epoch": 11.82,
"learning_rate": 4.092293973396745e-05,
"loss": 3.6791,
"step": 159000
},
{
"epoch": 11.85,
"learning_rate": 4.073716281489188e-05,
"loss": 3.6711,
"step": 159500
},
{
"epoch": 11.89,
"learning_rate": 4.055138589581631e-05,
"loss": 3.6809,
"step": 160000
},
{
"epoch": 11.93,
"learning_rate": 4.036560897674073e-05,
"loss": 3.6661,
"step": 160500
},
{
"epoch": 11.96,
"learning_rate": 4.0179832057665155e-05,
"loss": 3.6764,
"step": 161000
},
{
"epoch": 12.0,
"learning_rate": 3.999405513858958e-05,
"loss": 3.6734,
"step": 161500
},
{
"epoch": 12.04,
"learning_rate": 3.980827821951401e-05,
"loss": 3.6866,
"step": 162000
},
{
"epoch": 12.08,
"learning_rate": 3.962250130043844e-05,
"loss": 3.6503,
"step": 162500
},
{
"epoch": 12.11,
"learning_rate": 3.943672438136286e-05,
"loss": 3.6859,
"step": 163000
},
{
"epoch": 12.15,
"learning_rate": 3.925094746228729e-05,
"loss": 3.6527,
"step": 163500
},
{
"epoch": 12.19,
"learning_rate": 3.9065170543211714e-05,
"loss": 3.6639,
"step": 164000
},
{
"epoch": 12.22,
"learning_rate": 3.887939362413614e-05,
"loss": 3.6663,
"step": 164500
},
{
"epoch": 12.26,
"learning_rate": 3.8693616705060564e-05,
"loss": 3.6774,
"step": 165000
},
{
"epoch": 12.3,
"learning_rate": 3.850783978598499e-05,
"loss": 3.6705,
"step": 165500
},
{
"epoch": 12.34,
"learning_rate": 3.832206286690942e-05,
"loss": 3.6783,
"step": 166000
},
{
"epoch": 12.37,
"learning_rate": 3.813628594783385e-05,
"loss": 3.6554,
"step": 166500
},
{
"epoch": 12.41,
"learning_rate": 3.7950509028758266e-05,
"loss": 3.6564,
"step": 167000
},
{
"epoch": 12.45,
"learning_rate": 3.776473210968269e-05,
"loss": 3.6713,
"step": 167500
},
{
"epoch": 12.48,
"learning_rate": 3.757895519060712e-05,
"loss": 3.6641,
"step": 168000
},
{
"epoch": 12.52,
"learning_rate": 3.739317827153155e-05,
"loss": 3.6747,
"step": 168500
},
{
"epoch": 12.56,
"learning_rate": 3.7207401352455974e-05,
"loss": 3.6826,
"step": 169000
},
{
"epoch": 12.6,
"learning_rate": 3.70216244333804e-05,
"loss": 3.663,
"step": 169500
},
{
"epoch": 12.63,
"learning_rate": 3.6835847514304825e-05,
"loss": 3.6661,
"step": 170000
},
{
"epoch": 12.67,
"learning_rate": 3.665007059522925e-05,
"loss": 3.6552,
"step": 170500
},
{
"epoch": 12.71,
"learning_rate": 3.6464293676153676e-05,
"loss": 3.666,
"step": 171000
},
{
"epoch": 12.74,
"learning_rate": 3.62785167570781e-05,
"loss": 3.6531,
"step": 171500
},
{
"epoch": 12.78,
"learning_rate": 3.609273983800253e-05,
"loss": 3.6543,
"step": 172000
},
{
"epoch": 12.82,
"learning_rate": 3.590696291892696e-05,
"loss": 3.6503,
"step": 172500
},
{
"epoch": 12.86,
"learning_rate": 3.5721185999851384e-05,
"loss": 3.6605,
"step": 173000
},
{
"epoch": 12.89,
"learning_rate": 3.55354090807758e-05,
"loss": 3.6355,
"step": 173500
},
{
"epoch": 12.93,
"learning_rate": 3.534963216170023e-05,
"loss": 3.6543,
"step": 174000
},
{
"epoch": 12.97,
"learning_rate": 3.516385524262466e-05,
"loss": 3.6494,
"step": 174500
},
{
"epoch": 13.0,
"learning_rate": 3.4978078323549086e-05,
"loss": 3.625,
"step": 175000
},
{
"epoch": 13.04,
"learning_rate": 3.479230140447351e-05,
"loss": 3.6604,
"step": 175500
},
{
"epoch": 13.08,
"learning_rate": 3.4606524485397936e-05,
"loss": 3.6587,
"step": 176000
},
{
"epoch": 13.12,
"learning_rate": 3.442074756632236e-05,
"loss": 3.6422,
"step": 176500
},
{
"epoch": 13.15,
"learning_rate": 3.423497064724679e-05,
"loss": 3.6392,
"step": 177000
},
{
"epoch": 13.19,
"learning_rate": 3.404919372817121e-05,
"loss": 3.6254,
"step": 177500
},
{
"epoch": 13.23,
"learning_rate": 3.386341680909564e-05,
"loss": 3.6426,
"step": 178000
},
{
"epoch": 13.26,
"learning_rate": 3.3677639890020063e-05,
"loss": 3.6778,
"step": 178500
},
{
"epoch": 13.3,
"learning_rate": 3.3491862970944496e-05,
"loss": 3.6645,
"step": 179000
},
{
"epoch": 13.34,
"learning_rate": 3.330608605186892e-05,
"loss": 3.6433,
"step": 179500
},
{
"epoch": 13.38,
"learning_rate": 3.312030913279334e-05,
"loss": 3.6614,
"step": 180000
},
{
"epoch": 13.41,
"learning_rate": 3.2934532213717765e-05,
"loss": 3.6654,
"step": 180500
},
{
"epoch": 13.45,
"learning_rate": 3.27487552946422e-05,
"loss": 3.6309,
"step": 181000
},
{
"epoch": 13.49,
"learning_rate": 3.256297837556662e-05,
"loss": 3.629,
"step": 181500
},
{
"epoch": 13.52,
"learning_rate": 3.237720145649105e-05,
"loss": 3.6235,
"step": 182000
},
{
"epoch": 13.56,
"learning_rate": 3.219142453741547e-05,
"loss": 3.6386,
"step": 182500
},
{
"epoch": 13.6,
"learning_rate": 3.20056476183399e-05,
"loss": 3.6491,
"step": 183000
},
{
"epoch": 13.64,
"learning_rate": 3.1819870699264324e-05,
"loss": 3.632,
"step": 183500
},
{
"epoch": 13.67,
"learning_rate": 3.163409378018875e-05,
"loss": 3.6363,
"step": 184000
},
{
"epoch": 13.71,
"learning_rate": 3.1448316861113175e-05,
"loss": 3.6232,
"step": 184500
},
{
"epoch": 13.75,
"learning_rate": 3.126253994203761e-05,
"loss": 3.6226,
"step": 185000
},
{
"epoch": 13.78,
"learning_rate": 3.107676302296203e-05,
"loss": 3.6455,
"step": 185500
},
{
"epoch": 13.82,
"learning_rate": 3.089098610388646e-05,
"loss": 3.6386,
"step": 186000
},
{
"epoch": 13.86,
"learning_rate": 3.0705209184810876e-05,
"loss": 3.6171,
"step": 186500
},
{
"epoch": 13.9,
"learning_rate": 3.05194322657353e-05,
"loss": 3.6286,
"step": 187000
},
{
"epoch": 13.93,
"learning_rate": 3.0333655346659734e-05,
"loss": 3.6253,
"step": 187500
},
{
"epoch": 13.97,
"learning_rate": 3.014787842758416e-05,
"loss": 3.6207,
"step": 188000
},
{
"epoch": 14.01,
"learning_rate": 2.9962101508508585e-05,
"loss": 3.6429,
"step": 188500
},
{
"epoch": 14.04,
"learning_rate": 2.9776324589433007e-05,
"loss": 3.6227,
"step": 189000
},
{
"epoch": 14.08,
"learning_rate": 2.959054767035744e-05,
"loss": 3.6268,
"step": 189500
},
{
"epoch": 14.12,
"learning_rate": 2.9404770751281864e-05,
"loss": 3.6449,
"step": 190000
},
{
"epoch": 14.16,
"learning_rate": 2.9218993832206286e-05,
"loss": 3.6343,
"step": 190500
},
{
"epoch": 14.19,
"learning_rate": 2.903321691313071e-05,
"loss": 3.6216,
"step": 191000
},
{
"epoch": 14.23,
"learning_rate": 2.8847439994055144e-05,
"loss": 3.6226,
"step": 191500
},
{
"epoch": 14.27,
"learning_rate": 2.8661663074979566e-05,
"loss": 3.6349,
"step": 192000
},
{
"epoch": 14.3,
"learning_rate": 2.847588615590399e-05,
"loss": 3.6263,
"step": 192500
},
{
"epoch": 14.34,
"learning_rate": 2.8290109236828416e-05,
"loss": 3.6124,
"step": 193000
},
{
"epoch": 14.38,
"learning_rate": 2.8104332317752845e-05,
"loss": 3.6135,
"step": 193500
},
{
"epoch": 14.42,
"learning_rate": 2.791855539867727e-05,
"loss": 3.6049,
"step": 194000
},
{
"epoch": 14.45,
"learning_rate": 2.7732778479601696e-05,
"loss": 3.6161,
"step": 194500
},
{
"epoch": 14.49,
"learning_rate": 2.754700156052612e-05,
"loss": 3.6112,
"step": 195000
},
{
"epoch": 14.53,
"learning_rate": 2.736122464145055e-05,
"loss": 3.6308,
"step": 195500
},
{
"epoch": 14.56,
"learning_rate": 2.7175447722374976e-05,
"loss": 3.6182,
"step": 196000
},
{
"epoch": 14.6,
"learning_rate": 2.69896708032994e-05,
"loss": 3.6005,
"step": 196500
},
{
"epoch": 14.64,
"learning_rate": 2.6803893884223823e-05,
"loss": 3.6148,
"step": 197000
},
{
"epoch": 14.68,
"learning_rate": 2.661811696514825e-05,
"loss": 3.6142,
"step": 197500
},
{
"epoch": 14.71,
"learning_rate": 2.643234004607268e-05,
"loss": 3.6146,
"step": 198000
},
{
"epoch": 14.75,
"learning_rate": 2.6246563126997102e-05,
"loss": 3.6134,
"step": 198500
},
{
"epoch": 14.79,
"learning_rate": 2.6060786207921528e-05,
"loss": 3.6265,
"step": 199000
},
{
"epoch": 14.82,
"learning_rate": 2.5875009288845953e-05,
"loss": 3.5949,
"step": 199500
},
{
"epoch": 14.86,
"learning_rate": 2.5689232369770382e-05,
"loss": 3.6383,
"step": 200000
},
{
"epoch": 14.9,
"learning_rate": 2.5503455450694807e-05,
"loss": 3.5996,
"step": 200500
},
{
"epoch": 14.94,
"learning_rate": 2.5317678531619233e-05,
"loss": 3.6005,
"step": 201000
},
{
"epoch": 14.97,
"learning_rate": 2.5131901612543658e-05,
"loss": 3.6066,
"step": 201500
},
{
"epoch": 15.01,
"learning_rate": 2.4946124693468084e-05,
"loss": 3.5974,
"step": 202000
},
{
"epoch": 15.05,
"learning_rate": 2.4760347774392512e-05,
"loss": 3.5954,
"step": 202500
},
{
"epoch": 15.09,
"learning_rate": 2.4574570855316938e-05,
"loss": 3.6086,
"step": 203000
},
{
"epoch": 15.12,
"learning_rate": 2.4388793936241363e-05,
"loss": 3.5961,
"step": 203500
},
{
"epoch": 15.16,
"learning_rate": 2.420301701716579e-05,
"loss": 3.6197,
"step": 204000
},
{
"epoch": 15.2,
"learning_rate": 2.4017240098090217e-05,
"loss": 3.6059,
"step": 204500
},
{
"epoch": 15.23,
"learning_rate": 2.383146317901464e-05,
"loss": 3.5814,
"step": 205000
},
{
"epoch": 15.27,
"learning_rate": 2.3645686259939065e-05,
"loss": 3.6082,
"step": 205500
},
{
"epoch": 15.31,
"learning_rate": 2.3459909340863493e-05,
"loss": 3.6141,
"step": 206000
},
{
"epoch": 15.35,
"learning_rate": 2.3274132421787915e-05,
"loss": 3.6143,
"step": 206500
},
{
"epoch": 15.38,
"learning_rate": 2.3088355502712344e-05,
"loss": 3.6195,
"step": 207000
},
{
"epoch": 15.42,
"learning_rate": 2.290257858363677e-05,
"loss": 3.6097,
"step": 207500
},
{
"epoch": 15.46,
"learning_rate": 2.2716801664561195e-05,
"loss": 3.6006,
"step": 208000
},
{
"epoch": 15.49,
"learning_rate": 2.253102474548562e-05,
"loss": 3.5869,
"step": 208500
},
{
"epoch": 15.53,
"learning_rate": 2.234524782641005e-05,
"loss": 3.6097,
"step": 209000
},
{
"epoch": 15.57,
"learning_rate": 2.2159470907334474e-05,
"loss": 3.6082,
"step": 209500
},
{
"epoch": 15.61,
"learning_rate": 2.19736939882589e-05,
"loss": 3.598,
"step": 210000
},
{
"epoch": 15.64,
"learning_rate": 2.1787917069183325e-05,
"loss": 3.5982,
"step": 210500
},
{
"epoch": 15.68,
"learning_rate": 2.1602140150107754e-05,
"loss": 3.6078,
"step": 211000
},
{
"epoch": 15.72,
"learning_rate": 2.1416363231032176e-05,
"loss": 3.5942,
"step": 211500
},
{
"epoch": 15.75,
"learning_rate": 2.1230586311956605e-05,
"loss": 3.6247,
"step": 212000
},
{
"epoch": 15.79,
"learning_rate": 2.104480939288103e-05,
"loss": 3.5861,
"step": 212500
},
{
"epoch": 15.83,
"learning_rate": 2.0859032473805456e-05,
"loss": 3.614,
"step": 213000
},
{
"epoch": 15.87,
"learning_rate": 2.067325555472988e-05,
"loss": 3.6239,
"step": 213500
},
{
"epoch": 15.9,
"learning_rate": 2.048747863565431e-05,
"loss": 3.596,
"step": 214000
},
{
"epoch": 15.94,
"learning_rate": 2.0301701716578732e-05,
"loss": 3.6064,
"step": 214500
},
{
"epoch": 15.98,
"learning_rate": 2.0115924797503157e-05,
"loss": 3.5961,
"step": 215000
},
{
"epoch": 16.01,
"learning_rate": 1.9930147878427586e-05,
"loss": 3.5828,
"step": 215500
},
{
"epoch": 16.05,
"learning_rate": 1.974437095935201e-05,
"loss": 3.5974,
"step": 216000
},
{
"epoch": 16.09,
"learning_rate": 1.9558594040276437e-05,
"loss": 3.6004,
"step": 216500
},
{
"epoch": 16.13,
"learning_rate": 1.9372817121200862e-05,
"loss": 3.5748,
"step": 217000
},
{
"epoch": 16.16,
"learning_rate": 1.918704020212529e-05,
"loss": 3.6198,
"step": 217500
},
{
"epoch": 16.2,
"learning_rate": 1.9001263283049713e-05,
"loss": 3.5917,
"step": 218000
},
{
"epoch": 16.24,
"learning_rate": 1.881548636397414e-05,
"loss": 3.5839,
"step": 218500
},
{
"epoch": 16.27,
"learning_rate": 1.8629709444898567e-05,
"loss": 3.6037,
"step": 219000
},
{
"epoch": 16.31,
"learning_rate": 1.8443932525822992e-05,
"loss": 3.5726,
"step": 219500
},
{
"epoch": 16.35,
"learning_rate": 1.8258155606747418e-05,
"loss": 3.6034,
"step": 220000
},
{
"epoch": 16.39,
"learning_rate": 1.8072378687671846e-05,
"loss": 3.5972,
"step": 220500
},
{
"epoch": 16.42,
"learning_rate": 1.788660176859627e-05,
"loss": 3.5918,
"step": 221000
},
{
"epoch": 16.46,
"learning_rate": 1.7700824849520697e-05,
"loss": 3.6055,
"step": 221500
},
{
"epoch": 16.5,
"learning_rate": 1.7515047930445123e-05,
"loss": 3.6021,
"step": 222000
},
{
"epoch": 16.53,
"learning_rate": 1.7329271011369548e-05,
"loss": 3.5969,
"step": 222500
},
{
"epoch": 16.57,
"learning_rate": 1.7143494092293973e-05,
"loss": 3.6042,
"step": 223000
},
{
"epoch": 16.61,
"learning_rate": 1.6957717173218402e-05,
"loss": 3.5999,
"step": 223500
},
{
"epoch": 16.65,
"learning_rate": 1.6771940254142828e-05,
"loss": 3.6069,
"step": 224000
},
{
"epoch": 16.68,
"learning_rate": 1.658616333506725e-05,
"loss": 3.5965,
"step": 224500
},
{
"epoch": 16.72,
"learning_rate": 1.640038641599168e-05,
"loss": 3.5873,
"step": 225000
},
{
"epoch": 16.76,
"learning_rate": 1.6214609496916104e-05,
"loss": 3.5832,
"step": 225500
},
{
"epoch": 16.79,
"learning_rate": 1.602883257784053e-05,
"loss": 3.5859,
"step": 226000
},
{
"epoch": 16.83,
"learning_rate": 1.5843055658764954e-05,
"loss": 3.586,
"step": 226500
},
{
"epoch": 16.87,
"learning_rate": 1.5657278739689383e-05,
"loss": 3.5837,
"step": 227000
},
{
"epoch": 16.91,
"learning_rate": 1.5471501820613805e-05,
"loss": 3.5757,
"step": 227500
},
{
"epoch": 16.94,
"learning_rate": 1.5285724901538234e-05,
"loss": 3.5858,
"step": 228000
},
{
"epoch": 16.98,
"learning_rate": 1.509994798246266e-05,
"loss": 3.5886,
"step": 228500
},
{
"epoch": 17.02,
"learning_rate": 1.4914171063387086e-05,
"loss": 3.5836,
"step": 229000
},
{
"epoch": 17.05,
"learning_rate": 1.472839414431151e-05,
"loss": 3.6144,
"step": 229500
},
{
"epoch": 17.09,
"learning_rate": 1.4542617225235939e-05,
"loss": 3.5952,
"step": 230000
},
{
"epoch": 17.13,
"learning_rate": 1.4356840306160363e-05,
"loss": 3.6013,
"step": 230500
},
{
"epoch": 17.17,
"learning_rate": 1.417106338708479e-05,
"loss": 3.5861,
"step": 231000
},
{
"epoch": 17.2,
"learning_rate": 1.3985286468009215e-05,
"loss": 3.583,
"step": 231500
},
{
"epoch": 17.24,
"learning_rate": 1.3799509548933642e-05,
"loss": 3.5808,
"step": 232000
},
{
"epoch": 17.28,
"learning_rate": 1.3613732629858068e-05,
"loss": 3.5968,
"step": 232500
},
{
"epoch": 17.31,
"learning_rate": 1.3427955710782495e-05,
"loss": 3.6015,
"step": 233000
},
{
"epoch": 17.35,
"learning_rate": 1.3242178791706918e-05,
"loss": 3.5969,
"step": 233500
},
{
"epoch": 17.39,
"learning_rate": 1.3056401872631344e-05,
"loss": 3.5887,
"step": 234000
},
{
"epoch": 17.43,
"learning_rate": 1.287062495355577e-05,
"loss": 3.582,
"step": 234500
},
{
"epoch": 17.46,
"learning_rate": 1.2684848034480196e-05,
"loss": 3.5699,
"step": 235000
},
{
"epoch": 17.5,
"learning_rate": 1.2499071115404623e-05,
"loss": 3.5769,
"step": 235500
},
{
"epoch": 17.54,
"learning_rate": 1.2313294196329049e-05,
"loss": 3.5844,
"step": 236000
},
{
"epoch": 17.57,
"learning_rate": 1.2127517277253476e-05,
"loss": 3.5819,
"step": 236500
},
{
"epoch": 17.61,
"learning_rate": 1.1941740358177901e-05,
"loss": 3.5774,
"step": 237000
},
{
"epoch": 17.65,
"learning_rate": 1.1755963439102326e-05,
"loss": 3.5811,
"step": 237500
},
{
"epoch": 17.69,
"learning_rate": 1.1570186520026752e-05,
"loss": 3.5779,
"step": 238000
},
{
"epoch": 17.72,
"learning_rate": 1.1384409600951177e-05,
"loss": 3.6015,
"step": 238500
},
{
"epoch": 17.76,
"learning_rate": 1.1198632681875604e-05,
"loss": 3.5761,
"step": 239000
},
{
"epoch": 17.8,
"learning_rate": 1.101285576280003e-05,
"loss": 3.5738,
"step": 239500
},
{
"epoch": 17.83,
"learning_rate": 1.0827078843724455e-05,
"loss": 3.5726,
"step": 240000
},
{
"epoch": 17.87,
"learning_rate": 1.0641301924648882e-05,
"loss": 3.5745,
"step": 240500
},
{
"epoch": 17.91,
"learning_rate": 1.0455525005573308e-05,
"loss": 3.5656,
"step": 241000
},
{
"epoch": 17.95,
"learning_rate": 1.0269748086497735e-05,
"loss": 3.5615,
"step": 241500
},
{
"epoch": 17.98,
"learning_rate": 1.008397116742216e-05,
"loss": 3.6016,
"step": 242000
},
{
"epoch": 18.02,
"learning_rate": 9.898194248346585e-06,
"loss": 3.5701,
"step": 242500
},
{
"epoch": 18.06,
"learning_rate": 9.712417329271012e-06,
"loss": 3.5849,
"step": 243000
},
{
"epoch": 18.09,
"learning_rate": 9.526640410195438e-06,
"loss": 3.588,
"step": 243500
},
{
"epoch": 18.13,
"learning_rate": 9.340863491119863e-06,
"loss": 3.5743,
"step": 244000
},
{
"epoch": 18.17,
"learning_rate": 9.15508657204429e-06,
"loss": 3.569,
"step": 244500
},
{
"epoch": 18.21,
"learning_rate": 8.969309652968716e-06,
"loss": 3.587,
"step": 245000
},
{
"epoch": 18.24,
"learning_rate": 8.783532733893143e-06,
"loss": 3.569,
"step": 245500
},
{
"epoch": 18.28,
"learning_rate": 8.597755814817568e-06,
"loss": 3.5792,
"step": 246000
},
{
"epoch": 18.32,
"learning_rate": 8.411978895741994e-06,
"loss": 3.5709,
"step": 246500
},
{
"epoch": 18.35,
"learning_rate": 8.22620197666642e-06,
"loss": 3.5869,
"step": 247000
},
{
"epoch": 18.39,
"learning_rate": 8.040425057590844e-06,
"loss": 3.5759,
"step": 247500
},
{
"epoch": 18.43,
"learning_rate": 7.854648138515271e-06,
"loss": 3.5717,
"step": 248000
},
{
"epoch": 18.47,
"learning_rate": 7.668871219439697e-06,
"loss": 3.5706,
"step": 248500
},
{
"epoch": 18.5,
"learning_rate": 7.483094300364123e-06,
"loss": 3.5902,
"step": 249000
},
{
"epoch": 18.54,
"learning_rate": 7.297317381288548e-06,
"loss": 3.5644,
"step": 249500
},
{
"epoch": 18.58,
"learning_rate": 7.111540462212975e-06,
"loss": 3.5794,
"step": 250000
},
{
"epoch": 18.61,
"learning_rate": 6.925763543137401e-06,
"loss": 3.5598,
"step": 250500
},
{
"epoch": 18.65,
"learning_rate": 6.739986624061827e-06,
"loss": 3.5712,
"step": 251000
},
{
"epoch": 18.69,
"learning_rate": 6.5542097049862525e-06,
"loss": 3.5658,
"step": 251500
},
{
"epoch": 18.73,
"learning_rate": 6.368432785910679e-06,
"loss": 3.583,
"step": 252000
},
{
"epoch": 18.76,
"learning_rate": 6.182655866835105e-06,
"loss": 3.5836,
"step": 252500
},
{
"epoch": 18.8,
"learning_rate": 5.996878947759531e-06,
"loss": 3.5804,
"step": 253000
},
{
"epoch": 18.84,
"learning_rate": 5.8111020286839565e-06,
"loss": 3.5619,
"step": 253500
},
{
"epoch": 18.87,
"learning_rate": 5.625325109608383e-06,
"loss": 3.5812,
"step": 254000
},
{
"epoch": 18.91,
"learning_rate": 5.439548190532808e-06,
"loss": 3.5444,
"step": 254500
},
{
"epoch": 18.95,
"learning_rate": 5.253771271457234e-06,
"loss": 3.5842,
"step": 255000
},
{
"epoch": 18.99,
"learning_rate": 5.06799435238166e-06,
"loss": 3.5781,
"step": 255500
},
{
"epoch": 19.02,
"learning_rate": 4.882217433306086e-06,
"loss": 3.5553,
"step": 256000
},
{
"epoch": 19.06,
"learning_rate": 4.696440514230512e-06,
"loss": 3.5715,
"step": 256500
},
{
"epoch": 19.1,
"learning_rate": 4.5106635951549385e-06,
"loss": 3.5596,
"step": 257000
},
{
"epoch": 19.14,
"learning_rate": 4.324886676079364e-06,
"loss": 3.5656,
"step": 257500
},
{
"epoch": 19.17,
"learning_rate": 4.13910975700379e-06,
"loss": 3.5672,
"step": 258000
},
{
"epoch": 19.21,
"learning_rate": 3.953332837928216e-06,
"loss": 3.5775,
"step": 258500
},
{
"epoch": 19.25,
"learning_rate": 3.7675559188526417e-06,
"loss": 3.5739,
"step": 259000
},
{
"epoch": 19.28,
"learning_rate": 3.5817789997770675e-06,
"loss": 3.5585,
"step": 259500
},
{
"epoch": 19.32,
"learning_rate": 3.3960020807014937e-06,
"loss": 3.5669,
"step": 260000
},
{
"epoch": 19.36,
"learning_rate": 3.2102251616259195e-06,
"loss": 3.5983,
"step": 260500
},
{
"epoch": 19.4,
"learning_rate": 3.0244482425503458e-06,
"loss": 3.5677,
"step": 261000
},
{
"epoch": 19.43,
"learning_rate": 2.8386713234747716e-06,
"loss": 3.5637,
"step": 261500
},
{
"epoch": 19.47,
"learning_rate": 2.652894404399198e-06,
"loss": 3.5537,
"step": 262000
},
{
"epoch": 19.51,
"learning_rate": 2.467117485323623e-06,
"loss": 3.5905,
"step": 262500
},
{
"epoch": 19.54,
"learning_rate": 2.2813405662480494e-06,
"loss": 3.5472,
"step": 263000
},
{
"epoch": 19.58,
"learning_rate": 2.0955636471724752e-06,
"loss": 3.5655,
"step": 263500
},
{
"epoch": 19.62,
"learning_rate": 1.9097867280969015e-06,
"loss": 3.5524,
"step": 264000
},
{
"epoch": 19.66,
"learning_rate": 1.7240098090213275e-06,
"loss": 3.5676,
"step": 264500
},
{
"epoch": 19.69,
"learning_rate": 1.5382328899457533e-06,
"loss": 3.573,
"step": 265000
},
{
"epoch": 19.73,
"learning_rate": 1.352455970870179e-06,
"loss": 3.5716,
"step": 265500
},
{
"epoch": 19.77,
"learning_rate": 1.1666790517946051e-06,
"loss": 3.5609,
"step": 266000
},
{
"epoch": 19.8,
"learning_rate": 9.809021327190311e-07,
"loss": 3.5533,
"step": 266500
},
{
"epoch": 19.84,
"learning_rate": 7.951252136434569e-07,
"loss": 3.5868,
"step": 267000
},
{
"epoch": 19.88,
"learning_rate": 6.093482945678829e-07,
"loss": 3.5714,
"step": 267500
},
{
"epoch": 19.92,
"learning_rate": 4.235713754923089e-07,
"loss": 3.582,
"step": 268000
},
{
"epoch": 19.95,
"learning_rate": 2.377944564167348e-07,
"loss": 3.5858,
"step": 268500
},
{
"epoch": 19.99,
"learning_rate": 5.2017537341160735e-08,
"loss": 3.5559,
"step": 269000
},
{
"epoch": 20.0,
"step": 269140,
"total_flos": 510943470337440.0,
"train_loss": 3.981161423732987,
"train_runtime": 4340.1096,
"train_samples_per_second": 992.136,
"train_steps_per_second": 62.012
}
],
"max_steps": 269140,
"num_train_epochs": 20,
"total_flos": 510943470337440.0,
"trial_name": null,
"trial_params": null
}