{ "best_metric": null, "best_model_checkpoint": null, "epoch": 46.32, "global_step": 5790000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9998e-05, "loss": 177.567, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.999600000000001e-05, "loss": 229.8426, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.9994e-05, "loss": 206.0798, "step": 1500 }, { "epoch": 0.02, "learning_rate": 4.9992e-05, "loss": 224.9133, "step": 2000 }, { "epoch": 0.02, "learning_rate": 4.999e-05, "loss": 223.0107, "step": 2500 }, { "epoch": 0.02, "learning_rate": 4.9988e-05, "loss": 205.5634, "step": 3000 }, { "epoch": 0.03, "learning_rate": 4.9986000000000006e-05, "loss": 201.7022, "step": 3500 }, { "epoch": 0.03, "learning_rate": 4.9984e-05, "loss": 199.3237, "step": 4000 }, { "epoch": 0.04, "learning_rate": 4.9982000000000004e-05, "loss": 222.8082, "step": 4500 }, { "epoch": 0.04, "learning_rate": 4.9980000000000006e-05, "loss": 222.447, "step": 5000 }, { "epoch": 0.04, "learning_rate": 4.9978e-05, "loss": 213.5942, "step": 5500 }, { "epoch": 0.05, "learning_rate": 4.9976000000000004e-05, "loss": 187.0412, "step": 6000 }, { "epoch": 0.05, "learning_rate": 4.9974000000000006e-05, "loss": 162.1389, "step": 6500 }, { "epoch": 0.06, "learning_rate": 4.9972e-05, "loss": 148.2483, "step": 7000 }, { "epoch": 0.06, "learning_rate": 4.997e-05, "loss": 132.2994, "step": 7500 }, { "epoch": 0.06, "learning_rate": 4.996800000000001e-05, "loss": 117.2319, "step": 8000 }, { "epoch": 0.07, "learning_rate": 4.9966e-05, "loss": 111.3942, "step": 8500 }, { "epoch": 0.07, "learning_rate": 4.9964e-05, "loss": 95.5273, "step": 9000 }, { "epoch": 0.08, "learning_rate": 4.9962e-05, "loss": 86.6571, "step": 9500 }, { "epoch": 0.08, "learning_rate": 4.996e-05, "loss": 79.3726, "step": 10000 }, { "epoch": 0.08, "learning_rate": 4.9958000000000005e-05, "loss": 68.6612, "step": 10500 }, { "epoch": 0.09, "learning_rate": 4.9956e-05, "loss": 70.6297, "step": 11000 }, { "epoch": 0.09, "learning_rate": 4.9954e-05, "loss": 61.6434, "step": 11500 }, { "epoch": 0.1, "learning_rate": 4.9952000000000006e-05, "loss": 53.4336, "step": 12000 }, { "epoch": 0.1, "learning_rate": 4.995e-05, "loss": 48.383, "step": 12500 }, { "epoch": 0.1, "learning_rate": 4.9948000000000004e-05, "loss": 45.511, "step": 13000 }, { "epoch": 0.11, "learning_rate": 4.9946000000000006e-05, "loss": 41.6431, "step": 13500 }, { "epoch": 0.11, "learning_rate": 4.9944e-05, "loss": 38.4439, "step": 14000 }, { "epoch": 0.12, "learning_rate": 4.9942e-05, "loss": 34.6625, "step": 14500 }, { "epoch": 0.12, "learning_rate": 4.9940000000000006e-05, "loss": 29.331, "step": 15000 }, { "epoch": 0.12, "learning_rate": 4.9938e-05, "loss": 28.1292, "step": 15500 }, { "epoch": 0.13, "learning_rate": 4.9936000000000004e-05, "loss": 24.8091, "step": 16000 }, { "epoch": 0.13, "learning_rate": 4.9934e-05, "loss": 22.2228, "step": 16500 }, { "epoch": 0.14, "learning_rate": 4.9932e-05, "loss": 19.3743, "step": 17000 }, { "epoch": 0.14, "learning_rate": 4.9930000000000005e-05, "loss": 16.7677, "step": 17500 }, { "epoch": 0.14, "learning_rate": 4.9928e-05, "loss": 15.3355, "step": 18000 }, { "epoch": 0.15, "learning_rate": 4.9926e-05, "loss": 13.5838, "step": 18500 }, { "epoch": 0.15, "learning_rate": 4.9924000000000005e-05, "loss": 12.3605, "step": 19000 }, { "epoch": 0.16, "learning_rate": 4.9922e-05, "loss": 11.8171, "step": 19500 }, { "epoch": 0.16, "learning_rate": 4.992e-05, "loss": 11.0471, "step": 20000 }, { "epoch": 0.16, "learning_rate": 4.9918000000000006e-05, "loss": 10.5247, "step": 20500 }, { "epoch": 0.17, "learning_rate": 4.9916e-05, "loss": 10.0859, "step": 21000 }, { "epoch": 0.17, "learning_rate": 4.9914e-05, "loss": 9.4425, "step": 21500 }, { "epoch": 0.18, "learning_rate": 4.9912000000000006e-05, "loss": 9.1408, "step": 22000 }, { "epoch": 0.18, "learning_rate": 4.991e-05, "loss": 8.9474, "step": 22500 }, { "epoch": 0.18, "learning_rate": 4.9908000000000004e-05, "loss": 8.8876, "step": 23000 }, { "epoch": 0.19, "learning_rate": 4.9906e-05, "loss": 8.8865, "step": 23500 }, { "epoch": 0.19, "learning_rate": 4.9904e-05, "loss": 8.8362, "step": 24000 }, { "epoch": 0.2, "learning_rate": 4.9902000000000004e-05, "loss": 8.8566, "step": 24500 }, { "epoch": 0.2, "learning_rate": 4.99e-05, "loss": 8.8399, "step": 25000 }, { "epoch": 0.2, "learning_rate": 4.9898e-05, "loss": 8.8445, "step": 25500 }, { "epoch": 0.21, "learning_rate": 4.9896000000000005e-05, "loss": 8.8478, "step": 26000 }, { "epoch": 0.21, "learning_rate": 4.9894e-05, "loss": 8.8548, "step": 26500 }, { "epoch": 0.22, "learning_rate": 4.9892e-05, "loss": 8.8671, "step": 27000 }, { "epoch": 0.22, "learning_rate": 4.9890000000000005e-05, "loss": 8.855, "step": 27500 }, { "epoch": 0.22, "learning_rate": 4.9888e-05, "loss": 8.8546, "step": 28000 }, { "epoch": 0.23, "learning_rate": 4.9886e-05, "loss": 8.8518, "step": 28500 }, { "epoch": 0.23, "learning_rate": 4.9884000000000006e-05, "loss": 8.857, "step": 29000 }, { "epoch": 0.24, "learning_rate": 4.9882e-05, "loss": 8.8369, "step": 29500 }, { "epoch": 0.24, "learning_rate": 4.9880000000000004e-05, "loss": 8.8456, "step": 30000 }, { "epoch": 0.24, "learning_rate": 4.9878e-05, "loss": 8.8698, "step": 30500 }, { "epoch": 0.25, "learning_rate": 4.9876e-05, "loss": 8.8582, "step": 31000 }, { "epoch": 0.25, "learning_rate": 4.9874000000000004e-05, "loss": 8.8224, "step": 31500 }, { "epoch": 0.26, "learning_rate": 4.9872e-05, "loss": 8.8366, "step": 32000 }, { "epoch": 0.26, "learning_rate": 4.987e-05, "loss": 8.8184, "step": 32500 }, { "epoch": 0.26, "learning_rate": 4.9868000000000004e-05, "loss": 8.8429, "step": 33000 }, { "epoch": 0.27, "learning_rate": 4.9866e-05, "loss": 8.8179, "step": 33500 }, { "epoch": 0.27, "learning_rate": 4.9864e-05, "loss": 8.8337, "step": 34000 }, { "epoch": 0.28, "learning_rate": 4.9862000000000005e-05, "loss": 8.8342, "step": 34500 }, { "epoch": 0.28, "learning_rate": 4.986e-05, "loss": 8.8205, "step": 35000 }, { "epoch": 0.28, "learning_rate": 4.9858e-05, "loss": 8.8534, "step": 35500 }, { "epoch": 0.29, "learning_rate": 4.9856000000000005e-05, "loss": 8.8286, "step": 36000 }, { "epoch": 0.29, "learning_rate": 4.9854e-05, "loss": 8.8207, "step": 36500 }, { "epoch": 0.3, "learning_rate": 4.9852e-05, "loss": 8.834, "step": 37000 }, { "epoch": 0.3, "learning_rate": 4.9850000000000006e-05, "loss": 8.8154, "step": 37500 }, { "epoch": 0.3, "learning_rate": 4.9848e-05, "loss": 8.8426, "step": 38000 }, { "epoch": 0.31, "learning_rate": 4.9846000000000004e-05, "loss": 8.7958, "step": 38500 }, { "epoch": 0.31, "learning_rate": 4.9844e-05, "loss": 8.8004, "step": 39000 }, { "epoch": 0.32, "learning_rate": 4.9842e-05, "loss": 8.8354, "step": 39500 }, { "epoch": 0.32, "learning_rate": 4.9840000000000004e-05, "loss": 8.8021, "step": 40000 }, { "epoch": 0.32, "learning_rate": 4.9838e-05, "loss": 8.816, "step": 40500 }, { "epoch": 0.33, "learning_rate": 4.9836e-05, "loss": 8.8235, "step": 41000 }, { "epoch": 0.33, "learning_rate": 4.9834000000000004e-05, "loss": 8.8131, "step": 41500 }, { "epoch": 0.34, "learning_rate": 4.9832e-05, "loss": 8.8051, "step": 42000 }, { "epoch": 0.34, "learning_rate": 4.983e-05, "loss": 8.8245, "step": 42500 }, { "epoch": 0.34, "learning_rate": 4.9828000000000005e-05, "loss": 8.7944, "step": 43000 }, { "epoch": 0.35, "learning_rate": 4.9826e-05, "loss": 8.799, "step": 43500 }, { "epoch": 0.35, "learning_rate": 4.9824e-05, "loss": 8.788, "step": 44000 }, { "epoch": 0.36, "learning_rate": 4.9822000000000005e-05, "loss": 8.8212, "step": 44500 }, { "epoch": 0.36, "learning_rate": 4.982e-05, "loss": 8.7853, "step": 45000 }, { "epoch": 0.36, "learning_rate": 4.9818e-05, "loss": 8.7904, "step": 45500 }, { "epoch": 0.37, "learning_rate": 4.9816e-05, "loss": 8.77, "step": 46000 }, { "epoch": 0.37, "learning_rate": 4.981400000000001e-05, "loss": 8.7851, "step": 46500 }, { "epoch": 0.38, "learning_rate": 4.9812000000000004e-05, "loss": 8.8007, "step": 47000 }, { "epoch": 0.38, "learning_rate": 4.981e-05, "loss": 8.8027, "step": 47500 }, { "epoch": 0.38, "learning_rate": 4.9808e-05, "loss": 8.7927, "step": 48000 }, { "epoch": 0.39, "learning_rate": 4.9806000000000004e-05, "loss": 8.7844, "step": 48500 }, { "epoch": 0.39, "learning_rate": 4.9804e-05, "loss": 8.8055, "step": 49000 }, { "epoch": 0.4, "learning_rate": 4.9802e-05, "loss": 8.8258, "step": 49500 }, { "epoch": 0.4, "learning_rate": 4.9800000000000004e-05, "loss": 8.7737, "step": 50000 }, { "epoch": 0.4, "learning_rate": 4.9798e-05, "loss": 8.7962, "step": 50500 }, { "epoch": 0.41, "learning_rate": 4.9796e-05, "loss": 8.7885, "step": 51000 }, { "epoch": 0.41, "learning_rate": 4.9794000000000005e-05, "loss": 8.7867, "step": 51500 }, { "epoch": 0.42, "learning_rate": 4.9792e-05, "loss": 8.7893, "step": 52000 }, { "epoch": 0.42, "learning_rate": 4.979e-05, "loss": 8.7877, "step": 52500 }, { "epoch": 0.42, "learning_rate": 4.9788e-05, "loss": 8.7684, "step": 53000 }, { "epoch": 0.43, "learning_rate": 4.978600000000001e-05, "loss": 8.7848, "step": 53500 }, { "epoch": 0.43, "learning_rate": 4.9784e-05, "loss": 8.8075, "step": 54000 }, { "epoch": 0.44, "learning_rate": 4.9782e-05, "loss": 8.778, "step": 54500 }, { "epoch": 0.44, "learning_rate": 4.978e-05, "loss": 8.7759, "step": 55000 }, { "epoch": 0.44, "learning_rate": 4.9778000000000004e-05, "loss": 8.7717, "step": 55500 }, { "epoch": 0.45, "learning_rate": 4.9776e-05, "loss": 8.8094, "step": 56000 }, { "epoch": 0.45, "learning_rate": 4.9774e-05, "loss": 8.789, "step": 56500 }, { "epoch": 0.46, "learning_rate": 4.9772000000000004e-05, "loss": 8.8018, "step": 57000 }, { "epoch": 0.46, "learning_rate": 4.977e-05, "loss": 8.7735, "step": 57500 }, { "epoch": 0.46, "learning_rate": 4.9768e-05, "loss": 8.7683, "step": 58000 }, { "epoch": 0.47, "learning_rate": 4.9766000000000004e-05, "loss": 8.7793, "step": 58500 }, { "epoch": 0.47, "learning_rate": 4.976400000000001e-05, "loss": 8.783, "step": 59000 }, { "epoch": 0.48, "learning_rate": 4.9762e-05, "loss": 8.7766, "step": 59500 }, { "epoch": 0.48, "learning_rate": 4.976e-05, "loss": 8.8041, "step": 60000 }, { "epoch": 0.48, "learning_rate": 4.975800000000001e-05, "loss": 8.7769, "step": 60500 }, { "epoch": 0.49, "learning_rate": 4.9756e-05, "loss": 8.7819, "step": 61000 }, { "epoch": 0.49, "learning_rate": 4.9754e-05, "loss": 8.7652, "step": 61500 }, { "epoch": 0.5, "learning_rate": 4.975200000000001e-05, "loss": 8.7845, "step": 62000 }, { "epoch": 0.5, "learning_rate": 4.975e-05, "loss": 8.8006, "step": 62500 }, { "epoch": 0.5, "learning_rate": 4.9748e-05, "loss": 8.781, "step": 63000 }, { "epoch": 0.51, "learning_rate": 4.9746e-05, "loss": 8.7844, "step": 63500 }, { "epoch": 0.51, "learning_rate": 4.9744000000000003e-05, "loss": 8.7727, "step": 64000 }, { "epoch": 0.52, "learning_rate": 4.9742e-05, "loss": 8.8031, "step": 64500 }, { "epoch": 0.52, "learning_rate": 4.974e-05, "loss": 8.7657, "step": 65000 }, { "epoch": 0.52, "learning_rate": 4.9738000000000004e-05, "loss": 8.774, "step": 65500 }, { "epoch": 0.53, "learning_rate": 4.9736000000000006e-05, "loss": 8.767, "step": 66000 }, { "epoch": 0.53, "learning_rate": 4.9734e-05, "loss": 8.7919, "step": 66500 }, { "epoch": 0.54, "learning_rate": 4.9732e-05, "loss": 8.7646, "step": 67000 }, { "epoch": 0.54, "learning_rate": 4.973000000000001e-05, "loss": 8.787, "step": 67500 }, { "epoch": 0.54, "learning_rate": 4.9728e-05, "loss": 8.795, "step": 68000 }, { "epoch": 0.55, "learning_rate": 4.9726e-05, "loss": 8.7709, "step": 68500 }, { "epoch": 0.55, "learning_rate": 4.972400000000001e-05, "loss": 8.7623, "step": 69000 }, { "epoch": 0.56, "learning_rate": 4.9722e-05, "loss": 8.7827, "step": 69500 }, { "epoch": 0.56, "learning_rate": 4.972e-05, "loss": 8.7699, "step": 70000 }, { "epoch": 0.56, "learning_rate": 4.9718e-05, "loss": 8.75, "step": 70500 }, { "epoch": 0.57, "learning_rate": 4.9716e-05, "loss": 8.7845, "step": 71000 }, { "epoch": 0.57, "learning_rate": 4.9714000000000005e-05, "loss": 8.7727, "step": 71500 }, { "epoch": 0.58, "learning_rate": 4.9712e-05, "loss": 8.7799, "step": 72000 }, { "epoch": 0.58, "learning_rate": 4.9710000000000003e-05, "loss": 8.7707, "step": 72500 }, { "epoch": 0.58, "learning_rate": 4.9708000000000006e-05, "loss": 8.7511, "step": 73000 }, { "epoch": 0.59, "learning_rate": 4.9706e-05, "loss": 8.7638, "step": 73500 }, { "epoch": 0.59, "learning_rate": 4.9704000000000004e-05, "loss": 8.7653, "step": 74000 }, { "epoch": 0.6, "learning_rate": 4.9702000000000006e-05, "loss": 8.7515, "step": 74500 }, { "epoch": 0.6, "learning_rate": 4.97e-05, "loss": 8.7547, "step": 75000 }, { "epoch": 0.6, "learning_rate": 4.9698e-05, "loss": 8.7577, "step": 75500 }, { "epoch": 0.61, "learning_rate": 4.969600000000001e-05, "loss": 8.7659, "step": 76000 }, { "epoch": 0.61, "learning_rate": 4.9694e-05, "loss": 8.788, "step": 76500 }, { "epoch": 0.62, "learning_rate": 4.9692e-05, "loss": 8.7567, "step": 77000 }, { "epoch": 0.62, "learning_rate": 4.969e-05, "loss": 8.748, "step": 77500 }, { "epoch": 0.62, "learning_rate": 4.9688e-05, "loss": 8.765, "step": 78000 }, { "epoch": 0.63, "learning_rate": 4.9686000000000005e-05, "loss": 8.7625, "step": 78500 }, { "epoch": 0.63, "learning_rate": 4.9684e-05, "loss": 8.7567, "step": 79000 }, { "epoch": 0.64, "learning_rate": 4.9682e-05, "loss": 8.7806, "step": 79500 }, { "epoch": 0.64, "learning_rate": 4.9680000000000005e-05, "loss": 8.77, "step": 80000 }, { "epoch": 0.64, "learning_rate": 4.9678e-05, "loss": 8.7742, "step": 80500 }, { "epoch": 0.65, "learning_rate": 4.9676000000000003e-05, "loss": 8.7713, "step": 81000 }, { "epoch": 0.65, "learning_rate": 4.9674000000000006e-05, "loss": 8.7685, "step": 81500 }, { "epoch": 0.66, "learning_rate": 4.9672e-05, "loss": 8.7787, "step": 82000 }, { "epoch": 0.66, "learning_rate": 4.967e-05, "loss": 8.7524, "step": 82500 }, { "epoch": 0.66, "learning_rate": 4.9668000000000006e-05, "loss": 8.7598, "step": 83000 }, { "epoch": 0.67, "learning_rate": 4.9666e-05, "loss": 8.7544, "step": 83500 }, { "epoch": 0.67, "learning_rate": 4.9664000000000004e-05, "loss": 8.7418, "step": 84000 }, { "epoch": 0.68, "learning_rate": 4.9662e-05, "loss": 8.7688, "step": 84500 }, { "epoch": 0.68, "learning_rate": 4.966e-05, "loss": 8.7595, "step": 85000 }, { "epoch": 0.68, "learning_rate": 4.9658000000000005e-05, "loss": 8.7711, "step": 85500 }, { "epoch": 0.69, "learning_rate": 4.9656e-05, "loss": 8.783, "step": 86000 }, { "epoch": 0.69, "learning_rate": 4.9654e-05, "loss": 8.7435, "step": 86500 }, { "epoch": 0.7, "learning_rate": 4.9652000000000005e-05, "loss": 8.7599, "step": 87000 }, { "epoch": 0.7, "learning_rate": 4.965e-05, "loss": 8.7768, "step": 87500 }, { "epoch": 0.7, "learning_rate": 4.9648e-05, "loss": 8.7558, "step": 88000 }, { "epoch": 0.71, "learning_rate": 4.9646000000000005e-05, "loss": 8.7475, "step": 88500 }, { "epoch": 0.71, "learning_rate": 4.9644e-05, "loss": 8.7544, "step": 89000 }, { "epoch": 0.72, "learning_rate": 4.9642e-05, "loss": 8.7449, "step": 89500 }, { "epoch": 0.72, "learning_rate": 4.9640000000000006e-05, "loss": 8.777, "step": 90000 }, { "epoch": 0.72, "learning_rate": 4.9638e-05, "loss": 8.7771, "step": 90500 }, { "epoch": 0.73, "learning_rate": 4.9636000000000004e-05, "loss": 8.7372, "step": 91000 }, { "epoch": 0.73, "learning_rate": 4.9634e-05, "loss": 8.7578, "step": 91500 }, { "epoch": 0.74, "learning_rate": 4.9632e-05, "loss": 8.7693, "step": 92000 }, { "epoch": 0.74, "learning_rate": 4.9630000000000004e-05, "loss": 8.767, "step": 92500 }, { "epoch": 0.74, "learning_rate": 4.9628e-05, "loss": 8.7367, "step": 93000 }, { "epoch": 0.75, "learning_rate": 4.9626e-05, "loss": 8.7468, "step": 93500 }, { "epoch": 0.75, "learning_rate": 4.9624000000000005e-05, "loss": 8.7537, "step": 94000 }, { "epoch": 0.76, "learning_rate": 4.9622e-05, "loss": 8.7715, "step": 94500 }, { "epoch": 0.76, "learning_rate": 4.962e-05, "loss": 8.7653, "step": 95000 }, { "epoch": 0.76, "learning_rate": 4.9618000000000005e-05, "loss": 8.7405, "step": 95500 }, { "epoch": 0.77, "learning_rate": 4.9616e-05, "loss": 8.7496, "step": 96000 }, { "epoch": 0.77, "learning_rate": 4.9614e-05, "loss": 8.7796, "step": 96500 }, { "epoch": 0.78, "learning_rate": 4.9612000000000005e-05, "loss": 8.7674, "step": 97000 }, { "epoch": 0.78, "learning_rate": 4.961e-05, "loss": 8.7547, "step": 97500 }, { "epoch": 0.78, "learning_rate": 4.9608000000000003e-05, "loss": 8.7577, "step": 98000 }, { "epoch": 0.79, "learning_rate": 4.9606000000000006e-05, "loss": 8.7654, "step": 98500 }, { "epoch": 0.79, "learning_rate": 4.9604e-05, "loss": 8.7672, "step": 99000 }, { "epoch": 0.8, "learning_rate": 4.9602000000000004e-05, "loss": 8.7523, "step": 99500 }, { "epoch": 0.8, "learning_rate": 4.96e-05, "loss": 8.7607, "step": 100000 }, { "epoch": 0.8, "learning_rate": 4.9598e-05, "loss": 8.7597, "step": 100500 }, { "epoch": 0.81, "learning_rate": 4.9596000000000004e-05, "loss": 8.7691, "step": 101000 }, { "epoch": 0.81, "learning_rate": 4.9594e-05, "loss": 8.7523, "step": 101500 }, { "epoch": 0.82, "learning_rate": 4.9592e-05, "loss": 8.7514, "step": 102000 }, { "epoch": 0.82, "learning_rate": 4.9590000000000005e-05, "loss": 8.7593, "step": 102500 }, { "epoch": 0.82, "learning_rate": 4.9588e-05, "loss": 8.7539, "step": 103000 }, { "epoch": 0.83, "learning_rate": 4.9586e-05, "loss": 8.7361, "step": 103500 }, { "epoch": 0.83, "learning_rate": 4.9584000000000005e-05, "loss": 8.7512, "step": 104000 }, { "epoch": 0.84, "learning_rate": 4.9582e-05, "loss": 8.7695, "step": 104500 }, { "epoch": 0.84, "learning_rate": 4.958e-05, "loss": 8.7659, "step": 105000 }, { "epoch": 0.84, "learning_rate": 4.9578000000000005e-05, "loss": 8.7817, "step": 105500 }, { "epoch": 0.85, "learning_rate": 4.9576e-05, "loss": 8.7696, "step": 106000 }, { "epoch": 0.85, "learning_rate": 4.9574000000000003e-05, "loss": 8.7561, "step": 106500 }, { "epoch": 0.86, "learning_rate": 4.9572e-05, "loss": 8.7494, "step": 107000 }, { "epoch": 0.86, "learning_rate": 4.957e-05, "loss": 8.7502, "step": 107500 }, { "epoch": 0.86, "learning_rate": 4.9568000000000004e-05, "loss": 8.7394, "step": 108000 }, { "epoch": 0.87, "learning_rate": 4.9566e-05, "loss": 8.7482, "step": 108500 }, { "epoch": 0.87, "learning_rate": 4.9564e-05, "loss": 8.7649, "step": 109000 }, { "epoch": 0.88, "learning_rate": 4.9562000000000004e-05, "loss": 8.7671, "step": 109500 }, { "epoch": 0.88, "learning_rate": 4.956e-05, "loss": 8.7667, "step": 110000 }, { "epoch": 0.88, "learning_rate": 4.9558e-05, "loss": 8.7683, "step": 110500 }, { "epoch": 0.89, "learning_rate": 4.9556000000000005e-05, "loss": 8.7653, "step": 111000 }, { "epoch": 0.89, "learning_rate": 4.9554e-05, "loss": 8.7646, "step": 111500 }, { "epoch": 0.9, "learning_rate": 4.9552e-05, "loss": 8.7649, "step": 112000 }, { "epoch": 0.9, "learning_rate": 4.9550000000000005e-05, "loss": 8.75, "step": 112500 }, { "epoch": 0.9, "learning_rate": 4.9548e-05, "loss": 8.7591, "step": 113000 }, { "epoch": 0.91, "learning_rate": 4.9546e-05, "loss": 8.772, "step": 113500 }, { "epoch": 0.91, "learning_rate": 4.9544e-05, "loss": 8.7408, "step": 114000 }, { "epoch": 0.92, "learning_rate": 4.954200000000001e-05, "loss": 8.7661, "step": 114500 }, { "epoch": 0.92, "learning_rate": 4.9540000000000003e-05, "loss": 8.7426, "step": 115000 }, { "epoch": 0.92, "learning_rate": 4.9538e-05, "loss": 8.7735, "step": 115500 }, { "epoch": 0.93, "learning_rate": 4.9536e-05, "loss": 8.7562, "step": 116000 }, { "epoch": 0.93, "learning_rate": 4.9534000000000004e-05, "loss": 8.7609, "step": 116500 }, { "epoch": 0.94, "learning_rate": 4.9532e-05, "loss": 8.7585, "step": 117000 }, { "epoch": 0.94, "learning_rate": 4.953e-05, "loss": 8.7774, "step": 117500 }, { "epoch": 0.94, "learning_rate": 4.9528000000000004e-05, "loss": 8.7782, "step": 118000 }, { "epoch": 0.95, "learning_rate": 4.9526e-05, "loss": 8.7614, "step": 118500 }, { "epoch": 0.95, "learning_rate": 4.9524e-05, "loss": 8.7668, "step": 119000 }, { "epoch": 0.96, "learning_rate": 4.9522000000000005e-05, "loss": 8.7715, "step": 119500 }, { "epoch": 0.96, "learning_rate": 4.952e-05, "loss": 8.737, "step": 120000 }, { "epoch": 0.96, "learning_rate": 4.9518e-05, "loss": 8.7512, "step": 120500 }, { "epoch": 0.97, "learning_rate": 4.9516e-05, "loss": 8.7661, "step": 121000 }, { "epoch": 0.97, "learning_rate": 4.951400000000001e-05, "loss": 8.7657, "step": 121500 }, { "epoch": 0.98, "learning_rate": 4.9512e-05, "loss": 8.7438, "step": 122000 }, { "epoch": 0.98, "learning_rate": 4.951e-05, "loss": 8.7648, "step": 122500 }, { "epoch": 0.98, "learning_rate": 4.9508e-05, "loss": 8.7616, "step": 123000 }, { "epoch": 0.99, "learning_rate": 4.9506000000000003e-05, "loss": 8.7684, "step": 123500 }, { "epoch": 0.99, "learning_rate": 4.9504e-05, "loss": 8.7604, "step": 124000 }, { "epoch": 1.0, "learning_rate": 4.9502e-05, "loss": 8.7628, "step": 124500 }, { "epoch": 1.0, "learning_rate": 4.9500000000000004e-05, "loss": 8.7438, "step": 125000 }, { "epoch": 1.0, "learning_rate": 4.9498e-05, "loss": 8.7738, "step": 125500 }, { "epoch": 1.01, "learning_rate": 4.9496e-05, "loss": 8.7491, "step": 126000 }, { "epoch": 1.01, "learning_rate": 4.9494000000000004e-05, "loss": 8.7613, "step": 126500 }, { "epoch": 1.02, "learning_rate": 4.9492000000000007e-05, "loss": 8.7272, "step": 127000 }, { "epoch": 1.02, "learning_rate": 4.949e-05, "loss": 8.7693, "step": 127500 }, { "epoch": 1.02, "learning_rate": 4.9488e-05, "loss": 8.7588, "step": 128000 }, { "epoch": 1.03, "learning_rate": 4.948600000000001e-05, "loss": 8.7656, "step": 128500 }, { "epoch": 1.03, "learning_rate": 4.9484e-05, "loss": 8.773, "step": 129000 }, { "epoch": 1.04, "learning_rate": 4.9482e-05, "loss": 8.7498, "step": 129500 }, { "epoch": 1.04, "learning_rate": 4.948000000000001e-05, "loss": 8.7595, "step": 130000 }, { "epoch": 1.04, "learning_rate": 4.9478e-05, "loss": 8.7414, "step": 130500 }, { "epoch": 1.05, "learning_rate": 4.9476e-05, "loss": 8.7601, "step": 131000 }, { "epoch": 1.05, "learning_rate": 4.9474e-05, "loss": 8.7601, "step": 131500 }, { "epoch": 1.06, "learning_rate": 4.9472e-05, "loss": 8.7405, "step": 132000 }, { "epoch": 1.06, "learning_rate": 4.947e-05, "loss": 8.7968, "step": 132500 }, { "epoch": 1.06, "learning_rate": 4.9468e-05, "loss": 8.7726, "step": 133000 }, { "epoch": 1.07, "learning_rate": 4.9466000000000004e-05, "loss": 8.7743, "step": 133500 }, { "epoch": 1.07, "learning_rate": 4.9464000000000006e-05, "loss": 8.7779, "step": 134000 }, { "epoch": 1.08, "learning_rate": 4.9462e-05, "loss": 8.774, "step": 134500 }, { "epoch": 1.08, "learning_rate": 4.946e-05, "loss": 8.7412, "step": 135000 }, { "epoch": 1.08, "learning_rate": 4.9458000000000007e-05, "loss": 8.752, "step": 135500 }, { "epoch": 1.09, "learning_rate": 4.9456e-05, "loss": 8.7491, "step": 136000 }, { "epoch": 1.09, "learning_rate": 4.9454e-05, "loss": 8.7658, "step": 136500 }, { "epoch": 1.1, "learning_rate": 4.945200000000001e-05, "loss": 8.7446, "step": 137000 }, { "epoch": 1.1, "learning_rate": 4.945e-05, "loss": 8.7525, "step": 137500 }, { "epoch": 1.1, "learning_rate": 4.9448e-05, "loss": 8.7691, "step": 138000 }, { "epoch": 1.11, "learning_rate": 4.9446e-05, "loss": 8.7592, "step": 138500 }, { "epoch": 1.11, "learning_rate": 4.9444e-05, "loss": 8.7387, "step": 139000 }, { "epoch": 1.12, "learning_rate": 4.9442000000000005e-05, "loss": 8.771, "step": 139500 }, { "epoch": 1.12, "learning_rate": 4.944e-05, "loss": 8.7648, "step": 140000 }, { "epoch": 1.12, "learning_rate": 4.9438e-05, "loss": 8.7485, "step": 140500 }, { "epoch": 1.13, "learning_rate": 4.9436000000000006e-05, "loss": 8.7537, "step": 141000 }, { "epoch": 1.13, "learning_rate": 4.9434e-05, "loss": 8.7626, "step": 141500 }, { "epoch": 1.14, "learning_rate": 4.9432000000000004e-05, "loss": 8.7612, "step": 142000 }, { "epoch": 1.14, "learning_rate": 4.9430000000000006e-05, "loss": 8.7598, "step": 142500 }, { "epoch": 1.14, "learning_rate": 4.9428e-05, "loss": 8.7497, "step": 143000 }, { "epoch": 1.15, "learning_rate": 4.9426e-05, "loss": 8.7836, "step": 143500 }, { "epoch": 1.15, "learning_rate": 4.9424000000000007e-05, "loss": 8.7552, "step": 144000 }, { "epoch": 1.16, "learning_rate": 4.9422e-05, "loss": 8.761, "step": 144500 }, { "epoch": 1.16, "learning_rate": 4.942e-05, "loss": 8.748, "step": 145000 }, { "epoch": 1.16, "learning_rate": 4.9418e-05, "loss": 8.7587, "step": 145500 }, { "epoch": 1.17, "learning_rate": 4.9416e-05, "loss": 8.7661, "step": 146000 }, { "epoch": 1.17, "learning_rate": 4.9414000000000005e-05, "loss": 8.7722, "step": 146500 }, { "epoch": 1.18, "learning_rate": 4.9412e-05, "loss": 8.7828, "step": 147000 }, { "epoch": 1.18, "learning_rate": 4.941e-05, "loss": 8.741, "step": 147500 }, { "epoch": 1.18, "learning_rate": 4.9408000000000005e-05, "loss": 8.7601, "step": 148000 }, { "epoch": 1.19, "learning_rate": 4.9406e-05, "loss": 8.7608, "step": 148500 }, { "epoch": 1.19, "learning_rate": 4.9404e-05, "loss": 8.7538, "step": 149000 }, { "epoch": 1.2, "learning_rate": 4.9402000000000006e-05, "loss": 8.7575, "step": 149500 }, { "epoch": 1.2, "learning_rate": 4.94e-05, "loss": 8.7456, "step": 150000 }, { "epoch": 1.2, "learning_rate": 4.9398e-05, "loss": 8.7378, "step": 150500 }, { "epoch": 1.21, "learning_rate": 4.9396000000000006e-05, "loss": 8.7524, "step": 151000 }, { "epoch": 1.21, "learning_rate": 4.9394e-05, "loss": 8.7755, "step": 151500 }, { "epoch": 1.22, "learning_rate": 4.9392000000000004e-05, "loss": 8.7541, "step": 152000 }, { "epoch": 1.22, "learning_rate": 4.939e-05, "loss": 8.7717, "step": 152500 }, { "epoch": 1.22, "learning_rate": 4.9388e-05, "loss": 8.7632, "step": 153000 }, { "epoch": 1.23, "learning_rate": 4.9386000000000005e-05, "loss": 8.7365, "step": 153500 }, { "epoch": 1.23, "learning_rate": 4.9384e-05, "loss": 8.7359, "step": 154000 }, { "epoch": 1.24, "learning_rate": 4.9382e-05, "loss": 8.7563, "step": 154500 }, { "epoch": 1.24, "learning_rate": 4.9380000000000005e-05, "loss": 8.7517, "step": 155000 }, { "epoch": 1.24, "learning_rate": 4.9378e-05, "loss": 8.7495, "step": 155500 }, { "epoch": 1.25, "learning_rate": 4.9376e-05, "loss": 8.7531, "step": 156000 }, { "epoch": 1.25, "learning_rate": 4.9374000000000005e-05, "loss": 8.7379, "step": 156500 }, { "epoch": 1.26, "learning_rate": 4.9372e-05, "loss": 8.7617, "step": 157000 }, { "epoch": 1.26, "learning_rate": 4.937e-05, "loss": 8.7436, "step": 157500 }, { "epoch": 1.26, "learning_rate": 4.9368000000000006e-05, "loss": 8.7596, "step": 158000 }, { "epoch": 1.27, "learning_rate": 4.9366e-05, "loss": 8.7437, "step": 158500 }, { "epoch": 1.27, "learning_rate": 4.9364000000000004e-05, "loss": 8.7552, "step": 159000 }, { "epoch": 1.28, "learning_rate": 4.9362e-05, "loss": 8.7396, "step": 159500 }, { "epoch": 1.28, "learning_rate": 4.936e-05, "loss": 8.7349, "step": 160000 }, { "epoch": 1.28, "learning_rate": 4.9358000000000004e-05, "loss": 8.761, "step": 160500 }, { "epoch": 1.29, "learning_rate": 4.9356e-05, "loss": 8.7498, "step": 161000 }, { "epoch": 1.29, "learning_rate": 4.9354e-05, "loss": 8.745, "step": 161500 }, { "epoch": 1.3, "learning_rate": 4.9352000000000005e-05, "loss": 8.753, "step": 162000 }, { "epoch": 1.3, "learning_rate": 4.935e-05, "loss": 8.7479, "step": 162500 }, { "epoch": 1.3, "learning_rate": 4.9348e-05, "loss": 8.7598, "step": 163000 }, { "epoch": 1.31, "learning_rate": 4.9346000000000005e-05, "loss": 8.7609, "step": 163500 }, { "epoch": 1.31, "learning_rate": 4.9344e-05, "loss": 8.7376, "step": 164000 }, { "epoch": 1.32, "learning_rate": 4.9342e-05, "loss": 8.7572, "step": 164500 }, { "epoch": 1.32, "learning_rate": 4.9340000000000005e-05, "loss": 8.7565, "step": 165000 }, { "epoch": 1.32, "learning_rate": 4.9338e-05, "loss": 8.7472, "step": 165500 }, { "epoch": 1.33, "learning_rate": 4.9336e-05, "loss": 8.7641, "step": 166000 }, { "epoch": 1.33, "learning_rate": 4.9334000000000006e-05, "loss": 8.7628, "step": 166500 }, { "epoch": 1.34, "learning_rate": 4.9332e-05, "loss": 8.7689, "step": 167000 }, { "epoch": 1.34, "learning_rate": 4.9330000000000004e-05, "loss": 8.7531, "step": 167500 }, { "epoch": 1.34, "learning_rate": 4.9328e-05, "loss": 8.7609, "step": 168000 }, { "epoch": 1.35, "learning_rate": 4.9326e-05, "loss": 8.7289, "step": 168500 }, { "epoch": 1.35, "learning_rate": 4.9324000000000004e-05, "loss": 8.7828, "step": 169000 }, { "epoch": 1.36, "learning_rate": 4.9322e-05, "loss": 8.7592, "step": 169500 }, { "epoch": 1.36, "learning_rate": 4.932e-05, "loss": 8.7391, "step": 170000 }, { "epoch": 1.36, "learning_rate": 4.9318000000000005e-05, "loss": 8.7456, "step": 170500 }, { "epoch": 1.37, "learning_rate": 4.9316e-05, "loss": 8.7431, "step": 171000 }, { "epoch": 1.37, "learning_rate": 4.9314e-05, "loss": 8.7543, "step": 171500 }, { "epoch": 1.38, "learning_rate": 4.9312000000000005e-05, "loss": 8.7546, "step": 172000 }, { "epoch": 1.38, "learning_rate": 4.931e-05, "loss": 8.772, "step": 172500 }, { "epoch": 1.38, "learning_rate": 4.9308e-05, "loss": 8.7523, "step": 173000 }, { "epoch": 1.39, "learning_rate": 4.9306000000000005e-05, "loss": 8.7567, "step": 173500 }, { "epoch": 1.39, "learning_rate": 4.9304e-05, "loss": 8.7623, "step": 174000 }, { "epoch": 1.4, "learning_rate": 4.9302e-05, "loss": 8.7561, "step": 174500 }, { "epoch": 1.4, "learning_rate": 4.93e-05, "loss": 8.7412, "step": 175000 }, { "epoch": 1.4, "learning_rate": 4.9298e-05, "loss": 8.7614, "step": 175500 }, { "epoch": 1.41, "learning_rate": 4.9296000000000004e-05, "loss": 8.7533, "step": 176000 }, { "epoch": 1.41, "learning_rate": 4.9294e-05, "loss": 8.7542, "step": 176500 }, { "epoch": 1.42, "learning_rate": 4.9292e-05, "loss": 8.7757, "step": 177000 }, { "epoch": 1.42, "learning_rate": 4.9290000000000004e-05, "loss": 8.759, "step": 177500 }, { "epoch": 1.42, "learning_rate": 4.9288e-05, "loss": 8.7739, "step": 178000 }, { "epoch": 1.43, "learning_rate": 4.9286e-05, "loss": 8.7569, "step": 178500 }, { "epoch": 1.43, "learning_rate": 4.9284000000000005e-05, "loss": 8.7398, "step": 179000 }, { "epoch": 1.44, "learning_rate": 4.9282e-05, "loss": 8.7399, "step": 179500 }, { "epoch": 1.44, "learning_rate": 4.928e-05, "loss": 8.7764, "step": 180000 }, { "epoch": 1.44, "learning_rate": 4.9278000000000005e-05, "loss": 8.7582, "step": 180500 }, { "epoch": 1.45, "learning_rate": 4.9276e-05, "loss": 8.7579, "step": 181000 }, { "epoch": 1.45, "learning_rate": 4.9274e-05, "loss": 8.7711, "step": 181500 }, { "epoch": 1.46, "learning_rate": 4.9272e-05, "loss": 8.7446, "step": 182000 }, { "epoch": 1.46, "learning_rate": 4.927000000000001e-05, "loss": 8.7687, "step": 182500 }, { "epoch": 1.46, "learning_rate": 4.9268e-05, "loss": 8.7687, "step": 183000 }, { "epoch": 1.47, "learning_rate": 4.9266e-05, "loss": 8.7764, "step": 183500 }, { "epoch": 1.47, "learning_rate": 4.9264e-05, "loss": 8.747, "step": 184000 }, { "epoch": 1.48, "learning_rate": 4.9262000000000004e-05, "loss": 8.751, "step": 184500 }, { "epoch": 1.48, "learning_rate": 4.926e-05, "loss": 8.75, "step": 185000 }, { "epoch": 1.48, "learning_rate": 4.9258e-05, "loss": 8.735, "step": 185500 }, { "epoch": 1.49, "learning_rate": 4.9256000000000004e-05, "loss": 8.7517, "step": 186000 }, { "epoch": 1.49, "learning_rate": 4.9254e-05, "loss": 8.7541, "step": 186500 }, { "epoch": 1.5, "learning_rate": 4.9252e-05, "loss": 8.7445, "step": 187000 }, { "epoch": 1.5, "learning_rate": 4.9250000000000004e-05, "loss": 8.7666, "step": 187500 }, { "epoch": 1.5, "learning_rate": 4.9248e-05, "loss": 8.7593, "step": 188000 }, { "epoch": 1.51, "learning_rate": 4.9246e-05, "loss": 8.7537, "step": 188500 }, { "epoch": 1.51, "learning_rate": 4.9244e-05, "loss": 8.7391, "step": 189000 }, { "epoch": 1.52, "learning_rate": 4.924200000000001e-05, "loss": 8.7571, "step": 189500 }, { "epoch": 1.52, "learning_rate": 4.924e-05, "loss": 8.7576, "step": 190000 }, { "epoch": 1.52, "learning_rate": 4.9238e-05, "loss": 8.7644, "step": 190500 }, { "epoch": 1.53, "learning_rate": 4.923600000000001e-05, "loss": 8.7424, "step": 191000 }, { "epoch": 1.53, "learning_rate": 4.9234e-05, "loss": 8.7493, "step": 191500 }, { "epoch": 1.54, "learning_rate": 4.9232e-05, "loss": 8.7665, "step": 192000 }, { "epoch": 1.54, "learning_rate": 4.923e-05, "loss": 8.7642, "step": 192500 }, { "epoch": 1.54, "learning_rate": 4.9228000000000004e-05, "loss": 8.757, "step": 193000 }, { "epoch": 1.55, "learning_rate": 4.9226e-05, "loss": 8.7479, "step": 193500 }, { "epoch": 1.55, "learning_rate": 4.9224e-05, "loss": 8.7492, "step": 194000 }, { "epoch": 1.56, "learning_rate": 4.9222000000000004e-05, "loss": 8.7674, "step": 194500 }, { "epoch": 1.56, "learning_rate": 4.9220000000000006e-05, "loss": 8.7369, "step": 195000 }, { "epoch": 1.56, "learning_rate": 4.9218e-05, "loss": 8.743, "step": 195500 }, { "epoch": 1.57, "learning_rate": 4.9216e-05, "loss": 8.773, "step": 196000 }, { "epoch": 1.57, "learning_rate": 4.921400000000001e-05, "loss": 8.7442, "step": 196500 }, { "epoch": 1.58, "learning_rate": 4.9212e-05, "loss": 8.7504, "step": 197000 }, { "epoch": 1.58, "learning_rate": 4.921e-05, "loss": 8.7592, "step": 197500 }, { "epoch": 1.58, "learning_rate": 4.920800000000001e-05, "loss": 8.7444, "step": 198000 }, { "epoch": 1.59, "learning_rate": 4.9206e-05, "loss": 8.769, "step": 198500 }, { "epoch": 1.59, "learning_rate": 4.9204e-05, "loss": 8.7435, "step": 199000 }, { "epoch": 1.6, "learning_rate": 4.9202e-05, "loss": 8.7496, "step": 199500 }, { "epoch": 1.6, "learning_rate": 4.92e-05, "loss": 8.7509, "step": 200000 }, { "epoch": 1.6, "learning_rate": 4.9198e-05, "loss": 8.7599, "step": 200500 }, { "epoch": 1.61, "learning_rate": 4.9196e-05, "loss": 8.7636, "step": 201000 }, { "epoch": 1.61, "learning_rate": 4.9194000000000004e-05, "loss": 8.7606, "step": 201500 }, { "epoch": 1.62, "learning_rate": 4.9192000000000006e-05, "loss": 8.7517, "step": 202000 }, { "epoch": 1.62, "learning_rate": 4.919e-05, "loss": 8.7603, "step": 202500 }, { "epoch": 1.62, "learning_rate": 4.9188000000000004e-05, "loss": 8.7777, "step": 203000 }, { "epoch": 1.63, "learning_rate": 4.9186000000000006e-05, "loss": 8.7498, "step": 203500 }, { "epoch": 1.63, "learning_rate": 4.9184e-05, "loss": 8.7501, "step": 204000 }, { "epoch": 1.64, "learning_rate": 4.9182e-05, "loss": 8.7547, "step": 204500 }, { "epoch": 1.64, "learning_rate": 4.918000000000001e-05, "loss": 8.7583, "step": 205000 }, { "epoch": 1.64, "learning_rate": 4.9178e-05, "loss": 8.744, "step": 205500 }, { "epoch": 1.65, "learning_rate": 4.9176e-05, "loss": 8.7548, "step": 206000 }, { "epoch": 1.65, "learning_rate": 4.9174e-05, "loss": 8.7311, "step": 206500 }, { "epoch": 1.66, "learning_rate": 4.9172e-05, "loss": 8.7578, "step": 207000 }, { "epoch": 1.66, "learning_rate": 4.9170000000000005e-05, "loss": 8.7467, "step": 207500 }, { "epoch": 1.66, "learning_rate": 4.9168e-05, "loss": 8.7691, "step": 208000 }, { "epoch": 1.67, "learning_rate": 4.9166e-05, "loss": 8.7586, "step": 208500 }, { "epoch": 1.67, "learning_rate": 4.9164000000000006e-05, "loss": 8.7779, "step": 209000 }, { "epoch": 1.68, "learning_rate": 4.9162e-05, "loss": 8.7369, "step": 209500 }, { "epoch": 1.68, "learning_rate": 4.9160000000000004e-05, "loss": 8.7758, "step": 210000 }, { "epoch": 1.68, "learning_rate": 4.9158000000000006e-05, "loss": 8.7473, "step": 210500 }, { "epoch": 1.69, "learning_rate": 4.9156e-05, "loss": 8.76, "step": 211000 }, { "epoch": 1.69, "learning_rate": 4.9154e-05, "loss": 8.7495, "step": 211500 }, { "epoch": 1.7, "learning_rate": 4.9152000000000006e-05, "loss": 8.7639, "step": 212000 }, { "epoch": 1.7, "learning_rate": 4.915e-05, "loss": 8.7573, "step": 212500 }, { "epoch": 1.7, "learning_rate": 4.9148e-05, "loss": 8.7542, "step": 213000 }, { "epoch": 1.71, "learning_rate": 4.9146e-05, "loss": 8.7811, "step": 213500 }, { "epoch": 1.71, "learning_rate": 4.9144e-05, "loss": 8.7595, "step": 214000 }, { "epoch": 1.72, "learning_rate": 4.9142000000000005e-05, "loss": 8.7631, "step": 214500 }, { "epoch": 1.72, "learning_rate": 4.914e-05, "loss": 8.7503, "step": 215000 }, { "epoch": 1.72, "learning_rate": 4.9138e-05, "loss": 8.7547, "step": 215500 }, { "epoch": 1.73, "learning_rate": 4.9136000000000005e-05, "loss": 8.7493, "step": 216000 }, { "epoch": 1.73, "learning_rate": 4.9134e-05, "loss": 8.7696, "step": 216500 }, { "epoch": 1.74, "learning_rate": 4.9132e-05, "loss": 8.766, "step": 217000 }, { "epoch": 1.74, "learning_rate": 4.9130000000000006e-05, "loss": 8.7614, "step": 217500 }, { "epoch": 1.74, "learning_rate": 4.9128e-05, "loss": 8.7631, "step": 218000 }, { "epoch": 1.75, "learning_rate": 4.9126e-05, "loss": 8.7563, "step": 218500 }, { "epoch": 1.75, "learning_rate": 4.9124000000000006e-05, "loss": 8.7472, "step": 219000 }, { "epoch": 1.76, "learning_rate": 4.9122e-05, "loss": 8.7637, "step": 219500 }, { "epoch": 1.76, "learning_rate": 4.9120000000000004e-05, "loss": 8.7367, "step": 220000 }, { "epoch": 1.76, "learning_rate": 4.9118e-05, "loss": 8.7531, "step": 220500 }, { "epoch": 1.77, "learning_rate": 4.9116e-05, "loss": 8.7551, "step": 221000 }, { "epoch": 1.77, "learning_rate": 4.9114000000000004e-05, "loss": 8.7659, "step": 221500 }, { "epoch": 1.78, "learning_rate": 4.9112e-05, "loss": 8.7526, "step": 222000 }, { "epoch": 1.78, "learning_rate": 4.911e-05, "loss": 8.7477, "step": 222500 }, { "epoch": 1.78, "learning_rate": 4.9108000000000005e-05, "loss": 8.7507, "step": 223000 }, { "epoch": 1.79, "learning_rate": 4.9106e-05, "loss": 8.754, "step": 223500 }, { "epoch": 1.79, "learning_rate": 4.9104e-05, "loss": 8.7761, "step": 224000 }, { "epoch": 1.8, "learning_rate": 4.9102000000000005e-05, "loss": 8.7565, "step": 224500 }, { "epoch": 1.8, "learning_rate": 4.91e-05, "loss": 8.7464, "step": 225000 }, { "epoch": 1.8, "learning_rate": 4.9098e-05, "loss": 8.7579, "step": 225500 }, { "epoch": 1.81, "learning_rate": 4.9096000000000006e-05, "loss": 8.7629, "step": 226000 }, { "epoch": 1.81, "learning_rate": 4.9094e-05, "loss": 8.7703, "step": 226500 }, { "epoch": 1.82, "learning_rate": 4.9092000000000004e-05, "loss": 8.7727, "step": 227000 }, { "epoch": 1.82, "learning_rate": 4.9090000000000006e-05, "loss": 8.7401, "step": 227500 }, { "epoch": 1.82, "learning_rate": 4.9088e-05, "loss": 8.7467, "step": 228000 }, { "epoch": 1.83, "learning_rate": 4.9086000000000004e-05, "loss": 8.7597, "step": 228500 }, { "epoch": 1.83, "learning_rate": 4.9084e-05, "loss": 8.7617, "step": 229000 }, { "epoch": 1.84, "learning_rate": 4.9082e-05, "loss": 8.743, "step": 229500 }, { "epoch": 1.84, "learning_rate": 4.9080000000000004e-05, "loss": 8.7467, "step": 230000 }, { "epoch": 1.84, "learning_rate": 4.9078e-05, "loss": 8.7426, "step": 230500 }, { "epoch": 1.85, "learning_rate": 4.9076e-05, "loss": 8.7525, "step": 231000 }, { "epoch": 1.85, "learning_rate": 4.9074000000000005e-05, "loss": 8.7497, "step": 231500 }, { "epoch": 1.86, "learning_rate": 4.9072e-05, "loss": 8.7298, "step": 232000 }, { "epoch": 1.86, "learning_rate": 4.907e-05, "loss": 8.7624, "step": 232500 }, { "epoch": 1.86, "learning_rate": 4.9068000000000005e-05, "loss": 8.7636, "step": 233000 }, { "epoch": 1.87, "learning_rate": 4.9066e-05, "loss": 8.7614, "step": 233500 }, { "epoch": 1.87, "learning_rate": 4.9064e-05, "loss": 8.7499, "step": 234000 }, { "epoch": 1.88, "learning_rate": 4.9062000000000006e-05, "loss": 8.752, "step": 234500 }, { "epoch": 1.88, "learning_rate": 4.906e-05, "loss": 8.7422, "step": 235000 }, { "epoch": 1.88, "learning_rate": 4.9058000000000004e-05, "loss": 8.7471, "step": 235500 }, { "epoch": 1.89, "learning_rate": 4.9056e-05, "loss": 8.7678, "step": 236000 }, { "epoch": 1.89, "learning_rate": 4.9054e-05, "loss": 8.7641, "step": 236500 }, { "epoch": 1.9, "learning_rate": 4.9052000000000004e-05, "loss": 8.7293, "step": 237000 }, { "epoch": 1.9, "learning_rate": 4.905e-05, "loss": 8.7624, "step": 237500 }, { "epoch": 1.9, "learning_rate": 4.9048e-05, "loss": 8.7414, "step": 238000 }, { "epoch": 1.91, "learning_rate": 4.9046000000000004e-05, "loss": 8.7356, "step": 238500 }, { "epoch": 1.91, "learning_rate": 4.9044e-05, "loss": 8.7699, "step": 239000 }, { "epoch": 1.92, "learning_rate": 4.9042e-05, "loss": 8.738, "step": 239500 }, { "epoch": 1.92, "learning_rate": 4.9040000000000005e-05, "loss": 8.7517, "step": 240000 }, { "epoch": 1.92, "learning_rate": 4.9038e-05, "loss": 8.746, "step": 240500 }, { "epoch": 1.93, "learning_rate": 4.9036e-05, "loss": 8.7661, "step": 241000 }, { "epoch": 1.93, "learning_rate": 4.9034000000000005e-05, "loss": 8.7586, "step": 241500 }, { "epoch": 1.94, "learning_rate": 4.9032e-05, "loss": 8.7386, "step": 242000 }, { "epoch": 1.94, "learning_rate": 4.903e-05, "loss": 8.7252, "step": 242500 }, { "epoch": 1.94, "learning_rate": 4.9028e-05, "loss": 8.7585, "step": 243000 }, { "epoch": 1.95, "learning_rate": 4.9026e-05, "loss": 8.7384, "step": 243500 }, { "epoch": 1.95, "learning_rate": 4.9024000000000004e-05, "loss": 8.7721, "step": 244000 }, { "epoch": 1.96, "learning_rate": 4.9022e-05, "loss": 8.7627, "step": 244500 }, { "epoch": 1.96, "learning_rate": 4.902e-05, "loss": 8.7767, "step": 245000 }, { "epoch": 1.96, "learning_rate": 4.9018000000000004e-05, "loss": 8.743, "step": 245500 }, { "epoch": 1.97, "learning_rate": 4.9016e-05, "loss": 8.7454, "step": 246000 }, { "epoch": 1.97, "learning_rate": 4.9014e-05, "loss": 8.7421, "step": 246500 }, { "epoch": 1.98, "learning_rate": 4.9012000000000004e-05, "loss": 8.7498, "step": 247000 }, { "epoch": 1.98, "learning_rate": 4.901e-05, "loss": 8.7429, "step": 247500 }, { "epoch": 1.98, "learning_rate": 4.9008e-05, "loss": 8.7457, "step": 248000 }, { "epoch": 1.99, "learning_rate": 4.9006000000000005e-05, "loss": 8.7712, "step": 248500 }, { "epoch": 1.99, "learning_rate": 4.9004e-05, "loss": 8.7602, "step": 249000 }, { "epoch": 2.0, "learning_rate": 4.9002e-05, "loss": 8.7554, "step": 249500 }, { "epoch": 2.0, "learning_rate": 4.9e-05, "loss": 8.7461, "step": 250000 }, { "epoch": 2.0, "learning_rate": 4.899800000000001e-05, "loss": 8.7589, "step": 250500 }, { "epoch": 2.01, "learning_rate": 4.8996e-05, "loss": 8.7335, "step": 251000 }, { "epoch": 2.01, "learning_rate": 4.8994e-05, "loss": 8.7581, "step": 251500 }, { "epoch": 2.02, "learning_rate": 4.8992e-05, "loss": 8.7523, "step": 252000 }, { "epoch": 2.02, "learning_rate": 4.8990000000000004e-05, "loss": 8.7653, "step": 252500 }, { "epoch": 2.02, "learning_rate": 4.8988e-05, "loss": 8.7581, "step": 253000 }, { "epoch": 2.03, "learning_rate": 4.8986e-05, "loss": 8.7669, "step": 253500 }, { "epoch": 2.03, "learning_rate": 4.8984000000000004e-05, "loss": 8.7471, "step": 254000 }, { "epoch": 2.04, "learning_rate": 4.8982e-05, "loss": 8.7462, "step": 254500 }, { "epoch": 2.04, "learning_rate": 4.898e-05, "loss": 8.7597, "step": 255000 }, { "epoch": 2.04, "learning_rate": 4.8978000000000004e-05, "loss": 8.752, "step": 255500 }, { "epoch": 2.05, "learning_rate": 4.8976e-05, "loss": 8.7547, "step": 256000 }, { "epoch": 2.05, "learning_rate": 4.8974e-05, "loss": 8.755, "step": 256500 }, { "epoch": 2.06, "learning_rate": 4.8972e-05, "loss": 8.7556, "step": 257000 }, { "epoch": 2.06, "learning_rate": 4.897000000000001e-05, "loss": 8.7394, "step": 257500 }, { "epoch": 2.06, "learning_rate": 4.8968e-05, "loss": 8.7595, "step": 258000 }, { "epoch": 2.07, "learning_rate": 4.8966e-05, "loss": 8.7483, "step": 258500 }, { "epoch": 2.07, "learning_rate": 4.896400000000001e-05, "loss": 8.7489, "step": 259000 }, { "epoch": 2.08, "learning_rate": 4.8962e-05, "loss": 8.7326, "step": 259500 }, { "epoch": 2.08, "learning_rate": 4.896e-05, "loss": 8.7656, "step": 260000 }, { "epoch": 2.08, "learning_rate": 4.8958e-05, "loss": 8.7466, "step": 260500 }, { "epoch": 2.09, "learning_rate": 4.8956000000000004e-05, "loss": 8.7437, "step": 261000 }, { "epoch": 2.09, "learning_rate": 4.8954e-05, "loss": 8.7583, "step": 261500 }, { "epoch": 2.1, "learning_rate": 4.8952e-05, "loss": 8.7367, "step": 262000 }, { "epoch": 2.1, "learning_rate": 4.8950000000000004e-05, "loss": 8.7521, "step": 262500 }, { "epoch": 2.1, "learning_rate": 4.8948000000000006e-05, "loss": 8.7539, "step": 263000 }, { "epoch": 2.11, "learning_rate": 4.8946e-05, "loss": 8.7406, "step": 263500 }, { "epoch": 2.11, "learning_rate": 4.8944e-05, "loss": 8.7448, "step": 264000 }, { "epoch": 2.12, "learning_rate": 4.894200000000001e-05, "loss": 8.7596, "step": 264500 }, { "epoch": 2.12, "learning_rate": 4.894e-05, "loss": 8.7597, "step": 265000 }, { "epoch": 2.12, "learning_rate": 4.8938e-05, "loss": 8.7585, "step": 265500 }, { "epoch": 2.13, "learning_rate": 4.893600000000001e-05, "loss": 8.7481, "step": 266000 }, { "epoch": 2.13, "learning_rate": 4.8934e-05, "loss": 8.764, "step": 266500 }, { "epoch": 2.14, "learning_rate": 4.8932e-05, "loss": 8.7734, "step": 267000 }, { "epoch": 2.14, "learning_rate": 4.893e-05, "loss": 8.7483, "step": 267500 }, { "epoch": 2.14, "learning_rate": 4.8928e-05, "loss": 8.7484, "step": 268000 }, { "epoch": 2.15, "learning_rate": 4.8926e-05, "loss": 8.7656, "step": 268500 }, { "epoch": 2.15, "learning_rate": 4.8924e-05, "loss": 8.7595, "step": 269000 }, { "epoch": 2.16, "learning_rate": 4.8922000000000004e-05, "loss": 8.7658, "step": 269500 }, { "epoch": 2.16, "learning_rate": 4.8920000000000006e-05, "loss": 8.7568, "step": 270000 }, { "epoch": 2.16, "learning_rate": 4.8918e-05, "loss": 8.7572, "step": 270500 }, { "epoch": 2.17, "learning_rate": 4.8916000000000004e-05, "loss": 8.7591, "step": 271000 }, { "epoch": 2.17, "learning_rate": 4.8914000000000006e-05, "loss": 8.7675, "step": 271500 }, { "epoch": 2.18, "learning_rate": 4.8912e-05, "loss": 8.7634, "step": 272000 }, { "epoch": 2.18, "learning_rate": 4.891e-05, "loss": 8.7474, "step": 272500 }, { "epoch": 2.18, "learning_rate": 4.890800000000001e-05, "loss": 8.7466, "step": 273000 }, { "epoch": 2.19, "learning_rate": 4.8906e-05, "loss": 8.7516, "step": 273500 }, { "epoch": 2.19, "learning_rate": 4.8904e-05, "loss": 8.769, "step": 274000 }, { "epoch": 2.2, "learning_rate": 4.8902e-05, "loss": 8.7589, "step": 274500 }, { "epoch": 2.2, "learning_rate": 4.89e-05, "loss": 8.7587, "step": 275000 }, { "epoch": 2.2, "learning_rate": 4.8898000000000005e-05, "loss": 8.7407, "step": 275500 }, { "epoch": 2.21, "learning_rate": 4.8896e-05, "loss": 8.7613, "step": 276000 }, { "epoch": 2.21, "learning_rate": 4.8894e-05, "loss": 8.7432, "step": 276500 }, { "epoch": 2.22, "learning_rate": 4.8892000000000006e-05, "loss": 8.7603, "step": 277000 }, { "epoch": 2.22, "learning_rate": 4.889e-05, "loss": 8.7477, "step": 277500 }, { "epoch": 2.22, "learning_rate": 4.8888000000000004e-05, "loss": 8.7632, "step": 278000 }, { "epoch": 2.23, "learning_rate": 4.8886000000000006e-05, "loss": 8.763, "step": 278500 }, { "epoch": 2.23, "learning_rate": 4.8884e-05, "loss": 8.7469, "step": 279000 }, { "epoch": 2.24, "learning_rate": 4.8882e-05, "loss": 8.7335, "step": 279500 }, { "epoch": 2.24, "learning_rate": 4.8880000000000006e-05, "loss": 8.7536, "step": 280000 }, { "epoch": 2.24, "learning_rate": 4.8878e-05, "loss": 8.7547, "step": 280500 }, { "epoch": 2.25, "learning_rate": 4.8876e-05, "loss": 8.7601, "step": 281000 }, { "epoch": 2.25, "learning_rate": 4.8874e-05, "loss": 8.7617, "step": 281500 }, { "epoch": 2.26, "learning_rate": 4.8872e-05, "loss": 8.7697, "step": 282000 }, { "epoch": 2.26, "learning_rate": 4.8870000000000005e-05, "loss": 8.7687, "step": 282500 }, { "epoch": 2.26, "learning_rate": 4.8868e-05, "loss": 8.7366, "step": 283000 }, { "epoch": 2.27, "learning_rate": 4.8866e-05, "loss": 8.745, "step": 283500 }, { "epoch": 2.27, "learning_rate": 4.8864000000000005e-05, "loss": 8.7363, "step": 284000 }, { "epoch": 2.28, "learning_rate": 4.8862e-05, "loss": 8.7577, "step": 284500 }, { "epoch": 2.28, "learning_rate": 4.886e-05, "loss": 8.7381, "step": 285000 }, { "epoch": 2.28, "learning_rate": 4.8858000000000006e-05, "loss": 8.7508, "step": 285500 }, { "epoch": 2.29, "learning_rate": 4.8856e-05, "loss": 8.7541, "step": 286000 }, { "epoch": 2.29, "learning_rate": 4.8854e-05, "loss": 8.7335, "step": 286500 }, { "epoch": 2.3, "learning_rate": 4.8852000000000006e-05, "loss": 8.7509, "step": 287000 }, { "epoch": 2.3, "learning_rate": 4.885e-05, "loss": 8.7583, "step": 287500 }, { "epoch": 2.3, "learning_rate": 4.8848000000000004e-05, "loss": 8.779, "step": 288000 }, { "epoch": 2.31, "learning_rate": 4.8846e-05, "loss": 8.7575, "step": 288500 }, { "epoch": 2.31, "learning_rate": 4.8844e-05, "loss": 8.7531, "step": 289000 }, { "epoch": 2.32, "learning_rate": 4.8842000000000004e-05, "loss": 8.7606, "step": 289500 }, { "epoch": 2.32, "learning_rate": 4.884e-05, "loss": 8.7465, "step": 290000 }, { "epoch": 2.32, "learning_rate": 4.8838e-05, "loss": 8.7481, "step": 290500 }, { "epoch": 2.33, "learning_rate": 4.8836000000000005e-05, "loss": 8.7403, "step": 291000 }, { "epoch": 2.33, "learning_rate": 4.8834e-05, "loss": 8.7502, "step": 291500 }, { "epoch": 2.34, "learning_rate": 4.8832e-05, "loss": 8.7544, "step": 292000 }, { "epoch": 2.34, "learning_rate": 4.8830000000000005e-05, "loss": 8.763, "step": 292500 }, { "epoch": 2.34, "learning_rate": 4.8828e-05, "loss": 8.7447, "step": 293000 }, { "epoch": 2.35, "learning_rate": 4.8826e-05, "loss": 8.7638, "step": 293500 }, { "epoch": 2.35, "learning_rate": 4.8824000000000006e-05, "loss": 8.7513, "step": 294000 }, { "epoch": 2.36, "learning_rate": 4.8822e-05, "loss": 8.7465, "step": 294500 }, { "epoch": 2.36, "learning_rate": 4.8820000000000004e-05, "loss": 8.7542, "step": 295000 }, { "epoch": 2.36, "learning_rate": 4.8818000000000006e-05, "loss": 8.7704, "step": 295500 }, { "epoch": 2.37, "learning_rate": 4.8816e-05, "loss": 8.7568, "step": 296000 }, { "epoch": 2.37, "learning_rate": 4.8814000000000004e-05, "loss": 8.7451, "step": 296500 }, { "epoch": 2.38, "learning_rate": 4.8812e-05, "loss": 8.7568, "step": 297000 }, { "epoch": 2.38, "learning_rate": 4.881e-05, "loss": 8.7868, "step": 297500 }, { "epoch": 2.38, "learning_rate": 4.8808000000000004e-05, "loss": 8.7408, "step": 298000 }, { "epoch": 2.39, "learning_rate": 4.8806e-05, "loss": 8.7557, "step": 298500 }, { "epoch": 2.39, "learning_rate": 4.8804e-05, "loss": 8.746, "step": 299000 }, { "epoch": 2.4, "learning_rate": 4.8802000000000005e-05, "loss": 8.7709, "step": 299500 }, { "epoch": 2.4, "learning_rate": 4.88e-05, "loss": 8.7522, "step": 300000 }, { "epoch": 2.4, "learning_rate": 4.8798e-05, "loss": 8.7577, "step": 300500 }, { "epoch": 2.41, "learning_rate": 4.8796000000000005e-05, "loss": 8.7724, "step": 301000 }, { "epoch": 2.41, "learning_rate": 4.8794e-05, "loss": 8.7553, "step": 301500 }, { "epoch": 2.42, "learning_rate": 4.8792e-05, "loss": 8.7449, "step": 302000 }, { "epoch": 2.42, "learning_rate": 4.8790000000000006e-05, "loss": 8.7539, "step": 302500 }, { "epoch": 2.42, "learning_rate": 4.8788e-05, "loss": 8.764, "step": 303000 }, { "epoch": 2.43, "learning_rate": 4.8786000000000004e-05, "loss": 8.7532, "step": 303500 }, { "epoch": 2.43, "learning_rate": 4.8784e-05, "loss": 8.7616, "step": 304000 }, { "epoch": 2.44, "learning_rate": 4.8782e-05, "loss": 8.7409, "step": 304500 }, { "epoch": 2.44, "learning_rate": 4.8780000000000004e-05, "loss": 8.747, "step": 305000 }, { "epoch": 2.44, "learning_rate": 4.8778e-05, "loss": 8.75, "step": 305500 }, { "epoch": 2.45, "learning_rate": 4.8776e-05, "loss": 8.7552, "step": 306000 }, { "epoch": 2.45, "learning_rate": 4.8774000000000004e-05, "loss": 8.7579, "step": 306500 }, { "epoch": 2.46, "learning_rate": 4.8772e-05, "loss": 8.7558, "step": 307000 }, { "epoch": 2.46, "learning_rate": 4.877e-05, "loss": 8.757, "step": 307500 }, { "epoch": 2.46, "learning_rate": 4.8768000000000005e-05, "loss": 8.758, "step": 308000 }, { "epoch": 2.47, "learning_rate": 4.8766e-05, "loss": 8.7549, "step": 308500 }, { "epoch": 2.47, "learning_rate": 4.8764e-05, "loss": 8.7463, "step": 309000 }, { "epoch": 2.48, "learning_rate": 4.8762000000000005e-05, "loss": 8.7567, "step": 309500 }, { "epoch": 2.48, "learning_rate": 4.876e-05, "loss": 8.7626, "step": 310000 }, { "epoch": 2.48, "learning_rate": 4.8758e-05, "loss": 8.7625, "step": 310500 }, { "epoch": 2.49, "learning_rate": 4.8756e-05, "loss": 8.7301, "step": 311000 }, { "epoch": 2.49, "learning_rate": 4.8754e-05, "loss": 8.7732, "step": 311500 }, { "epoch": 2.5, "learning_rate": 4.8752000000000004e-05, "loss": 8.743, "step": 312000 }, { "epoch": 2.5, "learning_rate": 4.875e-05, "loss": 8.7503, "step": 312500 }, { "epoch": 2.5, "learning_rate": 4.8748e-05, "loss": 8.7441, "step": 313000 }, { "epoch": 2.51, "learning_rate": 4.8746000000000004e-05, "loss": 8.76, "step": 313500 }, { "epoch": 2.51, "learning_rate": 4.8744e-05, "loss": 8.7544, "step": 314000 }, { "epoch": 2.52, "learning_rate": 4.8742e-05, "loss": 8.7467, "step": 314500 }, { "epoch": 2.52, "learning_rate": 4.8740000000000004e-05, "loss": 8.7664, "step": 315000 }, { "epoch": 2.52, "learning_rate": 4.8738e-05, "loss": 8.7627, "step": 315500 }, { "epoch": 2.53, "learning_rate": 4.8736e-05, "loss": 8.7546, "step": 316000 }, { "epoch": 2.53, "learning_rate": 4.8734000000000005e-05, "loss": 8.7456, "step": 316500 }, { "epoch": 2.54, "learning_rate": 4.8732e-05, "loss": 8.7258, "step": 317000 }, { "epoch": 2.54, "learning_rate": 4.873e-05, "loss": 8.7628, "step": 317500 }, { "epoch": 2.54, "learning_rate": 4.8728e-05, "loss": 8.758, "step": 318000 }, { "epoch": 2.55, "learning_rate": 4.872600000000001e-05, "loss": 8.7654, "step": 318500 }, { "epoch": 2.55, "learning_rate": 4.8724e-05, "loss": 8.7408, "step": 319000 }, { "epoch": 2.56, "learning_rate": 4.8722e-05, "loss": 8.7472, "step": 319500 }, { "epoch": 2.56, "learning_rate": 4.872000000000001e-05, "loss": 8.7644, "step": 320000 }, { "epoch": 2.56, "learning_rate": 4.8718000000000003e-05, "loss": 8.747, "step": 320500 }, { "epoch": 2.57, "learning_rate": 4.8716e-05, "loss": 8.7558, "step": 321000 }, { "epoch": 2.57, "learning_rate": 4.8714e-05, "loss": 8.7469, "step": 321500 }, { "epoch": 2.58, "learning_rate": 4.8712000000000004e-05, "loss": 8.7346, "step": 322000 }, { "epoch": 2.58, "learning_rate": 4.871e-05, "loss": 8.7541, "step": 322500 }, { "epoch": 2.58, "learning_rate": 4.8708e-05, "loss": 8.747, "step": 323000 }, { "epoch": 2.59, "learning_rate": 4.8706000000000004e-05, "loss": 8.7451, "step": 323500 }, { "epoch": 2.59, "learning_rate": 4.8704e-05, "loss": 8.7404, "step": 324000 }, { "epoch": 2.6, "learning_rate": 4.8702e-05, "loss": 8.7639, "step": 324500 }, { "epoch": 2.6, "learning_rate": 4.87e-05, "loss": 8.7282, "step": 325000 }, { "epoch": 2.6, "learning_rate": 4.869800000000001e-05, "loss": 8.7707, "step": 325500 }, { "epoch": 2.61, "learning_rate": 4.8696e-05, "loss": 8.7302, "step": 326000 }, { "epoch": 2.61, "learning_rate": 4.8694e-05, "loss": 8.7531, "step": 326500 }, { "epoch": 2.62, "learning_rate": 4.869200000000001e-05, "loss": 8.7505, "step": 327000 }, { "epoch": 2.62, "learning_rate": 4.869e-05, "loss": 8.7551, "step": 327500 }, { "epoch": 2.62, "learning_rate": 4.8688e-05, "loss": 8.7452, "step": 328000 }, { "epoch": 2.63, "learning_rate": 4.8686e-05, "loss": 8.7512, "step": 328500 }, { "epoch": 2.63, "learning_rate": 4.8684000000000003e-05, "loss": 8.7413, "step": 329000 }, { "epoch": 2.64, "learning_rate": 4.8682e-05, "loss": 8.7533, "step": 329500 }, { "epoch": 2.64, "learning_rate": 4.868e-05, "loss": 8.7691, "step": 330000 }, { "epoch": 2.64, "learning_rate": 4.8678000000000004e-05, "loss": 8.7535, "step": 330500 }, { "epoch": 2.65, "learning_rate": 4.8676000000000006e-05, "loss": 8.7828, "step": 331000 }, { "epoch": 2.65, "learning_rate": 4.8674e-05, "loss": 8.7463, "step": 331500 }, { "epoch": 2.66, "learning_rate": 4.8672000000000004e-05, "loss": 8.7509, "step": 332000 }, { "epoch": 2.66, "learning_rate": 4.867000000000001e-05, "loss": 8.741, "step": 332500 }, { "epoch": 2.66, "learning_rate": 4.8668e-05, "loss": 8.7541, "step": 333000 }, { "epoch": 2.67, "learning_rate": 4.8666e-05, "loss": 8.7665, "step": 333500 }, { "epoch": 2.67, "learning_rate": 4.866400000000001e-05, "loss": 8.7486, "step": 334000 }, { "epoch": 2.68, "learning_rate": 4.8662e-05, "loss": 8.759, "step": 334500 }, { "epoch": 2.68, "learning_rate": 4.866e-05, "loss": 8.7555, "step": 335000 }, { "epoch": 2.68, "learning_rate": 4.8658e-05, "loss": 8.7611, "step": 335500 }, { "epoch": 2.69, "learning_rate": 4.8656e-05, "loss": 8.758, "step": 336000 }, { "epoch": 2.69, "learning_rate": 4.8654e-05, "loss": 8.7653, "step": 336500 }, { "epoch": 2.7, "learning_rate": 4.8652e-05, "loss": 8.7587, "step": 337000 }, { "epoch": 2.7, "learning_rate": 4.8650000000000003e-05, "loss": 8.7457, "step": 337500 }, { "epoch": 2.7, "learning_rate": 4.8648000000000006e-05, "loss": 8.7652, "step": 338000 }, { "epoch": 2.71, "learning_rate": 4.8646e-05, "loss": 8.7497, "step": 338500 }, { "epoch": 2.71, "learning_rate": 4.8644000000000004e-05, "loss": 8.7398, "step": 339000 }, { "epoch": 2.72, "learning_rate": 4.8642000000000006e-05, "loss": 8.7705, "step": 339500 }, { "epoch": 2.72, "learning_rate": 4.864e-05, "loss": 8.7525, "step": 340000 }, { "epoch": 2.72, "learning_rate": 4.8638e-05, "loss": 8.7683, "step": 340500 }, { "epoch": 2.73, "learning_rate": 4.863600000000001e-05, "loss": 8.7422, "step": 341000 }, { "epoch": 2.73, "learning_rate": 4.8634e-05, "loss": 8.7608, "step": 341500 }, { "epoch": 2.74, "learning_rate": 4.8632e-05, "loss": 8.7649, "step": 342000 }, { "epoch": 2.74, "learning_rate": 4.863e-05, "loss": 8.7696, "step": 342500 }, { "epoch": 2.74, "learning_rate": 4.8628e-05, "loss": 8.7783, "step": 343000 }, { "epoch": 2.75, "learning_rate": 4.8626000000000005e-05, "loss": 8.7574, "step": 343500 }, { "epoch": 2.75, "learning_rate": 4.8624e-05, "loss": 8.7427, "step": 344000 }, { "epoch": 2.76, "learning_rate": 4.8622e-05, "loss": 8.7588, "step": 344500 }, { "epoch": 2.76, "learning_rate": 4.8620000000000005e-05, "loss": 8.7543, "step": 345000 }, { "epoch": 2.76, "learning_rate": 4.8618e-05, "loss": 8.7619, "step": 345500 }, { "epoch": 2.77, "learning_rate": 4.8616000000000003e-05, "loss": 8.7535, "step": 346000 }, { "epoch": 2.77, "learning_rate": 4.8614000000000006e-05, "loss": 8.7445, "step": 346500 }, { "epoch": 2.78, "learning_rate": 4.8612e-05, "loss": 8.7377, "step": 347000 }, { "epoch": 2.78, "learning_rate": 4.861e-05, "loss": 8.741, "step": 347500 }, { "epoch": 2.78, "learning_rate": 4.8608000000000006e-05, "loss": 8.7343, "step": 348000 }, { "epoch": 2.79, "learning_rate": 4.8606e-05, "loss": 8.7566, "step": 348500 }, { "epoch": 2.79, "learning_rate": 4.8604000000000004e-05, "loss": 8.7494, "step": 349000 }, { "epoch": 2.8, "learning_rate": 4.8602e-05, "loss": 8.749, "step": 349500 }, { "epoch": 2.8, "learning_rate": 4.86e-05, "loss": 8.7472, "step": 350000 }, { "epoch": 2.8, "learning_rate": 4.8598000000000005e-05, "loss": 8.7571, "step": 350500 }, { "epoch": 2.81, "learning_rate": 4.8596e-05, "loss": 8.7542, "step": 351000 }, { "epoch": 2.81, "learning_rate": 4.8594e-05, "loss": 8.7333, "step": 351500 }, { "epoch": 2.82, "learning_rate": 4.8592000000000005e-05, "loss": 8.7505, "step": 352000 }, { "epoch": 2.82, "learning_rate": 4.859e-05, "loss": 8.7486, "step": 352500 }, { "epoch": 2.82, "learning_rate": 4.8588e-05, "loss": 8.7502, "step": 353000 }, { "epoch": 2.83, "learning_rate": 4.8586000000000005e-05, "loss": 8.7427, "step": 353500 }, { "epoch": 2.83, "learning_rate": 4.8584e-05, "loss": 8.7351, "step": 354000 }, { "epoch": 2.84, "learning_rate": 4.8582e-05, "loss": 8.7624, "step": 354500 }, { "epoch": 2.84, "learning_rate": 4.8580000000000006e-05, "loss": 8.7674, "step": 355000 }, { "epoch": 2.84, "learning_rate": 4.8578e-05, "loss": 8.7455, "step": 355500 }, { "epoch": 2.85, "learning_rate": 4.8576000000000004e-05, "loss": 8.7397, "step": 356000 }, { "epoch": 2.85, "learning_rate": 4.8574000000000006e-05, "loss": 8.7294, "step": 356500 }, { "epoch": 2.86, "learning_rate": 4.8572e-05, "loss": 8.753, "step": 357000 }, { "epoch": 2.86, "learning_rate": 4.8570000000000004e-05, "loss": 8.7578, "step": 357500 }, { "epoch": 2.86, "learning_rate": 4.8568e-05, "loss": 8.7503, "step": 358000 }, { "epoch": 2.87, "learning_rate": 4.8566e-05, "loss": 8.7472, "step": 358500 }, { "epoch": 2.87, "learning_rate": 4.8564000000000005e-05, "loss": 8.7239, "step": 359000 }, { "epoch": 2.88, "learning_rate": 4.8562e-05, "loss": 8.7689, "step": 359500 }, { "epoch": 2.88, "learning_rate": 4.856e-05, "loss": 8.7455, "step": 360000 }, { "epoch": 2.88, "learning_rate": 4.8558000000000005e-05, "loss": 8.7542, "step": 360500 }, { "epoch": 2.89, "learning_rate": 4.8556e-05, "loss": 8.7572, "step": 361000 }, { "epoch": 2.89, "learning_rate": 4.8554e-05, "loss": 8.7473, "step": 361500 }, { "epoch": 2.9, "learning_rate": 4.8552000000000005e-05, "loss": 8.749, "step": 362000 }, { "epoch": 2.9, "learning_rate": 4.855e-05, "loss": 8.7435, "step": 362500 }, { "epoch": 2.9, "learning_rate": 4.8548000000000003e-05, "loss": 8.7645, "step": 363000 }, { "epoch": 2.91, "learning_rate": 4.8546000000000006e-05, "loss": 8.7663, "step": 363500 }, { "epoch": 2.91, "learning_rate": 4.8544e-05, "loss": 8.7385, "step": 364000 }, { "epoch": 2.92, "learning_rate": 4.8542000000000004e-05, "loss": 8.7369, "step": 364500 }, { "epoch": 2.92, "learning_rate": 4.854e-05, "loss": 8.7638, "step": 365000 }, { "epoch": 2.92, "learning_rate": 4.8538e-05, "loss": 8.7661, "step": 365500 }, { "epoch": 2.93, "learning_rate": 4.8536000000000004e-05, "loss": 8.748, "step": 366000 }, { "epoch": 2.93, "learning_rate": 4.8534e-05, "loss": 8.7496, "step": 366500 }, { "epoch": 2.94, "learning_rate": 4.8532e-05, "loss": 8.7528, "step": 367000 }, { "epoch": 2.94, "learning_rate": 4.8530000000000005e-05, "loss": 8.7404, "step": 367500 }, { "epoch": 2.94, "learning_rate": 4.8528e-05, "loss": 8.7451, "step": 368000 }, { "epoch": 2.95, "learning_rate": 4.8526e-05, "loss": 8.7332, "step": 368500 }, { "epoch": 2.95, "learning_rate": 4.8524000000000005e-05, "loss": 8.7486, "step": 369000 }, { "epoch": 2.96, "learning_rate": 4.8522e-05, "loss": 8.7408, "step": 369500 }, { "epoch": 2.96, "learning_rate": 4.852e-05, "loss": 8.7447, "step": 370000 }, { "epoch": 2.96, "learning_rate": 4.8518000000000005e-05, "loss": 8.7465, "step": 370500 }, { "epoch": 2.97, "learning_rate": 4.8516e-05, "loss": 8.7366, "step": 371000 }, { "epoch": 2.97, "learning_rate": 4.8514000000000003e-05, "loss": 8.7491, "step": 371500 }, { "epoch": 2.98, "learning_rate": 4.8512e-05, "loss": 8.7499, "step": 372000 }, { "epoch": 2.98, "learning_rate": 4.851e-05, "loss": 8.7419, "step": 372500 }, { "epoch": 2.98, "learning_rate": 4.8508000000000004e-05, "loss": 8.7568, "step": 373000 }, { "epoch": 2.99, "learning_rate": 4.8506e-05, "loss": 8.7486, "step": 373500 }, { "epoch": 2.99, "learning_rate": 4.8504e-05, "loss": 8.749, "step": 374000 }, { "epoch": 3.0, "learning_rate": 4.8502000000000004e-05, "loss": 8.7433, "step": 374500 }, { "epoch": 3.0, "learning_rate": 4.85e-05, "loss": 8.7609, "step": 375000 }, { "epoch": 3.0, "learning_rate": 4.8498e-05, "loss": 8.7483, "step": 375500 }, { "epoch": 3.01, "learning_rate": 4.8496000000000005e-05, "loss": 8.7678, "step": 376000 }, { "epoch": 3.01, "learning_rate": 4.8494e-05, "loss": 8.7511, "step": 376500 }, { "epoch": 3.02, "learning_rate": 4.8492e-05, "loss": 8.7438, "step": 377000 }, { "epoch": 3.02, "learning_rate": 4.8490000000000005e-05, "loss": 8.7335, "step": 377500 }, { "epoch": 3.02, "learning_rate": 4.8488e-05, "loss": 8.7457, "step": 378000 }, { "epoch": 3.03, "learning_rate": 4.8486e-05, "loss": 8.7448, "step": 378500 }, { "epoch": 3.03, "learning_rate": 4.8484e-05, "loss": 8.7505, "step": 379000 }, { "epoch": 3.04, "learning_rate": 4.8482e-05, "loss": 8.7733, "step": 379500 }, { "epoch": 3.04, "learning_rate": 4.8480000000000003e-05, "loss": 8.7612, "step": 380000 }, { "epoch": 3.04, "learning_rate": 4.8478e-05, "loss": 8.7488, "step": 380500 }, { "epoch": 3.05, "learning_rate": 4.8476e-05, "loss": 8.7633, "step": 381000 }, { "epoch": 3.05, "learning_rate": 4.8474000000000004e-05, "loss": 8.7413, "step": 381500 }, { "epoch": 3.06, "learning_rate": 4.8472e-05, "loss": 8.7372, "step": 382000 }, { "epoch": 3.06, "learning_rate": 4.847e-05, "loss": 8.7601, "step": 382500 }, { "epoch": 3.06, "learning_rate": 4.8468000000000004e-05, "loss": 8.7372, "step": 383000 }, { "epoch": 3.07, "learning_rate": 4.8466e-05, "loss": 8.7428, "step": 383500 }, { "epoch": 3.07, "learning_rate": 4.8464e-05, "loss": 8.7382, "step": 384000 }, { "epoch": 3.08, "learning_rate": 4.8462000000000005e-05, "loss": 8.7367, "step": 384500 }, { "epoch": 3.08, "learning_rate": 4.846e-05, "loss": 8.7374, "step": 385000 }, { "epoch": 3.08, "learning_rate": 4.8458e-05, "loss": 8.7552, "step": 385500 }, { "epoch": 3.09, "learning_rate": 4.8456e-05, "loss": 8.7759, "step": 386000 }, { "epoch": 3.09, "learning_rate": 4.845400000000001e-05, "loss": 8.7605, "step": 386500 }, { "epoch": 3.1, "learning_rate": 4.8452e-05, "loss": 8.7623, "step": 387000 }, { "epoch": 3.1, "learning_rate": 4.845e-05, "loss": 8.7733, "step": 387500 }, { "epoch": 3.1, "learning_rate": 4.844800000000001e-05, "loss": 8.7524, "step": 388000 }, { "epoch": 3.11, "learning_rate": 4.8446e-05, "loss": 8.7407, "step": 388500 }, { "epoch": 3.11, "learning_rate": 4.8444e-05, "loss": 8.7488, "step": 389000 }, { "epoch": 3.12, "learning_rate": 4.8442e-05, "loss": 8.7355, "step": 389500 }, { "epoch": 3.12, "learning_rate": 4.8440000000000004e-05, "loss": 8.7447, "step": 390000 }, { "epoch": 3.12, "learning_rate": 4.8438e-05, "loss": 8.736, "step": 390500 }, { "epoch": 3.13, "learning_rate": 4.8436e-05, "loss": 8.7397, "step": 391000 }, { "epoch": 3.13, "learning_rate": 4.8434000000000004e-05, "loss": 8.7421, "step": 391500 }, { "epoch": 3.14, "learning_rate": 4.8432e-05, "loss": 8.753, "step": 392000 }, { "epoch": 3.14, "learning_rate": 4.843e-05, "loss": 8.7537, "step": 392500 }, { "epoch": 3.14, "learning_rate": 4.8428e-05, "loss": 8.7519, "step": 393000 }, { "epoch": 3.15, "learning_rate": 4.842600000000001e-05, "loss": 8.7677, "step": 393500 }, { "epoch": 3.15, "learning_rate": 4.8424e-05, "loss": 8.7676, "step": 394000 }, { "epoch": 3.16, "learning_rate": 4.8422e-05, "loss": 8.7641, "step": 394500 }, { "epoch": 3.16, "learning_rate": 4.842000000000001e-05, "loss": 8.7302, "step": 395000 }, { "epoch": 3.16, "learning_rate": 4.8418e-05, "loss": 8.7554, "step": 395500 }, { "epoch": 3.17, "learning_rate": 4.8416e-05, "loss": 8.7688, "step": 396000 }, { "epoch": 3.17, "learning_rate": 4.8414e-05, "loss": 8.7454, "step": 396500 }, { "epoch": 3.18, "learning_rate": 4.8412e-05, "loss": 8.7442, "step": 397000 }, { "epoch": 3.18, "learning_rate": 4.841e-05, "loss": 8.7395, "step": 397500 }, { "epoch": 3.18, "learning_rate": 4.8408e-05, "loss": 8.7744, "step": 398000 }, { "epoch": 3.19, "learning_rate": 4.8406000000000004e-05, "loss": 8.7589, "step": 398500 }, { "epoch": 3.19, "learning_rate": 4.8404000000000006e-05, "loss": 8.7535, "step": 399000 }, { "epoch": 3.2, "learning_rate": 4.8402e-05, "loss": 8.7534, "step": 399500 }, { "epoch": 3.2, "learning_rate": 4.8400000000000004e-05, "loss": 8.7473, "step": 400000 }, { "epoch": 3.2, "learning_rate": 4.8398000000000007e-05, "loss": 8.745, "step": 400500 }, { "epoch": 3.21, "learning_rate": 4.8396e-05, "loss": 8.7375, "step": 401000 }, { "epoch": 3.21, "learning_rate": 4.8394e-05, "loss": 8.7549, "step": 401500 }, { "epoch": 3.22, "learning_rate": 4.839200000000001e-05, "loss": 8.7694, "step": 402000 }, { "epoch": 3.22, "learning_rate": 4.839e-05, "loss": 8.7448, "step": 402500 }, { "epoch": 3.22, "learning_rate": 4.8388e-05, "loss": 8.7563, "step": 403000 }, { "epoch": 3.23, "learning_rate": 4.8386e-05, "loss": 8.7666, "step": 403500 }, { "epoch": 3.23, "learning_rate": 4.8384e-05, "loss": 8.7555, "step": 404000 }, { "epoch": 3.24, "learning_rate": 4.8382e-05, "loss": 8.765, "step": 404500 }, { "epoch": 3.24, "learning_rate": 4.838e-05, "loss": 8.7732, "step": 405000 }, { "epoch": 3.24, "learning_rate": 4.8378e-05, "loss": 8.7638, "step": 405500 }, { "epoch": 3.25, "learning_rate": 4.8376000000000006e-05, "loss": 8.754, "step": 406000 }, { "epoch": 3.25, "learning_rate": 4.8374e-05, "loss": 8.7653, "step": 406500 }, { "epoch": 3.26, "learning_rate": 4.8372000000000004e-05, "loss": 8.7407, "step": 407000 }, { "epoch": 3.26, "learning_rate": 4.8370000000000006e-05, "loss": 8.7518, "step": 407500 }, { "epoch": 3.26, "learning_rate": 4.8368e-05, "loss": 8.7565, "step": 408000 }, { "epoch": 3.27, "learning_rate": 4.8366e-05, "loss": 8.7438, "step": 408500 }, { "epoch": 3.27, "learning_rate": 4.8364000000000007e-05, "loss": 8.7645, "step": 409000 }, { "epoch": 3.28, "learning_rate": 4.8362e-05, "loss": 8.7607, "step": 409500 }, { "epoch": 3.28, "learning_rate": 4.836e-05, "loss": 8.7613, "step": 410000 }, { "epoch": 3.28, "learning_rate": 4.8358e-05, "loss": 8.7527, "step": 410500 }, { "epoch": 3.29, "learning_rate": 4.8356e-05, "loss": 8.7566, "step": 411000 }, { "epoch": 3.29, "learning_rate": 4.8354000000000005e-05, "loss": 8.7693, "step": 411500 }, { "epoch": 3.3, "learning_rate": 4.8352e-05, "loss": 8.7591, "step": 412000 }, { "epoch": 3.3, "learning_rate": 4.835e-05, "loss": 8.7666, "step": 412500 }, { "epoch": 3.3, "learning_rate": 4.8348000000000005e-05, "loss": 8.7431, "step": 413000 }, { "epoch": 3.31, "learning_rate": 4.8346e-05, "loss": 8.7582, "step": 413500 }, { "epoch": 3.31, "learning_rate": 4.8344e-05, "loss": 8.7567, "step": 414000 }, { "epoch": 3.32, "learning_rate": 4.8342000000000006e-05, "loss": 8.7523, "step": 414500 }, { "epoch": 3.32, "learning_rate": 4.834e-05, "loss": 8.7608, "step": 415000 }, { "epoch": 3.32, "learning_rate": 4.8338e-05, "loss": 8.7596, "step": 415500 }, { "epoch": 3.33, "learning_rate": 4.8336000000000006e-05, "loss": 8.7422, "step": 416000 }, { "epoch": 3.33, "learning_rate": 4.8334e-05, "loss": 8.7642, "step": 416500 }, { "epoch": 3.34, "learning_rate": 4.8332000000000004e-05, "loss": 8.7512, "step": 417000 }, { "epoch": 3.34, "learning_rate": 4.833e-05, "loss": 8.7579, "step": 417500 }, { "epoch": 3.34, "learning_rate": 4.8328e-05, "loss": 8.7566, "step": 418000 }, { "epoch": 3.35, "learning_rate": 4.8326000000000005e-05, "loss": 8.7634, "step": 418500 }, { "epoch": 3.35, "learning_rate": 4.8324e-05, "loss": 8.7593, "step": 419000 }, { "epoch": 3.36, "learning_rate": 4.8322e-05, "loss": 8.7595, "step": 419500 }, { "epoch": 3.36, "learning_rate": 4.8320000000000005e-05, "loss": 8.7483, "step": 420000 }, { "epoch": 3.36, "learning_rate": 4.8318e-05, "loss": 8.7651, "step": 420500 }, { "epoch": 3.37, "learning_rate": 4.8316e-05, "loss": 8.7552, "step": 421000 }, { "epoch": 3.37, "learning_rate": 4.8314000000000005e-05, "loss": 8.7627, "step": 421500 }, { "epoch": 3.38, "learning_rate": 4.8312e-05, "loss": 8.7704, "step": 422000 }, { "epoch": 3.38, "learning_rate": 4.8309999999999997e-05, "loss": 8.7244, "step": 422500 }, { "epoch": 3.38, "learning_rate": 4.8308000000000006e-05, "loss": 8.762, "step": 423000 }, { "epoch": 3.39, "learning_rate": 4.8306e-05, "loss": 8.7671, "step": 423500 }, { "epoch": 3.39, "learning_rate": 4.8304000000000004e-05, "loss": 8.7522, "step": 424000 }, { "epoch": 3.4, "learning_rate": 4.8302000000000006e-05, "loss": 8.7716, "step": 424500 }, { "epoch": 3.4, "learning_rate": 4.83e-05, "loss": 8.7713, "step": 425000 }, { "epoch": 3.4, "learning_rate": 4.8298000000000004e-05, "loss": 8.7503, "step": 425500 }, { "epoch": 3.41, "learning_rate": 4.8296e-05, "loss": 8.7434, "step": 426000 }, { "epoch": 3.41, "learning_rate": 4.8294e-05, "loss": 8.762, "step": 426500 }, { "epoch": 3.42, "learning_rate": 4.8292000000000005e-05, "loss": 8.7599, "step": 427000 }, { "epoch": 3.42, "learning_rate": 4.829e-05, "loss": 8.7456, "step": 427500 }, { "epoch": 3.42, "learning_rate": 4.8288e-05, "loss": 8.7266, "step": 428000 }, { "epoch": 3.43, "learning_rate": 4.8286000000000005e-05, "loss": 8.7583, "step": 428500 }, { "epoch": 3.43, "learning_rate": 4.8284e-05, "loss": 8.7391, "step": 429000 }, { "epoch": 3.44, "learning_rate": 4.8282e-05, "loss": 8.7589, "step": 429500 }, { "epoch": 3.44, "learning_rate": 4.8280000000000005e-05, "loss": 8.7344, "step": 430000 }, { "epoch": 3.44, "learning_rate": 4.8278e-05, "loss": 8.732, "step": 430500 }, { "epoch": 3.45, "learning_rate": 4.8276e-05, "loss": 8.7441, "step": 431000 }, { "epoch": 3.45, "learning_rate": 4.8274000000000006e-05, "loss": 8.7592, "step": 431500 }, { "epoch": 3.46, "learning_rate": 4.8272e-05, "loss": 8.7632, "step": 432000 }, { "epoch": 3.46, "learning_rate": 4.8270000000000004e-05, "loss": 8.7564, "step": 432500 }, { "epoch": 3.46, "learning_rate": 4.8268e-05, "loss": 8.7439, "step": 433000 }, { "epoch": 3.47, "learning_rate": 4.8266e-05, "loss": 8.7632, "step": 433500 }, { "epoch": 3.47, "learning_rate": 4.8264000000000004e-05, "loss": 8.7458, "step": 434000 }, { "epoch": 3.48, "learning_rate": 4.8262e-05, "loss": 8.7428, "step": 434500 }, { "epoch": 3.48, "learning_rate": 4.826e-05, "loss": 8.7487, "step": 435000 }, { "epoch": 3.48, "learning_rate": 4.8258000000000005e-05, "loss": 8.7695, "step": 435500 }, { "epoch": 3.49, "learning_rate": 4.8256e-05, "loss": 8.7414, "step": 436000 }, { "epoch": 3.49, "learning_rate": 4.8254e-05, "loss": 8.7499, "step": 436500 }, { "epoch": 3.5, "learning_rate": 4.8252000000000005e-05, "loss": 8.7543, "step": 437000 }, { "epoch": 3.5, "learning_rate": 4.825e-05, "loss": 8.7485, "step": 437500 }, { "epoch": 3.5, "learning_rate": 4.8248e-05, "loss": 8.7231, "step": 438000 }, { "epoch": 3.51, "learning_rate": 4.8246000000000005e-05, "loss": 8.7569, "step": 438500 }, { "epoch": 3.51, "learning_rate": 4.8244e-05, "loss": 8.7398, "step": 439000 }, { "epoch": 3.52, "learning_rate": 4.8242e-05, "loss": 8.7481, "step": 439500 }, { "epoch": 3.52, "learning_rate": 4.824e-05, "loss": 8.7254, "step": 440000 }, { "epoch": 3.52, "learning_rate": 4.8238e-05, "loss": 8.7746, "step": 440500 }, { "epoch": 3.53, "learning_rate": 4.8236000000000004e-05, "loss": 8.7653, "step": 441000 }, { "epoch": 3.53, "learning_rate": 4.8234e-05, "loss": 8.748, "step": 441500 }, { "epoch": 3.54, "learning_rate": 4.8232e-05, "loss": 8.7649, "step": 442000 }, { "epoch": 3.54, "learning_rate": 4.8230000000000004e-05, "loss": 8.7435, "step": 442500 }, { "epoch": 3.54, "learning_rate": 4.8228e-05, "loss": 8.7489, "step": 443000 }, { "epoch": 3.55, "learning_rate": 4.8226e-05, "loss": 8.734, "step": 443500 }, { "epoch": 3.55, "learning_rate": 4.8224000000000004e-05, "loss": 8.764, "step": 444000 }, { "epoch": 3.56, "learning_rate": 4.8222e-05, "loss": 8.7512, "step": 444500 }, { "epoch": 3.56, "learning_rate": 4.822e-05, "loss": 8.7518, "step": 445000 }, { "epoch": 3.56, "learning_rate": 4.8218000000000005e-05, "loss": 8.7505, "step": 445500 }, { "epoch": 3.57, "learning_rate": 4.8216e-05, "loss": 8.737, "step": 446000 }, { "epoch": 3.57, "learning_rate": 4.8214e-05, "loss": 8.7717, "step": 446500 }, { "epoch": 3.58, "learning_rate": 4.8212e-05, "loss": 8.742, "step": 447000 }, { "epoch": 3.58, "learning_rate": 4.821e-05, "loss": 8.7505, "step": 447500 }, { "epoch": 3.58, "learning_rate": 4.8208e-05, "loss": 8.7564, "step": 448000 }, { "epoch": 3.59, "learning_rate": 4.8206e-05, "loss": 8.7554, "step": 448500 }, { "epoch": 3.59, "learning_rate": 4.820400000000001e-05, "loss": 8.7518, "step": 449000 }, { "epoch": 3.6, "learning_rate": 4.8202000000000004e-05, "loss": 8.7572, "step": 449500 }, { "epoch": 3.6, "learning_rate": 4.82e-05, "loss": 8.7468, "step": 450000 }, { "epoch": 3.6, "learning_rate": 4.8198e-05, "loss": 8.7485, "step": 450500 }, { "epoch": 3.61, "learning_rate": 4.8196000000000004e-05, "loss": 8.7515, "step": 451000 }, { "epoch": 3.61, "learning_rate": 4.8194e-05, "loss": 8.7323, "step": 451500 }, { "epoch": 3.62, "learning_rate": 4.8192e-05, "loss": 8.7417, "step": 452000 }, { "epoch": 3.62, "learning_rate": 4.8190000000000004e-05, "loss": 8.7571, "step": 452500 }, { "epoch": 3.62, "learning_rate": 4.8188e-05, "loss": 8.7405, "step": 453000 }, { "epoch": 3.63, "learning_rate": 4.8186e-05, "loss": 8.7611, "step": 453500 }, { "epoch": 3.63, "learning_rate": 4.8184e-05, "loss": 8.7584, "step": 454000 }, { "epoch": 3.64, "learning_rate": 4.818200000000001e-05, "loss": 8.7799, "step": 454500 }, { "epoch": 3.64, "learning_rate": 4.818e-05, "loss": 8.7762, "step": 455000 }, { "epoch": 3.64, "learning_rate": 4.8178e-05, "loss": 8.7576, "step": 455500 }, { "epoch": 3.65, "learning_rate": 4.817600000000001e-05, "loss": 8.752, "step": 456000 }, { "epoch": 3.65, "learning_rate": 4.8174e-05, "loss": 8.7423, "step": 456500 }, { "epoch": 3.66, "learning_rate": 4.8172e-05, "loss": 8.7563, "step": 457000 }, { "epoch": 3.66, "learning_rate": 4.817e-05, "loss": 8.7524, "step": 457500 }, { "epoch": 3.66, "learning_rate": 4.8168000000000004e-05, "loss": 8.7515, "step": 458000 }, { "epoch": 3.67, "learning_rate": 4.8166e-05, "loss": 8.7716, "step": 458500 }, { "epoch": 3.67, "learning_rate": 4.8164e-05, "loss": 8.757, "step": 459000 }, { "epoch": 3.68, "learning_rate": 4.8162000000000004e-05, "loss": 8.7615, "step": 459500 }, { "epoch": 3.68, "learning_rate": 4.816e-05, "loss": 8.756, "step": 460000 }, { "epoch": 3.68, "learning_rate": 4.8158e-05, "loss": 8.7342, "step": 460500 }, { "epoch": 3.69, "learning_rate": 4.8156000000000004e-05, "loss": 8.7566, "step": 461000 }, { "epoch": 3.69, "learning_rate": 4.815400000000001e-05, "loss": 8.753, "step": 461500 }, { "epoch": 3.7, "learning_rate": 4.8152e-05, "loss": 8.7547, "step": 462000 }, { "epoch": 3.7, "learning_rate": 4.815e-05, "loss": 8.7692, "step": 462500 }, { "epoch": 3.7, "learning_rate": 4.814800000000001e-05, "loss": 8.7742, "step": 463000 }, { "epoch": 3.71, "learning_rate": 4.8146e-05, "loss": 8.7729, "step": 463500 }, { "epoch": 3.71, "learning_rate": 4.8144e-05, "loss": 8.7394, "step": 464000 }, { "epoch": 3.72, "learning_rate": 4.8142e-05, "loss": 8.7712, "step": 464500 }, { "epoch": 3.72, "learning_rate": 4.814e-05, "loss": 8.7456, "step": 465000 }, { "epoch": 3.72, "learning_rate": 4.8138e-05, "loss": 8.7553, "step": 465500 }, { "epoch": 3.73, "learning_rate": 4.8136e-05, "loss": 8.7539, "step": 466000 }, { "epoch": 3.73, "learning_rate": 4.8134000000000004e-05, "loss": 8.7534, "step": 466500 }, { "epoch": 3.74, "learning_rate": 4.8132000000000006e-05, "loss": 8.7424, "step": 467000 }, { "epoch": 3.74, "learning_rate": 4.813e-05, "loss": 8.7564, "step": 467500 }, { "epoch": 3.74, "learning_rate": 4.8128000000000004e-05, "loss": 8.7528, "step": 468000 }, { "epoch": 3.75, "learning_rate": 4.8126000000000006e-05, "loss": 8.754, "step": 468500 }, { "epoch": 3.75, "learning_rate": 4.8124e-05, "loss": 8.7415, "step": 469000 }, { "epoch": 3.76, "learning_rate": 4.8122e-05, "loss": 8.7619, "step": 469500 }, { "epoch": 3.76, "learning_rate": 4.812000000000001e-05, "loss": 8.7415, "step": 470000 }, { "epoch": 3.76, "learning_rate": 4.8118e-05, "loss": 8.7528, "step": 470500 }, { "epoch": 3.77, "learning_rate": 4.8116e-05, "loss": 8.7538, "step": 471000 }, { "epoch": 3.77, "learning_rate": 4.8114e-05, "loss": 8.7562, "step": 471500 }, { "epoch": 3.78, "learning_rate": 4.8112e-05, "loss": 8.7624, "step": 472000 }, { "epoch": 3.78, "learning_rate": 4.8110000000000005e-05, "loss": 8.7532, "step": 472500 }, { "epoch": 3.78, "learning_rate": 4.8108e-05, "loss": 8.7482, "step": 473000 }, { "epoch": 3.79, "learning_rate": 4.8106e-05, "loss": 8.7588, "step": 473500 }, { "epoch": 3.79, "learning_rate": 4.8104000000000006e-05, "loss": 8.7474, "step": 474000 }, { "epoch": 3.8, "learning_rate": 4.8102e-05, "loss": 8.7726, "step": 474500 }, { "epoch": 3.8, "learning_rate": 4.8100000000000004e-05, "loss": 8.7612, "step": 475000 }, { "epoch": 3.8, "learning_rate": 4.8098000000000006e-05, "loss": 8.7543, "step": 475500 }, { "epoch": 3.81, "learning_rate": 4.8096e-05, "loss": 8.7456, "step": 476000 }, { "epoch": 3.81, "learning_rate": 4.8094e-05, "loss": 8.7445, "step": 476500 }, { "epoch": 3.82, "learning_rate": 4.8092000000000006e-05, "loss": 8.7398, "step": 477000 }, { "epoch": 3.82, "learning_rate": 4.809e-05, "loss": 8.7514, "step": 477500 }, { "epoch": 3.82, "learning_rate": 4.8088e-05, "loss": 8.7657, "step": 478000 }, { "epoch": 3.83, "learning_rate": 4.8086e-05, "loss": 8.7623, "step": 478500 }, { "epoch": 3.83, "learning_rate": 4.8084e-05, "loss": 8.7593, "step": 479000 }, { "epoch": 3.84, "learning_rate": 4.8082000000000005e-05, "loss": 8.7438, "step": 479500 }, { "epoch": 3.84, "learning_rate": 4.808e-05, "loss": 8.7547, "step": 480000 }, { "epoch": 3.84, "learning_rate": 4.8078e-05, "loss": 8.7502, "step": 480500 }, { "epoch": 3.85, "learning_rate": 4.8076000000000005e-05, "loss": 8.7531, "step": 481000 }, { "epoch": 3.85, "learning_rate": 4.8074e-05, "loss": 8.7285, "step": 481500 }, { "epoch": 3.86, "learning_rate": 4.8072e-05, "loss": 8.7228, "step": 482000 }, { "epoch": 3.86, "learning_rate": 4.8070000000000006e-05, "loss": 8.7714, "step": 482500 }, { "epoch": 3.86, "learning_rate": 4.8068e-05, "loss": 8.7372, "step": 483000 }, { "epoch": 3.87, "learning_rate": 4.8066e-05, "loss": 8.7526, "step": 483500 }, { "epoch": 3.87, "learning_rate": 4.8064000000000006e-05, "loss": 8.7578, "step": 484000 }, { "epoch": 3.88, "learning_rate": 4.8062e-05, "loss": 8.74, "step": 484500 }, { "epoch": 3.88, "learning_rate": 4.8060000000000004e-05, "loss": 8.7388, "step": 485000 }, { "epoch": 3.88, "learning_rate": 4.8058e-05, "loss": 8.7712, "step": 485500 }, { "epoch": 3.89, "learning_rate": 4.8056e-05, "loss": 8.7538, "step": 486000 }, { "epoch": 3.89, "learning_rate": 4.8054000000000004e-05, "loss": 8.7563, "step": 486500 }, { "epoch": 3.9, "learning_rate": 4.8052e-05, "loss": 8.7625, "step": 487000 }, { "epoch": 3.9, "learning_rate": 4.805e-05, "loss": 8.7399, "step": 487500 }, { "epoch": 3.9, "learning_rate": 4.8048000000000005e-05, "loss": 8.7366, "step": 488000 }, { "epoch": 3.91, "learning_rate": 4.8046e-05, "loss": 8.7667, "step": 488500 }, { "epoch": 3.91, "learning_rate": 4.8044e-05, "loss": 8.7393, "step": 489000 }, { "epoch": 3.92, "learning_rate": 4.8042000000000005e-05, "loss": 8.7347, "step": 489500 }, { "epoch": 3.92, "learning_rate": 4.804e-05, "loss": 8.7582, "step": 490000 }, { "epoch": 3.92, "learning_rate": 4.8037999999999996e-05, "loss": 8.7331, "step": 490500 }, { "epoch": 3.93, "learning_rate": 4.8036000000000006e-05, "loss": 8.7593, "step": 491000 }, { "epoch": 3.93, "learning_rate": 4.8034e-05, "loss": 8.7537, "step": 491500 }, { "epoch": 3.94, "learning_rate": 4.8032000000000004e-05, "loss": 8.7553, "step": 492000 }, { "epoch": 3.94, "learning_rate": 4.8030000000000006e-05, "loss": 8.7489, "step": 492500 }, { "epoch": 3.94, "learning_rate": 4.8028e-05, "loss": 8.7588, "step": 493000 }, { "epoch": 3.95, "learning_rate": 4.8026000000000004e-05, "loss": 8.7673, "step": 493500 }, { "epoch": 3.95, "learning_rate": 4.8024e-05, "loss": 8.7473, "step": 494000 }, { "epoch": 3.96, "learning_rate": 4.8022e-05, "loss": 8.747, "step": 494500 }, { "epoch": 3.96, "learning_rate": 4.8020000000000004e-05, "loss": 8.7604, "step": 495000 }, { "epoch": 3.96, "learning_rate": 4.8018e-05, "loss": 8.7638, "step": 495500 }, { "epoch": 3.97, "learning_rate": 4.8016e-05, "loss": 8.7797, "step": 496000 }, { "epoch": 3.97, "learning_rate": 4.8014000000000005e-05, "loss": 8.7684, "step": 496500 }, { "epoch": 3.98, "learning_rate": 4.8012e-05, "loss": 8.78, "step": 497000 }, { "epoch": 3.98, "learning_rate": 4.801e-05, "loss": 8.7542, "step": 497500 }, { "epoch": 3.98, "learning_rate": 4.8008000000000005e-05, "loss": 8.7329, "step": 498000 }, { "epoch": 3.99, "learning_rate": 4.8006e-05, "loss": 8.7383, "step": 498500 }, { "epoch": 3.99, "learning_rate": 4.8004e-05, "loss": 8.7597, "step": 499000 }, { "epoch": 4.0, "learning_rate": 4.8002000000000006e-05, "loss": 8.7482, "step": 499500 }, { "epoch": 4.0, "learning_rate": 4.8e-05, "loss": 8.7717, "step": 500000 }, { "epoch": 4.0, "learning_rate": 4.7998000000000004e-05, "loss": 8.7499, "step": 500500 }, { "epoch": 4.01, "learning_rate": 4.7996e-05, "loss": 8.7442, "step": 501000 }, { "epoch": 4.01, "learning_rate": 4.7994e-05, "loss": 8.7406, "step": 501500 }, { "epoch": 4.02, "learning_rate": 4.7992000000000004e-05, "loss": 8.7637, "step": 502000 }, { "epoch": 4.02, "learning_rate": 4.799e-05, "loss": 8.7535, "step": 502500 }, { "epoch": 4.02, "learning_rate": 4.7988e-05, "loss": 8.7651, "step": 503000 }, { "epoch": 4.03, "learning_rate": 4.7986000000000004e-05, "loss": 8.7575, "step": 503500 }, { "epoch": 4.03, "learning_rate": 4.7984e-05, "loss": 8.7659, "step": 504000 }, { "epoch": 4.04, "learning_rate": 4.7982e-05, "loss": 8.7573, "step": 504500 }, { "epoch": 4.04, "learning_rate": 4.7980000000000005e-05, "loss": 8.7588, "step": 505000 }, { "epoch": 4.04, "learning_rate": 4.7978e-05, "loss": 8.762, "step": 505500 }, { "epoch": 4.05, "learning_rate": 4.7976e-05, "loss": 8.7461, "step": 506000 }, { "epoch": 4.05, "learning_rate": 4.7974000000000005e-05, "loss": 8.7404, "step": 506500 }, { "epoch": 4.06, "learning_rate": 4.7972e-05, "loss": 8.7518, "step": 507000 }, { "epoch": 4.06, "learning_rate": 4.797e-05, "loss": 8.7543, "step": 507500 }, { "epoch": 4.06, "learning_rate": 4.7968e-05, "loss": 8.7379, "step": 508000 }, { "epoch": 4.07, "learning_rate": 4.7966e-05, "loss": 8.7569, "step": 508500 }, { "epoch": 4.07, "learning_rate": 4.7964000000000004e-05, "loss": 8.7488, "step": 509000 }, { "epoch": 4.08, "learning_rate": 4.7962e-05, "loss": 8.7705, "step": 509500 }, { "epoch": 4.08, "learning_rate": 4.796e-05, "loss": 8.7362, "step": 510000 }, { "epoch": 4.08, "learning_rate": 4.7958000000000004e-05, "loss": 8.7622, "step": 510500 }, { "epoch": 4.09, "learning_rate": 4.7956e-05, "loss": 8.7517, "step": 511000 }, { "epoch": 4.09, "learning_rate": 4.7954e-05, "loss": 8.7565, "step": 511500 }, { "epoch": 4.1, "learning_rate": 4.7952000000000004e-05, "loss": 8.7623, "step": 512000 }, { "epoch": 4.1, "learning_rate": 4.795e-05, "loss": 8.7412, "step": 512500 }, { "epoch": 4.1, "learning_rate": 4.7948e-05, "loss": 8.7548, "step": 513000 }, { "epoch": 4.11, "learning_rate": 4.7946000000000005e-05, "loss": 8.7539, "step": 513500 }, { "epoch": 4.11, "learning_rate": 4.7944e-05, "loss": 8.7601, "step": 514000 }, { "epoch": 4.12, "learning_rate": 4.7942e-05, "loss": 8.7595, "step": 514500 }, { "epoch": 4.12, "learning_rate": 4.794e-05, "loss": 8.7504, "step": 515000 }, { "epoch": 4.12, "learning_rate": 4.7938e-05, "loss": 8.7748, "step": 515500 }, { "epoch": 4.13, "learning_rate": 4.7936e-05, "loss": 8.7505, "step": 516000 }, { "epoch": 4.13, "learning_rate": 4.7934e-05, "loss": 8.7553, "step": 516500 }, { "epoch": 4.14, "learning_rate": 4.793200000000001e-05, "loss": 8.7645, "step": 517000 }, { "epoch": 4.14, "learning_rate": 4.7930000000000004e-05, "loss": 8.7498, "step": 517500 }, { "epoch": 4.14, "learning_rate": 4.7928e-05, "loss": 8.7556, "step": 518000 }, { "epoch": 4.15, "learning_rate": 4.7926e-05, "loss": 8.7662, "step": 518500 }, { "epoch": 4.15, "learning_rate": 4.7924000000000004e-05, "loss": 8.782, "step": 519000 }, { "epoch": 4.16, "learning_rate": 4.7922e-05, "loss": 8.7636, "step": 519500 }, { "epoch": 4.16, "learning_rate": 4.792e-05, "loss": 8.7625, "step": 520000 }, { "epoch": 4.16, "learning_rate": 4.7918000000000004e-05, "loss": 8.7503, "step": 520500 }, { "epoch": 4.17, "learning_rate": 4.7916e-05, "loss": 8.7521, "step": 521000 }, { "epoch": 4.17, "learning_rate": 4.7914e-05, "loss": 8.7187, "step": 521500 }, { "epoch": 4.18, "learning_rate": 4.7912e-05, "loss": 8.7397, "step": 522000 }, { "epoch": 4.18, "learning_rate": 4.791000000000001e-05, "loss": 8.729, "step": 522500 }, { "epoch": 4.18, "learning_rate": 4.7908e-05, "loss": 8.7526, "step": 523000 }, { "epoch": 4.19, "learning_rate": 4.7906e-05, "loss": 8.7224, "step": 523500 }, { "epoch": 4.19, "learning_rate": 4.790400000000001e-05, "loss": 8.7554, "step": 524000 }, { "epoch": 4.2, "learning_rate": 4.7902e-05, "loss": 8.7756, "step": 524500 }, { "epoch": 4.2, "learning_rate": 4.79e-05, "loss": 8.755, "step": 525000 }, { "epoch": 4.2, "learning_rate": 4.7898e-05, "loss": 8.7724, "step": 525500 }, { "epoch": 4.21, "learning_rate": 4.7896000000000004e-05, "loss": 8.738, "step": 526000 }, { "epoch": 4.21, "learning_rate": 4.7894e-05, "loss": 8.7616, "step": 526500 }, { "epoch": 4.22, "learning_rate": 4.7892e-05, "loss": 8.7653, "step": 527000 }, { "epoch": 4.22, "learning_rate": 4.7890000000000004e-05, "loss": 8.7525, "step": 527500 }, { "epoch": 4.22, "learning_rate": 4.7888e-05, "loss": 8.7342, "step": 528000 }, { "epoch": 4.23, "learning_rate": 4.7886e-05, "loss": 8.7481, "step": 528500 }, { "epoch": 4.23, "learning_rate": 4.7884000000000004e-05, "loss": 8.7531, "step": 529000 }, { "epoch": 4.24, "learning_rate": 4.788200000000001e-05, "loss": 8.7467, "step": 529500 }, { "epoch": 4.24, "learning_rate": 4.788e-05, "loss": 8.7606, "step": 530000 }, { "epoch": 4.24, "learning_rate": 4.7878e-05, "loss": 8.7616, "step": 530500 }, { "epoch": 4.25, "learning_rate": 4.787600000000001e-05, "loss": 8.7543, "step": 531000 }, { "epoch": 4.25, "learning_rate": 4.7874e-05, "loss": 8.7449, "step": 531500 }, { "epoch": 4.26, "learning_rate": 4.7872e-05, "loss": 8.7379, "step": 532000 }, { "epoch": 4.26, "learning_rate": 4.787e-05, "loss": 8.75, "step": 532500 }, { "epoch": 4.26, "learning_rate": 4.7868e-05, "loss": 8.7545, "step": 533000 }, { "epoch": 4.27, "learning_rate": 4.7866e-05, "loss": 8.7516, "step": 533500 }, { "epoch": 4.27, "learning_rate": 4.7864e-05, "loss": 8.7609, "step": 534000 }, { "epoch": 4.28, "learning_rate": 4.7862000000000004e-05, "loss": 8.7567, "step": 534500 }, { "epoch": 4.28, "learning_rate": 4.7860000000000006e-05, "loss": 8.7473, "step": 535000 }, { "epoch": 4.28, "learning_rate": 4.7858e-05, "loss": 8.7593, "step": 535500 }, { "epoch": 4.29, "learning_rate": 4.7856000000000004e-05, "loss": 8.752, "step": 536000 }, { "epoch": 4.29, "learning_rate": 4.7854000000000006e-05, "loss": 8.7428, "step": 536500 }, { "epoch": 4.3, "learning_rate": 4.7852e-05, "loss": 8.7752, "step": 537000 }, { "epoch": 4.3, "learning_rate": 4.785e-05, "loss": 8.7553, "step": 537500 }, { "epoch": 4.3, "learning_rate": 4.784800000000001e-05, "loss": 8.7655, "step": 538000 }, { "epoch": 4.31, "learning_rate": 4.7846e-05, "loss": 8.7421, "step": 538500 }, { "epoch": 4.31, "learning_rate": 4.7844e-05, "loss": 8.7465, "step": 539000 }, { "epoch": 4.32, "learning_rate": 4.7842e-05, "loss": 8.7623, "step": 539500 }, { "epoch": 4.32, "learning_rate": 4.784e-05, "loss": 8.7699, "step": 540000 }, { "epoch": 4.32, "learning_rate": 4.7838000000000005e-05, "loss": 8.7608, "step": 540500 }, { "epoch": 4.33, "learning_rate": 4.7836e-05, "loss": 8.7561, "step": 541000 }, { "epoch": 4.33, "learning_rate": 4.7834e-05, "loss": 8.7749, "step": 541500 }, { "epoch": 4.34, "learning_rate": 4.7832000000000006e-05, "loss": 8.7393, "step": 542000 }, { "epoch": 4.34, "learning_rate": 4.783e-05, "loss": 8.749, "step": 542500 }, { "epoch": 4.34, "learning_rate": 4.7828000000000004e-05, "loss": 8.7468, "step": 543000 }, { "epoch": 4.35, "learning_rate": 4.7826000000000006e-05, "loss": 8.7438, "step": 543500 }, { "epoch": 4.35, "learning_rate": 4.7824e-05, "loss": 8.7608, "step": 544000 }, { "epoch": 4.36, "learning_rate": 4.7822e-05, "loss": 8.7449, "step": 544500 }, { "epoch": 4.36, "learning_rate": 4.7820000000000006e-05, "loss": 8.7582, "step": 545000 }, { "epoch": 4.36, "learning_rate": 4.7818e-05, "loss": 8.7417, "step": 545500 }, { "epoch": 4.37, "learning_rate": 4.7816e-05, "loss": 8.7402, "step": 546000 }, { "epoch": 4.37, "learning_rate": 4.7814e-05, "loss": 8.7631, "step": 546500 }, { "epoch": 4.38, "learning_rate": 4.7812e-05, "loss": 8.7404, "step": 547000 }, { "epoch": 4.38, "learning_rate": 4.7810000000000005e-05, "loss": 8.7467, "step": 547500 }, { "epoch": 4.38, "learning_rate": 4.7808e-05, "loss": 8.7535, "step": 548000 }, { "epoch": 4.39, "learning_rate": 4.7806e-05, "loss": 8.7181, "step": 548500 }, { "epoch": 4.39, "learning_rate": 4.7804000000000005e-05, "loss": 8.7445, "step": 549000 }, { "epoch": 4.4, "learning_rate": 4.7802e-05, "loss": 8.7866, "step": 549500 }, { "epoch": 4.4, "learning_rate": 4.78e-05, "loss": 8.7482, "step": 550000 }, { "epoch": 4.4, "learning_rate": 4.7798000000000006e-05, "loss": 8.7475, "step": 550500 }, { "epoch": 4.41, "learning_rate": 4.7796e-05, "loss": 8.7636, "step": 551000 }, { "epoch": 4.41, "learning_rate": 4.7794e-05, "loss": 8.7563, "step": 551500 }, { "epoch": 4.42, "learning_rate": 4.7792000000000006e-05, "loss": 8.7802, "step": 552000 }, { "epoch": 4.42, "learning_rate": 4.779e-05, "loss": 8.7351, "step": 552500 }, { "epoch": 4.42, "learning_rate": 4.7788000000000004e-05, "loss": 8.7508, "step": 553000 }, { "epoch": 4.43, "learning_rate": 4.7786000000000006e-05, "loss": 8.7724, "step": 553500 }, { "epoch": 4.43, "learning_rate": 4.7784e-05, "loss": 8.7689, "step": 554000 }, { "epoch": 4.44, "learning_rate": 4.7782000000000004e-05, "loss": 8.7684, "step": 554500 }, { "epoch": 4.44, "learning_rate": 4.778e-05, "loss": 8.7717, "step": 555000 }, { "epoch": 4.44, "learning_rate": 4.7778e-05, "loss": 8.7718, "step": 555500 }, { "epoch": 4.45, "learning_rate": 4.7776000000000005e-05, "loss": 8.7654, "step": 556000 }, { "epoch": 4.45, "learning_rate": 4.7774e-05, "loss": 8.7481, "step": 556500 }, { "epoch": 4.46, "learning_rate": 4.7772e-05, "loss": 8.7427, "step": 557000 }, { "epoch": 4.46, "learning_rate": 4.7770000000000005e-05, "loss": 8.7705, "step": 557500 }, { "epoch": 4.46, "learning_rate": 4.7768e-05, "loss": 8.7448, "step": 558000 }, { "epoch": 4.47, "learning_rate": 4.7765999999999996e-05, "loss": 8.7338, "step": 558500 }, { "epoch": 4.47, "learning_rate": 4.7764000000000006e-05, "loss": 8.7356, "step": 559000 }, { "epoch": 4.48, "learning_rate": 4.7762e-05, "loss": 8.7676, "step": 559500 }, { "epoch": 4.48, "learning_rate": 4.7760000000000004e-05, "loss": 8.7569, "step": 560000 }, { "epoch": 4.48, "learning_rate": 4.7758000000000006e-05, "loss": 8.772, "step": 560500 }, { "epoch": 4.49, "learning_rate": 4.7756e-05, "loss": 8.7395, "step": 561000 }, { "epoch": 4.49, "learning_rate": 4.7754000000000004e-05, "loss": 8.7639, "step": 561500 }, { "epoch": 4.5, "learning_rate": 4.7752e-05, "loss": 8.7387, "step": 562000 }, { "epoch": 4.5, "learning_rate": 4.775e-05, "loss": 8.7428, "step": 562500 }, { "epoch": 4.5, "learning_rate": 4.7748000000000004e-05, "loss": 8.7465, "step": 563000 }, { "epoch": 4.51, "learning_rate": 4.7746e-05, "loss": 8.7336, "step": 563500 }, { "epoch": 4.51, "learning_rate": 4.7744e-05, "loss": 8.7565, "step": 564000 }, { "epoch": 4.52, "learning_rate": 4.7742000000000005e-05, "loss": 8.7581, "step": 564500 }, { "epoch": 4.52, "learning_rate": 4.774e-05, "loss": 8.7249, "step": 565000 }, { "epoch": 4.52, "learning_rate": 4.7738e-05, "loss": 8.7651, "step": 565500 }, { "epoch": 4.53, "learning_rate": 4.7736000000000005e-05, "loss": 8.7521, "step": 566000 }, { "epoch": 4.53, "learning_rate": 4.7734e-05, "loss": 8.7434, "step": 566500 }, { "epoch": 4.54, "learning_rate": 4.7732e-05, "loss": 8.7441, "step": 567000 }, { "epoch": 4.54, "learning_rate": 4.7730000000000005e-05, "loss": 8.7777, "step": 567500 }, { "epoch": 4.54, "learning_rate": 4.7728e-05, "loss": 8.7526, "step": 568000 }, { "epoch": 4.55, "learning_rate": 4.7726000000000004e-05, "loss": 8.7525, "step": 568500 }, { "epoch": 4.55, "learning_rate": 4.7724e-05, "loss": 8.7323, "step": 569000 }, { "epoch": 4.56, "learning_rate": 4.7722e-05, "loss": 8.7522, "step": 569500 }, { "epoch": 4.56, "learning_rate": 4.7720000000000004e-05, "loss": 8.7496, "step": 570000 }, { "epoch": 4.56, "learning_rate": 4.7718e-05, "loss": 8.7318, "step": 570500 }, { "epoch": 4.57, "learning_rate": 4.7716e-05, "loss": 8.7415, "step": 571000 }, { "epoch": 4.57, "learning_rate": 4.7714000000000004e-05, "loss": 8.7345, "step": 571500 }, { "epoch": 4.58, "learning_rate": 4.7712e-05, "loss": 8.7335, "step": 572000 }, { "epoch": 4.58, "learning_rate": 4.771e-05, "loss": 8.7719, "step": 572500 }, { "epoch": 4.58, "learning_rate": 4.7708000000000005e-05, "loss": 8.76, "step": 573000 }, { "epoch": 4.59, "learning_rate": 4.7706e-05, "loss": 8.7465, "step": 573500 }, { "epoch": 4.59, "learning_rate": 4.7704e-05, "loss": 8.7392, "step": 574000 }, { "epoch": 4.6, "learning_rate": 4.7702000000000005e-05, "loss": 8.7694, "step": 574500 }, { "epoch": 4.6, "learning_rate": 4.77e-05, "loss": 8.7435, "step": 575000 }, { "epoch": 4.6, "learning_rate": 4.7698e-05, "loss": 8.7415, "step": 575500 }, { "epoch": 4.61, "learning_rate": 4.7696e-05, "loss": 8.7538, "step": 576000 }, { "epoch": 4.61, "learning_rate": 4.7694e-05, "loss": 8.7681, "step": 576500 }, { "epoch": 4.62, "learning_rate": 4.7692000000000003e-05, "loss": 8.7615, "step": 577000 }, { "epoch": 4.62, "learning_rate": 4.769e-05, "loss": 8.7546, "step": 577500 }, { "epoch": 4.62, "learning_rate": 4.768800000000001e-05, "loss": 8.7394, "step": 578000 }, { "epoch": 4.63, "learning_rate": 4.7686000000000004e-05, "loss": 8.7509, "step": 578500 }, { "epoch": 4.63, "learning_rate": 4.7684e-05, "loss": 8.764, "step": 579000 }, { "epoch": 4.64, "learning_rate": 4.7682e-05, "loss": 8.7423, "step": 579500 }, { "epoch": 4.64, "learning_rate": 4.7680000000000004e-05, "loss": 8.758, "step": 580000 }, { "epoch": 4.64, "learning_rate": 4.7678e-05, "loss": 8.7462, "step": 580500 }, { "epoch": 4.65, "learning_rate": 4.7676e-05, "loss": 8.7461, "step": 581000 }, { "epoch": 4.65, "learning_rate": 4.7674000000000005e-05, "loss": 8.7543, "step": 581500 }, { "epoch": 4.66, "learning_rate": 4.7672e-05, "loss": 8.7428, "step": 582000 }, { "epoch": 4.66, "learning_rate": 4.767e-05, "loss": 8.7842, "step": 582500 }, { "epoch": 4.66, "learning_rate": 4.7668e-05, "loss": 8.7545, "step": 583000 }, { "epoch": 4.67, "learning_rate": 4.7666e-05, "loss": 8.7374, "step": 583500 }, { "epoch": 4.67, "learning_rate": 4.7664e-05, "loss": 8.7314, "step": 584000 }, { "epoch": 4.68, "learning_rate": 4.7662e-05, "loss": 8.7562, "step": 584500 }, { "epoch": 4.68, "learning_rate": 4.766000000000001e-05, "loss": 8.7586, "step": 585000 }, { "epoch": 4.68, "learning_rate": 4.7658000000000003e-05, "loss": 8.7468, "step": 585500 }, { "epoch": 4.69, "learning_rate": 4.7656e-05, "loss": 8.7601, "step": 586000 }, { "epoch": 4.69, "learning_rate": 4.7654e-05, "loss": 8.7366, "step": 586500 }, { "epoch": 4.7, "learning_rate": 4.7652000000000004e-05, "loss": 8.7526, "step": 587000 }, { "epoch": 4.7, "learning_rate": 4.765e-05, "loss": 8.7386, "step": 587500 }, { "epoch": 4.7, "learning_rate": 4.7648e-05, "loss": 8.7365, "step": 588000 }, { "epoch": 4.71, "learning_rate": 4.7646000000000004e-05, "loss": 8.7565, "step": 588500 }, { "epoch": 4.71, "learning_rate": 4.7644e-05, "loss": 8.753, "step": 589000 }, { "epoch": 4.72, "learning_rate": 4.7642e-05, "loss": 8.7412, "step": 589500 }, { "epoch": 4.72, "learning_rate": 4.7640000000000005e-05, "loss": 8.7316, "step": 590000 }, { "epoch": 4.72, "learning_rate": 4.763800000000001e-05, "loss": 8.7529, "step": 590500 }, { "epoch": 4.73, "learning_rate": 4.7636e-05, "loss": 8.7293, "step": 591000 }, { "epoch": 4.73, "learning_rate": 4.7634e-05, "loss": 8.7475, "step": 591500 }, { "epoch": 4.74, "learning_rate": 4.763200000000001e-05, "loss": 8.7511, "step": 592000 }, { "epoch": 4.74, "learning_rate": 4.763e-05, "loss": 8.7515, "step": 592500 }, { "epoch": 4.74, "learning_rate": 4.7628e-05, "loss": 8.7538, "step": 593000 }, { "epoch": 4.75, "learning_rate": 4.7626e-05, "loss": 8.7386, "step": 593500 }, { "epoch": 4.75, "learning_rate": 4.7624000000000003e-05, "loss": 8.7435, "step": 594000 }, { "epoch": 4.76, "learning_rate": 4.7622e-05, "loss": 8.7602, "step": 594500 }, { "epoch": 4.76, "learning_rate": 4.762e-05, "loss": 8.7542, "step": 595000 }, { "epoch": 4.76, "learning_rate": 4.7618000000000004e-05, "loss": 8.7462, "step": 595500 }, { "epoch": 4.77, "learning_rate": 4.7616000000000006e-05, "loss": 8.7542, "step": 596000 }, { "epoch": 4.77, "learning_rate": 4.7614e-05, "loss": 8.7625, "step": 596500 }, { "epoch": 4.78, "learning_rate": 4.7612000000000004e-05, "loss": 8.7306, "step": 597000 }, { "epoch": 4.78, "learning_rate": 4.761000000000001e-05, "loss": 8.7577, "step": 597500 }, { "epoch": 4.78, "learning_rate": 4.7608e-05, "loss": 8.7514, "step": 598000 }, { "epoch": 4.79, "learning_rate": 4.7606e-05, "loss": 8.7462, "step": 598500 }, { "epoch": 4.79, "learning_rate": 4.760400000000001e-05, "loss": 8.7735, "step": 599000 }, { "epoch": 4.8, "learning_rate": 4.7602e-05, "loss": 8.7299, "step": 599500 }, { "epoch": 4.8, "learning_rate": 4.76e-05, "loss": 8.7545, "step": 600000 }, { "epoch": 4.8, "learning_rate": 4.7598e-05, "loss": 8.7465, "step": 600500 }, { "epoch": 4.81, "learning_rate": 4.7596e-05, "loss": 8.7472, "step": 601000 }, { "epoch": 4.81, "learning_rate": 4.7594e-05, "loss": 8.7558, "step": 601500 }, { "epoch": 4.82, "learning_rate": 4.7592e-05, "loss": 8.748, "step": 602000 }, { "epoch": 4.82, "learning_rate": 4.7590000000000003e-05, "loss": 8.7524, "step": 602500 }, { "epoch": 4.82, "learning_rate": 4.7588000000000006e-05, "loss": 8.7839, "step": 603000 }, { "epoch": 4.83, "learning_rate": 4.7586e-05, "loss": 8.7422, "step": 603500 }, { "epoch": 4.83, "learning_rate": 4.7584000000000004e-05, "loss": 8.7628, "step": 604000 }, { "epoch": 4.84, "learning_rate": 4.7582000000000006e-05, "loss": 8.7566, "step": 604500 }, { "epoch": 4.84, "learning_rate": 4.758e-05, "loss": 8.7475, "step": 605000 }, { "epoch": 4.84, "learning_rate": 4.7578e-05, "loss": 8.7617, "step": 605500 }, { "epoch": 4.85, "learning_rate": 4.757600000000001e-05, "loss": 8.7566, "step": 606000 }, { "epoch": 4.85, "learning_rate": 4.7574e-05, "loss": 8.745, "step": 606500 }, { "epoch": 4.86, "learning_rate": 4.7572e-05, "loss": 8.742, "step": 607000 }, { "epoch": 4.86, "learning_rate": 4.757e-05, "loss": 8.743, "step": 607500 }, { "epoch": 4.86, "learning_rate": 4.7568e-05, "loss": 8.7595, "step": 608000 }, { "epoch": 4.87, "learning_rate": 4.7566000000000005e-05, "loss": 8.7555, "step": 608500 }, { "epoch": 4.87, "learning_rate": 4.7564e-05, "loss": 8.7441, "step": 609000 }, { "epoch": 4.88, "learning_rate": 4.7562e-05, "loss": 8.7324, "step": 609500 }, { "epoch": 4.88, "learning_rate": 4.7560000000000005e-05, "loss": 8.7648, "step": 610000 }, { "epoch": 4.88, "learning_rate": 4.7558e-05, "loss": 8.7539, "step": 610500 }, { "epoch": 4.89, "learning_rate": 4.7556000000000003e-05, "loss": 8.7702, "step": 611000 }, { "epoch": 4.89, "learning_rate": 4.7554000000000006e-05, "loss": 8.7426, "step": 611500 }, { "epoch": 4.9, "learning_rate": 4.7552e-05, "loss": 8.7594, "step": 612000 }, { "epoch": 4.9, "learning_rate": 4.755e-05, "loss": 8.7513, "step": 612500 }, { "epoch": 4.9, "learning_rate": 4.7548000000000006e-05, "loss": 8.756, "step": 613000 }, { "epoch": 4.91, "learning_rate": 4.7546e-05, "loss": 8.7519, "step": 613500 }, { "epoch": 4.91, "learning_rate": 4.7544e-05, "loss": 8.762, "step": 614000 }, { "epoch": 4.92, "learning_rate": 4.7542e-05, "loss": 8.7368, "step": 614500 }, { "epoch": 4.92, "learning_rate": 4.754e-05, "loss": 8.7562, "step": 615000 }, { "epoch": 4.92, "learning_rate": 4.7538000000000005e-05, "loss": 8.7536, "step": 615500 }, { "epoch": 4.93, "learning_rate": 4.7536e-05, "loss": 8.756, "step": 616000 }, { "epoch": 4.93, "learning_rate": 4.7534e-05, "loss": 8.7499, "step": 616500 }, { "epoch": 4.94, "learning_rate": 4.7532000000000005e-05, "loss": 8.749, "step": 617000 }, { "epoch": 4.94, "learning_rate": 4.753e-05, "loss": 8.755, "step": 617500 }, { "epoch": 4.94, "learning_rate": 4.7528e-05, "loss": 8.7512, "step": 618000 }, { "epoch": 4.95, "learning_rate": 4.7526000000000005e-05, "loss": 8.7558, "step": 618500 }, { "epoch": 4.95, "learning_rate": 4.7524e-05, "loss": 8.7733, "step": 619000 }, { "epoch": 4.96, "learning_rate": 4.7522e-05, "loss": 8.7631, "step": 619500 }, { "epoch": 4.96, "learning_rate": 4.7520000000000006e-05, "loss": 8.7492, "step": 620000 }, { "epoch": 4.96, "learning_rate": 4.7518e-05, "loss": 8.7531, "step": 620500 }, { "epoch": 4.97, "learning_rate": 4.7516000000000004e-05, "loss": 8.7357, "step": 621000 }, { "epoch": 4.97, "learning_rate": 4.7514000000000006e-05, "loss": 8.7442, "step": 621500 }, { "epoch": 4.98, "learning_rate": 4.7512e-05, "loss": 8.7499, "step": 622000 }, { "epoch": 4.98, "learning_rate": 4.7510000000000004e-05, "loss": 8.7549, "step": 622500 }, { "epoch": 4.98, "learning_rate": 4.7508e-05, "loss": 8.7578, "step": 623000 }, { "epoch": 4.99, "learning_rate": 4.7506e-05, "loss": 8.75, "step": 623500 }, { "epoch": 4.99, "learning_rate": 4.7504000000000005e-05, "loss": 8.7476, "step": 624000 }, { "epoch": 5.0, "learning_rate": 4.7502e-05, "loss": 8.7717, "step": 624500 }, { "epoch": 5.0, "learning_rate": 4.75e-05, "loss": 8.7518, "step": 625000 }, { "epoch": 5.0, "learning_rate": 4.7498000000000005e-05, "loss": 8.7363, "step": 625500 }, { "epoch": 5.01, "learning_rate": 4.7496e-05, "loss": 8.7565, "step": 626000 }, { "epoch": 5.01, "learning_rate": 4.7493999999999996e-05, "loss": 8.7451, "step": 626500 }, { "epoch": 5.02, "learning_rate": 4.7492000000000005e-05, "loss": 8.7622, "step": 627000 }, { "epoch": 5.02, "learning_rate": 4.749e-05, "loss": 8.743, "step": 627500 }, { "epoch": 5.02, "learning_rate": 4.7488000000000003e-05, "loss": 8.7464, "step": 628000 }, { "epoch": 5.03, "learning_rate": 4.7486000000000006e-05, "loss": 8.7468, "step": 628500 }, { "epoch": 5.03, "learning_rate": 4.7484e-05, "loss": 8.7432, "step": 629000 }, { "epoch": 5.04, "learning_rate": 4.7482000000000004e-05, "loss": 8.7428, "step": 629500 }, { "epoch": 5.04, "learning_rate": 4.748e-05, "loss": 8.7689, "step": 630000 }, { "epoch": 5.04, "learning_rate": 4.7478e-05, "loss": 8.7494, "step": 630500 }, { "epoch": 5.05, "learning_rate": 4.7476000000000004e-05, "loss": 8.769, "step": 631000 }, { "epoch": 5.05, "learning_rate": 4.7474e-05, "loss": 8.7354, "step": 631500 }, { "epoch": 5.06, "learning_rate": 4.7472e-05, "loss": 8.7484, "step": 632000 }, { "epoch": 5.06, "learning_rate": 4.7470000000000005e-05, "loss": 8.7583, "step": 632500 }, { "epoch": 5.06, "learning_rate": 4.7468e-05, "loss": 8.7605, "step": 633000 }, { "epoch": 5.07, "learning_rate": 4.7466e-05, "loss": 8.7466, "step": 633500 }, { "epoch": 5.07, "learning_rate": 4.7464000000000005e-05, "loss": 8.7359, "step": 634000 }, { "epoch": 5.08, "learning_rate": 4.7462e-05, "loss": 8.743, "step": 634500 }, { "epoch": 5.08, "learning_rate": 4.746e-05, "loss": 8.7499, "step": 635000 }, { "epoch": 5.08, "learning_rate": 4.7458000000000005e-05, "loss": 8.746, "step": 635500 }, { "epoch": 5.09, "learning_rate": 4.7456e-05, "loss": 8.7517, "step": 636000 }, { "epoch": 5.09, "learning_rate": 4.7454000000000003e-05, "loss": 8.7398, "step": 636500 }, { "epoch": 5.1, "learning_rate": 4.7452e-05, "loss": 8.7467, "step": 637000 }, { "epoch": 5.1, "learning_rate": 4.745e-05, "loss": 8.7372, "step": 637500 }, { "epoch": 5.1, "learning_rate": 4.7448000000000004e-05, "loss": 8.7508, "step": 638000 }, { "epoch": 5.11, "learning_rate": 4.7446e-05, "loss": 8.7571, "step": 638500 }, { "epoch": 5.11, "learning_rate": 4.7444e-05, "loss": 8.7559, "step": 639000 }, { "epoch": 5.12, "learning_rate": 4.7442000000000004e-05, "loss": 8.7611, "step": 639500 }, { "epoch": 5.12, "learning_rate": 4.744e-05, "loss": 8.7432, "step": 640000 }, { "epoch": 5.12, "learning_rate": 4.7438e-05, "loss": 8.7649, "step": 640500 }, { "epoch": 5.13, "learning_rate": 4.7436000000000005e-05, "loss": 8.7531, "step": 641000 }, { "epoch": 5.13, "learning_rate": 4.7434e-05, "loss": 8.7493, "step": 641500 }, { "epoch": 5.14, "learning_rate": 4.7432e-05, "loss": 8.7362, "step": 642000 }, { "epoch": 5.14, "learning_rate": 4.7430000000000005e-05, "loss": 8.7518, "step": 642500 }, { "epoch": 5.14, "learning_rate": 4.7428e-05, "loss": 8.7423, "step": 643000 }, { "epoch": 5.15, "learning_rate": 4.7426e-05, "loss": 8.7439, "step": 643500 }, { "epoch": 5.15, "learning_rate": 4.7424e-05, "loss": 8.7527, "step": 644000 }, { "epoch": 5.16, "learning_rate": 4.7422e-05, "loss": 8.7505, "step": 644500 }, { "epoch": 5.16, "learning_rate": 4.742e-05, "loss": 8.75, "step": 645000 }, { "epoch": 5.16, "learning_rate": 4.7418e-05, "loss": 8.7435, "step": 645500 }, { "epoch": 5.17, "learning_rate": 4.741600000000001e-05, "loss": 8.7461, "step": 646000 }, { "epoch": 5.17, "learning_rate": 4.7414000000000004e-05, "loss": 8.7411, "step": 646500 }, { "epoch": 5.18, "learning_rate": 4.7412e-05, "loss": 8.7669, "step": 647000 }, { "epoch": 5.18, "learning_rate": 4.741e-05, "loss": 8.7466, "step": 647500 }, { "epoch": 5.18, "learning_rate": 4.7408000000000004e-05, "loss": 8.779, "step": 648000 }, { "epoch": 5.19, "learning_rate": 4.7406e-05, "loss": 8.7669, "step": 648500 }, { "epoch": 5.19, "learning_rate": 4.7404e-05, "loss": 8.754, "step": 649000 }, { "epoch": 5.2, "learning_rate": 4.7402000000000005e-05, "loss": 8.7269, "step": 649500 }, { "epoch": 5.2, "learning_rate": 4.74e-05, "loss": 8.7516, "step": 650000 }, { "epoch": 5.2, "learning_rate": 4.7398e-05, "loss": 8.7505, "step": 650500 }, { "epoch": 5.21, "learning_rate": 4.7396e-05, "loss": 8.7532, "step": 651000 }, { "epoch": 5.21, "learning_rate": 4.7394e-05, "loss": 8.7607, "step": 651500 }, { "epoch": 5.22, "learning_rate": 4.7392e-05, "loss": 8.7454, "step": 652000 }, { "epoch": 5.22, "learning_rate": 4.739e-05, "loss": 8.7484, "step": 652500 }, { "epoch": 5.22, "learning_rate": 4.738800000000001e-05, "loss": 8.7694, "step": 653000 }, { "epoch": 5.23, "learning_rate": 4.7386e-05, "loss": 8.764, "step": 653500 }, { "epoch": 5.23, "learning_rate": 4.7384e-05, "loss": 8.7495, "step": 654000 }, { "epoch": 5.24, "learning_rate": 4.7382e-05, "loss": 8.7572, "step": 654500 }, { "epoch": 5.24, "learning_rate": 4.7380000000000004e-05, "loss": 8.7495, "step": 655000 }, { "epoch": 5.24, "learning_rate": 4.7378e-05, "loss": 8.7603, "step": 655500 }, { "epoch": 5.25, "learning_rate": 4.7376e-05, "loss": 8.7483, "step": 656000 }, { "epoch": 5.25, "learning_rate": 4.7374000000000004e-05, "loss": 8.7515, "step": 656500 }, { "epoch": 5.26, "learning_rate": 4.7372e-05, "loss": 8.7321, "step": 657000 }, { "epoch": 5.26, "learning_rate": 4.737e-05, "loss": 8.7586, "step": 657500 }, { "epoch": 5.26, "learning_rate": 4.7368000000000005e-05, "loss": 8.7697, "step": 658000 }, { "epoch": 5.27, "learning_rate": 4.736600000000001e-05, "loss": 8.7733, "step": 658500 }, { "epoch": 5.27, "learning_rate": 4.7364e-05, "loss": 8.7587, "step": 659000 }, { "epoch": 5.28, "learning_rate": 4.7362e-05, "loss": 8.757, "step": 659500 }, { "epoch": 5.28, "learning_rate": 4.736000000000001e-05, "loss": 8.7394, "step": 660000 }, { "epoch": 5.28, "learning_rate": 4.7358e-05, "loss": 8.7624, "step": 660500 }, { "epoch": 5.29, "learning_rate": 4.7356e-05, "loss": 8.7533, "step": 661000 }, { "epoch": 5.29, "learning_rate": 4.7354e-05, "loss": 8.7655, "step": 661500 }, { "epoch": 5.3, "learning_rate": 4.7352e-05, "loss": 8.7661, "step": 662000 }, { "epoch": 5.3, "learning_rate": 4.735e-05, "loss": 8.7352, "step": 662500 }, { "epoch": 5.3, "learning_rate": 4.7348e-05, "loss": 8.7334, "step": 663000 }, { "epoch": 5.31, "learning_rate": 4.7346000000000004e-05, "loss": 8.7588, "step": 663500 }, { "epoch": 5.31, "learning_rate": 4.7344000000000006e-05, "loss": 8.7459, "step": 664000 }, { "epoch": 5.32, "learning_rate": 4.7342e-05, "loss": 8.7608, "step": 664500 }, { "epoch": 5.32, "learning_rate": 4.7340000000000004e-05, "loss": 8.748, "step": 665000 }, { "epoch": 5.32, "learning_rate": 4.7338000000000007e-05, "loss": 8.7622, "step": 665500 }, { "epoch": 5.33, "learning_rate": 4.7336e-05, "loss": 8.7342, "step": 666000 }, { "epoch": 5.33, "learning_rate": 4.7334e-05, "loss": 8.7517, "step": 666500 }, { "epoch": 5.34, "learning_rate": 4.733200000000001e-05, "loss": 8.7569, "step": 667000 }, { "epoch": 5.34, "learning_rate": 4.733e-05, "loss": 8.7613, "step": 667500 }, { "epoch": 5.34, "learning_rate": 4.7328e-05, "loss": 8.7377, "step": 668000 }, { "epoch": 5.35, "learning_rate": 4.7326e-05, "loss": 8.7542, "step": 668500 }, { "epoch": 5.35, "learning_rate": 4.7324e-05, "loss": 8.7308, "step": 669000 }, { "epoch": 5.36, "learning_rate": 4.7322e-05, "loss": 8.7557, "step": 669500 }, { "epoch": 5.36, "learning_rate": 4.732e-05, "loss": 8.7381, "step": 670000 }, { "epoch": 5.36, "learning_rate": 4.7318e-05, "loss": 8.7554, "step": 670500 }, { "epoch": 5.37, "learning_rate": 4.7316000000000006e-05, "loss": 8.7487, "step": 671000 }, { "epoch": 5.37, "learning_rate": 4.7314e-05, "loss": 8.7617, "step": 671500 }, { "epoch": 5.38, "learning_rate": 4.7312000000000004e-05, "loss": 8.739, "step": 672000 }, { "epoch": 5.38, "learning_rate": 4.7310000000000006e-05, "loss": 8.7498, "step": 672500 }, { "epoch": 5.38, "learning_rate": 4.7308e-05, "loss": 8.7363, "step": 673000 }, { "epoch": 5.39, "learning_rate": 4.7306e-05, "loss": 8.7562, "step": 673500 }, { "epoch": 5.39, "learning_rate": 4.7304000000000007e-05, "loss": 8.7443, "step": 674000 }, { "epoch": 5.4, "learning_rate": 4.7302e-05, "loss": 8.7399, "step": 674500 }, { "epoch": 5.4, "learning_rate": 4.73e-05, "loss": 8.7543, "step": 675000 }, { "epoch": 5.4, "learning_rate": 4.7298e-05, "loss": 8.7369, "step": 675500 }, { "epoch": 5.41, "learning_rate": 4.7296e-05, "loss": 8.7508, "step": 676000 }, { "epoch": 5.41, "learning_rate": 4.7294000000000005e-05, "loss": 8.7427, "step": 676500 }, { "epoch": 5.42, "learning_rate": 4.7292e-05, "loss": 8.7719, "step": 677000 }, { "epoch": 5.42, "learning_rate": 4.729e-05, "loss": 8.7513, "step": 677500 }, { "epoch": 5.42, "learning_rate": 4.7288000000000005e-05, "loss": 8.7606, "step": 678000 }, { "epoch": 5.43, "learning_rate": 4.7286e-05, "loss": 8.7438, "step": 678500 }, { "epoch": 5.43, "learning_rate": 4.7284e-05, "loss": 8.7321, "step": 679000 }, { "epoch": 5.44, "learning_rate": 4.7282000000000006e-05, "loss": 8.7533, "step": 679500 }, { "epoch": 5.44, "learning_rate": 4.728e-05, "loss": 8.7865, "step": 680000 }, { "epoch": 5.44, "learning_rate": 4.7278e-05, "loss": 8.7571, "step": 680500 }, { "epoch": 5.45, "learning_rate": 4.7276000000000006e-05, "loss": 8.7375, "step": 681000 }, { "epoch": 5.45, "learning_rate": 4.7274e-05, "loss": 8.735, "step": 681500 }, { "epoch": 5.46, "learning_rate": 4.7272e-05, "loss": 8.7661, "step": 682000 }, { "epoch": 5.46, "learning_rate": 4.7270000000000007e-05, "loss": 8.7396, "step": 682500 }, { "epoch": 5.46, "learning_rate": 4.7268e-05, "loss": 8.746, "step": 683000 }, { "epoch": 5.47, "learning_rate": 4.7266000000000005e-05, "loss": 8.7234, "step": 683500 }, { "epoch": 5.47, "learning_rate": 4.7264e-05, "loss": 8.7574, "step": 684000 }, { "epoch": 5.48, "learning_rate": 4.7262e-05, "loss": 8.7484, "step": 684500 }, { "epoch": 5.48, "learning_rate": 4.7260000000000005e-05, "loss": 8.7425, "step": 685000 }, { "epoch": 5.48, "learning_rate": 4.7258e-05, "loss": 8.7636, "step": 685500 }, { "epoch": 5.49, "learning_rate": 4.7256e-05, "loss": 8.7503, "step": 686000 }, { "epoch": 5.49, "learning_rate": 4.7254000000000005e-05, "loss": 8.7699, "step": 686500 }, { "epoch": 5.5, "learning_rate": 4.7252e-05, "loss": 8.7598, "step": 687000 }, { "epoch": 5.5, "learning_rate": 4.7249999999999997e-05, "loss": 8.7505, "step": 687500 }, { "epoch": 5.5, "learning_rate": 4.7248000000000006e-05, "loss": 8.7346, "step": 688000 }, { "epoch": 5.51, "learning_rate": 4.7246e-05, "loss": 8.7663, "step": 688500 }, { "epoch": 5.51, "learning_rate": 4.7244000000000004e-05, "loss": 8.7502, "step": 689000 }, { "epoch": 5.52, "learning_rate": 4.7242000000000006e-05, "loss": 8.7637, "step": 689500 }, { "epoch": 5.52, "learning_rate": 4.724e-05, "loss": 8.7522, "step": 690000 }, { "epoch": 5.52, "learning_rate": 4.7238000000000004e-05, "loss": 8.7288, "step": 690500 }, { "epoch": 5.53, "learning_rate": 4.7236e-05, "loss": 8.7226, "step": 691000 }, { "epoch": 5.53, "learning_rate": 4.7234e-05, "loss": 8.7622, "step": 691500 }, { "epoch": 5.54, "learning_rate": 4.7232000000000005e-05, "loss": 8.7544, "step": 692000 }, { "epoch": 5.54, "learning_rate": 4.723e-05, "loss": 8.7595, "step": 692500 }, { "epoch": 5.54, "learning_rate": 4.7228e-05, "loss": 8.7517, "step": 693000 }, { "epoch": 5.55, "learning_rate": 4.7226000000000005e-05, "loss": 8.7454, "step": 693500 }, { "epoch": 5.55, "learning_rate": 4.7224e-05, "loss": 8.7437, "step": 694000 }, { "epoch": 5.56, "learning_rate": 4.7222e-05, "loss": 8.7513, "step": 694500 }, { "epoch": 5.56, "learning_rate": 4.7220000000000005e-05, "loss": 8.7477, "step": 695000 }, { "epoch": 5.56, "learning_rate": 4.7218e-05, "loss": 8.7548, "step": 695500 }, { "epoch": 5.57, "learning_rate": 4.7216e-05, "loss": 8.7475, "step": 696000 }, { "epoch": 5.57, "learning_rate": 4.7214000000000006e-05, "loss": 8.763, "step": 696500 }, { "epoch": 5.58, "learning_rate": 4.7212e-05, "loss": 8.7648, "step": 697000 }, { "epoch": 5.58, "learning_rate": 4.7210000000000004e-05, "loss": 8.7581, "step": 697500 }, { "epoch": 5.58, "learning_rate": 4.7208e-05, "loss": 8.737, "step": 698000 }, { "epoch": 5.59, "learning_rate": 4.7206e-05, "loss": 8.7625, "step": 698500 }, { "epoch": 5.59, "learning_rate": 4.7204000000000004e-05, "loss": 8.7457, "step": 699000 }, { "epoch": 5.6, "learning_rate": 4.7202e-05, "loss": 8.7416, "step": 699500 }, { "epoch": 5.6, "learning_rate": 4.72e-05, "loss": 8.7489, "step": 700000 }, { "epoch": 5.6, "learning_rate": 4.7198000000000004e-05, "loss": 8.7571, "step": 700500 }, { "epoch": 5.61, "learning_rate": 4.7196e-05, "loss": 8.7475, "step": 701000 }, { "epoch": 5.61, "learning_rate": 4.7194e-05, "loss": 8.7488, "step": 701500 }, { "epoch": 5.62, "learning_rate": 4.7192000000000005e-05, "loss": 8.7479, "step": 702000 }, { "epoch": 5.62, "learning_rate": 4.719e-05, "loss": 8.7414, "step": 702500 }, { "epoch": 5.62, "learning_rate": 4.7188e-05, "loss": 8.7295, "step": 703000 }, { "epoch": 5.63, "learning_rate": 4.7186000000000005e-05, "loss": 8.7695, "step": 703500 }, { "epoch": 5.63, "learning_rate": 4.7184e-05, "loss": 8.7448, "step": 704000 }, { "epoch": 5.64, "learning_rate": 4.7182e-05, "loss": 8.7634, "step": 704500 }, { "epoch": 5.64, "learning_rate": 4.718e-05, "loss": 8.7518, "step": 705000 }, { "epoch": 5.64, "learning_rate": 4.7178e-05, "loss": 8.755, "step": 705500 }, { "epoch": 5.65, "learning_rate": 4.7176000000000004e-05, "loss": 8.7551, "step": 706000 }, { "epoch": 5.65, "learning_rate": 4.7174e-05, "loss": 8.752, "step": 706500 }, { "epoch": 5.66, "learning_rate": 4.7172e-05, "loss": 8.7563, "step": 707000 }, { "epoch": 5.66, "learning_rate": 4.7170000000000004e-05, "loss": 8.7611, "step": 707500 }, { "epoch": 5.66, "learning_rate": 4.7168e-05, "loss": 8.7514, "step": 708000 }, { "epoch": 5.67, "learning_rate": 4.7166e-05, "loss": 8.7601, "step": 708500 }, { "epoch": 5.67, "learning_rate": 4.7164000000000004e-05, "loss": 8.7648, "step": 709000 }, { "epoch": 5.68, "learning_rate": 4.7162e-05, "loss": 8.7556, "step": 709500 }, { "epoch": 5.68, "learning_rate": 4.716e-05, "loss": 8.7652, "step": 710000 }, { "epoch": 5.68, "learning_rate": 4.7158000000000005e-05, "loss": 8.7476, "step": 710500 }, { "epoch": 5.69, "learning_rate": 4.7156e-05, "loss": 8.7524, "step": 711000 }, { "epoch": 5.69, "learning_rate": 4.7154e-05, "loss": 8.747, "step": 711500 }, { "epoch": 5.7, "learning_rate": 4.7152e-05, "loss": 8.7438, "step": 712000 }, { "epoch": 5.7, "learning_rate": 4.715e-05, "loss": 8.7535, "step": 712500 }, { "epoch": 5.7, "learning_rate": 4.7148e-05, "loss": 8.7333, "step": 713000 }, { "epoch": 5.71, "learning_rate": 4.7146e-05, "loss": 8.7589, "step": 713500 }, { "epoch": 5.71, "learning_rate": 4.714400000000001e-05, "loss": 8.7535, "step": 714000 }, { "epoch": 5.72, "learning_rate": 4.7142000000000004e-05, "loss": 8.7367, "step": 714500 }, { "epoch": 5.72, "learning_rate": 4.714e-05, "loss": 8.7474, "step": 715000 }, { "epoch": 5.72, "learning_rate": 4.7138e-05, "loss": 8.751, "step": 715500 }, { "epoch": 5.73, "learning_rate": 4.7136000000000004e-05, "loss": 8.7505, "step": 716000 }, { "epoch": 5.73, "learning_rate": 4.7134e-05, "loss": 8.7624, "step": 716500 }, { "epoch": 5.74, "learning_rate": 4.7132e-05, "loss": 8.7474, "step": 717000 }, { "epoch": 5.74, "learning_rate": 4.7130000000000004e-05, "loss": 8.7419, "step": 717500 }, { "epoch": 5.74, "learning_rate": 4.7128e-05, "loss": 8.7374, "step": 718000 }, { "epoch": 5.75, "learning_rate": 4.7126e-05, "loss": 8.7473, "step": 718500 }, { "epoch": 5.75, "learning_rate": 4.7124000000000005e-05, "loss": 8.7498, "step": 719000 }, { "epoch": 5.76, "learning_rate": 4.712200000000001e-05, "loss": 8.7616, "step": 719500 }, { "epoch": 5.76, "learning_rate": 4.712e-05, "loss": 8.7447, "step": 720000 }, { "epoch": 5.76, "learning_rate": 4.7118e-05, "loss": 8.7444, "step": 720500 }, { "epoch": 5.77, "learning_rate": 4.711600000000001e-05, "loss": 8.7532, "step": 721000 }, { "epoch": 5.77, "learning_rate": 4.7114e-05, "loss": 8.7596, "step": 721500 }, { "epoch": 5.78, "learning_rate": 4.7112e-05, "loss": 8.7504, "step": 722000 }, { "epoch": 5.78, "learning_rate": 4.711e-05, "loss": 8.7738, "step": 722500 }, { "epoch": 5.78, "learning_rate": 4.7108000000000004e-05, "loss": 8.7431, "step": 723000 }, { "epoch": 5.79, "learning_rate": 4.7106e-05, "loss": 8.7453, "step": 723500 }, { "epoch": 5.79, "learning_rate": 4.7104e-05, "loss": 8.7284, "step": 724000 }, { "epoch": 5.8, "learning_rate": 4.7102000000000004e-05, "loss": 8.7391, "step": 724500 }, { "epoch": 5.8, "learning_rate": 4.71e-05, "loss": 8.7456, "step": 725000 }, { "epoch": 5.8, "learning_rate": 4.7098e-05, "loss": 8.7534, "step": 725500 }, { "epoch": 5.81, "learning_rate": 4.7096000000000004e-05, "loss": 8.753, "step": 726000 }, { "epoch": 5.81, "learning_rate": 4.709400000000001e-05, "loss": 8.7371, "step": 726500 }, { "epoch": 5.82, "learning_rate": 4.7092e-05, "loss": 8.7457, "step": 727000 }, { "epoch": 5.82, "learning_rate": 4.709e-05, "loss": 8.7505, "step": 727500 }, { "epoch": 5.82, "learning_rate": 4.708800000000001e-05, "loss": 8.767, "step": 728000 }, { "epoch": 5.83, "learning_rate": 4.7086e-05, "loss": 8.7433, "step": 728500 }, { "epoch": 5.83, "learning_rate": 4.7084e-05, "loss": 8.7626, "step": 729000 }, { "epoch": 5.84, "learning_rate": 4.7082e-05, "loss": 8.7656, "step": 729500 }, { "epoch": 5.84, "learning_rate": 4.708e-05, "loss": 8.7567, "step": 730000 }, { "epoch": 5.84, "learning_rate": 4.7078e-05, "loss": 8.7273, "step": 730500 }, { "epoch": 5.85, "learning_rate": 4.7076e-05, "loss": 8.7502, "step": 731000 }, { "epoch": 5.85, "learning_rate": 4.7074000000000004e-05, "loss": 8.7346, "step": 731500 }, { "epoch": 5.86, "learning_rate": 4.7072000000000006e-05, "loss": 8.7323, "step": 732000 }, { "epoch": 5.86, "learning_rate": 4.707e-05, "loss": 8.7544, "step": 732500 }, { "epoch": 5.86, "learning_rate": 4.7068000000000004e-05, "loss": 8.7366, "step": 733000 }, { "epoch": 5.87, "learning_rate": 4.7066000000000006e-05, "loss": 8.7397, "step": 733500 }, { "epoch": 5.87, "learning_rate": 4.7064e-05, "loss": 8.7394, "step": 734000 }, { "epoch": 5.88, "learning_rate": 4.7062e-05, "loss": 8.7622, "step": 734500 }, { "epoch": 5.88, "learning_rate": 4.706000000000001e-05, "loss": 8.7406, "step": 735000 }, { "epoch": 5.88, "learning_rate": 4.7058e-05, "loss": 8.7603, "step": 735500 }, { "epoch": 5.89, "learning_rate": 4.7056e-05, "loss": 8.7463, "step": 736000 }, { "epoch": 5.89, "learning_rate": 4.7054e-05, "loss": 8.7447, "step": 736500 }, { "epoch": 5.9, "learning_rate": 4.7052e-05, "loss": 8.7742, "step": 737000 }, { "epoch": 5.9, "learning_rate": 4.705e-05, "loss": 8.7461, "step": 737500 }, { "epoch": 5.9, "learning_rate": 4.7048e-05, "loss": 8.7501, "step": 738000 }, { "epoch": 5.91, "learning_rate": 4.7046e-05, "loss": 8.7593, "step": 738500 }, { "epoch": 5.91, "learning_rate": 4.7044000000000006e-05, "loss": 8.7456, "step": 739000 }, { "epoch": 5.92, "learning_rate": 4.7042e-05, "loss": 8.7436, "step": 739500 }, { "epoch": 5.92, "learning_rate": 4.7040000000000004e-05, "loss": 8.7447, "step": 740000 }, { "epoch": 5.92, "learning_rate": 4.7038000000000006e-05, "loss": 8.7447, "step": 740500 }, { "epoch": 5.93, "learning_rate": 4.7036e-05, "loss": 8.759, "step": 741000 }, { "epoch": 5.93, "learning_rate": 4.7034e-05, "loss": 8.7361, "step": 741500 }, { "epoch": 5.94, "learning_rate": 4.7032000000000006e-05, "loss": 8.7455, "step": 742000 }, { "epoch": 5.94, "learning_rate": 4.703e-05, "loss": 8.7745, "step": 742500 }, { "epoch": 5.94, "learning_rate": 4.7028e-05, "loss": 8.7608, "step": 743000 }, { "epoch": 5.95, "learning_rate": 4.7026e-05, "loss": 8.7666, "step": 743500 }, { "epoch": 5.95, "learning_rate": 4.7024e-05, "loss": 8.7195, "step": 744000 }, { "epoch": 5.96, "learning_rate": 4.7022000000000005e-05, "loss": 8.7423, "step": 744500 }, { "epoch": 5.96, "learning_rate": 4.702e-05, "loss": 8.7685, "step": 745000 }, { "epoch": 5.96, "learning_rate": 4.7018e-05, "loss": 8.7496, "step": 745500 }, { "epoch": 5.97, "learning_rate": 4.7016000000000005e-05, "loss": 8.7515, "step": 746000 }, { "epoch": 5.97, "learning_rate": 4.7014e-05, "loss": 8.7565, "step": 746500 }, { "epoch": 5.98, "learning_rate": 4.7012e-05, "loss": 8.7554, "step": 747000 }, { "epoch": 5.98, "learning_rate": 4.7010000000000006e-05, "loss": 8.7551, "step": 747500 }, { "epoch": 5.98, "learning_rate": 4.7008e-05, "loss": 8.7639, "step": 748000 }, { "epoch": 5.99, "learning_rate": 4.7006e-05, "loss": 8.739, "step": 748500 }, { "epoch": 5.99, "learning_rate": 4.7004000000000006e-05, "loss": 8.7458, "step": 749000 }, { "epoch": 6.0, "learning_rate": 4.7002e-05, "loss": 8.7541, "step": 749500 }, { "epoch": 6.0, "learning_rate": 4.7e-05, "loss": 8.7386, "step": 750000 }, { "epoch": 6.0, "learning_rate": 4.6998000000000006e-05, "loss": 8.7483, "step": 750500 }, { "epoch": 6.01, "learning_rate": 4.6996e-05, "loss": 8.7668, "step": 751000 }, { "epoch": 6.01, "learning_rate": 4.6994000000000004e-05, "loss": 8.7657, "step": 751500 }, { "epoch": 6.02, "learning_rate": 4.6992e-05, "loss": 8.7456, "step": 752000 }, { "epoch": 6.02, "learning_rate": 4.699e-05, "loss": 8.7593, "step": 752500 }, { "epoch": 6.02, "learning_rate": 4.6988000000000005e-05, "loss": 8.7309, "step": 753000 }, { "epoch": 6.03, "learning_rate": 4.6986e-05, "loss": 8.7621, "step": 753500 }, { "epoch": 6.03, "learning_rate": 4.6984e-05, "loss": 8.7725, "step": 754000 }, { "epoch": 6.04, "learning_rate": 4.6982000000000005e-05, "loss": 8.7567, "step": 754500 }, { "epoch": 6.04, "learning_rate": 4.698e-05, "loss": 8.7762, "step": 755000 }, { "epoch": 6.04, "learning_rate": 4.6977999999999996e-05, "loss": 8.7624, "step": 755500 }, { "epoch": 6.05, "learning_rate": 4.6976000000000006e-05, "loss": 8.7412, "step": 756000 }, { "epoch": 6.05, "learning_rate": 4.6974e-05, "loss": 8.7485, "step": 756500 }, { "epoch": 6.06, "learning_rate": 4.6972000000000004e-05, "loss": 8.7449, "step": 757000 }, { "epoch": 6.06, "learning_rate": 4.6970000000000006e-05, "loss": 8.7556, "step": 757500 }, { "epoch": 6.06, "learning_rate": 4.6968e-05, "loss": 8.7627, "step": 758000 }, { "epoch": 6.07, "learning_rate": 4.6966000000000004e-05, "loss": 8.7467, "step": 758500 }, { "epoch": 6.07, "learning_rate": 4.6964e-05, "loss": 8.7481, "step": 759000 }, { "epoch": 6.08, "learning_rate": 4.6962e-05, "loss": 8.752, "step": 759500 }, { "epoch": 6.08, "learning_rate": 4.6960000000000004e-05, "loss": 8.762, "step": 760000 }, { "epoch": 6.08, "learning_rate": 4.6958e-05, "loss": 8.7387, "step": 760500 }, { "epoch": 6.09, "learning_rate": 4.6956e-05, "loss": 8.7311, "step": 761000 }, { "epoch": 6.09, "learning_rate": 4.6954000000000005e-05, "loss": 8.7542, "step": 761500 }, { "epoch": 6.1, "learning_rate": 4.6952e-05, "loss": 8.7595, "step": 762000 }, { "epoch": 6.1, "learning_rate": 4.695e-05, "loss": 8.7542, "step": 762500 }, { "epoch": 6.1, "learning_rate": 4.6948000000000005e-05, "loss": 8.7753, "step": 763000 }, { "epoch": 6.11, "learning_rate": 4.6946e-05, "loss": 8.7496, "step": 763500 }, { "epoch": 6.11, "learning_rate": 4.6944e-05, "loss": 8.7554, "step": 764000 }, { "epoch": 6.12, "learning_rate": 4.6942000000000006e-05, "loss": 8.7498, "step": 764500 }, { "epoch": 6.12, "learning_rate": 4.694e-05, "loss": 8.7719, "step": 765000 }, { "epoch": 6.12, "learning_rate": 4.6938000000000004e-05, "loss": 8.7646, "step": 765500 }, { "epoch": 6.13, "learning_rate": 4.6936e-05, "loss": 8.772, "step": 766000 }, { "epoch": 6.13, "learning_rate": 4.6934e-05, "loss": 8.7446, "step": 766500 }, { "epoch": 6.14, "learning_rate": 4.6932000000000004e-05, "loss": 8.7736, "step": 767000 }, { "epoch": 6.14, "learning_rate": 4.693e-05, "loss": 8.7547, "step": 767500 }, { "epoch": 6.14, "learning_rate": 4.6928e-05, "loss": 8.733, "step": 768000 }, { "epoch": 6.15, "learning_rate": 4.6926000000000004e-05, "loss": 8.7627, "step": 768500 }, { "epoch": 6.15, "learning_rate": 4.6924e-05, "loss": 8.7668, "step": 769000 }, { "epoch": 6.16, "learning_rate": 4.6922e-05, "loss": 8.7351, "step": 769500 }, { "epoch": 6.16, "learning_rate": 4.6920000000000005e-05, "loss": 8.7374, "step": 770000 }, { "epoch": 6.16, "learning_rate": 4.6918e-05, "loss": 8.7697, "step": 770500 }, { "epoch": 6.17, "learning_rate": 4.6916e-05, "loss": 8.7433, "step": 771000 }, { "epoch": 6.17, "learning_rate": 4.6914000000000005e-05, "loss": 8.7597, "step": 771500 }, { "epoch": 6.18, "learning_rate": 4.6912e-05, "loss": 8.7423, "step": 772000 }, { "epoch": 6.18, "learning_rate": 4.691e-05, "loss": 8.7428, "step": 772500 }, { "epoch": 6.18, "learning_rate": 4.6908e-05, "loss": 8.7477, "step": 773000 }, { "epoch": 6.19, "learning_rate": 4.6906e-05, "loss": 8.7647, "step": 773500 }, { "epoch": 6.19, "learning_rate": 4.6904000000000004e-05, "loss": 8.7574, "step": 774000 }, { "epoch": 6.2, "learning_rate": 4.6902e-05, "loss": 8.7484, "step": 774500 }, { "epoch": 6.2, "learning_rate": 4.69e-05, "loss": 8.7527, "step": 775000 }, { "epoch": 6.2, "learning_rate": 4.6898000000000004e-05, "loss": 8.7672, "step": 775500 }, { "epoch": 6.21, "learning_rate": 4.6896e-05, "loss": 8.7309, "step": 776000 }, { "epoch": 6.21, "learning_rate": 4.6894e-05, "loss": 8.7448, "step": 776500 }, { "epoch": 6.22, "learning_rate": 4.6892000000000004e-05, "loss": 8.7333, "step": 777000 }, { "epoch": 6.22, "learning_rate": 4.689e-05, "loss": 8.7493, "step": 777500 }, { "epoch": 6.22, "learning_rate": 4.6888e-05, "loss": 8.7766, "step": 778000 }, { "epoch": 6.23, "learning_rate": 4.6886000000000005e-05, "loss": 8.7659, "step": 778500 }, { "epoch": 6.23, "learning_rate": 4.6884e-05, "loss": 8.7443, "step": 779000 }, { "epoch": 6.24, "learning_rate": 4.6882e-05, "loss": 8.7434, "step": 779500 }, { "epoch": 6.24, "learning_rate": 4.688e-05, "loss": 8.7506, "step": 780000 }, { "epoch": 6.24, "learning_rate": 4.6878e-05, "loss": 8.7548, "step": 780500 }, { "epoch": 6.25, "learning_rate": 4.6876e-05, "loss": 8.7633, "step": 781000 }, { "epoch": 6.25, "learning_rate": 4.6874e-05, "loss": 8.7673, "step": 781500 }, { "epoch": 6.26, "learning_rate": 4.687200000000001e-05, "loss": 8.7291, "step": 782000 }, { "epoch": 6.26, "learning_rate": 4.6870000000000004e-05, "loss": 8.747, "step": 782500 }, { "epoch": 6.26, "learning_rate": 4.6868e-05, "loss": 8.735, "step": 783000 }, { "epoch": 6.27, "learning_rate": 4.6866e-05, "loss": 8.7436, "step": 783500 }, { "epoch": 6.27, "learning_rate": 4.6864000000000004e-05, "loss": 8.7532, "step": 784000 }, { "epoch": 6.28, "learning_rate": 4.6862e-05, "loss": 8.7667, "step": 784500 }, { "epoch": 6.28, "learning_rate": 4.686e-05, "loss": 8.7414, "step": 785000 }, { "epoch": 6.28, "learning_rate": 4.6858000000000004e-05, "loss": 8.7301, "step": 785500 }, { "epoch": 6.29, "learning_rate": 4.6856e-05, "loss": 8.7559, "step": 786000 }, { "epoch": 6.29, "learning_rate": 4.6854e-05, "loss": 8.7326, "step": 786500 }, { "epoch": 6.3, "learning_rate": 4.6852000000000005e-05, "loss": 8.7318, "step": 787000 }, { "epoch": 6.3, "learning_rate": 4.685000000000001e-05, "loss": 8.751, "step": 787500 }, { "epoch": 6.3, "learning_rate": 4.6848e-05, "loss": 8.7537, "step": 788000 }, { "epoch": 6.31, "learning_rate": 4.6846e-05, "loss": 8.7584, "step": 788500 }, { "epoch": 6.31, "learning_rate": 4.684400000000001e-05, "loss": 8.7564, "step": 789000 }, { "epoch": 6.32, "learning_rate": 4.6842e-05, "loss": 8.7552, "step": 789500 }, { "epoch": 6.32, "learning_rate": 4.684e-05, "loss": 8.7492, "step": 790000 }, { "epoch": 6.32, "learning_rate": 4.6838e-05, "loss": 8.7569, "step": 790500 }, { "epoch": 6.33, "learning_rate": 4.6836000000000004e-05, "loss": 8.7477, "step": 791000 }, { "epoch": 6.33, "learning_rate": 4.6834e-05, "loss": 8.745, "step": 791500 }, { "epoch": 6.34, "learning_rate": 4.6832e-05, "loss": 8.745, "step": 792000 }, { "epoch": 6.34, "learning_rate": 4.6830000000000004e-05, "loss": 8.7704, "step": 792500 }, { "epoch": 6.34, "learning_rate": 4.6828e-05, "loss": 8.7451, "step": 793000 }, { "epoch": 6.35, "learning_rate": 4.6826e-05, "loss": 8.7475, "step": 793500 }, { "epoch": 6.35, "learning_rate": 4.6824000000000004e-05, "loss": 8.738, "step": 794000 }, { "epoch": 6.36, "learning_rate": 4.682200000000001e-05, "loss": 8.7687, "step": 794500 }, { "epoch": 6.36, "learning_rate": 4.682e-05, "loss": 8.7716, "step": 795000 }, { "epoch": 6.36, "learning_rate": 4.6818e-05, "loss": 8.7517, "step": 795500 }, { "epoch": 6.37, "learning_rate": 4.681600000000001e-05, "loss": 8.7634, "step": 796000 }, { "epoch": 6.37, "learning_rate": 4.6814e-05, "loss": 8.7568, "step": 796500 }, { "epoch": 6.38, "learning_rate": 4.6812e-05, "loss": 8.7353, "step": 797000 }, { "epoch": 6.38, "learning_rate": 4.681e-05, "loss": 8.7495, "step": 797500 }, { "epoch": 6.38, "learning_rate": 4.6808e-05, "loss": 8.7548, "step": 798000 }, { "epoch": 6.39, "learning_rate": 4.6806e-05, "loss": 8.7522, "step": 798500 }, { "epoch": 6.39, "learning_rate": 4.6804e-05, "loss": 8.7529, "step": 799000 }, { "epoch": 6.4, "learning_rate": 4.6802000000000004e-05, "loss": 8.7506, "step": 799500 }, { "epoch": 6.4, "learning_rate": 4.6800000000000006e-05, "loss": 8.735, "step": 800000 }, { "epoch": 6.4, "learning_rate": 4.6798e-05, "loss": 8.7611, "step": 800500 }, { "epoch": 6.41, "learning_rate": 4.6796000000000004e-05, "loss": 8.7729, "step": 801000 }, { "epoch": 6.41, "learning_rate": 4.6794000000000006e-05, "loss": 8.755, "step": 801500 }, { "epoch": 6.42, "learning_rate": 4.6792e-05, "loss": 8.754, "step": 802000 }, { "epoch": 6.42, "learning_rate": 4.679e-05, "loss": 8.7361, "step": 802500 }, { "epoch": 6.42, "learning_rate": 4.678800000000001e-05, "loss": 8.7393, "step": 803000 }, { "epoch": 6.43, "learning_rate": 4.6786e-05, "loss": 8.752, "step": 803500 }, { "epoch": 6.43, "learning_rate": 4.6784e-05, "loss": 8.768, "step": 804000 }, { "epoch": 6.44, "learning_rate": 4.6782e-05, "loss": 8.7412, "step": 804500 }, { "epoch": 6.44, "learning_rate": 4.678e-05, "loss": 8.7351, "step": 805000 }, { "epoch": 6.44, "learning_rate": 4.6778e-05, "loss": 8.76, "step": 805500 }, { "epoch": 6.45, "learning_rate": 4.6776e-05, "loss": 8.7633, "step": 806000 }, { "epoch": 6.45, "learning_rate": 4.6774e-05, "loss": 8.7428, "step": 806500 }, { "epoch": 6.46, "learning_rate": 4.6772000000000006e-05, "loss": 8.7432, "step": 807000 }, { "epoch": 6.46, "learning_rate": 4.677e-05, "loss": 8.7577, "step": 807500 }, { "epoch": 6.46, "learning_rate": 4.6768000000000004e-05, "loss": 8.7639, "step": 808000 }, { "epoch": 6.47, "learning_rate": 4.6766000000000006e-05, "loss": 8.7474, "step": 808500 }, { "epoch": 6.47, "learning_rate": 4.6764e-05, "loss": 8.7587, "step": 809000 }, { "epoch": 6.48, "learning_rate": 4.6762e-05, "loss": 8.7553, "step": 809500 }, { "epoch": 6.48, "learning_rate": 4.6760000000000006e-05, "loss": 8.7598, "step": 810000 }, { "epoch": 6.48, "learning_rate": 4.6758e-05, "loss": 8.7403, "step": 810500 }, { "epoch": 6.49, "learning_rate": 4.6756e-05, "loss": 8.7455, "step": 811000 }, { "epoch": 6.49, "learning_rate": 4.675400000000001e-05, "loss": 8.7494, "step": 811500 }, { "epoch": 6.5, "learning_rate": 4.6752e-05, "loss": 8.7577, "step": 812000 }, { "epoch": 6.5, "learning_rate": 4.6750000000000005e-05, "loss": 8.748, "step": 812500 }, { "epoch": 6.5, "learning_rate": 4.6748e-05, "loss": 8.7401, "step": 813000 }, { "epoch": 6.51, "learning_rate": 4.6746e-05, "loss": 8.7301, "step": 813500 }, { "epoch": 6.51, "learning_rate": 4.6744000000000005e-05, "loss": 8.7598, "step": 814000 }, { "epoch": 6.52, "learning_rate": 4.6742e-05, "loss": 8.755, "step": 814500 }, { "epoch": 6.52, "learning_rate": 4.674e-05, "loss": 8.7449, "step": 815000 }, { "epoch": 6.52, "learning_rate": 4.6738000000000006e-05, "loss": 8.7321, "step": 815500 }, { "epoch": 6.53, "learning_rate": 4.6736e-05, "loss": 8.75, "step": 816000 }, { "epoch": 6.53, "learning_rate": 4.6734e-05, "loss": 8.7544, "step": 816500 }, { "epoch": 6.54, "learning_rate": 4.6732000000000006e-05, "loss": 8.7393, "step": 817000 }, { "epoch": 6.54, "learning_rate": 4.673e-05, "loss": 8.7382, "step": 817500 }, { "epoch": 6.54, "learning_rate": 4.6728e-05, "loss": 8.7405, "step": 818000 }, { "epoch": 6.55, "learning_rate": 4.6726000000000006e-05, "loss": 8.7697, "step": 818500 }, { "epoch": 6.55, "learning_rate": 4.6724e-05, "loss": 8.7469, "step": 819000 }, { "epoch": 6.56, "learning_rate": 4.6722000000000004e-05, "loss": 8.744, "step": 819500 }, { "epoch": 6.56, "learning_rate": 4.672e-05, "loss": 8.7496, "step": 820000 }, { "epoch": 6.56, "learning_rate": 4.6718e-05, "loss": 8.7391, "step": 820500 }, { "epoch": 6.57, "learning_rate": 4.6716000000000005e-05, "loss": 8.7378, "step": 821000 }, { "epoch": 6.57, "learning_rate": 4.6714e-05, "loss": 8.7796, "step": 821500 }, { "epoch": 6.58, "learning_rate": 4.6712e-05, "loss": 8.7449, "step": 822000 }, { "epoch": 6.58, "learning_rate": 4.6710000000000005e-05, "loss": 8.75, "step": 822500 }, { "epoch": 6.58, "learning_rate": 4.6708e-05, "loss": 8.7536, "step": 823000 }, { "epoch": 6.59, "learning_rate": 4.6706e-05, "loss": 8.7436, "step": 823500 }, { "epoch": 6.59, "learning_rate": 4.6704000000000005e-05, "loss": 8.7696, "step": 824000 }, { "epoch": 6.6, "learning_rate": 4.6702e-05, "loss": 8.7568, "step": 824500 }, { "epoch": 6.6, "learning_rate": 4.6700000000000003e-05, "loss": 8.7632, "step": 825000 }, { "epoch": 6.6, "learning_rate": 4.6698000000000006e-05, "loss": 8.7508, "step": 825500 }, { "epoch": 6.61, "learning_rate": 4.6696e-05, "loss": 8.7569, "step": 826000 }, { "epoch": 6.61, "learning_rate": 4.6694000000000004e-05, "loss": 8.7632, "step": 826500 }, { "epoch": 6.62, "learning_rate": 4.6692e-05, "loss": 8.761, "step": 827000 }, { "epoch": 6.62, "learning_rate": 4.669e-05, "loss": 8.7271, "step": 827500 }, { "epoch": 6.62, "learning_rate": 4.6688000000000004e-05, "loss": 8.7485, "step": 828000 }, { "epoch": 6.63, "learning_rate": 4.6686e-05, "loss": 8.745, "step": 828500 }, { "epoch": 6.63, "learning_rate": 4.6684e-05, "loss": 8.7669, "step": 829000 }, { "epoch": 6.64, "learning_rate": 4.6682000000000005e-05, "loss": 8.7584, "step": 829500 }, { "epoch": 6.64, "learning_rate": 4.668e-05, "loss": 8.7514, "step": 830000 }, { "epoch": 6.64, "learning_rate": 4.6678e-05, "loss": 8.7318, "step": 830500 }, { "epoch": 6.65, "learning_rate": 4.6676000000000005e-05, "loss": 8.7484, "step": 831000 }, { "epoch": 6.65, "learning_rate": 4.6674e-05, "loss": 8.7701, "step": 831500 }, { "epoch": 6.66, "learning_rate": 4.6672e-05, "loss": 8.7421, "step": 832000 }, { "epoch": 6.66, "learning_rate": 4.6670000000000005e-05, "loss": 8.7547, "step": 832500 }, { "epoch": 6.66, "learning_rate": 4.6668e-05, "loss": 8.7611, "step": 833000 }, { "epoch": 6.67, "learning_rate": 4.6666000000000003e-05, "loss": 8.7349, "step": 833500 }, { "epoch": 6.67, "learning_rate": 4.6664e-05, "loss": 8.7465, "step": 834000 }, { "epoch": 6.68, "learning_rate": 4.6662e-05, "loss": 8.7559, "step": 834500 }, { "epoch": 6.68, "learning_rate": 4.6660000000000004e-05, "loss": 8.7405, "step": 835000 }, { "epoch": 6.68, "learning_rate": 4.6658e-05, "loss": 8.7558, "step": 835500 }, { "epoch": 6.69, "learning_rate": 4.6656e-05, "loss": 8.7591, "step": 836000 }, { "epoch": 6.69, "learning_rate": 4.6654000000000004e-05, "loss": 8.7664, "step": 836500 }, { "epoch": 6.7, "learning_rate": 4.6652e-05, "loss": 8.7586, "step": 837000 }, { "epoch": 6.7, "learning_rate": 4.665e-05, "loss": 8.7488, "step": 837500 }, { "epoch": 6.7, "learning_rate": 4.6648000000000005e-05, "loss": 8.7643, "step": 838000 }, { "epoch": 6.71, "learning_rate": 4.6646e-05, "loss": 8.7406, "step": 838500 }, { "epoch": 6.71, "learning_rate": 4.6644e-05, "loss": 8.7555, "step": 839000 }, { "epoch": 6.72, "learning_rate": 4.6642000000000005e-05, "loss": 8.76, "step": 839500 }, { "epoch": 6.72, "learning_rate": 4.664e-05, "loss": 8.7536, "step": 840000 }, { "epoch": 6.72, "learning_rate": 4.6638e-05, "loss": 8.7293, "step": 840500 }, { "epoch": 6.73, "learning_rate": 4.6636e-05, "loss": 8.7763, "step": 841000 }, { "epoch": 6.73, "learning_rate": 4.6634e-05, "loss": 8.7408, "step": 841500 }, { "epoch": 6.74, "learning_rate": 4.6632000000000003e-05, "loss": 8.7462, "step": 842000 }, { "epoch": 6.74, "learning_rate": 4.663e-05, "loss": 8.7606, "step": 842500 }, { "epoch": 6.74, "learning_rate": 4.6628e-05, "loss": 8.7548, "step": 843000 }, { "epoch": 6.75, "learning_rate": 4.6626000000000004e-05, "loss": 8.7438, "step": 843500 }, { "epoch": 6.75, "learning_rate": 4.6624e-05, "loss": 8.7728, "step": 844000 }, { "epoch": 6.76, "learning_rate": 4.6622e-05, "loss": 8.745, "step": 844500 }, { "epoch": 6.76, "learning_rate": 4.6620000000000004e-05, "loss": 8.7669, "step": 845000 }, { "epoch": 6.76, "learning_rate": 4.6618e-05, "loss": 8.744, "step": 845500 }, { "epoch": 6.77, "learning_rate": 4.6616e-05, "loss": 8.7448, "step": 846000 }, { "epoch": 6.77, "learning_rate": 4.6614000000000005e-05, "loss": 8.7628, "step": 846500 }, { "epoch": 6.78, "learning_rate": 4.6612e-05, "loss": 8.74, "step": 847000 }, { "epoch": 6.78, "learning_rate": 4.661e-05, "loss": 8.7416, "step": 847500 }, { "epoch": 6.78, "learning_rate": 4.6608e-05, "loss": 8.7393, "step": 848000 }, { "epoch": 6.79, "learning_rate": 4.6606e-05, "loss": 8.7714, "step": 848500 }, { "epoch": 6.79, "learning_rate": 4.6604e-05, "loss": 8.7799, "step": 849000 }, { "epoch": 6.8, "learning_rate": 4.6602e-05, "loss": 8.7592, "step": 849500 }, { "epoch": 6.8, "learning_rate": 4.660000000000001e-05, "loss": 8.7547, "step": 850000 }, { "epoch": 6.8, "learning_rate": 4.6598000000000003e-05, "loss": 8.752, "step": 850500 }, { "epoch": 6.81, "learning_rate": 4.6596e-05, "loss": 8.7538, "step": 851000 }, { "epoch": 6.81, "learning_rate": 4.6594e-05, "loss": 8.7523, "step": 851500 }, { "epoch": 6.82, "learning_rate": 4.6592000000000004e-05, "loss": 8.7763, "step": 852000 }, { "epoch": 6.82, "learning_rate": 4.659e-05, "loss": 8.7771, "step": 852500 }, { "epoch": 6.82, "learning_rate": 4.6588e-05, "loss": 8.7656, "step": 853000 }, { "epoch": 6.83, "learning_rate": 4.6586000000000004e-05, "loss": 8.7343, "step": 853500 }, { "epoch": 6.83, "learning_rate": 4.6584e-05, "loss": 8.7753, "step": 854000 }, { "epoch": 6.84, "learning_rate": 4.6582e-05, "loss": 8.7371, "step": 854500 }, { "epoch": 6.84, "learning_rate": 4.6580000000000005e-05, "loss": 8.7683, "step": 855000 }, { "epoch": 6.84, "learning_rate": 4.657800000000001e-05, "loss": 8.7771, "step": 855500 }, { "epoch": 6.85, "learning_rate": 4.6576e-05, "loss": 8.7487, "step": 856000 }, { "epoch": 6.85, "learning_rate": 4.6574e-05, "loss": 8.7567, "step": 856500 }, { "epoch": 6.86, "learning_rate": 4.657200000000001e-05, "loss": 8.7579, "step": 857000 }, { "epoch": 6.86, "learning_rate": 4.657e-05, "loss": 8.7585, "step": 857500 }, { "epoch": 6.86, "learning_rate": 4.6568e-05, "loss": 8.7402, "step": 858000 }, { "epoch": 6.87, "learning_rate": 4.6566e-05, "loss": 8.7717, "step": 858500 }, { "epoch": 6.87, "learning_rate": 4.6564000000000003e-05, "loss": 8.7564, "step": 859000 }, { "epoch": 6.88, "learning_rate": 4.6562e-05, "loss": 8.7586, "step": 859500 }, { "epoch": 6.88, "learning_rate": 4.656e-05, "loss": 8.7525, "step": 860000 }, { "epoch": 6.88, "learning_rate": 4.6558000000000004e-05, "loss": 8.7611, "step": 860500 }, { "epoch": 6.89, "learning_rate": 4.6556e-05, "loss": 8.7306, "step": 861000 }, { "epoch": 6.89, "learning_rate": 4.6554e-05, "loss": 8.7462, "step": 861500 }, { "epoch": 6.9, "learning_rate": 4.6552000000000004e-05, "loss": 8.7549, "step": 862000 }, { "epoch": 6.9, "learning_rate": 4.655000000000001e-05, "loss": 8.751, "step": 862500 }, { "epoch": 6.9, "learning_rate": 4.6548e-05, "loss": 8.7388, "step": 863000 }, { "epoch": 6.91, "learning_rate": 4.6546e-05, "loss": 8.7528, "step": 863500 }, { "epoch": 6.91, "learning_rate": 4.654400000000001e-05, "loss": 8.7364, "step": 864000 }, { "epoch": 6.92, "learning_rate": 4.6542e-05, "loss": 8.7629, "step": 864500 }, { "epoch": 6.92, "learning_rate": 4.654e-05, "loss": 8.7323, "step": 865000 }, { "epoch": 6.92, "learning_rate": 4.6538e-05, "loss": 8.757, "step": 865500 }, { "epoch": 6.93, "learning_rate": 4.6536e-05, "loss": 8.7493, "step": 866000 }, { "epoch": 6.93, "learning_rate": 4.6534e-05, "loss": 8.7544, "step": 866500 }, { "epoch": 6.94, "learning_rate": 4.6532e-05, "loss": 8.7371, "step": 867000 }, { "epoch": 6.94, "learning_rate": 4.6530000000000003e-05, "loss": 8.7359, "step": 867500 }, { "epoch": 6.94, "learning_rate": 4.6528000000000006e-05, "loss": 8.7716, "step": 868000 }, { "epoch": 6.95, "learning_rate": 4.6526e-05, "loss": 8.7644, "step": 868500 }, { "epoch": 6.95, "learning_rate": 4.6524000000000004e-05, "loss": 8.7509, "step": 869000 }, { "epoch": 6.96, "learning_rate": 4.6522000000000006e-05, "loss": 8.7467, "step": 869500 }, { "epoch": 6.96, "learning_rate": 4.652e-05, "loss": 8.7418, "step": 870000 }, { "epoch": 6.96, "learning_rate": 4.6518e-05, "loss": 8.7563, "step": 870500 }, { "epoch": 6.97, "learning_rate": 4.651600000000001e-05, "loss": 8.7563, "step": 871000 }, { "epoch": 6.97, "learning_rate": 4.6514e-05, "loss": 8.7628, "step": 871500 }, { "epoch": 6.98, "learning_rate": 4.6512e-05, "loss": 8.7366, "step": 872000 }, { "epoch": 6.98, "learning_rate": 4.651e-05, "loss": 8.774, "step": 872500 }, { "epoch": 6.98, "learning_rate": 4.6508e-05, "loss": 8.7612, "step": 873000 }, { "epoch": 6.99, "learning_rate": 4.6506e-05, "loss": 8.7502, "step": 873500 }, { "epoch": 6.99, "learning_rate": 4.6504e-05, "loss": 8.7307, "step": 874000 }, { "epoch": 7.0, "learning_rate": 4.6502e-05, "loss": 8.7444, "step": 874500 }, { "epoch": 7.0, "learning_rate": 4.6500000000000005e-05, "loss": 8.7551, "step": 875000 }, { "epoch": 7.0, "learning_rate": 4.6498e-05, "loss": 8.7419, "step": 875500 }, { "epoch": 7.01, "learning_rate": 4.6496000000000003e-05, "loss": 8.753, "step": 876000 }, { "epoch": 7.01, "learning_rate": 4.6494000000000006e-05, "loss": 8.7469, "step": 876500 }, { "epoch": 7.02, "learning_rate": 4.6492e-05, "loss": 8.743, "step": 877000 }, { "epoch": 7.02, "learning_rate": 4.649e-05, "loss": 8.7461, "step": 877500 }, { "epoch": 7.02, "learning_rate": 4.6488000000000006e-05, "loss": 8.7421, "step": 878000 }, { "epoch": 7.03, "learning_rate": 4.6486e-05, "loss": 8.7455, "step": 878500 }, { "epoch": 7.03, "learning_rate": 4.6484e-05, "loss": 8.7582, "step": 879000 }, { "epoch": 7.04, "learning_rate": 4.6482000000000007e-05, "loss": 8.7316, "step": 879500 }, { "epoch": 7.04, "learning_rate": 4.648e-05, "loss": 8.756, "step": 880000 }, { "epoch": 7.04, "learning_rate": 4.6478000000000005e-05, "loss": 8.7415, "step": 880500 }, { "epoch": 7.05, "learning_rate": 4.6476e-05, "loss": 8.7314, "step": 881000 }, { "epoch": 7.05, "learning_rate": 4.6474e-05, "loss": 8.7433, "step": 881500 }, { "epoch": 7.06, "learning_rate": 4.6472000000000005e-05, "loss": 8.7494, "step": 882000 }, { "epoch": 7.06, "learning_rate": 4.647e-05, "loss": 8.7465, "step": 882500 }, { "epoch": 7.06, "learning_rate": 4.6468e-05, "loss": 8.7456, "step": 883000 }, { "epoch": 7.07, "learning_rate": 4.6466000000000005e-05, "loss": 8.7582, "step": 883500 }, { "epoch": 7.07, "learning_rate": 4.6464e-05, "loss": 8.7455, "step": 884000 }, { "epoch": 7.08, "learning_rate": 4.6462e-05, "loss": 8.7486, "step": 884500 }, { "epoch": 7.08, "learning_rate": 4.6460000000000006e-05, "loss": 8.7646, "step": 885000 }, { "epoch": 7.08, "learning_rate": 4.6458e-05, "loss": 8.7388, "step": 885500 }, { "epoch": 7.09, "learning_rate": 4.6456e-05, "loss": 8.7506, "step": 886000 }, { "epoch": 7.09, "learning_rate": 4.6454000000000006e-05, "loss": 8.7676, "step": 886500 }, { "epoch": 7.1, "learning_rate": 4.6452e-05, "loss": 8.7709, "step": 887000 }, { "epoch": 7.1, "learning_rate": 4.6450000000000004e-05, "loss": 8.7412, "step": 887500 }, { "epoch": 7.1, "learning_rate": 4.6448e-05, "loss": 8.743, "step": 888000 }, { "epoch": 7.11, "learning_rate": 4.6446e-05, "loss": 8.7279, "step": 888500 }, { "epoch": 7.11, "learning_rate": 4.6444000000000005e-05, "loss": 8.7234, "step": 889000 }, { "epoch": 7.12, "learning_rate": 4.6442e-05, "loss": 8.7318, "step": 889500 }, { "epoch": 7.12, "learning_rate": 4.644e-05, "loss": 8.7569, "step": 890000 }, { "epoch": 7.12, "learning_rate": 4.6438000000000005e-05, "loss": 8.7387, "step": 890500 }, { "epoch": 7.13, "learning_rate": 4.6436e-05, "loss": 8.7512, "step": 891000 }, { "epoch": 7.13, "learning_rate": 4.6434e-05, "loss": 8.7666, "step": 891500 }, { "epoch": 7.14, "learning_rate": 4.6432000000000005e-05, "loss": 8.7506, "step": 892000 }, { "epoch": 7.14, "learning_rate": 4.643e-05, "loss": 8.7631, "step": 892500 }, { "epoch": 7.14, "learning_rate": 4.6428000000000003e-05, "loss": 8.7471, "step": 893000 }, { "epoch": 7.15, "learning_rate": 4.6426000000000006e-05, "loss": 8.7502, "step": 893500 }, { "epoch": 7.15, "learning_rate": 4.6424e-05, "loss": 8.7741, "step": 894000 }, { "epoch": 7.16, "learning_rate": 4.6422000000000004e-05, "loss": 8.751, "step": 894500 }, { "epoch": 7.16, "learning_rate": 4.642e-05, "loss": 8.735, "step": 895000 }, { "epoch": 7.16, "learning_rate": 4.6418e-05, "loss": 8.7579, "step": 895500 }, { "epoch": 7.17, "learning_rate": 4.6416000000000004e-05, "loss": 8.7568, "step": 896000 }, { "epoch": 7.17, "learning_rate": 4.6414e-05, "loss": 8.7528, "step": 896500 }, { "epoch": 7.18, "learning_rate": 4.6412e-05, "loss": 8.756, "step": 897000 }, { "epoch": 7.18, "learning_rate": 4.6410000000000005e-05, "loss": 8.7631, "step": 897500 }, { "epoch": 7.18, "learning_rate": 4.6408e-05, "loss": 8.7734, "step": 898000 }, { "epoch": 7.19, "learning_rate": 4.6406e-05, "loss": 8.7523, "step": 898500 }, { "epoch": 7.19, "learning_rate": 4.6404000000000005e-05, "loss": 8.756, "step": 899000 }, { "epoch": 7.2, "learning_rate": 4.6402e-05, "loss": 8.7523, "step": 899500 }, { "epoch": 7.2, "learning_rate": 4.64e-05, "loss": 8.7533, "step": 900000 }, { "epoch": 7.2, "learning_rate": 4.6398000000000005e-05, "loss": 8.7466, "step": 900500 }, { "epoch": 7.21, "learning_rate": 4.6396e-05, "loss": 8.766, "step": 901000 }, { "epoch": 7.21, "learning_rate": 4.6394e-05, "loss": 8.7683, "step": 901500 }, { "epoch": 7.22, "learning_rate": 4.6392e-05, "loss": 8.7553, "step": 902000 }, { "epoch": 7.22, "learning_rate": 4.639e-05, "loss": 8.7434, "step": 902500 }, { "epoch": 7.22, "learning_rate": 4.6388000000000004e-05, "loss": 8.7682, "step": 903000 }, { "epoch": 7.23, "learning_rate": 4.6386e-05, "loss": 8.7404, "step": 903500 }, { "epoch": 7.23, "learning_rate": 4.6384e-05, "loss": 8.7508, "step": 904000 }, { "epoch": 7.24, "learning_rate": 4.6382000000000004e-05, "loss": 8.7508, "step": 904500 }, { "epoch": 7.24, "learning_rate": 4.638e-05, "loss": 8.7736, "step": 905000 }, { "epoch": 7.24, "learning_rate": 4.6378e-05, "loss": 8.7652, "step": 905500 }, { "epoch": 7.25, "learning_rate": 4.6376000000000005e-05, "loss": 8.7474, "step": 906000 }, { "epoch": 7.25, "learning_rate": 4.6374e-05, "loss": 8.7477, "step": 906500 }, { "epoch": 7.26, "learning_rate": 4.6372e-05, "loss": 8.7469, "step": 907000 }, { "epoch": 7.26, "learning_rate": 4.6370000000000005e-05, "loss": 8.7526, "step": 907500 }, { "epoch": 7.26, "learning_rate": 4.6368e-05, "loss": 8.7602, "step": 908000 }, { "epoch": 7.27, "learning_rate": 4.6366e-05, "loss": 8.7599, "step": 908500 }, { "epoch": 7.27, "learning_rate": 4.6364e-05, "loss": 8.7532, "step": 909000 }, { "epoch": 7.28, "learning_rate": 4.6362e-05, "loss": 8.7391, "step": 909500 }, { "epoch": 7.28, "learning_rate": 4.636e-05, "loss": 8.7765, "step": 910000 }, { "epoch": 7.28, "learning_rate": 4.6358e-05, "loss": 8.7576, "step": 910500 }, { "epoch": 7.29, "learning_rate": 4.635600000000001e-05, "loss": 8.7453, "step": 911000 }, { "epoch": 7.29, "learning_rate": 4.6354000000000004e-05, "loss": 8.7707, "step": 911500 }, { "epoch": 7.3, "learning_rate": 4.6352e-05, "loss": 8.7469, "step": 912000 }, { "epoch": 7.3, "learning_rate": 4.635e-05, "loss": 8.7334, "step": 912500 }, { "epoch": 7.3, "learning_rate": 4.6348000000000004e-05, "loss": 8.7304, "step": 913000 }, { "epoch": 7.31, "learning_rate": 4.6346e-05, "loss": 8.7494, "step": 913500 }, { "epoch": 7.31, "learning_rate": 4.6344e-05, "loss": 8.7511, "step": 914000 }, { "epoch": 7.32, "learning_rate": 4.6342000000000005e-05, "loss": 8.7478, "step": 914500 }, { "epoch": 7.32, "learning_rate": 4.634e-05, "loss": 8.7506, "step": 915000 }, { "epoch": 7.32, "learning_rate": 4.6338e-05, "loss": 8.7416, "step": 915500 }, { "epoch": 7.33, "learning_rate": 4.6336000000000005e-05, "loss": 8.7361, "step": 916000 }, { "epoch": 7.33, "learning_rate": 4.6334e-05, "loss": 8.7527, "step": 916500 }, { "epoch": 7.34, "learning_rate": 4.6332e-05, "loss": 8.7591, "step": 917000 }, { "epoch": 7.34, "learning_rate": 4.633e-05, "loss": 8.7497, "step": 917500 }, { "epoch": 7.34, "learning_rate": 4.632800000000001e-05, "loss": 8.7632, "step": 918000 }, { "epoch": 7.35, "learning_rate": 4.6326e-05, "loss": 8.7562, "step": 918500 }, { "epoch": 7.35, "learning_rate": 4.6324e-05, "loss": 8.7455, "step": 919000 }, { "epoch": 7.36, "learning_rate": 4.6322e-05, "loss": 8.7385, "step": 919500 }, { "epoch": 7.36, "learning_rate": 4.6320000000000004e-05, "loss": 8.7521, "step": 920000 }, { "epoch": 7.36, "learning_rate": 4.6318e-05, "loss": 8.7747, "step": 920500 }, { "epoch": 7.37, "learning_rate": 4.6316e-05, "loss": 8.7334, "step": 921000 }, { "epoch": 7.37, "learning_rate": 4.6314000000000004e-05, "loss": 8.7658, "step": 921500 }, { "epoch": 7.38, "learning_rate": 4.6312e-05, "loss": 8.7794, "step": 922000 }, { "epoch": 7.38, "learning_rate": 4.631e-05, "loss": 8.7407, "step": 922500 }, { "epoch": 7.38, "learning_rate": 4.6308000000000005e-05, "loss": 8.7533, "step": 923000 }, { "epoch": 7.39, "learning_rate": 4.630600000000001e-05, "loss": 8.7517, "step": 923500 }, { "epoch": 7.39, "learning_rate": 4.6304e-05, "loss": 8.7667, "step": 924000 }, { "epoch": 7.4, "learning_rate": 4.6302e-05, "loss": 8.7558, "step": 924500 }, { "epoch": 7.4, "learning_rate": 4.630000000000001e-05, "loss": 8.739, "step": 925000 }, { "epoch": 7.4, "learning_rate": 4.6298e-05, "loss": 8.7558, "step": 925500 }, { "epoch": 7.41, "learning_rate": 4.6296e-05, "loss": 8.7487, "step": 926000 }, { "epoch": 7.41, "learning_rate": 4.6294e-05, "loss": 8.7644, "step": 926500 }, { "epoch": 7.42, "learning_rate": 4.6292e-05, "loss": 8.7642, "step": 927000 }, { "epoch": 7.42, "learning_rate": 4.629e-05, "loss": 8.7668, "step": 927500 }, { "epoch": 7.42, "learning_rate": 4.6288e-05, "loss": 8.7595, "step": 928000 }, { "epoch": 7.43, "learning_rate": 4.6286000000000004e-05, "loss": 8.7578, "step": 928500 }, { "epoch": 7.43, "learning_rate": 4.6284e-05, "loss": 8.7754, "step": 929000 }, { "epoch": 7.44, "learning_rate": 4.6282e-05, "loss": 8.7619, "step": 929500 }, { "epoch": 7.44, "learning_rate": 4.6280000000000004e-05, "loss": 8.7482, "step": 930000 }, { "epoch": 7.44, "learning_rate": 4.6278000000000007e-05, "loss": 8.7273, "step": 930500 }, { "epoch": 7.45, "learning_rate": 4.6276e-05, "loss": 8.7513, "step": 931000 }, { "epoch": 7.45, "learning_rate": 4.6274e-05, "loss": 8.7417, "step": 931500 }, { "epoch": 7.46, "learning_rate": 4.627200000000001e-05, "loss": 8.7726, "step": 932000 }, { "epoch": 7.46, "learning_rate": 4.627e-05, "loss": 8.7473, "step": 932500 }, { "epoch": 7.46, "learning_rate": 4.6268e-05, "loss": 8.7501, "step": 933000 }, { "epoch": 7.47, "learning_rate": 4.6266e-05, "loss": 8.7592, "step": 933500 }, { "epoch": 7.47, "learning_rate": 4.6264e-05, "loss": 8.7503, "step": 934000 }, { "epoch": 7.48, "learning_rate": 4.6262e-05, "loss": 8.746, "step": 934500 }, { "epoch": 7.48, "learning_rate": 4.626e-05, "loss": 8.7629, "step": 935000 }, { "epoch": 7.48, "learning_rate": 4.6258e-05, "loss": 8.7334, "step": 935500 }, { "epoch": 7.49, "learning_rate": 4.6256000000000006e-05, "loss": 8.7368, "step": 936000 }, { "epoch": 7.49, "learning_rate": 4.6254e-05, "loss": 8.761, "step": 936500 }, { "epoch": 7.5, "learning_rate": 4.6252000000000004e-05, "loss": 8.7554, "step": 937000 }, { "epoch": 7.5, "learning_rate": 4.6250000000000006e-05, "loss": 8.7589, "step": 937500 }, { "epoch": 7.5, "learning_rate": 4.6248e-05, "loss": 8.7468, "step": 938000 }, { "epoch": 7.51, "learning_rate": 4.6246e-05, "loss": 8.745, "step": 938500 }, { "epoch": 7.51, "learning_rate": 4.6244000000000007e-05, "loss": 8.7527, "step": 939000 }, { "epoch": 7.52, "learning_rate": 4.6242e-05, "loss": 8.7717, "step": 939500 }, { "epoch": 7.52, "learning_rate": 4.624e-05, "loss": 8.7429, "step": 940000 }, { "epoch": 7.52, "learning_rate": 4.623800000000001e-05, "loss": 8.7609, "step": 940500 }, { "epoch": 7.53, "learning_rate": 4.6236e-05, "loss": 8.7705, "step": 941000 }, { "epoch": 7.53, "learning_rate": 4.6234e-05, "loss": 8.7518, "step": 941500 }, { "epoch": 7.54, "learning_rate": 4.6232e-05, "loss": 8.7496, "step": 942000 }, { "epoch": 7.54, "learning_rate": 4.623e-05, "loss": 8.7507, "step": 942500 }, { "epoch": 7.54, "learning_rate": 4.6228000000000005e-05, "loss": 8.748, "step": 943000 }, { "epoch": 7.55, "learning_rate": 4.6226e-05, "loss": 8.7688, "step": 943500 }, { "epoch": 7.55, "learning_rate": 4.6224e-05, "loss": 8.7518, "step": 944000 }, { "epoch": 7.56, "learning_rate": 4.6222000000000006e-05, "loss": 8.7468, "step": 944500 }, { "epoch": 7.56, "learning_rate": 4.622e-05, "loss": 8.7657, "step": 945000 }, { "epoch": 7.56, "learning_rate": 4.6218e-05, "loss": 8.7403, "step": 945500 }, { "epoch": 7.57, "learning_rate": 4.6216000000000006e-05, "loss": 8.7527, "step": 946000 }, { "epoch": 7.57, "learning_rate": 4.6214e-05, "loss": 8.7469, "step": 946500 }, { "epoch": 7.58, "learning_rate": 4.6212e-05, "loss": 8.7376, "step": 947000 }, { "epoch": 7.58, "learning_rate": 4.6210000000000006e-05, "loss": 8.7543, "step": 947500 }, { "epoch": 7.58, "learning_rate": 4.6208e-05, "loss": 8.7403, "step": 948000 }, { "epoch": 7.59, "learning_rate": 4.6206000000000005e-05, "loss": 8.7669, "step": 948500 }, { "epoch": 7.59, "learning_rate": 4.6204e-05, "loss": 8.757, "step": 949000 }, { "epoch": 7.6, "learning_rate": 4.6202e-05, "loss": 8.7382, "step": 949500 }, { "epoch": 7.6, "learning_rate": 4.6200000000000005e-05, "loss": 8.7674, "step": 950000 }, { "epoch": 7.6, "learning_rate": 4.6198e-05, "loss": 8.734, "step": 950500 }, { "epoch": 7.61, "learning_rate": 4.6196e-05, "loss": 8.7594, "step": 951000 }, { "epoch": 7.61, "learning_rate": 4.6194000000000005e-05, "loss": 8.738, "step": 951500 }, { "epoch": 7.62, "learning_rate": 4.6192e-05, "loss": 8.7285, "step": 952000 }, { "epoch": 7.62, "learning_rate": 4.619e-05, "loss": 8.7311, "step": 952500 }, { "epoch": 7.62, "learning_rate": 4.6188000000000006e-05, "loss": 8.7498, "step": 953000 }, { "epoch": 7.63, "learning_rate": 4.6186e-05, "loss": 8.7787, "step": 953500 }, { "epoch": 7.63, "learning_rate": 4.6184e-05, "loss": 8.7747, "step": 954000 }, { "epoch": 7.64, "learning_rate": 4.6182000000000006e-05, "loss": 8.7507, "step": 954500 }, { "epoch": 7.64, "learning_rate": 4.618e-05, "loss": 8.7527, "step": 955000 }, { "epoch": 7.64, "learning_rate": 4.6178000000000004e-05, "loss": 8.7804, "step": 955500 }, { "epoch": 7.65, "learning_rate": 4.6176e-05, "loss": 8.7654, "step": 956000 }, { "epoch": 7.65, "learning_rate": 4.6174e-05, "loss": 8.7496, "step": 956500 }, { "epoch": 7.66, "learning_rate": 4.6172000000000004e-05, "loss": 8.758, "step": 957000 }, { "epoch": 7.66, "learning_rate": 4.617e-05, "loss": 8.7512, "step": 957500 }, { "epoch": 7.66, "learning_rate": 4.6168e-05, "loss": 8.7683, "step": 958000 }, { "epoch": 7.67, "learning_rate": 4.6166000000000005e-05, "loss": 8.7577, "step": 958500 }, { "epoch": 7.67, "learning_rate": 4.6164e-05, "loss": 8.7482, "step": 959000 }, { "epoch": 7.68, "learning_rate": 4.6162e-05, "loss": 8.7481, "step": 959500 }, { "epoch": 7.68, "learning_rate": 4.6160000000000005e-05, "loss": 8.7449, "step": 960000 }, { "epoch": 7.68, "learning_rate": 4.6158e-05, "loss": 8.7511, "step": 960500 }, { "epoch": 7.69, "learning_rate": 4.6156e-05, "loss": 8.7303, "step": 961000 }, { "epoch": 7.69, "learning_rate": 4.6154000000000006e-05, "loss": 8.7855, "step": 961500 }, { "epoch": 7.7, "learning_rate": 4.6152e-05, "loss": 8.7703, "step": 962000 }, { "epoch": 7.7, "learning_rate": 4.6150000000000004e-05, "loss": 8.7406, "step": 962500 }, { "epoch": 7.7, "learning_rate": 4.6148e-05, "loss": 8.7559, "step": 963000 }, { "epoch": 7.71, "learning_rate": 4.6146e-05, "loss": 8.7682, "step": 963500 }, { "epoch": 7.71, "learning_rate": 4.6144000000000004e-05, "loss": 8.7531, "step": 964000 }, { "epoch": 7.72, "learning_rate": 4.6142e-05, "loss": 8.744, "step": 964500 }, { "epoch": 7.72, "learning_rate": 4.614e-05, "loss": 8.7477, "step": 965000 }, { "epoch": 7.72, "learning_rate": 4.6138000000000004e-05, "loss": 8.7321, "step": 965500 }, { "epoch": 7.73, "learning_rate": 4.6136e-05, "loss": 8.7567, "step": 966000 }, { "epoch": 7.73, "learning_rate": 4.6134e-05, "loss": 8.7499, "step": 966500 }, { "epoch": 7.74, "learning_rate": 4.6132000000000005e-05, "loss": 8.7618, "step": 967000 }, { "epoch": 7.74, "learning_rate": 4.613e-05, "loss": 8.7702, "step": 967500 }, { "epoch": 7.74, "learning_rate": 4.6128e-05, "loss": 8.7327, "step": 968000 }, { "epoch": 7.75, "learning_rate": 4.6126000000000005e-05, "loss": 8.7448, "step": 968500 }, { "epoch": 7.75, "learning_rate": 4.6124e-05, "loss": 8.7575, "step": 969000 }, { "epoch": 7.76, "learning_rate": 4.6122e-05, "loss": 8.7466, "step": 969500 }, { "epoch": 7.76, "learning_rate": 4.612e-05, "loss": 8.7576, "step": 970000 }, { "epoch": 7.76, "learning_rate": 4.6118e-05, "loss": 8.7494, "step": 970500 }, { "epoch": 7.77, "learning_rate": 4.6116000000000004e-05, "loss": 8.7521, "step": 971000 }, { "epoch": 7.77, "learning_rate": 4.6114e-05, "loss": 8.7711, "step": 971500 }, { "epoch": 7.78, "learning_rate": 4.6112e-05, "loss": 8.7575, "step": 972000 }, { "epoch": 7.78, "learning_rate": 4.6110000000000004e-05, "loss": 8.7581, "step": 972500 }, { "epoch": 7.78, "learning_rate": 4.6108e-05, "loss": 8.7522, "step": 973000 }, { "epoch": 7.79, "learning_rate": 4.6106e-05, "loss": 8.7435, "step": 973500 }, { "epoch": 7.79, "learning_rate": 4.6104000000000004e-05, "loss": 8.7435, "step": 974000 }, { "epoch": 7.8, "learning_rate": 4.6102e-05, "loss": 8.7487, "step": 974500 }, { "epoch": 7.8, "learning_rate": 4.61e-05, "loss": 8.7584, "step": 975000 }, { "epoch": 7.8, "learning_rate": 4.6098000000000005e-05, "loss": 8.781, "step": 975500 }, { "epoch": 7.81, "learning_rate": 4.6096e-05, "loss": 8.7488, "step": 976000 }, { "epoch": 7.81, "learning_rate": 4.6094e-05, "loss": 8.7435, "step": 976500 }, { "epoch": 7.82, "learning_rate": 4.6092e-05, "loss": 8.7671, "step": 977000 }, { "epoch": 7.82, "learning_rate": 4.609e-05, "loss": 8.7473, "step": 977500 }, { "epoch": 7.82, "learning_rate": 4.6088e-05, "loss": 8.789, "step": 978000 }, { "epoch": 7.83, "learning_rate": 4.6086e-05, "loss": 8.7573, "step": 978500 }, { "epoch": 7.83, "learning_rate": 4.608400000000001e-05, "loss": 8.7524, "step": 979000 }, { "epoch": 7.84, "learning_rate": 4.6082000000000004e-05, "loss": 8.7607, "step": 979500 }, { "epoch": 7.84, "learning_rate": 4.608e-05, "loss": 8.7431, "step": 980000 }, { "epoch": 7.84, "learning_rate": 4.6078e-05, "loss": 8.7603, "step": 980500 }, { "epoch": 7.85, "learning_rate": 4.6076000000000004e-05, "loss": 8.7607, "step": 981000 }, { "epoch": 7.85, "learning_rate": 4.6074e-05, "loss": 8.7611, "step": 981500 }, { "epoch": 7.86, "learning_rate": 4.6072e-05, "loss": 8.7727, "step": 982000 }, { "epoch": 7.86, "learning_rate": 4.6070000000000004e-05, "loss": 8.7705, "step": 982500 }, { "epoch": 7.86, "learning_rate": 4.6068e-05, "loss": 8.748, "step": 983000 }, { "epoch": 7.87, "learning_rate": 4.6066e-05, "loss": 8.7487, "step": 983500 }, { "epoch": 7.87, "learning_rate": 4.6064000000000005e-05, "loss": 8.7531, "step": 984000 }, { "epoch": 7.88, "learning_rate": 4.6062e-05, "loss": 8.7451, "step": 984500 }, { "epoch": 7.88, "learning_rate": 4.606e-05, "loss": 8.7569, "step": 985000 }, { "epoch": 7.88, "learning_rate": 4.6058e-05, "loss": 8.7437, "step": 985500 }, { "epoch": 7.89, "learning_rate": 4.605600000000001e-05, "loss": 8.7727, "step": 986000 }, { "epoch": 7.89, "learning_rate": 4.6054e-05, "loss": 8.7642, "step": 986500 }, { "epoch": 7.9, "learning_rate": 4.6052e-05, "loss": 8.7476, "step": 987000 }, { "epoch": 7.9, "learning_rate": 4.605e-05, "loss": 8.7485, "step": 987500 }, { "epoch": 7.9, "learning_rate": 4.6048000000000004e-05, "loss": 8.7622, "step": 988000 }, { "epoch": 7.91, "learning_rate": 4.6046e-05, "loss": 8.7574, "step": 988500 }, { "epoch": 7.91, "learning_rate": 4.6044e-05, "loss": 8.7454, "step": 989000 }, { "epoch": 7.92, "learning_rate": 4.6042000000000004e-05, "loss": 8.747, "step": 989500 }, { "epoch": 7.92, "learning_rate": 4.604e-05, "loss": 8.7404, "step": 990000 }, { "epoch": 7.92, "learning_rate": 4.6038e-05, "loss": 8.7213, "step": 990500 }, { "epoch": 7.93, "learning_rate": 4.6036000000000004e-05, "loss": 8.7656, "step": 991000 }, { "epoch": 7.93, "learning_rate": 4.603400000000001e-05, "loss": 8.7572, "step": 991500 }, { "epoch": 7.94, "learning_rate": 4.6032e-05, "loss": 8.7596, "step": 992000 }, { "epoch": 7.94, "learning_rate": 4.603e-05, "loss": 8.7597, "step": 992500 }, { "epoch": 7.94, "learning_rate": 4.602800000000001e-05, "loss": 8.7695, "step": 993000 }, { "epoch": 7.95, "learning_rate": 4.6026e-05, "loss": 8.7391, "step": 993500 }, { "epoch": 7.95, "learning_rate": 4.6024e-05, "loss": 8.7392, "step": 994000 }, { "epoch": 7.96, "learning_rate": 4.6022e-05, "loss": 8.7401, "step": 994500 }, { "epoch": 7.96, "learning_rate": 4.602e-05, "loss": 8.7305, "step": 995000 }, { "epoch": 7.96, "learning_rate": 4.6018e-05, "loss": 8.7626, "step": 995500 }, { "epoch": 7.97, "learning_rate": 4.6016e-05, "loss": 8.7187, "step": 996000 }, { "epoch": 7.97, "learning_rate": 4.6014000000000004e-05, "loss": 8.7574, "step": 996500 }, { "epoch": 7.98, "learning_rate": 4.6012e-05, "loss": 8.7661, "step": 997000 }, { "epoch": 7.98, "learning_rate": 4.601e-05, "loss": 8.755, "step": 997500 }, { "epoch": 7.98, "learning_rate": 4.6008000000000004e-05, "loss": 8.7419, "step": 998000 }, { "epoch": 7.99, "learning_rate": 4.6006000000000006e-05, "loss": 8.7441, "step": 998500 }, { "epoch": 7.99, "learning_rate": 4.6004e-05, "loss": 8.7499, "step": 999000 }, { "epoch": 8.0, "learning_rate": 4.6002e-05, "loss": 8.7668, "step": 999500 }, { "epoch": 8.0, "learning_rate": 4.600000000000001e-05, "loss": 8.7548, "step": 1000000 }, { "epoch": 8.0, "learning_rate": 4.5998e-05, "loss": 8.7518, "step": 1000500 }, { "epoch": 8.01, "learning_rate": 4.5996e-05, "loss": 8.7525, "step": 1001000 }, { "epoch": 8.01, "learning_rate": 4.5994e-05, "loss": 8.7594, "step": 1001500 }, { "epoch": 8.02, "learning_rate": 4.5992e-05, "loss": 8.7526, "step": 1002000 }, { "epoch": 8.02, "learning_rate": 4.599e-05, "loss": 8.7309, "step": 1002500 }, { "epoch": 8.02, "learning_rate": 4.5988e-05, "loss": 8.7528, "step": 1003000 }, { "epoch": 8.03, "learning_rate": 4.5986e-05, "loss": 8.7467, "step": 1003500 }, { "epoch": 8.03, "learning_rate": 4.5984000000000006e-05, "loss": 8.7619, "step": 1004000 }, { "epoch": 8.04, "learning_rate": 4.5982e-05, "loss": 8.7641, "step": 1004500 }, { "epoch": 8.04, "learning_rate": 4.5980000000000004e-05, "loss": 8.7859, "step": 1005000 }, { "epoch": 8.04, "learning_rate": 4.5978000000000006e-05, "loss": 8.7646, "step": 1005500 }, { "epoch": 8.05, "learning_rate": 4.5976e-05, "loss": 8.7443, "step": 1006000 }, { "epoch": 8.05, "learning_rate": 4.5974e-05, "loss": 8.7468, "step": 1006500 }, { "epoch": 8.06, "learning_rate": 4.5972000000000006e-05, "loss": 8.7516, "step": 1007000 }, { "epoch": 8.06, "learning_rate": 4.597e-05, "loss": 8.7447, "step": 1007500 }, { "epoch": 8.06, "learning_rate": 4.5968e-05, "loss": 8.7767, "step": 1008000 }, { "epoch": 8.07, "learning_rate": 4.596600000000001e-05, "loss": 8.7518, "step": 1008500 }, { "epoch": 8.07, "learning_rate": 4.5964e-05, "loss": 8.7566, "step": 1009000 }, { "epoch": 8.08, "learning_rate": 4.5962e-05, "loss": 8.7503, "step": 1009500 }, { "epoch": 8.08, "learning_rate": 4.596e-05, "loss": 8.7505, "step": 1010000 }, { "epoch": 8.08, "learning_rate": 4.5958e-05, "loss": 8.7709, "step": 1010500 }, { "epoch": 8.09, "learning_rate": 4.5956000000000005e-05, "loss": 8.7462, "step": 1011000 }, { "epoch": 8.09, "learning_rate": 4.5954e-05, "loss": 8.7429, "step": 1011500 }, { "epoch": 8.1, "learning_rate": 4.5952e-05, "loss": 8.7683, "step": 1012000 }, { "epoch": 8.1, "learning_rate": 4.5950000000000006e-05, "loss": 8.7532, "step": 1012500 }, { "epoch": 8.1, "learning_rate": 4.5948e-05, "loss": 8.7512, "step": 1013000 }, { "epoch": 8.11, "learning_rate": 4.5946e-05, "loss": 8.7598, "step": 1013500 }, { "epoch": 8.11, "learning_rate": 4.5944000000000006e-05, "loss": 8.7399, "step": 1014000 }, { "epoch": 8.12, "learning_rate": 4.5942e-05, "loss": 8.7572, "step": 1014500 }, { "epoch": 8.12, "learning_rate": 4.594e-05, "loss": 8.7477, "step": 1015000 }, { "epoch": 8.12, "learning_rate": 4.5938000000000006e-05, "loss": 8.7442, "step": 1015500 }, { "epoch": 8.13, "learning_rate": 4.5936e-05, "loss": 8.7665, "step": 1016000 }, { "epoch": 8.13, "learning_rate": 4.5934000000000004e-05, "loss": 8.7644, "step": 1016500 }, { "epoch": 8.14, "learning_rate": 4.5932e-05, "loss": 8.7552, "step": 1017000 }, { "epoch": 8.14, "learning_rate": 4.593e-05, "loss": 8.756, "step": 1017500 }, { "epoch": 8.14, "learning_rate": 4.5928000000000005e-05, "loss": 8.7607, "step": 1018000 }, { "epoch": 8.15, "learning_rate": 4.5926e-05, "loss": 8.7513, "step": 1018500 }, { "epoch": 8.15, "learning_rate": 4.5924e-05, "loss": 8.7616, "step": 1019000 }, { "epoch": 8.16, "learning_rate": 4.5922000000000005e-05, "loss": 8.7444, "step": 1019500 }, { "epoch": 8.16, "learning_rate": 4.592e-05, "loss": 8.757, "step": 1020000 }, { "epoch": 8.16, "learning_rate": 4.5918e-05, "loss": 8.7525, "step": 1020500 }, { "epoch": 8.17, "learning_rate": 4.5916000000000006e-05, "loss": 8.7414, "step": 1021000 }, { "epoch": 8.17, "learning_rate": 4.5914e-05, "loss": 8.7481, "step": 1021500 }, { "epoch": 8.18, "learning_rate": 4.5912e-05, "loss": 8.7632, "step": 1022000 }, { "epoch": 8.18, "learning_rate": 4.5910000000000006e-05, "loss": 8.7571, "step": 1022500 }, { "epoch": 8.18, "learning_rate": 4.5908e-05, "loss": 8.766, "step": 1023000 }, { "epoch": 8.19, "learning_rate": 4.5906000000000004e-05, "loss": 8.7551, "step": 1023500 }, { "epoch": 8.19, "learning_rate": 4.5904e-05, "loss": 8.7421, "step": 1024000 }, { "epoch": 8.2, "learning_rate": 4.5902e-05, "loss": 8.7492, "step": 1024500 }, { "epoch": 8.2, "learning_rate": 4.5900000000000004e-05, "loss": 8.7505, "step": 1025000 }, { "epoch": 8.2, "learning_rate": 4.5898e-05, "loss": 8.7618, "step": 1025500 }, { "epoch": 8.21, "learning_rate": 4.5896e-05, "loss": 8.7647, "step": 1026000 }, { "epoch": 8.21, "learning_rate": 4.5894000000000005e-05, "loss": 8.7412, "step": 1026500 }, { "epoch": 8.22, "learning_rate": 4.5892e-05, "loss": 8.7593, "step": 1027000 }, { "epoch": 8.22, "learning_rate": 4.589e-05, "loss": 8.7523, "step": 1027500 }, { "epoch": 8.22, "learning_rate": 4.5888000000000005e-05, "loss": 8.7619, "step": 1028000 }, { "epoch": 8.23, "learning_rate": 4.5886e-05, "loss": 8.7542, "step": 1028500 }, { "epoch": 8.23, "learning_rate": 4.5884e-05, "loss": 8.7483, "step": 1029000 }, { "epoch": 8.24, "learning_rate": 4.5882000000000006e-05, "loss": 8.737, "step": 1029500 }, { "epoch": 8.24, "learning_rate": 4.588e-05, "loss": 8.746, "step": 1030000 }, { "epoch": 8.24, "learning_rate": 4.5878000000000004e-05, "loss": 8.7424, "step": 1030500 }, { "epoch": 8.25, "learning_rate": 4.5876e-05, "loss": 8.7581, "step": 1031000 }, { "epoch": 8.25, "learning_rate": 4.5874e-05, "loss": 8.7561, "step": 1031500 }, { "epoch": 8.26, "learning_rate": 4.5872000000000004e-05, "loss": 8.7321, "step": 1032000 }, { "epoch": 8.26, "learning_rate": 4.587e-05, "loss": 8.7496, "step": 1032500 }, { "epoch": 8.26, "learning_rate": 4.5868e-05, "loss": 8.7524, "step": 1033000 }, { "epoch": 8.27, "learning_rate": 4.5866000000000004e-05, "loss": 8.7635, "step": 1033500 }, { "epoch": 8.27, "learning_rate": 4.5864e-05, "loss": 8.7616, "step": 1034000 }, { "epoch": 8.28, "learning_rate": 4.5862e-05, "loss": 8.7377, "step": 1034500 }, { "epoch": 8.28, "learning_rate": 4.5860000000000005e-05, "loss": 8.7379, "step": 1035000 }, { "epoch": 8.28, "learning_rate": 4.5858e-05, "loss": 8.7452, "step": 1035500 }, { "epoch": 8.29, "learning_rate": 4.5856e-05, "loss": 8.7661, "step": 1036000 }, { "epoch": 8.29, "learning_rate": 4.5854000000000005e-05, "loss": 8.7918, "step": 1036500 }, { "epoch": 8.3, "learning_rate": 4.5852e-05, "loss": 8.7821, "step": 1037000 }, { "epoch": 8.3, "learning_rate": 4.585e-05, "loss": 8.7515, "step": 1037500 }, { "epoch": 8.3, "learning_rate": 4.5848e-05, "loss": 8.753, "step": 1038000 }, { "epoch": 8.31, "learning_rate": 4.5846e-05, "loss": 8.7653, "step": 1038500 }, { "epoch": 8.31, "learning_rate": 4.5844000000000004e-05, "loss": 8.7434, "step": 1039000 }, { "epoch": 8.32, "learning_rate": 4.5842e-05, "loss": 8.7503, "step": 1039500 }, { "epoch": 8.32, "learning_rate": 4.584e-05, "loss": 8.7442, "step": 1040000 }, { "epoch": 8.32, "learning_rate": 4.5838000000000004e-05, "loss": 8.7556, "step": 1040500 }, { "epoch": 8.33, "learning_rate": 4.5836e-05, "loss": 8.7387, "step": 1041000 }, { "epoch": 8.33, "learning_rate": 4.5834e-05, "loss": 8.7435, "step": 1041500 }, { "epoch": 8.34, "learning_rate": 4.5832000000000004e-05, "loss": 8.7568, "step": 1042000 }, { "epoch": 8.34, "learning_rate": 4.583e-05, "loss": 8.731, "step": 1042500 }, { "epoch": 8.34, "learning_rate": 4.5828e-05, "loss": 8.7556, "step": 1043000 }, { "epoch": 8.35, "learning_rate": 4.5826000000000005e-05, "loss": 8.753, "step": 1043500 }, { "epoch": 8.35, "learning_rate": 4.5824e-05, "loss": 8.765, "step": 1044000 }, { "epoch": 8.36, "learning_rate": 4.5822e-05, "loss": 8.7608, "step": 1044500 }, { "epoch": 8.36, "learning_rate": 4.5820000000000005e-05, "loss": 8.7479, "step": 1045000 }, { "epoch": 8.36, "learning_rate": 4.5818e-05, "loss": 8.7496, "step": 1045500 }, { "epoch": 8.37, "learning_rate": 4.5816e-05, "loss": 8.7563, "step": 1046000 }, { "epoch": 8.37, "learning_rate": 4.5814e-05, "loss": 8.7536, "step": 1046500 }, { "epoch": 8.38, "learning_rate": 4.581200000000001e-05, "loss": 8.7393, "step": 1047000 }, { "epoch": 8.38, "learning_rate": 4.5810000000000004e-05, "loss": 8.7538, "step": 1047500 }, { "epoch": 8.38, "learning_rate": 4.5808e-05, "loss": 8.7405, "step": 1048000 }, { "epoch": 8.39, "learning_rate": 4.5806e-05, "loss": 8.7559, "step": 1048500 }, { "epoch": 8.39, "learning_rate": 4.5804000000000004e-05, "loss": 8.7593, "step": 1049000 }, { "epoch": 8.4, "learning_rate": 4.5802e-05, "loss": 8.7558, "step": 1049500 }, { "epoch": 8.4, "learning_rate": 4.58e-05, "loss": 8.7608, "step": 1050000 }, { "epoch": 8.4, "learning_rate": 4.5798000000000004e-05, "loss": 8.753, "step": 1050500 }, { "epoch": 8.41, "learning_rate": 4.5796e-05, "loss": 8.7595, "step": 1051000 }, { "epoch": 8.41, "learning_rate": 4.5794e-05, "loss": 8.7691, "step": 1051500 }, { "epoch": 8.42, "learning_rate": 4.5792000000000005e-05, "loss": 8.7589, "step": 1052000 }, { "epoch": 8.42, "learning_rate": 4.579e-05, "loss": 8.7563, "step": 1052500 }, { "epoch": 8.42, "learning_rate": 4.5788e-05, "loss": 8.7495, "step": 1053000 }, { "epoch": 8.43, "learning_rate": 4.5786e-05, "loss": 8.7422, "step": 1053500 }, { "epoch": 8.43, "learning_rate": 4.578400000000001e-05, "loss": 8.7573, "step": 1054000 }, { "epoch": 8.44, "learning_rate": 4.5782e-05, "loss": 8.7329, "step": 1054500 }, { "epoch": 8.44, "learning_rate": 4.578e-05, "loss": 8.7471, "step": 1055000 }, { "epoch": 8.44, "learning_rate": 4.5778e-05, "loss": 8.7595, "step": 1055500 }, { "epoch": 8.45, "learning_rate": 4.5776000000000004e-05, "loss": 8.7502, "step": 1056000 }, { "epoch": 8.45, "learning_rate": 4.5774e-05, "loss": 8.7616, "step": 1056500 }, { "epoch": 8.46, "learning_rate": 4.5772e-05, "loss": 8.7519, "step": 1057000 }, { "epoch": 8.46, "learning_rate": 4.5770000000000004e-05, "loss": 8.7762, "step": 1057500 }, { "epoch": 8.46, "learning_rate": 4.5768e-05, "loss": 8.786, "step": 1058000 }, { "epoch": 8.47, "learning_rate": 4.5766e-05, "loss": 8.7416, "step": 1058500 }, { "epoch": 8.47, "learning_rate": 4.5764000000000004e-05, "loss": 8.735, "step": 1059000 }, { "epoch": 8.48, "learning_rate": 4.576200000000001e-05, "loss": 8.7345, "step": 1059500 }, { "epoch": 8.48, "learning_rate": 4.576e-05, "loss": 8.7803, "step": 1060000 }, { "epoch": 8.48, "learning_rate": 4.5758e-05, "loss": 8.7387, "step": 1060500 }, { "epoch": 8.49, "learning_rate": 4.575600000000001e-05, "loss": 8.7484, "step": 1061000 }, { "epoch": 8.49, "learning_rate": 4.5754e-05, "loss": 8.7491, "step": 1061500 }, { "epoch": 8.5, "learning_rate": 4.5752e-05, "loss": 8.7428, "step": 1062000 }, { "epoch": 8.5, "learning_rate": 4.575e-05, "loss": 8.7552, "step": 1062500 }, { "epoch": 8.5, "learning_rate": 4.5748e-05, "loss": 8.762, "step": 1063000 }, { "epoch": 8.51, "learning_rate": 4.5746e-05, "loss": 8.7661, "step": 1063500 }, { "epoch": 8.51, "learning_rate": 4.5744e-05, "loss": 8.7571, "step": 1064000 }, { "epoch": 8.52, "learning_rate": 4.5742000000000004e-05, "loss": 8.761, "step": 1064500 }, { "epoch": 8.52, "learning_rate": 4.574e-05, "loss": 8.7609, "step": 1065000 }, { "epoch": 8.52, "learning_rate": 4.5738e-05, "loss": 8.7881, "step": 1065500 }, { "epoch": 8.53, "learning_rate": 4.5736000000000004e-05, "loss": 8.7426, "step": 1066000 }, { "epoch": 8.53, "learning_rate": 4.5734000000000006e-05, "loss": 8.7552, "step": 1066500 }, { "epoch": 8.54, "learning_rate": 4.5732e-05, "loss": 8.754, "step": 1067000 }, { "epoch": 8.54, "learning_rate": 4.573e-05, "loss": 8.7388, "step": 1067500 }, { "epoch": 8.54, "learning_rate": 4.572800000000001e-05, "loss": 8.7461, "step": 1068000 }, { "epoch": 8.55, "learning_rate": 4.5726e-05, "loss": 8.7406, "step": 1068500 }, { "epoch": 8.55, "learning_rate": 4.5724e-05, "loss": 8.7331, "step": 1069000 }, { "epoch": 8.56, "learning_rate": 4.572200000000001e-05, "loss": 8.7287, "step": 1069500 }, { "epoch": 8.56, "learning_rate": 4.572e-05, "loss": 8.7564, "step": 1070000 }, { "epoch": 8.56, "learning_rate": 4.5718e-05, "loss": 8.765, "step": 1070500 }, { "epoch": 8.57, "learning_rate": 4.5716e-05, "loss": 8.7335, "step": 1071000 }, { "epoch": 8.57, "learning_rate": 4.5714e-05, "loss": 8.7621, "step": 1071500 }, { "epoch": 8.58, "learning_rate": 4.5712000000000006e-05, "loss": 8.7647, "step": 1072000 }, { "epoch": 8.58, "learning_rate": 4.571e-05, "loss": 8.7606, "step": 1072500 }, { "epoch": 8.58, "learning_rate": 4.5708000000000004e-05, "loss": 8.7504, "step": 1073000 }, { "epoch": 8.59, "learning_rate": 4.5706000000000006e-05, "loss": 8.7628, "step": 1073500 }, { "epoch": 8.59, "learning_rate": 4.5704e-05, "loss": 8.7536, "step": 1074000 }, { "epoch": 8.6, "learning_rate": 4.5702e-05, "loss": 8.7651, "step": 1074500 }, { "epoch": 8.6, "learning_rate": 4.5700000000000006e-05, "loss": 8.7671, "step": 1075000 }, { "epoch": 8.6, "learning_rate": 4.5698e-05, "loss": 8.7596, "step": 1075500 }, { "epoch": 8.61, "learning_rate": 4.5696e-05, "loss": 8.7435, "step": 1076000 }, { "epoch": 8.61, "learning_rate": 4.569400000000001e-05, "loss": 8.741, "step": 1076500 }, { "epoch": 8.62, "learning_rate": 4.5692e-05, "loss": 8.7707, "step": 1077000 }, { "epoch": 8.62, "learning_rate": 4.569e-05, "loss": 8.7603, "step": 1077500 }, { "epoch": 8.62, "learning_rate": 4.5688e-05, "loss": 8.7325, "step": 1078000 }, { "epoch": 8.63, "learning_rate": 4.5686e-05, "loss": 8.7381, "step": 1078500 }, { "epoch": 8.63, "learning_rate": 4.5684000000000005e-05, "loss": 8.7707, "step": 1079000 }, { "epoch": 8.64, "learning_rate": 4.5682e-05, "loss": 8.7425, "step": 1079500 }, { "epoch": 8.64, "learning_rate": 4.568e-05, "loss": 8.7552, "step": 1080000 }, { "epoch": 8.64, "learning_rate": 4.5678000000000005e-05, "loss": 8.7544, "step": 1080500 }, { "epoch": 8.65, "learning_rate": 4.5676e-05, "loss": 8.7629, "step": 1081000 }, { "epoch": 8.65, "learning_rate": 4.5674000000000003e-05, "loss": 8.7533, "step": 1081500 }, { "epoch": 8.66, "learning_rate": 4.5672000000000006e-05, "loss": 8.7634, "step": 1082000 }, { "epoch": 8.66, "learning_rate": 4.567e-05, "loss": 8.769, "step": 1082500 }, { "epoch": 8.66, "learning_rate": 4.5668e-05, "loss": 8.7436, "step": 1083000 }, { "epoch": 8.67, "learning_rate": 4.5666000000000006e-05, "loss": 8.756, "step": 1083500 }, { "epoch": 8.67, "learning_rate": 4.5664e-05, "loss": 8.7361, "step": 1084000 }, { "epoch": 8.68, "learning_rate": 4.5662000000000004e-05, "loss": 8.7676, "step": 1084500 }, { "epoch": 8.68, "learning_rate": 4.566e-05, "loss": 8.754, "step": 1085000 }, { "epoch": 8.68, "learning_rate": 4.5658e-05, "loss": 8.7639, "step": 1085500 }, { "epoch": 8.69, "learning_rate": 4.5656000000000005e-05, "loss": 8.743, "step": 1086000 }, { "epoch": 8.69, "learning_rate": 4.5654e-05, "loss": 8.762, "step": 1086500 }, { "epoch": 8.7, "learning_rate": 4.5652e-05, "loss": 8.7312, "step": 1087000 }, { "epoch": 8.7, "learning_rate": 4.5650000000000005e-05, "loss": 8.7352, "step": 1087500 }, { "epoch": 8.7, "learning_rate": 4.5648e-05, "loss": 8.7593, "step": 1088000 }, { "epoch": 8.71, "learning_rate": 4.5646e-05, "loss": 8.7647, "step": 1088500 }, { "epoch": 8.71, "learning_rate": 4.5644000000000005e-05, "loss": 8.7693, "step": 1089000 }, { "epoch": 8.72, "learning_rate": 4.5642e-05, "loss": 8.7362, "step": 1089500 }, { "epoch": 8.72, "learning_rate": 4.564e-05, "loss": 8.7623, "step": 1090000 }, { "epoch": 8.72, "learning_rate": 4.5638000000000006e-05, "loss": 8.7371, "step": 1090500 }, { "epoch": 8.73, "learning_rate": 4.5636e-05, "loss": 8.7593, "step": 1091000 }, { "epoch": 8.73, "learning_rate": 4.5634000000000004e-05, "loss": 8.7447, "step": 1091500 }, { "epoch": 8.74, "learning_rate": 4.5632e-05, "loss": 8.7487, "step": 1092000 }, { "epoch": 8.74, "learning_rate": 4.563e-05, "loss": 8.7339, "step": 1092500 }, { "epoch": 8.74, "learning_rate": 4.5628000000000004e-05, "loss": 8.7495, "step": 1093000 }, { "epoch": 8.75, "learning_rate": 4.5626e-05, "loss": 8.7597, "step": 1093500 }, { "epoch": 8.75, "learning_rate": 4.5624e-05, "loss": 8.7549, "step": 1094000 }, { "epoch": 8.76, "learning_rate": 4.5622000000000005e-05, "loss": 8.7327, "step": 1094500 }, { "epoch": 8.76, "learning_rate": 4.562e-05, "loss": 8.7635, "step": 1095000 }, { "epoch": 8.76, "learning_rate": 4.5618e-05, "loss": 8.7513, "step": 1095500 }, { "epoch": 8.77, "learning_rate": 4.5616000000000005e-05, "loss": 8.748, "step": 1096000 }, { "epoch": 8.77, "learning_rate": 4.5614e-05, "loss": 8.7531, "step": 1096500 }, { "epoch": 8.78, "learning_rate": 4.5612e-05, "loss": 8.7585, "step": 1097000 }, { "epoch": 8.78, "learning_rate": 4.5610000000000005e-05, "loss": 8.7534, "step": 1097500 }, { "epoch": 8.78, "learning_rate": 4.5608e-05, "loss": 8.7641, "step": 1098000 }, { "epoch": 8.79, "learning_rate": 4.5606000000000003e-05, "loss": 8.7441, "step": 1098500 }, { "epoch": 8.79, "learning_rate": 4.5604e-05, "loss": 8.7411, "step": 1099000 }, { "epoch": 8.8, "learning_rate": 4.5602e-05, "loss": 8.7323, "step": 1099500 }, { "epoch": 8.8, "learning_rate": 4.5600000000000004e-05, "loss": 8.7527, "step": 1100000 }, { "epoch": 8.8, "learning_rate": 4.5598e-05, "loss": 8.7551, "step": 1100500 }, { "epoch": 8.81, "learning_rate": 4.5596e-05, "loss": 8.7487, "step": 1101000 }, { "epoch": 8.81, "learning_rate": 4.5594000000000004e-05, "loss": 8.765, "step": 1101500 }, { "epoch": 8.82, "learning_rate": 4.5592e-05, "loss": 8.7599, "step": 1102000 }, { "epoch": 8.82, "learning_rate": 4.559e-05, "loss": 8.747, "step": 1102500 }, { "epoch": 8.82, "learning_rate": 4.5588000000000005e-05, "loss": 8.7591, "step": 1103000 }, { "epoch": 8.83, "learning_rate": 4.5586e-05, "loss": 8.7681, "step": 1103500 }, { "epoch": 8.83, "learning_rate": 4.5584e-05, "loss": 8.7826, "step": 1104000 }, { "epoch": 8.84, "learning_rate": 4.5582000000000005e-05, "loss": 8.7503, "step": 1104500 }, { "epoch": 8.84, "learning_rate": 4.558e-05, "loss": 8.7548, "step": 1105000 }, { "epoch": 8.84, "learning_rate": 4.5578e-05, "loss": 8.7503, "step": 1105500 }, { "epoch": 8.85, "learning_rate": 4.5576e-05, "loss": 8.7616, "step": 1106000 }, { "epoch": 8.85, "learning_rate": 4.5574e-05, "loss": 8.7462, "step": 1106500 }, { "epoch": 8.86, "learning_rate": 4.5572000000000003e-05, "loss": 8.745, "step": 1107000 }, { "epoch": 8.86, "learning_rate": 4.557e-05, "loss": 8.7549, "step": 1107500 }, { "epoch": 8.86, "learning_rate": 4.5568e-05, "loss": 8.7553, "step": 1108000 }, { "epoch": 8.87, "learning_rate": 4.5566000000000004e-05, "loss": 8.7741, "step": 1108500 }, { "epoch": 8.87, "learning_rate": 4.5564e-05, "loss": 8.7531, "step": 1109000 }, { "epoch": 8.88, "learning_rate": 4.5562e-05, "loss": 8.7575, "step": 1109500 }, { "epoch": 8.88, "learning_rate": 4.5560000000000004e-05, "loss": 8.7682, "step": 1110000 }, { "epoch": 8.88, "learning_rate": 4.5558e-05, "loss": 8.7461, "step": 1110500 }, { "epoch": 8.89, "learning_rate": 4.5556e-05, "loss": 8.7475, "step": 1111000 }, { "epoch": 8.89, "learning_rate": 4.5554000000000005e-05, "loss": 8.7511, "step": 1111500 }, { "epoch": 8.9, "learning_rate": 4.5552e-05, "loss": 8.7438, "step": 1112000 }, { "epoch": 8.9, "learning_rate": 4.555e-05, "loss": 8.7435, "step": 1112500 }, { "epoch": 8.9, "learning_rate": 4.5548000000000005e-05, "loss": 8.7563, "step": 1113000 }, { "epoch": 8.91, "learning_rate": 4.5546e-05, "loss": 8.77, "step": 1113500 }, { "epoch": 8.91, "learning_rate": 4.5544e-05, "loss": 8.7655, "step": 1114000 }, { "epoch": 8.92, "learning_rate": 4.5542e-05, "loss": 8.7363, "step": 1114500 }, { "epoch": 8.92, "learning_rate": 4.554000000000001e-05, "loss": 8.768, "step": 1115000 }, { "epoch": 8.92, "learning_rate": 4.5538000000000003e-05, "loss": 8.7206, "step": 1115500 }, { "epoch": 8.93, "learning_rate": 4.5536e-05, "loss": 8.7314, "step": 1116000 }, { "epoch": 8.93, "learning_rate": 4.5534e-05, "loss": 8.747, "step": 1116500 }, { "epoch": 8.94, "learning_rate": 4.5532000000000004e-05, "loss": 8.7343, "step": 1117000 }, { "epoch": 8.94, "learning_rate": 4.553e-05, "loss": 8.753, "step": 1117500 }, { "epoch": 8.94, "learning_rate": 4.5528e-05, "loss": 8.7468, "step": 1118000 }, { "epoch": 8.95, "learning_rate": 4.5526000000000004e-05, "loss": 8.737, "step": 1118500 }, { "epoch": 8.95, "learning_rate": 4.5524e-05, "loss": 8.7333, "step": 1119000 }, { "epoch": 8.96, "learning_rate": 4.5522e-05, "loss": 8.747, "step": 1119500 }, { "epoch": 8.96, "learning_rate": 4.5520000000000005e-05, "loss": 8.7544, "step": 1120000 }, { "epoch": 8.96, "learning_rate": 4.5518e-05, "loss": 8.7659, "step": 1120500 }, { "epoch": 8.97, "learning_rate": 4.5516e-05, "loss": 8.7715, "step": 1121000 }, { "epoch": 8.97, "learning_rate": 4.5514e-05, "loss": 8.7462, "step": 1121500 }, { "epoch": 8.98, "learning_rate": 4.551200000000001e-05, "loss": 8.7711, "step": 1122000 }, { "epoch": 8.98, "learning_rate": 4.551e-05, "loss": 8.7655, "step": 1122500 }, { "epoch": 8.98, "learning_rate": 4.5508e-05, "loss": 8.7888, "step": 1123000 }, { "epoch": 8.99, "learning_rate": 4.5506e-05, "loss": 8.7536, "step": 1123500 }, { "epoch": 8.99, "learning_rate": 4.5504000000000003e-05, "loss": 8.771, "step": 1124000 }, { "epoch": 9.0, "learning_rate": 4.5502e-05, "loss": 8.7383, "step": 1124500 }, { "epoch": 9.0, "learning_rate": 4.55e-05, "loss": 8.7406, "step": 1125000 }, { "epoch": 9.0, "learning_rate": 4.5498000000000004e-05, "loss": 8.7599, "step": 1125500 }, { "epoch": 9.01, "learning_rate": 4.5496e-05, "loss": 8.7698, "step": 1126000 }, { "epoch": 9.01, "learning_rate": 4.5494e-05, "loss": 8.7651, "step": 1126500 }, { "epoch": 9.02, "learning_rate": 4.5492000000000004e-05, "loss": 8.7592, "step": 1127000 }, { "epoch": 9.02, "learning_rate": 4.549000000000001e-05, "loss": 8.7589, "step": 1127500 }, { "epoch": 9.02, "learning_rate": 4.5488e-05, "loss": 8.7567, "step": 1128000 }, { "epoch": 9.03, "learning_rate": 4.5486e-05, "loss": 8.7364, "step": 1128500 }, { "epoch": 9.03, "learning_rate": 4.548400000000001e-05, "loss": 8.774, "step": 1129000 }, { "epoch": 9.04, "learning_rate": 4.5482e-05, "loss": 8.762, "step": 1129500 }, { "epoch": 9.04, "learning_rate": 4.548e-05, "loss": 8.7673, "step": 1130000 }, { "epoch": 9.04, "learning_rate": 4.5478e-05, "loss": 8.7546, "step": 1130500 }, { "epoch": 9.05, "learning_rate": 4.5476e-05, "loss": 8.7627, "step": 1131000 }, { "epoch": 9.05, "learning_rate": 4.5474e-05, "loss": 8.7855, "step": 1131500 }, { "epoch": 9.06, "learning_rate": 4.5472e-05, "loss": 8.7556, "step": 1132000 }, { "epoch": 9.06, "learning_rate": 4.5470000000000003e-05, "loss": 8.7556, "step": 1132500 }, { "epoch": 9.06, "learning_rate": 4.5468e-05, "loss": 8.7451, "step": 1133000 }, { "epoch": 9.07, "learning_rate": 4.5466e-05, "loss": 8.7679, "step": 1133500 }, { "epoch": 9.07, "learning_rate": 4.5464000000000004e-05, "loss": 8.7587, "step": 1134000 }, { "epoch": 9.08, "learning_rate": 4.5462000000000006e-05, "loss": 8.7522, "step": 1134500 }, { "epoch": 9.08, "learning_rate": 4.546e-05, "loss": 8.733, "step": 1135000 }, { "epoch": 9.08, "learning_rate": 4.5458e-05, "loss": 8.7503, "step": 1135500 }, { "epoch": 9.09, "learning_rate": 4.5456000000000007e-05, "loss": 8.7692, "step": 1136000 }, { "epoch": 9.09, "learning_rate": 4.5454e-05, "loss": 8.7564, "step": 1136500 }, { "epoch": 9.1, "learning_rate": 4.5452e-05, "loss": 8.7352, "step": 1137000 }, { "epoch": 9.1, "learning_rate": 4.545000000000001e-05, "loss": 8.7352, "step": 1137500 }, { "epoch": 9.1, "learning_rate": 4.5448e-05, "loss": 8.7374, "step": 1138000 }, { "epoch": 9.11, "learning_rate": 4.5446e-05, "loss": 8.7624, "step": 1138500 }, { "epoch": 9.11, "learning_rate": 4.5444e-05, "loss": 8.7635, "step": 1139000 }, { "epoch": 9.12, "learning_rate": 4.5442e-05, "loss": 8.7537, "step": 1139500 }, { "epoch": 9.12, "learning_rate": 4.5440000000000005e-05, "loss": 8.7543, "step": 1140000 }, { "epoch": 9.12, "learning_rate": 4.5438e-05, "loss": 8.7552, "step": 1140500 }, { "epoch": 9.13, "learning_rate": 4.5436000000000003e-05, "loss": 8.7545, "step": 1141000 }, { "epoch": 9.13, "learning_rate": 4.5434000000000006e-05, "loss": 8.7683, "step": 1141500 }, { "epoch": 9.14, "learning_rate": 4.5432e-05, "loss": 8.7505, "step": 1142000 }, { "epoch": 9.14, "learning_rate": 4.543e-05, "loss": 8.7544, "step": 1142500 }, { "epoch": 9.14, "learning_rate": 4.5428000000000006e-05, "loss": 8.7368, "step": 1143000 }, { "epoch": 9.15, "learning_rate": 4.5426e-05, "loss": 8.7529, "step": 1143500 }, { "epoch": 9.15, "learning_rate": 4.5424e-05, "loss": 8.7587, "step": 1144000 }, { "epoch": 9.16, "learning_rate": 4.5422000000000007e-05, "loss": 8.752, "step": 1144500 }, { "epoch": 9.16, "learning_rate": 4.542e-05, "loss": 8.7557, "step": 1145000 }, { "epoch": 9.16, "learning_rate": 4.5418e-05, "loss": 8.7305, "step": 1145500 }, { "epoch": 9.17, "learning_rate": 4.5416e-05, "loss": 8.7572, "step": 1146000 }, { "epoch": 9.17, "learning_rate": 4.5414e-05, "loss": 8.7655, "step": 1146500 }, { "epoch": 9.18, "learning_rate": 4.5412000000000005e-05, "loss": 8.7714, "step": 1147000 }, { "epoch": 9.18, "learning_rate": 4.541e-05, "loss": 8.7434, "step": 1147500 }, { "epoch": 9.18, "learning_rate": 4.5408e-05, "loss": 8.7608, "step": 1148000 }, { "epoch": 9.19, "learning_rate": 4.5406000000000005e-05, "loss": 8.7613, "step": 1148500 }, { "epoch": 9.19, "learning_rate": 4.5404e-05, "loss": 8.7547, "step": 1149000 }, { "epoch": 9.2, "learning_rate": 4.5402000000000003e-05, "loss": 8.7442, "step": 1149500 }, { "epoch": 9.2, "learning_rate": 4.5400000000000006e-05, "loss": 8.7498, "step": 1150000 }, { "epoch": 9.2, "learning_rate": 4.5398e-05, "loss": 8.7669, "step": 1150500 }, { "epoch": 9.21, "learning_rate": 4.5396e-05, "loss": 8.7629, "step": 1151000 }, { "epoch": 9.21, "learning_rate": 4.5394000000000006e-05, "loss": 8.7594, "step": 1151500 }, { "epoch": 9.22, "learning_rate": 4.5392e-05, "loss": 8.7643, "step": 1152000 }, { "epoch": 9.22, "learning_rate": 4.5390000000000004e-05, "loss": 8.7613, "step": 1152500 }, { "epoch": 9.22, "learning_rate": 4.5388e-05, "loss": 8.7615, "step": 1153000 }, { "epoch": 9.23, "learning_rate": 4.5386e-05, "loss": 8.7547, "step": 1153500 }, { "epoch": 9.23, "learning_rate": 4.5384000000000005e-05, "loss": 8.7678, "step": 1154000 }, { "epoch": 9.24, "learning_rate": 4.5382e-05, "loss": 8.7531, "step": 1154500 }, { "epoch": 9.24, "learning_rate": 4.538e-05, "loss": 8.7318, "step": 1155000 }, { "epoch": 9.24, "learning_rate": 4.5378000000000005e-05, "loss": 8.7542, "step": 1155500 }, { "epoch": 9.25, "learning_rate": 4.5376e-05, "loss": 8.7568, "step": 1156000 }, { "epoch": 9.25, "learning_rate": 4.5374e-05, "loss": 8.7563, "step": 1156500 }, { "epoch": 9.26, "learning_rate": 4.5372000000000005e-05, "loss": 8.759, "step": 1157000 }, { "epoch": 9.26, "learning_rate": 4.537e-05, "loss": 8.7729, "step": 1157500 }, { "epoch": 9.26, "learning_rate": 4.5368e-05, "loss": 8.755, "step": 1158000 }, { "epoch": 9.27, "learning_rate": 4.5366000000000006e-05, "loss": 8.7435, "step": 1158500 }, { "epoch": 9.27, "learning_rate": 4.5364e-05, "loss": 8.7412, "step": 1159000 }, { "epoch": 9.28, "learning_rate": 4.5362000000000004e-05, "loss": 8.7703, "step": 1159500 }, { "epoch": 9.28, "learning_rate": 4.536e-05, "loss": 8.768, "step": 1160000 }, { "epoch": 9.28, "learning_rate": 4.5358e-05, "loss": 8.7589, "step": 1160500 }, { "epoch": 9.29, "learning_rate": 4.5356000000000004e-05, "loss": 8.7464, "step": 1161000 }, { "epoch": 9.29, "learning_rate": 4.5354e-05, "loss": 8.7481, "step": 1161500 }, { "epoch": 9.3, "learning_rate": 4.5352e-05, "loss": 8.7375, "step": 1162000 }, { "epoch": 9.3, "learning_rate": 4.5350000000000005e-05, "loss": 8.7514, "step": 1162500 }, { "epoch": 9.3, "learning_rate": 4.5348e-05, "loss": 8.7627, "step": 1163000 }, { "epoch": 9.31, "learning_rate": 4.5346e-05, "loss": 8.7656, "step": 1163500 }, { "epoch": 9.31, "learning_rate": 4.5344000000000005e-05, "loss": 8.7472, "step": 1164000 }, { "epoch": 9.32, "learning_rate": 4.5342e-05, "loss": 8.7374, "step": 1164500 }, { "epoch": 9.32, "learning_rate": 4.534e-05, "loss": 8.7591, "step": 1165000 }, { "epoch": 9.32, "learning_rate": 4.5338000000000005e-05, "loss": 8.7456, "step": 1165500 }, { "epoch": 9.33, "learning_rate": 4.5336e-05, "loss": 8.7264, "step": 1166000 }, { "epoch": 9.33, "learning_rate": 4.5334e-05, "loss": 8.7475, "step": 1166500 }, { "epoch": 9.34, "learning_rate": 4.5332e-05, "loss": 8.7525, "step": 1167000 }, { "epoch": 9.34, "learning_rate": 4.533e-05, "loss": 8.772, "step": 1167500 }, { "epoch": 9.34, "learning_rate": 4.5328000000000004e-05, "loss": 8.7502, "step": 1168000 }, { "epoch": 9.35, "learning_rate": 4.5326e-05, "loss": 8.7524, "step": 1168500 }, { "epoch": 9.35, "learning_rate": 4.5324e-05, "loss": 8.7427, "step": 1169000 }, { "epoch": 9.36, "learning_rate": 4.5322000000000004e-05, "loss": 8.7586, "step": 1169500 }, { "epoch": 9.36, "learning_rate": 4.532e-05, "loss": 8.744, "step": 1170000 }, { "epoch": 9.36, "learning_rate": 4.5318e-05, "loss": 8.7423, "step": 1170500 }, { "epoch": 9.37, "learning_rate": 4.5316000000000005e-05, "loss": 8.7525, "step": 1171000 }, { "epoch": 9.37, "learning_rate": 4.5314e-05, "loss": 8.7382, "step": 1171500 }, { "epoch": 9.38, "learning_rate": 4.5312e-05, "loss": 8.7658, "step": 1172000 }, { "epoch": 9.38, "learning_rate": 4.5310000000000005e-05, "loss": 8.7402, "step": 1172500 }, { "epoch": 9.38, "learning_rate": 4.5308e-05, "loss": 8.7607, "step": 1173000 }, { "epoch": 9.39, "learning_rate": 4.5306e-05, "loss": 8.742, "step": 1173500 }, { "epoch": 9.39, "learning_rate": 4.5304000000000005e-05, "loss": 8.7564, "step": 1174000 }, { "epoch": 9.4, "learning_rate": 4.5302e-05, "loss": 8.7478, "step": 1174500 }, { "epoch": 9.4, "learning_rate": 4.53e-05, "loss": 8.7551, "step": 1175000 }, { "epoch": 9.4, "learning_rate": 4.5298e-05, "loss": 8.7826, "step": 1175500 }, { "epoch": 9.41, "learning_rate": 4.5296e-05, "loss": 8.7596, "step": 1176000 }, { "epoch": 9.41, "learning_rate": 4.5294000000000004e-05, "loss": 8.7496, "step": 1176500 }, { "epoch": 9.42, "learning_rate": 4.5292e-05, "loss": 8.7539, "step": 1177000 }, { "epoch": 9.42, "learning_rate": 4.529e-05, "loss": 8.7526, "step": 1177500 }, { "epoch": 9.42, "learning_rate": 4.5288000000000004e-05, "loss": 8.7525, "step": 1178000 }, { "epoch": 9.43, "learning_rate": 4.5286e-05, "loss": 8.7621, "step": 1178500 }, { "epoch": 9.43, "learning_rate": 4.5284e-05, "loss": 8.7571, "step": 1179000 }, { "epoch": 9.44, "learning_rate": 4.5282000000000005e-05, "loss": 8.7664, "step": 1179500 }, { "epoch": 9.44, "learning_rate": 4.528e-05, "loss": 8.7463, "step": 1180000 }, { "epoch": 9.44, "learning_rate": 4.5278e-05, "loss": 8.7633, "step": 1180500 }, { "epoch": 9.45, "learning_rate": 4.5276000000000005e-05, "loss": 8.7597, "step": 1181000 }, { "epoch": 9.45, "learning_rate": 4.5274e-05, "loss": 8.7388, "step": 1181500 }, { "epoch": 9.46, "learning_rate": 4.5272e-05, "loss": 8.7646, "step": 1182000 }, { "epoch": 9.46, "learning_rate": 4.527e-05, "loss": 8.7659, "step": 1182500 }, { "epoch": 9.46, "learning_rate": 4.526800000000001e-05, "loss": 8.7391, "step": 1183000 }, { "epoch": 9.47, "learning_rate": 4.5266e-05, "loss": 8.7365, "step": 1183500 }, { "epoch": 9.47, "learning_rate": 4.5264e-05, "loss": 8.7684, "step": 1184000 }, { "epoch": 9.48, "learning_rate": 4.5262e-05, "loss": 8.7474, "step": 1184500 }, { "epoch": 9.48, "learning_rate": 4.5260000000000004e-05, "loss": 8.7737, "step": 1185000 }, { "epoch": 9.48, "learning_rate": 4.5258e-05, "loss": 8.7453, "step": 1185500 }, { "epoch": 9.49, "learning_rate": 4.5256e-05, "loss": 8.7527, "step": 1186000 }, { "epoch": 9.49, "learning_rate": 4.5254000000000004e-05, "loss": 8.7763, "step": 1186500 }, { "epoch": 9.5, "learning_rate": 4.5252e-05, "loss": 8.7497, "step": 1187000 }, { "epoch": 9.5, "learning_rate": 4.525e-05, "loss": 8.7292, "step": 1187500 }, { "epoch": 9.5, "learning_rate": 4.5248000000000005e-05, "loss": 8.7844, "step": 1188000 }, { "epoch": 9.51, "learning_rate": 4.5246e-05, "loss": 8.7557, "step": 1188500 }, { "epoch": 9.51, "learning_rate": 4.5244e-05, "loss": 8.74, "step": 1189000 }, { "epoch": 9.52, "learning_rate": 4.5242e-05, "loss": 8.7816, "step": 1189500 }, { "epoch": 9.52, "learning_rate": 4.524000000000001e-05, "loss": 8.7359, "step": 1190000 }, { "epoch": 9.52, "learning_rate": 4.5238e-05, "loss": 8.7397, "step": 1190500 }, { "epoch": 9.53, "learning_rate": 4.5236e-05, "loss": 8.7586, "step": 1191000 }, { "epoch": 9.53, "learning_rate": 4.5234e-05, "loss": 8.7524, "step": 1191500 }, { "epoch": 9.54, "learning_rate": 4.5232e-05, "loss": 8.7367, "step": 1192000 }, { "epoch": 9.54, "learning_rate": 4.523e-05, "loss": 8.7476, "step": 1192500 }, { "epoch": 9.54, "learning_rate": 4.5228e-05, "loss": 8.7597, "step": 1193000 }, { "epoch": 9.55, "learning_rate": 4.5226000000000004e-05, "loss": 8.7374, "step": 1193500 }, { "epoch": 9.55, "learning_rate": 4.5224e-05, "loss": 8.7652, "step": 1194000 }, { "epoch": 9.56, "learning_rate": 4.5222e-05, "loss": 8.7611, "step": 1194500 }, { "epoch": 9.56, "learning_rate": 4.5220000000000004e-05, "loss": 8.7519, "step": 1195000 }, { "epoch": 9.56, "learning_rate": 4.5218000000000007e-05, "loss": 8.7266, "step": 1195500 }, { "epoch": 9.57, "learning_rate": 4.5216e-05, "loss": 8.7592, "step": 1196000 }, { "epoch": 9.57, "learning_rate": 4.5214e-05, "loss": 8.7492, "step": 1196500 }, { "epoch": 9.58, "learning_rate": 4.521200000000001e-05, "loss": 8.7413, "step": 1197000 }, { "epoch": 9.58, "learning_rate": 4.521e-05, "loss": 8.7379, "step": 1197500 }, { "epoch": 9.58, "learning_rate": 4.5208e-05, "loss": 8.7603, "step": 1198000 }, { "epoch": 9.59, "learning_rate": 4.5206e-05, "loss": 8.7648, "step": 1198500 }, { "epoch": 9.59, "learning_rate": 4.5204e-05, "loss": 8.7292, "step": 1199000 }, { "epoch": 9.6, "learning_rate": 4.5202e-05, "loss": 8.7711, "step": 1199500 }, { "epoch": 9.6, "learning_rate": 4.52e-05, "loss": 8.749, "step": 1200000 }, { "epoch": 9.6, "learning_rate": 4.5198e-05, "loss": 8.7441, "step": 1200500 }, { "epoch": 9.61, "learning_rate": 4.5196e-05, "loss": 8.7643, "step": 1201000 }, { "epoch": 9.61, "learning_rate": 4.5194e-05, "loss": 8.7491, "step": 1201500 }, { "epoch": 9.62, "learning_rate": 4.5192000000000004e-05, "loss": 8.7535, "step": 1202000 }, { "epoch": 9.62, "learning_rate": 4.5190000000000006e-05, "loss": 8.737, "step": 1202500 }, { "epoch": 9.62, "learning_rate": 4.5188e-05, "loss": 8.7307, "step": 1203000 }, { "epoch": 9.63, "learning_rate": 4.5186e-05, "loss": 8.7495, "step": 1203500 }, { "epoch": 9.63, "learning_rate": 4.5184000000000006e-05, "loss": 8.7608, "step": 1204000 }, { "epoch": 9.64, "learning_rate": 4.5182e-05, "loss": 8.7604, "step": 1204500 }, { "epoch": 9.64, "learning_rate": 4.518e-05, "loss": 8.7589, "step": 1205000 }, { "epoch": 9.64, "learning_rate": 4.517800000000001e-05, "loss": 8.7379, "step": 1205500 }, { "epoch": 9.65, "learning_rate": 4.5176e-05, "loss": 8.7512, "step": 1206000 }, { "epoch": 9.65, "learning_rate": 4.5174e-05, "loss": 8.7443, "step": 1206500 }, { "epoch": 9.66, "learning_rate": 4.5172e-05, "loss": 8.7652, "step": 1207000 }, { "epoch": 9.66, "learning_rate": 4.517e-05, "loss": 8.7684, "step": 1207500 }, { "epoch": 9.66, "learning_rate": 4.5168000000000005e-05, "loss": 8.7629, "step": 1208000 }, { "epoch": 9.67, "learning_rate": 4.5166e-05, "loss": 8.7664, "step": 1208500 }, { "epoch": 9.67, "learning_rate": 4.5164e-05, "loss": 8.7526, "step": 1209000 }, { "epoch": 9.68, "learning_rate": 4.5162000000000006e-05, "loss": 8.7592, "step": 1209500 }, { "epoch": 9.68, "learning_rate": 4.516e-05, "loss": 8.7408, "step": 1210000 }, { "epoch": 9.68, "learning_rate": 4.5158000000000004e-05, "loss": 8.7475, "step": 1210500 }, { "epoch": 9.69, "learning_rate": 4.5156000000000006e-05, "loss": 8.7412, "step": 1211000 }, { "epoch": 9.69, "learning_rate": 4.5154e-05, "loss": 8.7561, "step": 1211500 }, { "epoch": 9.7, "learning_rate": 4.5152e-05, "loss": 8.7699, "step": 1212000 }, { "epoch": 9.7, "learning_rate": 4.5150000000000006e-05, "loss": 8.7617, "step": 1212500 }, { "epoch": 9.7, "learning_rate": 4.5148e-05, "loss": 8.7651, "step": 1213000 }, { "epoch": 9.71, "learning_rate": 4.5146e-05, "loss": 8.7427, "step": 1213500 }, { "epoch": 9.71, "learning_rate": 4.5144e-05, "loss": 8.7549, "step": 1214000 }, { "epoch": 9.72, "learning_rate": 4.5142e-05, "loss": 8.738, "step": 1214500 }, { "epoch": 9.72, "learning_rate": 4.5140000000000005e-05, "loss": 8.7624, "step": 1215000 }, { "epoch": 9.72, "learning_rate": 4.5138e-05, "loss": 8.7464, "step": 1215500 }, { "epoch": 9.73, "learning_rate": 4.5136e-05, "loss": 8.7426, "step": 1216000 }, { "epoch": 9.73, "learning_rate": 4.5134000000000005e-05, "loss": 8.7413, "step": 1216500 }, { "epoch": 9.74, "learning_rate": 4.5132e-05, "loss": 8.7588, "step": 1217000 }, { "epoch": 9.74, "learning_rate": 4.513e-05, "loss": 8.764, "step": 1217500 }, { "epoch": 9.74, "learning_rate": 4.5128000000000006e-05, "loss": 8.7513, "step": 1218000 }, { "epoch": 9.75, "learning_rate": 4.5126e-05, "loss": 8.7579, "step": 1218500 }, { "epoch": 9.75, "learning_rate": 4.5124e-05, "loss": 8.7561, "step": 1219000 }, { "epoch": 9.76, "learning_rate": 4.5122000000000006e-05, "loss": 8.75, "step": 1219500 }, { "epoch": 9.76, "learning_rate": 4.512e-05, "loss": 8.7743, "step": 1220000 }, { "epoch": 9.76, "learning_rate": 4.5118000000000004e-05, "loss": 8.7463, "step": 1220500 }, { "epoch": 9.77, "learning_rate": 4.5116e-05, "loss": 8.7353, "step": 1221000 }, { "epoch": 9.77, "learning_rate": 4.5114e-05, "loss": 8.7342, "step": 1221500 }, { "epoch": 9.78, "learning_rate": 4.5112000000000004e-05, "loss": 8.7473, "step": 1222000 }, { "epoch": 9.78, "learning_rate": 4.511e-05, "loss": 8.7529, "step": 1222500 }, { "epoch": 9.78, "learning_rate": 4.5108e-05, "loss": 8.742, "step": 1223000 }, { "epoch": 9.79, "learning_rate": 4.5106000000000005e-05, "loss": 8.7661, "step": 1223500 }, { "epoch": 9.79, "learning_rate": 4.5104e-05, "loss": 8.75, "step": 1224000 }, { "epoch": 9.8, "learning_rate": 4.5102e-05, "loss": 8.7571, "step": 1224500 }, { "epoch": 9.8, "learning_rate": 4.5100000000000005e-05, "loss": 8.7476, "step": 1225000 }, { "epoch": 9.8, "learning_rate": 4.5098e-05, "loss": 8.7513, "step": 1225500 }, { "epoch": 9.81, "learning_rate": 4.5096e-05, "loss": 8.7628, "step": 1226000 }, { "epoch": 9.81, "learning_rate": 4.5094000000000006e-05, "loss": 8.7401, "step": 1226500 }, { "epoch": 9.82, "learning_rate": 4.5092e-05, "loss": 8.7629, "step": 1227000 }, { "epoch": 9.82, "learning_rate": 4.5090000000000004e-05, "loss": 8.7687, "step": 1227500 }, { "epoch": 9.82, "learning_rate": 4.5088e-05, "loss": 8.7505, "step": 1228000 }, { "epoch": 9.83, "learning_rate": 4.5086e-05, "loss": 8.7703, "step": 1228500 }, { "epoch": 9.83, "learning_rate": 4.5084000000000004e-05, "loss": 8.766, "step": 1229000 }, { "epoch": 9.84, "learning_rate": 4.5082e-05, "loss": 8.7343, "step": 1229500 }, { "epoch": 9.84, "learning_rate": 4.508e-05, "loss": 8.7361, "step": 1230000 }, { "epoch": 9.84, "learning_rate": 4.5078000000000004e-05, "loss": 8.7571, "step": 1230500 }, { "epoch": 9.85, "learning_rate": 4.5076e-05, "loss": 8.7444, "step": 1231000 }, { "epoch": 9.85, "learning_rate": 4.5074e-05, "loss": 8.7603, "step": 1231500 }, { "epoch": 9.86, "learning_rate": 4.5072000000000005e-05, "loss": 8.756, "step": 1232000 }, { "epoch": 9.86, "learning_rate": 4.507e-05, "loss": 8.7534, "step": 1232500 }, { "epoch": 9.86, "learning_rate": 4.5068e-05, "loss": 8.781, "step": 1233000 }, { "epoch": 9.87, "learning_rate": 4.5066000000000005e-05, "loss": 8.7568, "step": 1233500 }, { "epoch": 9.87, "learning_rate": 4.5064e-05, "loss": 8.7763, "step": 1234000 }, { "epoch": 9.88, "learning_rate": 4.5062e-05, "loss": 8.749, "step": 1234500 }, { "epoch": 9.88, "learning_rate": 4.506e-05, "loss": 8.7366, "step": 1235000 }, { "epoch": 9.88, "learning_rate": 4.5058e-05, "loss": 8.7589, "step": 1235500 }, { "epoch": 9.89, "learning_rate": 4.5056000000000004e-05, "loss": 8.7579, "step": 1236000 }, { "epoch": 9.89, "learning_rate": 4.5054e-05, "loss": 8.7722, "step": 1236500 }, { "epoch": 9.9, "learning_rate": 4.5052e-05, "loss": 8.727, "step": 1237000 }, { "epoch": 9.9, "learning_rate": 4.5050000000000004e-05, "loss": 8.7718, "step": 1237500 }, { "epoch": 9.9, "learning_rate": 4.5048e-05, "loss": 8.7313, "step": 1238000 }, { "epoch": 9.91, "learning_rate": 4.5046e-05, "loss": 8.7492, "step": 1238500 }, { "epoch": 9.91, "learning_rate": 4.5044000000000004e-05, "loss": 8.7967, "step": 1239000 }, { "epoch": 9.92, "learning_rate": 4.5042e-05, "loss": 8.7551, "step": 1239500 }, { "epoch": 9.92, "learning_rate": 4.504e-05, "loss": 8.7504, "step": 1240000 }, { "epoch": 9.92, "learning_rate": 4.5038000000000005e-05, "loss": 8.7443, "step": 1240500 }, { "epoch": 9.93, "learning_rate": 4.5036e-05, "loss": 8.7583, "step": 1241000 }, { "epoch": 9.93, "learning_rate": 4.5034e-05, "loss": 8.758, "step": 1241500 }, { "epoch": 9.94, "learning_rate": 4.5032000000000005e-05, "loss": 8.7476, "step": 1242000 }, { "epoch": 9.94, "learning_rate": 4.503e-05, "loss": 8.7657, "step": 1242500 }, { "epoch": 9.94, "learning_rate": 4.5028e-05, "loss": 8.7427, "step": 1243000 }, { "epoch": 9.95, "learning_rate": 4.5026e-05, "loss": 8.7378, "step": 1243500 }, { "epoch": 9.95, "learning_rate": 4.5024e-05, "loss": 8.7701, "step": 1244000 }, { "epoch": 9.96, "learning_rate": 4.5022000000000004e-05, "loss": 8.7502, "step": 1244500 }, { "epoch": 9.96, "learning_rate": 4.502e-05, "loss": 8.7618, "step": 1245000 }, { "epoch": 9.96, "learning_rate": 4.5018e-05, "loss": 8.7589, "step": 1245500 }, { "epoch": 9.97, "learning_rate": 4.5016000000000004e-05, "loss": 8.7485, "step": 1246000 }, { "epoch": 9.97, "learning_rate": 4.5014e-05, "loss": 8.7366, "step": 1246500 }, { "epoch": 9.98, "learning_rate": 4.5012e-05, "loss": 8.7703, "step": 1247000 }, { "epoch": 9.98, "learning_rate": 4.5010000000000004e-05, "loss": 8.7501, "step": 1247500 }, { "epoch": 9.98, "learning_rate": 4.5008e-05, "loss": 8.746, "step": 1248000 }, { "epoch": 9.99, "learning_rate": 4.5006e-05, "loss": 8.7472, "step": 1248500 }, { "epoch": 9.99, "learning_rate": 4.5004000000000005e-05, "loss": 8.7504, "step": 1249000 }, { "epoch": 10.0, "learning_rate": 4.5002e-05, "loss": 8.7757, "step": 1249500 }, { "epoch": 10.0, "learning_rate": 4.5e-05, "loss": 8.7316, "step": 1250000 }, { "epoch": 10.0, "learning_rate": 4.4998e-05, "loss": 8.7486, "step": 1250500 }, { "epoch": 10.01, "learning_rate": 4.499600000000001e-05, "loss": 8.7491, "step": 1251000 }, { "epoch": 10.01, "learning_rate": 4.4994e-05, "loss": 8.7608, "step": 1251500 }, { "epoch": 10.02, "learning_rate": 4.4992e-05, "loss": 8.7386, "step": 1252000 }, { "epoch": 10.02, "learning_rate": 4.499e-05, "loss": 8.7712, "step": 1252500 }, { "epoch": 10.02, "learning_rate": 4.4988000000000004e-05, "loss": 8.7476, "step": 1253000 }, { "epoch": 10.03, "learning_rate": 4.4986e-05, "loss": 8.7362, "step": 1253500 }, { "epoch": 10.03, "learning_rate": 4.4984e-05, "loss": 8.7626, "step": 1254000 }, { "epoch": 10.04, "learning_rate": 4.4982000000000004e-05, "loss": 8.7514, "step": 1254500 }, { "epoch": 10.04, "learning_rate": 4.498e-05, "loss": 8.7538, "step": 1255000 }, { "epoch": 10.04, "learning_rate": 4.4978e-05, "loss": 8.7414, "step": 1255500 }, { "epoch": 10.05, "learning_rate": 4.4976000000000004e-05, "loss": 8.7334, "step": 1256000 }, { "epoch": 10.05, "learning_rate": 4.4974e-05, "loss": 8.7561, "step": 1256500 }, { "epoch": 10.06, "learning_rate": 4.4972e-05, "loss": 8.7475, "step": 1257000 }, { "epoch": 10.06, "learning_rate": 4.497e-05, "loss": 8.7516, "step": 1257500 }, { "epoch": 10.06, "learning_rate": 4.496800000000001e-05, "loss": 8.7553, "step": 1258000 }, { "epoch": 10.07, "learning_rate": 4.4966e-05, "loss": 8.7506, "step": 1258500 }, { "epoch": 10.07, "learning_rate": 4.4964e-05, "loss": 8.7691, "step": 1259000 }, { "epoch": 10.08, "learning_rate": 4.4962e-05, "loss": 8.7588, "step": 1259500 }, { "epoch": 10.08, "learning_rate": 4.496e-05, "loss": 8.743, "step": 1260000 }, { "epoch": 10.08, "learning_rate": 4.4958e-05, "loss": 8.7509, "step": 1260500 }, { "epoch": 10.09, "learning_rate": 4.4956e-05, "loss": 8.725, "step": 1261000 }, { "epoch": 10.09, "learning_rate": 4.4954000000000004e-05, "loss": 8.7418, "step": 1261500 }, { "epoch": 10.1, "learning_rate": 4.4952e-05, "loss": 8.7484, "step": 1262000 }, { "epoch": 10.1, "learning_rate": 4.495e-05, "loss": 8.7488, "step": 1262500 }, { "epoch": 10.1, "learning_rate": 4.4948000000000004e-05, "loss": 8.7425, "step": 1263000 }, { "epoch": 10.11, "learning_rate": 4.4946000000000006e-05, "loss": 8.7532, "step": 1263500 }, { "epoch": 10.11, "learning_rate": 4.4944e-05, "loss": 8.7418, "step": 1264000 }, { "epoch": 10.12, "learning_rate": 4.4942e-05, "loss": 8.751, "step": 1264500 }, { "epoch": 10.12, "learning_rate": 4.494000000000001e-05, "loss": 8.7276, "step": 1265000 }, { "epoch": 10.12, "learning_rate": 4.4938e-05, "loss": 8.7572, "step": 1265500 }, { "epoch": 10.13, "learning_rate": 4.4936e-05, "loss": 8.7432, "step": 1266000 }, { "epoch": 10.13, "learning_rate": 4.493400000000001e-05, "loss": 8.7424, "step": 1266500 }, { "epoch": 10.14, "learning_rate": 4.4932e-05, "loss": 8.7472, "step": 1267000 }, { "epoch": 10.14, "learning_rate": 4.493e-05, "loss": 8.7392, "step": 1267500 }, { "epoch": 10.14, "learning_rate": 4.4928e-05, "loss": 8.7519, "step": 1268000 }, { "epoch": 10.15, "learning_rate": 4.4926e-05, "loss": 8.7638, "step": 1268500 }, { "epoch": 10.15, "learning_rate": 4.4924e-05, "loss": 8.7501, "step": 1269000 }, { "epoch": 10.16, "learning_rate": 4.4922e-05, "loss": 8.7683, "step": 1269500 }, { "epoch": 10.16, "learning_rate": 4.4920000000000004e-05, "loss": 8.7598, "step": 1270000 }, { "epoch": 10.16, "learning_rate": 4.4918000000000006e-05, "loss": 8.7727, "step": 1270500 }, { "epoch": 10.17, "learning_rate": 4.4916e-05, "loss": 8.7493, "step": 1271000 }, { "epoch": 10.17, "learning_rate": 4.4914e-05, "loss": 8.739, "step": 1271500 }, { "epoch": 10.18, "learning_rate": 4.4912000000000006e-05, "loss": 8.7506, "step": 1272000 }, { "epoch": 10.18, "learning_rate": 4.491e-05, "loss": 8.7398, "step": 1272500 }, { "epoch": 10.18, "learning_rate": 4.4908e-05, "loss": 8.7718, "step": 1273000 }, { "epoch": 10.19, "learning_rate": 4.490600000000001e-05, "loss": 8.7543, "step": 1273500 }, { "epoch": 10.19, "learning_rate": 4.4904e-05, "loss": 8.7506, "step": 1274000 }, { "epoch": 10.2, "learning_rate": 4.4902e-05, "loss": 8.7429, "step": 1274500 }, { "epoch": 10.2, "learning_rate": 4.49e-05, "loss": 8.7661, "step": 1275000 }, { "epoch": 10.2, "learning_rate": 4.4898e-05, "loss": 8.7537, "step": 1275500 }, { "epoch": 10.21, "learning_rate": 4.4896000000000005e-05, "loss": 8.748, "step": 1276000 }, { "epoch": 10.21, "learning_rate": 4.4894e-05, "loss": 8.7568, "step": 1276500 }, { "epoch": 10.22, "learning_rate": 4.4892e-05, "loss": 8.7393, "step": 1277000 }, { "epoch": 10.22, "learning_rate": 4.4890000000000006e-05, "loss": 8.7437, "step": 1277500 }, { "epoch": 10.22, "learning_rate": 4.4888e-05, "loss": 8.7522, "step": 1278000 }, { "epoch": 10.23, "learning_rate": 4.4886000000000004e-05, "loss": 8.7452, "step": 1278500 }, { "epoch": 10.23, "learning_rate": 4.4884000000000006e-05, "loss": 8.7661, "step": 1279000 }, { "epoch": 10.24, "learning_rate": 4.4882e-05, "loss": 8.7415, "step": 1279500 }, { "epoch": 10.24, "learning_rate": 4.488e-05, "loss": 8.7468, "step": 1280000 }, { "epoch": 10.24, "learning_rate": 4.4878000000000006e-05, "loss": 8.7479, "step": 1280500 }, { "epoch": 10.25, "learning_rate": 4.4876e-05, "loss": 8.7557, "step": 1281000 }, { "epoch": 10.25, "learning_rate": 4.4874000000000004e-05, "loss": 8.748, "step": 1281500 }, { "epoch": 10.26, "learning_rate": 4.4872e-05, "loss": 8.7691, "step": 1282000 }, { "epoch": 10.26, "learning_rate": 4.487e-05, "loss": 8.7609, "step": 1282500 }, { "epoch": 10.26, "learning_rate": 4.4868000000000005e-05, "loss": 8.7534, "step": 1283000 }, { "epoch": 10.27, "learning_rate": 4.4866e-05, "loss": 8.7656, "step": 1283500 }, { "epoch": 10.27, "learning_rate": 4.4864e-05, "loss": 8.7617, "step": 1284000 }, { "epoch": 10.28, "learning_rate": 4.4862000000000005e-05, "loss": 8.7521, "step": 1284500 }, { "epoch": 10.28, "learning_rate": 4.486e-05, "loss": 8.7457, "step": 1285000 }, { "epoch": 10.28, "learning_rate": 4.4858e-05, "loss": 8.7383, "step": 1285500 }, { "epoch": 10.29, "learning_rate": 4.4856000000000006e-05, "loss": 8.7522, "step": 1286000 }, { "epoch": 10.29, "learning_rate": 4.4854e-05, "loss": 8.7657, "step": 1286500 }, { "epoch": 10.3, "learning_rate": 4.4852e-05, "loss": 8.732, "step": 1287000 }, { "epoch": 10.3, "learning_rate": 4.4850000000000006e-05, "loss": 8.7506, "step": 1287500 }, { "epoch": 10.3, "learning_rate": 4.4848e-05, "loss": 8.7716, "step": 1288000 }, { "epoch": 10.31, "learning_rate": 4.4846000000000004e-05, "loss": 8.7645, "step": 1288500 }, { "epoch": 10.31, "learning_rate": 4.4844e-05, "loss": 8.7553, "step": 1289000 }, { "epoch": 10.32, "learning_rate": 4.4842e-05, "loss": 8.7594, "step": 1289500 }, { "epoch": 10.32, "learning_rate": 4.4840000000000004e-05, "loss": 8.7474, "step": 1290000 }, { "epoch": 10.32, "learning_rate": 4.4838e-05, "loss": 8.7435, "step": 1290500 }, { "epoch": 10.33, "learning_rate": 4.4836e-05, "loss": 8.7434, "step": 1291000 }, { "epoch": 10.33, "learning_rate": 4.4834000000000005e-05, "loss": 8.7312, "step": 1291500 }, { "epoch": 10.34, "learning_rate": 4.4832e-05, "loss": 8.7442, "step": 1292000 }, { "epoch": 10.34, "learning_rate": 4.483e-05, "loss": 8.7685, "step": 1292500 }, { "epoch": 10.34, "learning_rate": 4.4828000000000005e-05, "loss": 8.7544, "step": 1293000 }, { "epoch": 10.35, "learning_rate": 4.4826e-05, "loss": 8.7363, "step": 1293500 }, { "epoch": 10.35, "learning_rate": 4.4824e-05, "loss": 8.7604, "step": 1294000 }, { "epoch": 10.36, "learning_rate": 4.4822000000000006e-05, "loss": 8.7621, "step": 1294500 }, { "epoch": 10.36, "learning_rate": 4.482e-05, "loss": 8.7646, "step": 1295000 }, { "epoch": 10.36, "learning_rate": 4.4818000000000004e-05, "loss": 8.7448, "step": 1295500 }, { "epoch": 10.37, "learning_rate": 4.4816e-05, "loss": 8.734, "step": 1296000 }, { "epoch": 10.37, "learning_rate": 4.4814e-05, "loss": 8.7444, "step": 1296500 }, { "epoch": 10.38, "learning_rate": 4.4812000000000004e-05, "loss": 8.7412, "step": 1297000 }, { "epoch": 10.38, "learning_rate": 4.481e-05, "loss": 8.7465, "step": 1297500 }, { "epoch": 10.38, "learning_rate": 4.4808e-05, "loss": 8.7382, "step": 1298000 }, { "epoch": 10.39, "learning_rate": 4.4806000000000004e-05, "loss": 8.7529, "step": 1298500 }, { "epoch": 10.39, "learning_rate": 4.4804e-05, "loss": 8.747, "step": 1299000 }, { "epoch": 10.4, "learning_rate": 4.4802e-05, "loss": 8.7407, "step": 1299500 }, { "epoch": 10.4, "learning_rate": 4.4800000000000005e-05, "loss": 8.7768, "step": 1300000 }, { "epoch": 10.4, "learning_rate": 4.4798e-05, "loss": 8.7576, "step": 1300500 }, { "epoch": 10.41, "learning_rate": 4.4796e-05, "loss": 8.7533, "step": 1301000 }, { "epoch": 10.41, "learning_rate": 4.4794000000000005e-05, "loss": 8.756, "step": 1301500 }, { "epoch": 10.42, "learning_rate": 4.4792e-05, "loss": 8.7583, "step": 1302000 }, { "epoch": 10.42, "learning_rate": 4.479e-05, "loss": 8.752, "step": 1302500 }, { "epoch": 10.42, "learning_rate": 4.4788000000000006e-05, "loss": 8.7591, "step": 1303000 }, { "epoch": 10.43, "learning_rate": 4.4786e-05, "loss": 8.7581, "step": 1303500 }, { "epoch": 10.43, "learning_rate": 4.4784000000000004e-05, "loss": 8.769, "step": 1304000 }, { "epoch": 10.44, "learning_rate": 4.4782e-05, "loss": 8.7675, "step": 1304500 }, { "epoch": 10.44, "learning_rate": 4.478e-05, "loss": 8.7409, "step": 1305000 }, { "epoch": 10.44, "learning_rate": 4.4778000000000004e-05, "loss": 8.7417, "step": 1305500 }, { "epoch": 10.45, "learning_rate": 4.4776e-05, "loss": 8.7418, "step": 1306000 }, { "epoch": 10.45, "learning_rate": 4.4774e-05, "loss": 8.7714, "step": 1306500 }, { "epoch": 10.46, "learning_rate": 4.4772000000000004e-05, "loss": 8.7588, "step": 1307000 }, { "epoch": 10.46, "learning_rate": 4.477e-05, "loss": 8.763, "step": 1307500 }, { "epoch": 10.46, "learning_rate": 4.4768e-05, "loss": 8.7577, "step": 1308000 }, { "epoch": 10.47, "learning_rate": 4.4766000000000005e-05, "loss": 8.7762, "step": 1308500 }, { "epoch": 10.47, "learning_rate": 4.4764e-05, "loss": 8.7591, "step": 1309000 }, { "epoch": 10.48, "learning_rate": 4.4762e-05, "loss": 8.763, "step": 1309500 }, { "epoch": 10.48, "learning_rate": 4.4760000000000005e-05, "loss": 8.765, "step": 1310000 }, { "epoch": 10.48, "learning_rate": 4.4758e-05, "loss": 8.7434, "step": 1310500 }, { "epoch": 10.49, "learning_rate": 4.4756e-05, "loss": 8.751, "step": 1311000 }, { "epoch": 10.49, "learning_rate": 4.4754e-05, "loss": 8.731, "step": 1311500 }, { "epoch": 10.5, "learning_rate": 4.4752e-05, "loss": 8.7378, "step": 1312000 }, { "epoch": 10.5, "learning_rate": 4.4750000000000004e-05, "loss": 8.7532, "step": 1312500 }, { "epoch": 10.5, "learning_rate": 4.4748e-05, "loss": 8.7495, "step": 1313000 }, { "epoch": 10.51, "learning_rate": 4.4746e-05, "loss": 8.7376, "step": 1313500 }, { "epoch": 10.51, "learning_rate": 4.4744000000000004e-05, "loss": 8.7111, "step": 1314000 }, { "epoch": 10.52, "learning_rate": 4.4742e-05, "loss": 8.7363, "step": 1314500 }, { "epoch": 10.52, "learning_rate": 4.474e-05, "loss": 8.7466, "step": 1315000 }, { "epoch": 10.52, "learning_rate": 4.4738000000000004e-05, "loss": 8.7222, "step": 1315500 }, { "epoch": 10.53, "learning_rate": 4.4736e-05, "loss": 8.7612, "step": 1316000 }, { "epoch": 10.53, "learning_rate": 4.4734e-05, "loss": 8.7468, "step": 1316500 }, { "epoch": 10.54, "learning_rate": 4.4732000000000005e-05, "loss": 8.7356, "step": 1317000 }, { "epoch": 10.54, "learning_rate": 4.473e-05, "loss": 8.7417, "step": 1317500 }, { "epoch": 10.54, "learning_rate": 4.4728e-05, "loss": 8.7459, "step": 1318000 }, { "epoch": 10.55, "learning_rate": 4.4726e-05, "loss": 8.7464, "step": 1318500 }, { "epoch": 10.55, "learning_rate": 4.472400000000001e-05, "loss": 8.7606, "step": 1319000 }, { "epoch": 10.56, "learning_rate": 4.4722e-05, "loss": 8.7577, "step": 1319500 }, { "epoch": 10.56, "learning_rate": 4.472e-05, "loss": 8.7309, "step": 1320000 }, { "epoch": 10.56, "learning_rate": 4.4718e-05, "loss": 8.7502, "step": 1320500 }, { "epoch": 10.57, "learning_rate": 4.4716000000000004e-05, "loss": 8.7573, "step": 1321000 }, { "epoch": 10.57, "learning_rate": 4.4714e-05, "loss": 8.7536, "step": 1321500 }, { "epoch": 10.58, "learning_rate": 4.4712e-05, "loss": 8.7677, "step": 1322000 }, { "epoch": 10.58, "learning_rate": 4.4710000000000004e-05, "loss": 8.7475, "step": 1322500 }, { "epoch": 10.58, "learning_rate": 4.4708e-05, "loss": 8.755, "step": 1323000 }, { "epoch": 10.59, "learning_rate": 4.4706e-05, "loss": 8.759, "step": 1323500 }, { "epoch": 10.59, "learning_rate": 4.4704000000000004e-05, "loss": 8.7721, "step": 1324000 }, { "epoch": 10.6, "learning_rate": 4.4702e-05, "loss": 8.774, "step": 1324500 }, { "epoch": 10.6, "learning_rate": 4.47e-05, "loss": 8.7667, "step": 1325000 }, { "epoch": 10.6, "learning_rate": 4.4698e-05, "loss": 8.7633, "step": 1325500 }, { "epoch": 10.61, "learning_rate": 4.469600000000001e-05, "loss": 8.7306, "step": 1326000 }, { "epoch": 10.61, "learning_rate": 4.4694e-05, "loss": 8.7729, "step": 1326500 }, { "epoch": 10.62, "learning_rate": 4.4692e-05, "loss": 8.76, "step": 1327000 }, { "epoch": 10.62, "learning_rate": 4.469e-05, "loss": 8.7561, "step": 1327500 }, { "epoch": 10.62, "learning_rate": 4.4688e-05, "loss": 8.7621, "step": 1328000 }, { "epoch": 10.63, "learning_rate": 4.4686e-05, "loss": 8.7667, "step": 1328500 }, { "epoch": 10.63, "learning_rate": 4.4684e-05, "loss": 8.7432, "step": 1329000 }, { "epoch": 10.64, "learning_rate": 4.4682000000000004e-05, "loss": 8.7685, "step": 1329500 }, { "epoch": 10.64, "learning_rate": 4.468e-05, "loss": 8.7511, "step": 1330000 }, { "epoch": 10.64, "learning_rate": 4.4678e-05, "loss": 8.7638, "step": 1330500 }, { "epoch": 10.65, "learning_rate": 4.4676000000000004e-05, "loss": 8.746, "step": 1331000 }, { "epoch": 10.65, "learning_rate": 4.4674000000000006e-05, "loss": 8.7702, "step": 1331500 }, { "epoch": 10.66, "learning_rate": 4.4672e-05, "loss": 8.7635, "step": 1332000 }, { "epoch": 10.66, "learning_rate": 4.467e-05, "loss": 8.7553, "step": 1332500 }, { "epoch": 10.66, "learning_rate": 4.466800000000001e-05, "loss": 8.7495, "step": 1333000 }, { "epoch": 10.67, "learning_rate": 4.4666e-05, "loss": 8.7388, "step": 1333500 }, { "epoch": 10.67, "learning_rate": 4.4664e-05, "loss": 8.7695, "step": 1334000 }, { "epoch": 10.68, "learning_rate": 4.466200000000001e-05, "loss": 8.7322, "step": 1334500 }, { "epoch": 10.68, "learning_rate": 4.466e-05, "loss": 8.7468, "step": 1335000 }, { "epoch": 10.68, "learning_rate": 4.4658e-05, "loss": 8.7514, "step": 1335500 }, { "epoch": 10.69, "learning_rate": 4.4656e-05, "loss": 8.7401, "step": 1336000 }, { "epoch": 10.69, "learning_rate": 4.4654e-05, "loss": 8.7458, "step": 1336500 }, { "epoch": 10.7, "learning_rate": 4.4652e-05, "loss": 8.755, "step": 1337000 }, { "epoch": 10.7, "learning_rate": 4.465e-05, "loss": 8.7579, "step": 1337500 }, { "epoch": 10.7, "learning_rate": 4.4648000000000003e-05, "loss": 8.7352, "step": 1338000 }, { "epoch": 10.71, "learning_rate": 4.4646000000000006e-05, "loss": 8.7561, "step": 1338500 }, { "epoch": 10.71, "learning_rate": 4.4644e-05, "loss": 8.7419, "step": 1339000 }, { "epoch": 10.72, "learning_rate": 4.4642e-05, "loss": 8.7292, "step": 1339500 }, { "epoch": 10.72, "learning_rate": 4.4640000000000006e-05, "loss": 8.7426, "step": 1340000 }, { "epoch": 10.72, "learning_rate": 4.4638e-05, "loss": 8.757, "step": 1340500 }, { "epoch": 10.73, "learning_rate": 4.4636e-05, "loss": 8.7442, "step": 1341000 }, { "epoch": 10.73, "learning_rate": 4.463400000000001e-05, "loss": 8.7702, "step": 1341500 }, { "epoch": 10.74, "learning_rate": 4.4632e-05, "loss": 8.7565, "step": 1342000 }, { "epoch": 10.74, "learning_rate": 4.463e-05, "loss": 8.7528, "step": 1342500 }, { "epoch": 10.74, "learning_rate": 4.4628e-05, "loss": 8.7672, "step": 1343000 }, { "epoch": 10.75, "learning_rate": 4.4626e-05, "loss": 8.7587, "step": 1343500 }, { "epoch": 10.75, "learning_rate": 4.4624000000000005e-05, "loss": 8.7554, "step": 1344000 }, { "epoch": 10.76, "learning_rate": 4.4622e-05, "loss": 8.7491, "step": 1344500 }, { "epoch": 10.76, "learning_rate": 4.462e-05, "loss": 8.7462, "step": 1345000 }, { "epoch": 10.76, "learning_rate": 4.4618000000000005e-05, "loss": 8.7392, "step": 1345500 }, { "epoch": 10.77, "learning_rate": 4.4616e-05, "loss": 8.7454, "step": 1346000 }, { "epoch": 10.77, "learning_rate": 4.4614000000000003e-05, "loss": 8.7351, "step": 1346500 }, { "epoch": 10.78, "learning_rate": 4.4612000000000006e-05, "loss": 8.745, "step": 1347000 }, { "epoch": 10.78, "learning_rate": 4.461e-05, "loss": 8.7493, "step": 1347500 }, { "epoch": 10.78, "learning_rate": 4.4608e-05, "loss": 8.7473, "step": 1348000 }, { "epoch": 10.79, "learning_rate": 4.4606000000000006e-05, "loss": 8.7405, "step": 1348500 }, { "epoch": 10.79, "learning_rate": 4.4604e-05, "loss": 8.7556, "step": 1349000 }, { "epoch": 10.8, "learning_rate": 4.4602000000000004e-05, "loss": 8.7554, "step": 1349500 }, { "epoch": 10.8, "learning_rate": 4.46e-05, "loss": 8.7442, "step": 1350000 }, { "epoch": 10.8, "learning_rate": 4.4598e-05, "loss": 8.7508, "step": 1350500 }, { "epoch": 10.81, "learning_rate": 4.4596000000000005e-05, "loss": 8.7515, "step": 1351000 }, { "epoch": 10.81, "learning_rate": 4.4594e-05, "loss": 8.7492, "step": 1351500 }, { "epoch": 10.82, "learning_rate": 4.4592e-05, "loss": 8.7634, "step": 1352000 }, { "epoch": 10.82, "learning_rate": 4.4590000000000005e-05, "loss": 8.7576, "step": 1352500 }, { "epoch": 10.82, "learning_rate": 4.4588e-05, "loss": 8.7397, "step": 1353000 }, { "epoch": 10.83, "learning_rate": 4.4586e-05, "loss": 8.7492, "step": 1353500 }, { "epoch": 10.83, "learning_rate": 4.4584000000000005e-05, "loss": 8.7698, "step": 1354000 }, { "epoch": 10.84, "learning_rate": 4.4582e-05, "loss": 8.7434, "step": 1354500 }, { "epoch": 10.84, "learning_rate": 4.458e-05, "loss": 8.7575, "step": 1355000 }, { "epoch": 10.84, "learning_rate": 4.4578000000000006e-05, "loss": 8.7407, "step": 1355500 }, { "epoch": 10.85, "learning_rate": 4.4576e-05, "loss": 8.7592, "step": 1356000 }, { "epoch": 10.85, "learning_rate": 4.4574000000000004e-05, "loss": 8.7484, "step": 1356500 }, { "epoch": 10.86, "learning_rate": 4.4572e-05, "loss": 8.7321, "step": 1357000 }, { "epoch": 10.86, "learning_rate": 4.457e-05, "loss": 8.7448, "step": 1357500 }, { "epoch": 10.86, "learning_rate": 4.4568000000000004e-05, "loss": 8.749, "step": 1358000 }, { "epoch": 10.87, "learning_rate": 4.4566e-05, "loss": 8.7558, "step": 1358500 }, { "epoch": 10.87, "learning_rate": 4.4564e-05, "loss": 8.744, "step": 1359000 }, { "epoch": 10.88, "learning_rate": 4.4562000000000005e-05, "loss": 8.7449, "step": 1359500 }, { "epoch": 10.88, "learning_rate": 4.456e-05, "loss": 8.7477, "step": 1360000 }, { "epoch": 10.88, "learning_rate": 4.4558e-05, "loss": 8.7486, "step": 1360500 }, { "epoch": 10.89, "learning_rate": 4.4556000000000005e-05, "loss": 8.7497, "step": 1361000 }, { "epoch": 10.89, "learning_rate": 4.4554e-05, "loss": 8.7356, "step": 1361500 }, { "epoch": 10.9, "learning_rate": 4.4552e-05, "loss": 8.7535, "step": 1362000 }, { "epoch": 10.9, "learning_rate": 4.4550000000000005e-05, "loss": 8.7483, "step": 1362500 }, { "epoch": 10.9, "learning_rate": 4.4548e-05, "loss": 8.7514, "step": 1363000 }, { "epoch": 10.91, "learning_rate": 4.4546000000000003e-05, "loss": 8.7608, "step": 1363500 }, { "epoch": 10.91, "learning_rate": 4.4544e-05, "loss": 8.7634, "step": 1364000 }, { "epoch": 10.92, "learning_rate": 4.4542e-05, "loss": 8.7567, "step": 1364500 }, { "epoch": 10.92, "learning_rate": 4.4540000000000004e-05, "loss": 8.7514, "step": 1365000 }, { "epoch": 10.92, "learning_rate": 4.4538e-05, "loss": 8.7566, "step": 1365500 }, { "epoch": 10.93, "learning_rate": 4.4536e-05, "loss": 8.7592, "step": 1366000 }, { "epoch": 10.93, "learning_rate": 4.4534000000000004e-05, "loss": 8.7475, "step": 1366500 }, { "epoch": 10.94, "learning_rate": 4.4532e-05, "loss": 8.7805, "step": 1367000 }, { "epoch": 10.94, "learning_rate": 4.453e-05, "loss": 8.7385, "step": 1367500 }, { "epoch": 10.94, "learning_rate": 4.4528000000000005e-05, "loss": 8.7232, "step": 1368000 }, { "epoch": 10.95, "learning_rate": 4.4526e-05, "loss": 8.7508, "step": 1368500 }, { "epoch": 10.95, "learning_rate": 4.4524e-05, "loss": 8.744, "step": 1369000 }, { "epoch": 10.96, "learning_rate": 4.4522000000000005e-05, "loss": 8.731, "step": 1369500 }, { "epoch": 10.96, "learning_rate": 4.452e-05, "loss": 8.762, "step": 1370000 }, { "epoch": 10.96, "learning_rate": 4.4518e-05, "loss": 8.7562, "step": 1370500 }, { "epoch": 10.97, "learning_rate": 4.4516000000000005e-05, "loss": 8.7491, "step": 1371000 }, { "epoch": 10.97, "learning_rate": 4.4514e-05, "loss": 8.7641, "step": 1371500 }, { "epoch": 10.98, "learning_rate": 4.4512000000000003e-05, "loss": 8.7582, "step": 1372000 }, { "epoch": 10.98, "learning_rate": 4.451e-05, "loss": 8.7495, "step": 1372500 }, { "epoch": 10.98, "learning_rate": 4.4508e-05, "loss": 8.7671, "step": 1373000 }, { "epoch": 10.99, "learning_rate": 4.4506000000000004e-05, "loss": 8.7609, "step": 1373500 }, { "epoch": 10.99, "learning_rate": 4.4504e-05, "loss": 8.7479, "step": 1374000 }, { "epoch": 11.0, "learning_rate": 4.4502e-05, "loss": 8.7651, "step": 1374500 }, { "epoch": 11.0, "learning_rate": 4.4500000000000004e-05, "loss": 8.7619, "step": 1375000 }, { "epoch": 11.0, "learning_rate": 4.4498e-05, "loss": 8.7495, "step": 1375500 }, { "epoch": 11.01, "learning_rate": 4.4496e-05, "loss": 8.739, "step": 1376000 }, { "epoch": 11.01, "learning_rate": 4.4494000000000005e-05, "loss": 8.7601, "step": 1376500 }, { "epoch": 11.02, "learning_rate": 4.4492e-05, "loss": 8.7418, "step": 1377000 }, { "epoch": 11.02, "learning_rate": 4.449e-05, "loss": 8.762, "step": 1377500 }, { "epoch": 11.02, "learning_rate": 4.4488000000000005e-05, "loss": 8.7446, "step": 1378000 }, { "epoch": 11.03, "learning_rate": 4.4486e-05, "loss": 8.7475, "step": 1378500 }, { "epoch": 11.03, "learning_rate": 4.4484e-05, "loss": 8.7526, "step": 1379000 }, { "epoch": 11.04, "learning_rate": 4.4482e-05, "loss": 8.7603, "step": 1379500 }, { "epoch": 11.04, "learning_rate": 4.448e-05, "loss": 8.7501, "step": 1380000 }, { "epoch": 11.04, "learning_rate": 4.4478000000000003e-05, "loss": 8.7627, "step": 1380500 }, { "epoch": 11.05, "learning_rate": 4.4476e-05, "loss": 8.751, "step": 1381000 }, { "epoch": 11.05, "learning_rate": 4.4474e-05, "loss": 8.7548, "step": 1381500 }, { "epoch": 11.06, "learning_rate": 4.4472000000000004e-05, "loss": 8.7402, "step": 1382000 }, { "epoch": 11.06, "learning_rate": 4.447e-05, "loss": 8.7351, "step": 1382500 }, { "epoch": 11.06, "learning_rate": 4.4468e-05, "loss": 8.7449, "step": 1383000 }, { "epoch": 11.07, "learning_rate": 4.4466000000000004e-05, "loss": 8.7463, "step": 1383500 }, { "epoch": 11.07, "learning_rate": 4.4464e-05, "loss": 8.7525, "step": 1384000 }, { "epoch": 11.08, "learning_rate": 4.4462e-05, "loss": 8.7338, "step": 1384500 }, { "epoch": 11.08, "learning_rate": 4.4460000000000005e-05, "loss": 8.7442, "step": 1385000 }, { "epoch": 11.08, "learning_rate": 4.4458e-05, "loss": 8.774, "step": 1385500 }, { "epoch": 11.09, "learning_rate": 4.4456e-05, "loss": 8.756, "step": 1386000 }, { "epoch": 11.09, "learning_rate": 4.4454e-05, "loss": 8.7652, "step": 1386500 }, { "epoch": 11.1, "learning_rate": 4.445200000000001e-05, "loss": 8.7432, "step": 1387000 }, { "epoch": 11.1, "learning_rate": 4.445e-05, "loss": 8.7401, "step": 1387500 }, { "epoch": 11.1, "learning_rate": 4.4448e-05, "loss": 8.7525, "step": 1388000 }, { "epoch": 11.11, "learning_rate": 4.4446e-05, "loss": 8.7566, "step": 1388500 }, { "epoch": 11.11, "learning_rate": 4.4444000000000003e-05, "loss": 8.7397, "step": 1389000 }, { "epoch": 11.12, "learning_rate": 4.4442e-05, "loss": 8.7674, "step": 1389500 }, { "epoch": 11.12, "learning_rate": 4.444e-05, "loss": 8.7632, "step": 1390000 }, { "epoch": 11.12, "learning_rate": 4.4438000000000004e-05, "loss": 8.7602, "step": 1390500 }, { "epoch": 11.13, "learning_rate": 4.4436e-05, "loss": 8.7396, "step": 1391000 }, { "epoch": 11.13, "learning_rate": 4.4434e-05, "loss": 8.7363, "step": 1391500 }, { "epoch": 11.14, "learning_rate": 4.4432000000000004e-05, "loss": 8.753, "step": 1392000 }, { "epoch": 11.14, "learning_rate": 4.443e-05, "loss": 8.7388, "step": 1392500 }, { "epoch": 11.14, "learning_rate": 4.4428e-05, "loss": 8.7485, "step": 1393000 }, { "epoch": 11.15, "learning_rate": 4.4426e-05, "loss": 8.7398, "step": 1393500 }, { "epoch": 11.15, "learning_rate": 4.442400000000001e-05, "loss": 8.7579, "step": 1394000 }, { "epoch": 11.16, "learning_rate": 4.4422e-05, "loss": 8.7367, "step": 1394500 }, { "epoch": 11.16, "learning_rate": 4.442e-05, "loss": 8.75, "step": 1395000 }, { "epoch": 11.16, "learning_rate": 4.441800000000001e-05, "loss": 8.7512, "step": 1395500 }, { "epoch": 11.17, "learning_rate": 4.4416e-05, "loss": 8.7411, "step": 1396000 }, { "epoch": 11.17, "learning_rate": 4.4414e-05, "loss": 8.7536, "step": 1396500 }, { "epoch": 11.18, "learning_rate": 4.4412e-05, "loss": 8.7613, "step": 1397000 }, { "epoch": 11.18, "learning_rate": 4.4410000000000003e-05, "loss": 8.7491, "step": 1397500 }, { "epoch": 11.18, "learning_rate": 4.4408e-05, "loss": 8.7605, "step": 1398000 }, { "epoch": 11.19, "learning_rate": 4.4406e-05, "loss": 8.755, "step": 1398500 }, { "epoch": 11.19, "learning_rate": 4.4404000000000004e-05, "loss": 8.7564, "step": 1399000 }, { "epoch": 11.2, "learning_rate": 4.4402000000000006e-05, "loss": 8.7718, "step": 1399500 }, { "epoch": 11.2, "learning_rate": 4.44e-05, "loss": 8.7557, "step": 1400000 }, { "epoch": 11.2, "learning_rate": 4.4398e-05, "loss": 8.7577, "step": 1400500 }, { "epoch": 11.21, "learning_rate": 4.4396000000000007e-05, "loss": 8.7426, "step": 1401000 }, { "epoch": 11.21, "learning_rate": 4.4394e-05, "loss": 8.7415, "step": 1401500 }, { "epoch": 11.22, "learning_rate": 4.4392e-05, "loss": 8.7518, "step": 1402000 }, { "epoch": 11.22, "learning_rate": 4.439000000000001e-05, "loss": 8.7307, "step": 1402500 }, { "epoch": 11.22, "learning_rate": 4.4388e-05, "loss": 8.7649, "step": 1403000 }, { "epoch": 11.23, "learning_rate": 4.4386e-05, "loss": 8.7386, "step": 1403500 }, { "epoch": 11.23, "learning_rate": 4.4384e-05, "loss": 8.7484, "step": 1404000 }, { "epoch": 11.24, "learning_rate": 4.4382e-05, "loss": 8.7429, "step": 1404500 }, { "epoch": 11.24, "learning_rate": 4.438e-05, "loss": 8.7512, "step": 1405000 }, { "epoch": 11.24, "learning_rate": 4.4378e-05, "loss": 8.7591, "step": 1405500 }, { "epoch": 11.25, "learning_rate": 4.4376e-05, "loss": 8.7523, "step": 1406000 }, { "epoch": 11.25, "learning_rate": 4.4374000000000006e-05, "loss": 8.7513, "step": 1406500 }, { "epoch": 11.26, "learning_rate": 4.4372e-05, "loss": 8.7553, "step": 1407000 }, { "epoch": 11.26, "learning_rate": 4.4370000000000004e-05, "loss": 8.7608, "step": 1407500 }, { "epoch": 11.26, "learning_rate": 4.4368000000000006e-05, "loss": 8.7569, "step": 1408000 }, { "epoch": 11.27, "learning_rate": 4.4366e-05, "loss": 8.7441, "step": 1408500 }, { "epoch": 11.27, "learning_rate": 4.4364e-05, "loss": 8.753, "step": 1409000 }, { "epoch": 11.28, "learning_rate": 4.4362000000000007e-05, "loss": 8.7453, "step": 1409500 }, { "epoch": 11.28, "learning_rate": 4.436e-05, "loss": 8.7572, "step": 1410000 }, { "epoch": 11.28, "learning_rate": 4.4358e-05, "loss": 8.7452, "step": 1410500 }, { "epoch": 11.29, "learning_rate": 4.4356e-05, "loss": 8.7639, "step": 1411000 }, { "epoch": 11.29, "learning_rate": 4.4354e-05, "loss": 8.7531, "step": 1411500 }, { "epoch": 11.3, "learning_rate": 4.4352000000000005e-05, "loss": 8.7525, "step": 1412000 }, { "epoch": 11.3, "learning_rate": 4.435e-05, "loss": 8.744, "step": 1412500 }, { "epoch": 11.3, "learning_rate": 4.4348e-05, "loss": 8.7464, "step": 1413000 }, { "epoch": 11.31, "learning_rate": 4.4346000000000005e-05, "loss": 8.7575, "step": 1413500 }, { "epoch": 11.31, "learning_rate": 4.4344e-05, "loss": 8.7282, "step": 1414000 }, { "epoch": 11.32, "learning_rate": 4.4342e-05, "loss": 8.7584, "step": 1414500 }, { "epoch": 11.32, "learning_rate": 4.4340000000000006e-05, "loss": 8.7682, "step": 1415000 }, { "epoch": 11.32, "learning_rate": 4.4338e-05, "loss": 8.7636, "step": 1415500 }, { "epoch": 11.33, "learning_rate": 4.4336e-05, "loss": 8.7536, "step": 1416000 }, { "epoch": 11.33, "learning_rate": 4.4334000000000006e-05, "loss": 8.7386, "step": 1416500 }, { "epoch": 11.34, "learning_rate": 4.4332e-05, "loss": 8.7463, "step": 1417000 }, { "epoch": 11.34, "learning_rate": 4.4330000000000004e-05, "loss": 8.756, "step": 1417500 }, { "epoch": 11.34, "learning_rate": 4.4328e-05, "loss": 8.7447, "step": 1418000 }, { "epoch": 11.35, "learning_rate": 4.4326e-05, "loss": 8.7585, "step": 1418500 }, { "epoch": 11.35, "learning_rate": 4.4324000000000005e-05, "loss": 8.7485, "step": 1419000 }, { "epoch": 11.36, "learning_rate": 4.4322e-05, "loss": 8.7623, "step": 1419500 }, { "epoch": 11.36, "learning_rate": 4.432e-05, "loss": 8.7691, "step": 1420000 }, { "epoch": 11.36, "learning_rate": 4.4318000000000005e-05, "loss": 8.7728, "step": 1420500 }, { "epoch": 11.37, "learning_rate": 4.4316e-05, "loss": 8.7563, "step": 1421000 }, { "epoch": 11.37, "learning_rate": 4.4314e-05, "loss": 8.7573, "step": 1421500 }, { "epoch": 11.38, "learning_rate": 4.4312000000000005e-05, "loss": 8.7683, "step": 1422000 }, { "epoch": 11.38, "learning_rate": 4.431e-05, "loss": 8.7504, "step": 1422500 }, { "epoch": 11.38, "learning_rate": 4.4307999999999997e-05, "loss": 8.7395, "step": 1423000 }, { "epoch": 11.39, "learning_rate": 4.4306000000000006e-05, "loss": 8.7582, "step": 1423500 }, { "epoch": 11.39, "learning_rate": 4.4304e-05, "loss": 8.7488, "step": 1424000 }, { "epoch": 11.4, "learning_rate": 4.4302000000000004e-05, "loss": 8.753, "step": 1424500 }, { "epoch": 11.4, "learning_rate": 4.43e-05, "loss": 8.751, "step": 1425000 }, { "epoch": 11.4, "learning_rate": 4.4298e-05, "loss": 8.7466, "step": 1425500 }, { "epoch": 11.41, "learning_rate": 4.4296000000000004e-05, "loss": 8.742, "step": 1426000 }, { "epoch": 11.41, "learning_rate": 4.4294e-05, "loss": 8.7412, "step": 1426500 }, { "epoch": 11.42, "learning_rate": 4.4292e-05, "loss": 8.742, "step": 1427000 }, { "epoch": 11.42, "learning_rate": 4.4290000000000005e-05, "loss": 8.754, "step": 1427500 }, { "epoch": 11.42, "learning_rate": 4.4288e-05, "loss": 8.7577, "step": 1428000 }, { "epoch": 11.43, "learning_rate": 4.4286e-05, "loss": 8.7351, "step": 1428500 }, { "epoch": 11.43, "learning_rate": 4.4284000000000005e-05, "loss": 8.7463, "step": 1429000 }, { "epoch": 11.44, "learning_rate": 4.4282e-05, "loss": 8.7633, "step": 1429500 }, { "epoch": 11.44, "learning_rate": 4.428e-05, "loss": 8.7428, "step": 1430000 }, { "epoch": 11.44, "learning_rate": 4.4278000000000005e-05, "loss": 8.7468, "step": 1430500 }, { "epoch": 11.45, "learning_rate": 4.4276e-05, "loss": 8.745, "step": 1431000 }, { "epoch": 11.45, "learning_rate": 4.4274e-05, "loss": 8.7739, "step": 1431500 }, { "epoch": 11.46, "learning_rate": 4.4272000000000006e-05, "loss": 8.7532, "step": 1432000 }, { "epoch": 11.46, "learning_rate": 4.427e-05, "loss": 8.7617, "step": 1432500 }, { "epoch": 11.46, "learning_rate": 4.4268000000000004e-05, "loss": 8.764, "step": 1433000 }, { "epoch": 11.47, "learning_rate": 4.4266e-05, "loss": 8.7445, "step": 1433500 }, { "epoch": 11.47, "learning_rate": 4.4264e-05, "loss": 8.7726, "step": 1434000 }, { "epoch": 11.48, "learning_rate": 4.4262000000000004e-05, "loss": 8.7344, "step": 1434500 }, { "epoch": 11.48, "learning_rate": 4.426e-05, "loss": 8.7348, "step": 1435000 }, { "epoch": 11.48, "learning_rate": 4.4258e-05, "loss": 8.7374, "step": 1435500 }, { "epoch": 11.49, "learning_rate": 4.4256000000000005e-05, "loss": 8.7577, "step": 1436000 }, { "epoch": 11.49, "learning_rate": 4.4254e-05, "loss": 8.778, "step": 1436500 }, { "epoch": 11.5, "learning_rate": 4.4252e-05, "loss": 8.7409, "step": 1437000 }, { "epoch": 11.5, "learning_rate": 4.4250000000000005e-05, "loss": 8.7493, "step": 1437500 }, { "epoch": 11.5, "learning_rate": 4.4248e-05, "loss": 8.7363, "step": 1438000 }, { "epoch": 11.51, "learning_rate": 4.4246e-05, "loss": 8.7439, "step": 1438500 }, { "epoch": 11.51, "learning_rate": 4.4244000000000005e-05, "loss": 8.7526, "step": 1439000 }, { "epoch": 11.52, "learning_rate": 4.4242e-05, "loss": 8.7489, "step": 1439500 }, { "epoch": 11.52, "learning_rate": 4.424e-05, "loss": 8.7562, "step": 1440000 }, { "epoch": 11.52, "learning_rate": 4.4238e-05, "loss": 8.7404, "step": 1440500 }, { "epoch": 11.53, "learning_rate": 4.4236e-05, "loss": 8.7474, "step": 1441000 }, { "epoch": 11.53, "learning_rate": 4.4234000000000004e-05, "loss": 8.7534, "step": 1441500 }, { "epoch": 11.54, "learning_rate": 4.4232e-05, "loss": 8.7588, "step": 1442000 }, { "epoch": 11.54, "learning_rate": 4.423e-05, "loss": 8.7421, "step": 1442500 }, { "epoch": 11.54, "learning_rate": 4.4228000000000004e-05, "loss": 8.7351, "step": 1443000 }, { "epoch": 11.55, "learning_rate": 4.4226e-05, "loss": 8.7537, "step": 1443500 }, { "epoch": 11.55, "learning_rate": 4.4224e-05, "loss": 8.7424, "step": 1444000 }, { "epoch": 11.56, "learning_rate": 4.4222000000000005e-05, "loss": 8.7398, "step": 1444500 }, { "epoch": 11.56, "learning_rate": 4.422e-05, "loss": 8.7445, "step": 1445000 }, { "epoch": 11.56, "learning_rate": 4.4218e-05, "loss": 8.7547, "step": 1445500 }, { "epoch": 11.57, "learning_rate": 4.4216000000000005e-05, "loss": 8.749, "step": 1446000 }, { "epoch": 11.57, "learning_rate": 4.4214e-05, "loss": 8.7404, "step": 1446500 }, { "epoch": 11.58, "learning_rate": 4.4212e-05, "loss": 8.7738, "step": 1447000 }, { "epoch": 11.58, "learning_rate": 4.421e-05, "loss": 8.7605, "step": 1447500 }, { "epoch": 11.58, "learning_rate": 4.4208e-05, "loss": 8.7541, "step": 1448000 }, { "epoch": 11.59, "learning_rate": 4.4206e-05, "loss": 8.7439, "step": 1448500 }, { "epoch": 11.59, "learning_rate": 4.4204e-05, "loss": 8.7479, "step": 1449000 }, { "epoch": 11.6, "learning_rate": 4.4202e-05, "loss": 8.7322, "step": 1449500 }, { "epoch": 11.6, "learning_rate": 4.4200000000000004e-05, "loss": 8.7695, "step": 1450000 }, { "epoch": 11.6, "learning_rate": 4.4198e-05, "loss": 8.7703, "step": 1450500 }, { "epoch": 11.61, "learning_rate": 4.4196e-05, "loss": 8.718, "step": 1451000 }, { "epoch": 11.61, "learning_rate": 4.4194000000000004e-05, "loss": 8.7577, "step": 1451500 }, { "epoch": 11.62, "learning_rate": 4.4192e-05, "loss": 8.7687, "step": 1452000 }, { "epoch": 11.62, "learning_rate": 4.419e-05, "loss": 8.7244, "step": 1452500 }, { "epoch": 11.62, "learning_rate": 4.4188000000000005e-05, "loss": 8.7567, "step": 1453000 }, { "epoch": 11.63, "learning_rate": 4.4186e-05, "loss": 8.7752, "step": 1453500 }, { "epoch": 11.63, "learning_rate": 4.4184e-05, "loss": 8.7544, "step": 1454000 }, { "epoch": 11.64, "learning_rate": 4.4182e-05, "loss": 8.7705, "step": 1454500 }, { "epoch": 11.64, "learning_rate": 4.418000000000001e-05, "loss": 8.7471, "step": 1455000 }, { "epoch": 11.64, "learning_rate": 4.4178e-05, "loss": 8.7386, "step": 1455500 }, { "epoch": 11.65, "learning_rate": 4.4176e-05, "loss": 8.7482, "step": 1456000 }, { "epoch": 11.65, "learning_rate": 4.4174e-05, "loss": 8.756, "step": 1456500 }, { "epoch": 11.66, "learning_rate": 4.4172e-05, "loss": 8.7603, "step": 1457000 }, { "epoch": 11.66, "learning_rate": 4.417e-05, "loss": 8.7548, "step": 1457500 }, { "epoch": 11.66, "learning_rate": 4.4168e-05, "loss": 8.7536, "step": 1458000 }, { "epoch": 11.67, "learning_rate": 4.4166000000000004e-05, "loss": 8.7626, "step": 1458500 }, { "epoch": 11.67, "learning_rate": 4.4164e-05, "loss": 8.74, "step": 1459000 }, { "epoch": 11.68, "learning_rate": 4.4162e-05, "loss": 8.7553, "step": 1459500 }, { "epoch": 11.68, "learning_rate": 4.4160000000000004e-05, "loss": 8.7427, "step": 1460000 }, { "epoch": 11.68, "learning_rate": 4.4158e-05, "loss": 8.76, "step": 1460500 }, { "epoch": 11.69, "learning_rate": 4.4156e-05, "loss": 8.7494, "step": 1461000 }, { "epoch": 11.69, "learning_rate": 4.4154e-05, "loss": 8.757, "step": 1461500 }, { "epoch": 11.7, "learning_rate": 4.415200000000001e-05, "loss": 8.7561, "step": 1462000 }, { "epoch": 11.7, "learning_rate": 4.415e-05, "loss": 8.746, "step": 1462500 }, { "epoch": 11.7, "learning_rate": 4.4148e-05, "loss": 8.7592, "step": 1463000 }, { "epoch": 11.71, "learning_rate": 4.414600000000001e-05, "loss": 8.7444, "step": 1463500 }, { "epoch": 11.71, "learning_rate": 4.4144e-05, "loss": 8.7317, "step": 1464000 }, { "epoch": 11.72, "learning_rate": 4.4142e-05, "loss": 8.7389, "step": 1464500 }, { "epoch": 11.72, "learning_rate": 4.414e-05, "loss": 8.7536, "step": 1465000 }, { "epoch": 11.72, "learning_rate": 4.4138e-05, "loss": 8.7616, "step": 1465500 }, { "epoch": 11.73, "learning_rate": 4.4136e-05, "loss": 8.7499, "step": 1466000 }, { "epoch": 11.73, "learning_rate": 4.4134e-05, "loss": 8.7424, "step": 1466500 }, { "epoch": 11.74, "learning_rate": 4.4132000000000004e-05, "loss": 8.7487, "step": 1467000 }, { "epoch": 11.74, "learning_rate": 4.4130000000000006e-05, "loss": 8.7538, "step": 1467500 }, { "epoch": 11.74, "learning_rate": 4.4128e-05, "loss": 8.753, "step": 1468000 }, { "epoch": 11.75, "learning_rate": 4.4126e-05, "loss": 8.7516, "step": 1468500 }, { "epoch": 11.75, "learning_rate": 4.4124000000000006e-05, "loss": 8.7441, "step": 1469000 }, { "epoch": 11.76, "learning_rate": 4.4122e-05, "loss": 8.7655, "step": 1469500 }, { "epoch": 11.76, "learning_rate": 4.412e-05, "loss": 8.754, "step": 1470000 }, { "epoch": 11.76, "learning_rate": 4.411800000000001e-05, "loss": 8.758, "step": 1470500 }, { "epoch": 11.77, "learning_rate": 4.4116e-05, "loss": 8.7432, "step": 1471000 }, { "epoch": 11.77, "learning_rate": 4.4114e-05, "loss": 8.747, "step": 1471500 }, { "epoch": 11.78, "learning_rate": 4.4112e-05, "loss": 8.7625, "step": 1472000 }, { "epoch": 11.78, "learning_rate": 4.411e-05, "loss": 8.7442, "step": 1472500 }, { "epoch": 11.78, "learning_rate": 4.4108000000000005e-05, "loss": 8.7524, "step": 1473000 }, { "epoch": 11.79, "learning_rate": 4.4106e-05, "loss": 8.7479, "step": 1473500 }, { "epoch": 11.79, "learning_rate": 4.4104e-05, "loss": 8.7682, "step": 1474000 }, { "epoch": 11.8, "learning_rate": 4.4102000000000006e-05, "loss": 8.7588, "step": 1474500 }, { "epoch": 11.8, "learning_rate": 4.41e-05, "loss": 8.751, "step": 1475000 }, { "epoch": 11.8, "learning_rate": 4.4098000000000004e-05, "loss": 8.7314, "step": 1475500 }, { "epoch": 11.81, "learning_rate": 4.4096000000000006e-05, "loss": 8.7312, "step": 1476000 }, { "epoch": 11.81, "learning_rate": 4.4094e-05, "loss": 8.7479, "step": 1476500 }, { "epoch": 11.82, "learning_rate": 4.4092e-05, "loss": 8.7519, "step": 1477000 }, { "epoch": 11.82, "learning_rate": 4.4090000000000006e-05, "loss": 8.7672, "step": 1477500 }, { "epoch": 11.82, "learning_rate": 4.4088e-05, "loss": 8.7468, "step": 1478000 }, { "epoch": 11.83, "learning_rate": 4.4086e-05, "loss": 8.7506, "step": 1478500 }, { "epoch": 11.83, "learning_rate": 4.4084e-05, "loss": 8.7538, "step": 1479000 }, { "epoch": 11.84, "learning_rate": 4.4082e-05, "loss": 8.7441, "step": 1479500 }, { "epoch": 11.84, "learning_rate": 4.4080000000000005e-05, "loss": 8.7463, "step": 1480000 }, { "epoch": 11.84, "learning_rate": 4.4078e-05, "loss": 8.7663, "step": 1480500 }, { "epoch": 11.85, "learning_rate": 4.4076e-05, "loss": 8.7654, "step": 1481000 }, { "epoch": 11.85, "learning_rate": 4.4074000000000005e-05, "loss": 8.7238, "step": 1481500 }, { "epoch": 11.86, "learning_rate": 4.4072e-05, "loss": 8.766, "step": 1482000 }, { "epoch": 11.86, "learning_rate": 4.407e-05, "loss": 8.7482, "step": 1482500 }, { "epoch": 11.86, "learning_rate": 4.4068000000000006e-05, "loss": 8.7479, "step": 1483000 }, { "epoch": 11.87, "learning_rate": 4.4066e-05, "loss": 8.7544, "step": 1483500 }, { "epoch": 11.87, "learning_rate": 4.4064e-05, "loss": 8.7641, "step": 1484000 }, { "epoch": 11.88, "learning_rate": 4.4062000000000006e-05, "loss": 8.7356, "step": 1484500 }, { "epoch": 11.88, "learning_rate": 4.406e-05, "loss": 8.7462, "step": 1485000 }, { "epoch": 11.88, "learning_rate": 4.4058000000000004e-05, "loss": 8.7466, "step": 1485500 }, { "epoch": 11.89, "learning_rate": 4.4056e-05, "loss": 8.7344, "step": 1486000 }, { "epoch": 11.89, "learning_rate": 4.4054e-05, "loss": 8.7447, "step": 1486500 }, { "epoch": 11.9, "learning_rate": 4.4052000000000004e-05, "loss": 8.7527, "step": 1487000 }, { "epoch": 11.9, "learning_rate": 4.405e-05, "loss": 8.7584, "step": 1487500 }, { "epoch": 11.9, "learning_rate": 4.4048e-05, "loss": 8.7659, "step": 1488000 }, { "epoch": 11.91, "learning_rate": 4.4046000000000005e-05, "loss": 8.7597, "step": 1488500 }, { "epoch": 11.91, "learning_rate": 4.4044e-05, "loss": 8.7435, "step": 1489000 }, { "epoch": 11.92, "learning_rate": 4.4042e-05, "loss": 8.7627, "step": 1489500 }, { "epoch": 11.92, "learning_rate": 4.4040000000000005e-05, "loss": 8.7614, "step": 1490000 }, { "epoch": 11.92, "learning_rate": 4.4038e-05, "loss": 8.7599, "step": 1490500 }, { "epoch": 11.93, "learning_rate": 4.4035999999999996e-05, "loss": 8.7522, "step": 1491000 }, { "epoch": 11.93, "learning_rate": 4.4034000000000006e-05, "loss": 8.7675, "step": 1491500 }, { "epoch": 11.94, "learning_rate": 4.4032e-05, "loss": 8.7372, "step": 1492000 }, { "epoch": 11.94, "learning_rate": 4.4030000000000004e-05, "loss": 8.7517, "step": 1492500 }, { "epoch": 11.94, "learning_rate": 4.4028e-05, "loss": 8.7604, "step": 1493000 }, { "epoch": 11.95, "learning_rate": 4.4026e-05, "loss": 8.7581, "step": 1493500 }, { "epoch": 11.95, "learning_rate": 4.4024000000000004e-05, "loss": 8.7335, "step": 1494000 }, { "epoch": 11.96, "learning_rate": 4.4022e-05, "loss": 8.739, "step": 1494500 }, { "epoch": 11.96, "learning_rate": 4.402e-05, "loss": 8.7752, "step": 1495000 }, { "epoch": 11.96, "learning_rate": 4.4018000000000004e-05, "loss": 8.7663, "step": 1495500 }, { "epoch": 11.97, "learning_rate": 4.4016e-05, "loss": 8.7561, "step": 1496000 }, { "epoch": 11.97, "learning_rate": 4.4014e-05, "loss": 8.7701, "step": 1496500 }, { "epoch": 11.98, "learning_rate": 4.4012000000000005e-05, "loss": 8.7699, "step": 1497000 }, { "epoch": 11.98, "learning_rate": 4.401e-05, "loss": 8.7582, "step": 1497500 }, { "epoch": 11.98, "learning_rate": 4.4008e-05, "loss": 8.7699, "step": 1498000 }, { "epoch": 11.99, "learning_rate": 4.4006000000000005e-05, "loss": 8.7405, "step": 1498500 }, { "epoch": 11.99, "learning_rate": 4.4004e-05, "loss": 8.7616, "step": 1499000 }, { "epoch": 12.0, "learning_rate": 4.4002e-05, "loss": 8.7742, "step": 1499500 }, { "epoch": 12.0, "learning_rate": 4.4000000000000006e-05, "loss": 8.7405, "step": 1500000 }, { "epoch": 12.0, "learning_rate": 4.3998e-05, "loss": 8.7583, "step": 1500500 }, { "epoch": 12.01, "learning_rate": 4.3996000000000004e-05, "loss": 8.7542, "step": 1501000 }, { "epoch": 12.01, "learning_rate": 4.3994e-05, "loss": 8.7522, "step": 1501500 }, { "epoch": 12.02, "learning_rate": 4.3992e-05, "loss": 8.7479, "step": 1502000 }, { "epoch": 12.02, "learning_rate": 4.3990000000000004e-05, "loss": 8.7388, "step": 1502500 }, { "epoch": 12.02, "learning_rate": 4.3988e-05, "loss": 8.7719, "step": 1503000 }, { "epoch": 12.03, "learning_rate": 4.3986e-05, "loss": 8.7372, "step": 1503500 }, { "epoch": 12.03, "learning_rate": 4.3984000000000004e-05, "loss": 8.7544, "step": 1504000 }, { "epoch": 12.04, "learning_rate": 4.3982e-05, "loss": 8.7511, "step": 1504500 }, { "epoch": 12.04, "learning_rate": 4.398e-05, "loss": 8.7601, "step": 1505000 }, { "epoch": 12.04, "learning_rate": 4.3978000000000005e-05, "loss": 8.7561, "step": 1505500 }, { "epoch": 12.05, "learning_rate": 4.3976e-05, "loss": 8.7634, "step": 1506000 }, { "epoch": 12.05, "learning_rate": 4.3974e-05, "loss": 8.7426, "step": 1506500 }, { "epoch": 12.06, "learning_rate": 4.3972000000000005e-05, "loss": 8.7575, "step": 1507000 }, { "epoch": 12.06, "learning_rate": 4.397e-05, "loss": 8.7574, "step": 1507500 }, { "epoch": 12.06, "learning_rate": 4.3968e-05, "loss": 8.7464, "step": 1508000 }, { "epoch": 12.07, "learning_rate": 4.3966e-05, "loss": 8.7475, "step": 1508500 }, { "epoch": 12.07, "learning_rate": 4.3964e-05, "loss": 8.7584, "step": 1509000 }, { "epoch": 12.08, "learning_rate": 4.3962000000000004e-05, "loss": 8.76, "step": 1509500 }, { "epoch": 12.08, "learning_rate": 4.396e-05, "loss": 8.7495, "step": 1510000 }, { "epoch": 12.08, "learning_rate": 4.3958e-05, "loss": 8.7351, "step": 1510500 }, { "epoch": 12.09, "learning_rate": 4.3956000000000004e-05, "loss": 8.7373, "step": 1511000 }, { "epoch": 12.09, "learning_rate": 4.3954e-05, "loss": 8.7542, "step": 1511500 }, { "epoch": 12.1, "learning_rate": 4.3952e-05, "loss": 8.7339, "step": 1512000 }, { "epoch": 12.1, "learning_rate": 4.3950000000000004e-05, "loss": 8.7468, "step": 1512500 }, { "epoch": 12.1, "learning_rate": 4.3948e-05, "loss": 8.7617, "step": 1513000 }, { "epoch": 12.11, "learning_rate": 4.3946e-05, "loss": 8.7596, "step": 1513500 }, { "epoch": 12.11, "learning_rate": 4.3944000000000005e-05, "loss": 8.745, "step": 1514000 }, { "epoch": 12.12, "learning_rate": 4.3942e-05, "loss": 8.762, "step": 1514500 }, { "epoch": 12.12, "learning_rate": 4.394e-05, "loss": 8.7502, "step": 1515000 }, { "epoch": 12.12, "learning_rate": 4.3938e-05, "loss": 8.7529, "step": 1515500 }, { "epoch": 12.13, "learning_rate": 4.3936e-05, "loss": 8.7658, "step": 1516000 }, { "epoch": 12.13, "learning_rate": 4.3934e-05, "loss": 8.7617, "step": 1516500 }, { "epoch": 12.14, "learning_rate": 4.3932e-05, "loss": 8.765, "step": 1517000 }, { "epoch": 12.14, "learning_rate": 4.393e-05, "loss": 8.7545, "step": 1517500 }, { "epoch": 12.14, "learning_rate": 4.3928000000000004e-05, "loss": 8.7509, "step": 1518000 }, { "epoch": 12.15, "learning_rate": 4.3926e-05, "loss": 8.7394, "step": 1518500 }, { "epoch": 12.15, "learning_rate": 4.3924e-05, "loss": 8.7646, "step": 1519000 }, { "epoch": 12.16, "learning_rate": 4.3922000000000004e-05, "loss": 8.7599, "step": 1519500 }, { "epoch": 12.16, "learning_rate": 4.392e-05, "loss": 8.7347, "step": 1520000 }, { "epoch": 12.16, "learning_rate": 4.3918e-05, "loss": 8.7409, "step": 1520500 }, { "epoch": 12.17, "learning_rate": 4.3916000000000004e-05, "loss": 8.7582, "step": 1521000 }, { "epoch": 12.17, "learning_rate": 4.3914e-05, "loss": 8.7471, "step": 1521500 }, { "epoch": 12.18, "learning_rate": 4.3912e-05, "loss": 8.7529, "step": 1522000 }, { "epoch": 12.18, "learning_rate": 4.391e-05, "loss": 8.7464, "step": 1522500 }, { "epoch": 12.18, "learning_rate": 4.390800000000001e-05, "loss": 8.7459, "step": 1523000 }, { "epoch": 12.19, "learning_rate": 4.3906e-05, "loss": 8.7509, "step": 1523500 }, { "epoch": 12.19, "learning_rate": 4.3904e-05, "loss": 8.7666, "step": 1524000 }, { "epoch": 12.2, "learning_rate": 4.390200000000001e-05, "loss": 8.7531, "step": 1524500 }, { "epoch": 12.2, "learning_rate": 4.39e-05, "loss": 8.7568, "step": 1525000 }, { "epoch": 12.2, "learning_rate": 4.3898e-05, "loss": 8.7522, "step": 1525500 }, { "epoch": 12.21, "learning_rate": 4.3896e-05, "loss": 8.741, "step": 1526000 }, { "epoch": 12.21, "learning_rate": 4.3894000000000004e-05, "loss": 8.7571, "step": 1526500 }, { "epoch": 12.22, "learning_rate": 4.3892e-05, "loss": 8.7598, "step": 1527000 }, { "epoch": 12.22, "learning_rate": 4.389e-05, "loss": 8.7568, "step": 1527500 }, { "epoch": 12.22, "learning_rate": 4.3888000000000004e-05, "loss": 8.7582, "step": 1528000 }, { "epoch": 12.23, "learning_rate": 4.3886e-05, "loss": 8.7389, "step": 1528500 }, { "epoch": 12.23, "learning_rate": 4.3884e-05, "loss": 8.7288, "step": 1529000 }, { "epoch": 12.24, "learning_rate": 4.3882e-05, "loss": 8.7551, "step": 1529500 }, { "epoch": 12.24, "learning_rate": 4.388000000000001e-05, "loss": 8.7502, "step": 1530000 }, { "epoch": 12.24, "learning_rate": 4.3878e-05, "loss": 8.7444, "step": 1530500 }, { "epoch": 12.25, "learning_rate": 4.3876e-05, "loss": 8.7414, "step": 1531000 }, { "epoch": 12.25, "learning_rate": 4.387400000000001e-05, "loss": 8.7494, "step": 1531500 }, { "epoch": 12.26, "learning_rate": 4.3872e-05, "loss": 8.7671, "step": 1532000 }, { "epoch": 12.26, "learning_rate": 4.387e-05, "loss": 8.7413, "step": 1532500 }, { "epoch": 12.26, "learning_rate": 4.3868e-05, "loss": 8.7387, "step": 1533000 }, { "epoch": 12.27, "learning_rate": 4.3866e-05, "loss": 8.7418, "step": 1533500 }, { "epoch": 12.27, "learning_rate": 4.3864e-05, "loss": 8.7273, "step": 1534000 }, { "epoch": 12.28, "learning_rate": 4.3862e-05, "loss": 8.7359, "step": 1534500 }, { "epoch": 12.28, "learning_rate": 4.3860000000000004e-05, "loss": 8.7633, "step": 1535000 }, { "epoch": 12.28, "learning_rate": 4.3858000000000006e-05, "loss": 8.7437, "step": 1535500 }, { "epoch": 12.29, "learning_rate": 4.3856e-05, "loss": 8.7427, "step": 1536000 }, { "epoch": 12.29, "learning_rate": 4.3854000000000004e-05, "loss": 8.7514, "step": 1536500 }, { "epoch": 12.3, "learning_rate": 4.3852000000000006e-05, "loss": 8.743, "step": 1537000 }, { "epoch": 12.3, "learning_rate": 4.385e-05, "loss": 8.7319, "step": 1537500 }, { "epoch": 12.3, "learning_rate": 4.3848e-05, "loss": 8.7522, "step": 1538000 }, { "epoch": 12.31, "learning_rate": 4.384600000000001e-05, "loss": 8.7458, "step": 1538500 }, { "epoch": 12.31, "learning_rate": 4.3844e-05, "loss": 8.772, "step": 1539000 }, { "epoch": 12.32, "learning_rate": 4.3842e-05, "loss": 8.7506, "step": 1539500 }, { "epoch": 12.32, "learning_rate": 4.384e-05, "loss": 8.7602, "step": 1540000 }, { "epoch": 12.32, "learning_rate": 4.3838e-05, "loss": 8.7438, "step": 1540500 }, { "epoch": 12.33, "learning_rate": 4.3836000000000005e-05, "loss": 8.7674, "step": 1541000 }, { "epoch": 12.33, "learning_rate": 4.3834e-05, "loss": 8.7582, "step": 1541500 }, { "epoch": 12.34, "learning_rate": 4.3832e-05, "loss": 8.7605, "step": 1542000 }, { "epoch": 12.34, "learning_rate": 4.3830000000000006e-05, "loss": 8.7596, "step": 1542500 }, { "epoch": 12.34, "learning_rate": 4.3828e-05, "loss": 8.7523, "step": 1543000 }, { "epoch": 12.35, "learning_rate": 4.3826000000000004e-05, "loss": 8.7665, "step": 1543500 }, { "epoch": 12.35, "learning_rate": 4.3824000000000006e-05, "loss": 8.7597, "step": 1544000 }, { "epoch": 12.36, "learning_rate": 4.3822e-05, "loss": 8.7549, "step": 1544500 }, { "epoch": 12.36, "learning_rate": 4.382e-05, "loss": 8.7812, "step": 1545000 }, { "epoch": 12.36, "learning_rate": 4.3818000000000006e-05, "loss": 8.7583, "step": 1545500 }, { "epoch": 12.37, "learning_rate": 4.3816e-05, "loss": 8.7573, "step": 1546000 }, { "epoch": 12.37, "learning_rate": 4.3814e-05, "loss": 8.7629, "step": 1546500 }, { "epoch": 12.38, "learning_rate": 4.3812e-05, "loss": 8.7322, "step": 1547000 }, { "epoch": 12.38, "learning_rate": 4.381e-05, "loss": 8.7611, "step": 1547500 }, { "epoch": 12.38, "learning_rate": 4.3808000000000005e-05, "loss": 8.7548, "step": 1548000 }, { "epoch": 12.39, "learning_rate": 4.3806e-05, "loss": 8.7422, "step": 1548500 }, { "epoch": 12.39, "learning_rate": 4.3804e-05, "loss": 8.7558, "step": 1549000 }, { "epoch": 12.4, "learning_rate": 4.3802000000000005e-05, "loss": 8.759, "step": 1549500 }, { "epoch": 12.4, "learning_rate": 4.38e-05, "loss": 8.7739, "step": 1550000 }, { "epoch": 12.4, "learning_rate": 4.3798e-05, "loss": 8.7625, "step": 1550500 }, { "epoch": 12.41, "learning_rate": 4.3796000000000006e-05, "loss": 8.7385, "step": 1551000 }, { "epoch": 12.41, "learning_rate": 4.3794e-05, "loss": 8.7452, "step": 1551500 }, { "epoch": 12.42, "learning_rate": 4.3792e-05, "loss": 8.7543, "step": 1552000 }, { "epoch": 12.42, "learning_rate": 4.3790000000000006e-05, "loss": 8.7569, "step": 1552500 }, { "epoch": 12.42, "learning_rate": 4.3788e-05, "loss": 8.7516, "step": 1553000 }, { "epoch": 12.43, "learning_rate": 4.3786000000000004e-05, "loss": 8.7466, "step": 1553500 }, { "epoch": 12.43, "learning_rate": 4.3784e-05, "loss": 8.7404, "step": 1554000 }, { "epoch": 12.44, "learning_rate": 4.3782e-05, "loss": 8.7515, "step": 1554500 }, { "epoch": 12.44, "learning_rate": 4.3780000000000004e-05, "loss": 8.764, "step": 1555000 }, { "epoch": 12.44, "learning_rate": 4.3778e-05, "loss": 8.7413, "step": 1555500 }, { "epoch": 12.45, "learning_rate": 4.3776e-05, "loss": 8.748, "step": 1556000 }, { "epoch": 12.45, "learning_rate": 4.3774000000000005e-05, "loss": 8.7584, "step": 1556500 }, { "epoch": 12.46, "learning_rate": 4.3772e-05, "loss": 8.7628, "step": 1557000 }, { "epoch": 12.46, "learning_rate": 4.377e-05, "loss": 8.7479, "step": 1557500 }, { "epoch": 12.46, "learning_rate": 4.3768000000000005e-05, "loss": 8.7568, "step": 1558000 }, { "epoch": 12.47, "learning_rate": 4.3766e-05, "loss": 8.7535, "step": 1558500 }, { "epoch": 12.47, "learning_rate": 4.3763999999999996e-05, "loss": 8.7434, "step": 1559000 }, { "epoch": 12.48, "learning_rate": 4.3762000000000006e-05, "loss": 8.753, "step": 1559500 }, { "epoch": 12.48, "learning_rate": 4.376e-05, "loss": 8.7405, "step": 1560000 }, { "epoch": 12.48, "learning_rate": 4.3758000000000004e-05, "loss": 8.7429, "step": 1560500 }, { "epoch": 12.49, "learning_rate": 4.3756e-05, "loss": 8.7708, "step": 1561000 }, { "epoch": 12.49, "learning_rate": 4.3754e-05, "loss": 8.7516, "step": 1561500 }, { "epoch": 12.5, "learning_rate": 4.3752000000000004e-05, "loss": 8.759, "step": 1562000 }, { "epoch": 12.5, "learning_rate": 4.375e-05, "loss": 8.7542, "step": 1562500 }, { "epoch": 12.5, "learning_rate": 4.3748e-05, "loss": 8.7573, "step": 1563000 }, { "epoch": 12.51, "learning_rate": 4.3746000000000004e-05, "loss": 8.7517, "step": 1563500 }, { "epoch": 12.51, "learning_rate": 4.3744e-05, "loss": 8.7648, "step": 1564000 }, { "epoch": 12.52, "learning_rate": 4.3742e-05, "loss": 8.7557, "step": 1564500 }, { "epoch": 12.52, "learning_rate": 4.3740000000000005e-05, "loss": 8.7548, "step": 1565000 }, { "epoch": 12.52, "learning_rate": 4.3738e-05, "loss": 8.7577, "step": 1565500 }, { "epoch": 12.53, "learning_rate": 4.3736e-05, "loss": 8.7465, "step": 1566000 }, { "epoch": 12.53, "learning_rate": 4.3734000000000005e-05, "loss": 8.7572, "step": 1566500 }, { "epoch": 12.54, "learning_rate": 4.3732e-05, "loss": 8.7344, "step": 1567000 }, { "epoch": 12.54, "learning_rate": 4.373e-05, "loss": 8.7518, "step": 1567500 }, { "epoch": 12.54, "learning_rate": 4.3728000000000006e-05, "loss": 8.7459, "step": 1568000 }, { "epoch": 12.55, "learning_rate": 4.3726e-05, "loss": 8.7448, "step": 1568500 }, { "epoch": 12.55, "learning_rate": 4.3724000000000004e-05, "loss": 8.7537, "step": 1569000 }, { "epoch": 12.56, "learning_rate": 4.3722e-05, "loss": 8.7372, "step": 1569500 }, { "epoch": 12.56, "learning_rate": 4.372e-05, "loss": 8.7438, "step": 1570000 }, { "epoch": 12.56, "learning_rate": 4.3718000000000004e-05, "loss": 8.7518, "step": 1570500 }, { "epoch": 12.57, "learning_rate": 4.3716e-05, "loss": 8.7591, "step": 1571000 }, { "epoch": 12.57, "learning_rate": 4.3714e-05, "loss": 8.7569, "step": 1571500 }, { "epoch": 12.58, "learning_rate": 4.3712000000000004e-05, "loss": 8.7659, "step": 1572000 }, { "epoch": 12.58, "learning_rate": 4.371e-05, "loss": 8.747, "step": 1572500 }, { "epoch": 12.58, "learning_rate": 4.3708e-05, "loss": 8.7539, "step": 1573000 }, { "epoch": 12.59, "learning_rate": 4.3706000000000005e-05, "loss": 8.7363, "step": 1573500 }, { "epoch": 12.59, "learning_rate": 4.3704e-05, "loss": 8.775, "step": 1574000 }, { "epoch": 12.6, "learning_rate": 4.3702e-05, "loss": 8.7433, "step": 1574500 }, { "epoch": 12.6, "learning_rate": 4.3700000000000005e-05, "loss": 8.7354, "step": 1575000 }, { "epoch": 12.6, "learning_rate": 4.3698e-05, "loss": 8.7518, "step": 1575500 }, { "epoch": 12.61, "learning_rate": 4.3696e-05, "loss": 8.745, "step": 1576000 }, { "epoch": 12.61, "learning_rate": 4.3694e-05, "loss": 8.7519, "step": 1576500 }, { "epoch": 12.62, "learning_rate": 4.3692e-05, "loss": 8.7414, "step": 1577000 }, { "epoch": 12.62, "learning_rate": 4.3690000000000004e-05, "loss": 8.757, "step": 1577500 }, { "epoch": 12.62, "learning_rate": 4.3688e-05, "loss": 8.7596, "step": 1578000 }, { "epoch": 12.63, "learning_rate": 4.3686e-05, "loss": 8.7414, "step": 1578500 }, { "epoch": 12.63, "learning_rate": 4.3684000000000004e-05, "loss": 8.7527, "step": 1579000 }, { "epoch": 12.64, "learning_rate": 4.3682e-05, "loss": 8.7562, "step": 1579500 }, { "epoch": 12.64, "learning_rate": 4.368e-05, "loss": 8.7458, "step": 1580000 }, { "epoch": 12.64, "learning_rate": 4.3678000000000004e-05, "loss": 8.7608, "step": 1580500 }, { "epoch": 12.65, "learning_rate": 4.3676e-05, "loss": 8.7401, "step": 1581000 }, { "epoch": 12.65, "learning_rate": 4.3674e-05, "loss": 8.7478, "step": 1581500 }, { "epoch": 12.66, "learning_rate": 4.3672000000000005e-05, "loss": 8.746, "step": 1582000 }, { "epoch": 12.66, "learning_rate": 4.367e-05, "loss": 8.7363, "step": 1582500 }, { "epoch": 12.66, "learning_rate": 4.3668e-05, "loss": 8.7604, "step": 1583000 }, { "epoch": 12.67, "learning_rate": 4.3666e-05, "loss": 8.7571, "step": 1583500 }, { "epoch": 12.67, "learning_rate": 4.3664e-05, "loss": 8.7312, "step": 1584000 }, { "epoch": 12.68, "learning_rate": 4.3662e-05, "loss": 8.7495, "step": 1584500 }, { "epoch": 12.68, "learning_rate": 4.366e-05, "loss": 8.7594, "step": 1585000 }, { "epoch": 12.68, "learning_rate": 4.3658e-05, "loss": 8.7324, "step": 1585500 }, { "epoch": 12.69, "learning_rate": 4.3656000000000004e-05, "loss": 8.7521, "step": 1586000 }, { "epoch": 12.69, "learning_rate": 4.3654e-05, "loss": 8.7497, "step": 1586500 }, { "epoch": 12.7, "learning_rate": 4.3652e-05, "loss": 8.74, "step": 1587000 }, { "epoch": 12.7, "learning_rate": 4.3650000000000004e-05, "loss": 8.7529, "step": 1587500 }, { "epoch": 12.7, "learning_rate": 4.3648e-05, "loss": 8.7492, "step": 1588000 }, { "epoch": 12.71, "learning_rate": 4.3646e-05, "loss": 8.7467, "step": 1588500 }, { "epoch": 12.71, "learning_rate": 4.3644000000000004e-05, "loss": 8.7646, "step": 1589000 }, { "epoch": 12.72, "learning_rate": 4.3642e-05, "loss": 8.7673, "step": 1589500 }, { "epoch": 12.72, "learning_rate": 4.364e-05, "loss": 8.7382, "step": 1590000 }, { "epoch": 12.72, "learning_rate": 4.3638e-05, "loss": 8.7568, "step": 1590500 }, { "epoch": 12.73, "learning_rate": 4.363600000000001e-05, "loss": 8.7544, "step": 1591000 }, { "epoch": 12.73, "learning_rate": 4.3634e-05, "loss": 8.7266, "step": 1591500 }, { "epoch": 12.74, "learning_rate": 4.3632e-05, "loss": 8.7493, "step": 1592000 }, { "epoch": 12.74, "learning_rate": 4.363000000000001e-05, "loss": 8.7719, "step": 1592500 }, { "epoch": 12.74, "learning_rate": 4.3628e-05, "loss": 8.7408, "step": 1593000 }, { "epoch": 12.75, "learning_rate": 4.3626e-05, "loss": 8.7542, "step": 1593500 }, { "epoch": 12.75, "learning_rate": 4.3624e-05, "loss": 8.7657, "step": 1594000 }, { "epoch": 12.76, "learning_rate": 4.3622000000000003e-05, "loss": 8.7509, "step": 1594500 }, { "epoch": 12.76, "learning_rate": 4.362e-05, "loss": 8.7383, "step": 1595000 }, { "epoch": 12.76, "learning_rate": 4.3618e-05, "loss": 8.7467, "step": 1595500 }, { "epoch": 12.77, "learning_rate": 4.3616000000000004e-05, "loss": 8.7405, "step": 1596000 }, { "epoch": 12.77, "learning_rate": 4.3614000000000006e-05, "loss": 8.7469, "step": 1596500 }, { "epoch": 12.78, "learning_rate": 4.3612e-05, "loss": 8.7497, "step": 1597000 }, { "epoch": 12.78, "learning_rate": 4.361e-05, "loss": 8.742, "step": 1597500 }, { "epoch": 12.78, "learning_rate": 4.360800000000001e-05, "loss": 8.7363, "step": 1598000 }, { "epoch": 12.79, "learning_rate": 4.3606e-05, "loss": 8.7687, "step": 1598500 }, { "epoch": 12.79, "learning_rate": 4.3604e-05, "loss": 8.7612, "step": 1599000 }, { "epoch": 12.8, "learning_rate": 4.360200000000001e-05, "loss": 8.7464, "step": 1599500 }, { "epoch": 12.8, "learning_rate": 4.36e-05, "loss": 8.7537, "step": 1600000 }, { "epoch": 12.8, "learning_rate": 4.3598e-05, "loss": 8.751, "step": 1600500 }, { "epoch": 12.81, "learning_rate": 4.3596e-05, "loss": 8.7465, "step": 1601000 }, { "epoch": 12.81, "learning_rate": 4.3594e-05, "loss": 8.7463, "step": 1601500 }, { "epoch": 12.82, "learning_rate": 4.3592e-05, "loss": 8.7493, "step": 1602000 }, { "epoch": 12.82, "learning_rate": 4.359e-05, "loss": 8.746, "step": 1602500 }, { "epoch": 12.82, "learning_rate": 4.3588000000000003e-05, "loss": 8.7793, "step": 1603000 }, { "epoch": 12.83, "learning_rate": 4.3586000000000006e-05, "loss": 8.7553, "step": 1603500 }, { "epoch": 12.83, "learning_rate": 4.3584e-05, "loss": 8.7534, "step": 1604000 }, { "epoch": 12.84, "learning_rate": 4.3582000000000004e-05, "loss": 8.7492, "step": 1604500 }, { "epoch": 12.84, "learning_rate": 4.3580000000000006e-05, "loss": 8.7716, "step": 1605000 }, { "epoch": 12.84, "learning_rate": 4.3578e-05, "loss": 8.7547, "step": 1605500 }, { "epoch": 12.85, "learning_rate": 4.3576e-05, "loss": 8.7435, "step": 1606000 }, { "epoch": 12.85, "learning_rate": 4.357400000000001e-05, "loss": 8.7413, "step": 1606500 }, { "epoch": 12.86, "learning_rate": 4.3572e-05, "loss": 8.737, "step": 1607000 }, { "epoch": 12.86, "learning_rate": 4.357e-05, "loss": 8.7563, "step": 1607500 }, { "epoch": 12.86, "learning_rate": 4.3568e-05, "loss": 8.7415, "step": 1608000 }, { "epoch": 12.87, "learning_rate": 4.3566e-05, "loss": 8.7294, "step": 1608500 }, { "epoch": 12.87, "learning_rate": 4.3564000000000005e-05, "loss": 8.7233, "step": 1609000 }, { "epoch": 12.88, "learning_rate": 4.3562e-05, "loss": 8.7542, "step": 1609500 }, { "epoch": 12.88, "learning_rate": 4.356e-05, "loss": 8.7596, "step": 1610000 }, { "epoch": 12.88, "learning_rate": 4.3558000000000005e-05, "loss": 8.7542, "step": 1610500 }, { "epoch": 12.89, "learning_rate": 4.3556e-05, "loss": 8.7634, "step": 1611000 }, { "epoch": 12.89, "learning_rate": 4.3554000000000003e-05, "loss": 8.7519, "step": 1611500 }, { "epoch": 12.9, "learning_rate": 4.3552000000000006e-05, "loss": 8.7538, "step": 1612000 }, { "epoch": 12.9, "learning_rate": 4.355e-05, "loss": 8.7377, "step": 1612500 }, { "epoch": 12.9, "learning_rate": 4.3548e-05, "loss": 8.7727, "step": 1613000 }, { "epoch": 12.91, "learning_rate": 4.3546000000000006e-05, "loss": 8.7723, "step": 1613500 }, { "epoch": 12.91, "learning_rate": 4.3544e-05, "loss": 8.7416, "step": 1614000 }, { "epoch": 12.92, "learning_rate": 4.3542e-05, "loss": 8.7599, "step": 1614500 }, { "epoch": 12.92, "learning_rate": 4.354e-05, "loss": 8.7347, "step": 1615000 }, { "epoch": 12.92, "learning_rate": 4.3538e-05, "loss": 8.7399, "step": 1615500 }, { "epoch": 12.93, "learning_rate": 4.3536000000000005e-05, "loss": 8.756, "step": 1616000 }, { "epoch": 12.93, "learning_rate": 4.3534e-05, "loss": 8.7533, "step": 1616500 }, { "epoch": 12.94, "learning_rate": 4.3532e-05, "loss": 8.7607, "step": 1617000 }, { "epoch": 12.94, "learning_rate": 4.3530000000000005e-05, "loss": 8.7373, "step": 1617500 }, { "epoch": 12.94, "learning_rate": 4.3528e-05, "loss": 8.7729, "step": 1618000 }, { "epoch": 12.95, "learning_rate": 4.3526e-05, "loss": 8.755, "step": 1618500 }, { "epoch": 12.95, "learning_rate": 4.3524000000000005e-05, "loss": 8.7806, "step": 1619000 }, { "epoch": 12.96, "learning_rate": 4.3522e-05, "loss": 8.7542, "step": 1619500 }, { "epoch": 12.96, "learning_rate": 4.352e-05, "loss": 8.7426, "step": 1620000 }, { "epoch": 12.96, "learning_rate": 4.3518000000000006e-05, "loss": 8.7673, "step": 1620500 }, { "epoch": 12.97, "learning_rate": 4.3516e-05, "loss": 8.7386, "step": 1621000 }, { "epoch": 12.97, "learning_rate": 4.3514000000000004e-05, "loss": 8.7731, "step": 1621500 }, { "epoch": 12.98, "learning_rate": 4.3512e-05, "loss": 8.7572, "step": 1622000 }, { "epoch": 12.98, "learning_rate": 4.351e-05, "loss": 8.7541, "step": 1622500 }, { "epoch": 12.98, "learning_rate": 4.3508000000000004e-05, "loss": 8.7225, "step": 1623000 }, { "epoch": 12.99, "learning_rate": 4.3506e-05, "loss": 8.7344, "step": 1623500 }, { "epoch": 12.99, "learning_rate": 4.3504e-05, "loss": 8.7563, "step": 1624000 }, { "epoch": 13.0, "learning_rate": 4.3502000000000005e-05, "loss": 8.7478, "step": 1624500 }, { "epoch": 13.0, "learning_rate": 4.35e-05, "loss": 8.7496, "step": 1625000 }, { "epoch": 13.0, "learning_rate": 4.3498e-05, "loss": 8.7351, "step": 1625500 }, { "epoch": 13.01, "learning_rate": 4.3496000000000005e-05, "loss": 8.7438, "step": 1626000 }, { "epoch": 13.01, "learning_rate": 4.3494e-05, "loss": 8.7563, "step": 1626500 }, { "epoch": 13.02, "learning_rate": 4.3491999999999996e-05, "loss": 8.7474, "step": 1627000 }, { "epoch": 13.02, "learning_rate": 4.3490000000000005e-05, "loss": 8.7578, "step": 1627500 }, { "epoch": 13.02, "learning_rate": 4.3488e-05, "loss": 8.7564, "step": 1628000 }, { "epoch": 13.03, "learning_rate": 4.3486000000000003e-05, "loss": 8.7243, "step": 1628500 }, { "epoch": 13.03, "learning_rate": 4.3484000000000006e-05, "loss": 8.7614, "step": 1629000 }, { "epoch": 13.04, "learning_rate": 4.3482e-05, "loss": 8.7562, "step": 1629500 }, { "epoch": 13.04, "learning_rate": 4.3480000000000004e-05, "loss": 8.7481, "step": 1630000 }, { "epoch": 13.04, "learning_rate": 4.3478e-05, "loss": 8.7468, "step": 1630500 }, { "epoch": 13.05, "learning_rate": 4.3476e-05, "loss": 8.7579, "step": 1631000 }, { "epoch": 13.05, "learning_rate": 4.3474000000000004e-05, "loss": 8.7516, "step": 1631500 }, { "epoch": 13.06, "learning_rate": 4.3472e-05, "loss": 8.7367, "step": 1632000 }, { "epoch": 13.06, "learning_rate": 4.347e-05, "loss": 8.743, "step": 1632500 }, { "epoch": 13.06, "learning_rate": 4.3468000000000005e-05, "loss": 8.7284, "step": 1633000 }, { "epoch": 13.07, "learning_rate": 4.3466e-05, "loss": 8.7424, "step": 1633500 }, { "epoch": 13.07, "learning_rate": 4.3464e-05, "loss": 8.7613, "step": 1634000 }, { "epoch": 13.08, "learning_rate": 4.3462000000000005e-05, "loss": 8.7464, "step": 1634500 }, { "epoch": 13.08, "learning_rate": 4.346e-05, "loss": 8.7342, "step": 1635000 }, { "epoch": 13.08, "learning_rate": 4.3458e-05, "loss": 8.7531, "step": 1635500 }, { "epoch": 13.09, "learning_rate": 4.3456000000000005e-05, "loss": 8.7604, "step": 1636000 }, { "epoch": 13.09, "learning_rate": 4.3454e-05, "loss": 8.7586, "step": 1636500 }, { "epoch": 13.1, "learning_rate": 4.3452000000000003e-05, "loss": 8.753, "step": 1637000 }, { "epoch": 13.1, "learning_rate": 4.345e-05, "loss": 8.7723, "step": 1637500 }, { "epoch": 13.1, "learning_rate": 4.3448e-05, "loss": 8.7842, "step": 1638000 }, { "epoch": 13.11, "learning_rate": 4.3446000000000004e-05, "loss": 8.7481, "step": 1638500 }, { "epoch": 13.11, "learning_rate": 4.3444e-05, "loss": 8.7622, "step": 1639000 }, { "epoch": 13.12, "learning_rate": 4.3442e-05, "loss": 8.7361, "step": 1639500 }, { "epoch": 13.12, "learning_rate": 4.3440000000000004e-05, "loss": 8.7626, "step": 1640000 }, { "epoch": 13.12, "learning_rate": 4.3438e-05, "loss": 8.7587, "step": 1640500 }, { "epoch": 13.13, "learning_rate": 4.3436e-05, "loss": 8.758, "step": 1641000 }, { "epoch": 13.13, "learning_rate": 4.3434000000000005e-05, "loss": 8.7505, "step": 1641500 }, { "epoch": 13.14, "learning_rate": 4.3432e-05, "loss": 8.7561, "step": 1642000 }, { "epoch": 13.14, "learning_rate": 4.343e-05, "loss": 8.7633, "step": 1642500 }, { "epoch": 13.14, "learning_rate": 4.3428000000000005e-05, "loss": 8.7615, "step": 1643000 }, { "epoch": 13.15, "learning_rate": 4.3426e-05, "loss": 8.7403, "step": 1643500 }, { "epoch": 13.15, "learning_rate": 4.3424e-05, "loss": 8.7325, "step": 1644000 }, { "epoch": 13.16, "learning_rate": 4.3422e-05, "loss": 8.7468, "step": 1644500 }, { "epoch": 13.16, "learning_rate": 4.342e-05, "loss": 8.7512, "step": 1645000 }, { "epoch": 13.16, "learning_rate": 4.3418000000000003e-05, "loss": 8.7498, "step": 1645500 }, { "epoch": 13.17, "learning_rate": 4.3416e-05, "loss": 8.7646, "step": 1646000 }, { "epoch": 13.17, "learning_rate": 4.3414e-05, "loss": 8.7605, "step": 1646500 }, { "epoch": 13.18, "learning_rate": 4.3412000000000004e-05, "loss": 8.734, "step": 1647000 }, { "epoch": 13.18, "learning_rate": 4.341e-05, "loss": 8.7627, "step": 1647500 }, { "epoch": 13.18, "learning_rate": 4.3408e-05, "loss": 8.759, "step": 1648000 }, { "epoch": 13.19, "learning_rate": 4.3406000000000004e-05, "loss": 8.7504, "step": 1648500 }, { "epoch": 13.19, "learning_rate": 4.3404e-05, "loss": 8.7486, "step": 1649000 }, { "epoch": 13.2, "learning_rate": 4.3402e-05, "loss": 8.7339, "step": 1649500 }, { "epoch": 13.2, "learning_rate": 4.3400000000000005e-05, "loss": 8.7737, "step": 1650000 }, { "epoch": 13.2, "learning_rate": 4.3398e-05, "loss": 8.7673, "step": 1650500 }, { "epoch": 13.21, "learning_rate": 4.3396e-05, "loss": 8.7609, "step": 1651000 }, { "epoch": 13.21, "learning_rate": 4.3394e-05, "loss": 8.7603, "step": 1651500 }, { "epoch": 13.22, "learning_rate": 4.3392e-05, "loss": 8.7494, "step": 1652000 }, { "epoch": 13.22, "learning_rate": 4.339e-05, "loss": 8.7367, "step": 1652500 }, { "epoch": 13.22, "learning_rate": 4.3388e-05, "loss": 8.7559, "step": 1653000 }, { "epoch": 13.23, "learning_rate": 4.338600000000001e-05, "loss": 8.7413, "step": 1653500 }, { "epoch": 13.23, "learning_rate": 4.3384000000000003e-05, "loss": 8.7683, "step": 1654000 }, { "epoch": 13.24, "learning_rate": 4.3382e-05, "loss": 8.7357, "step": 1654500 }, { "epoch": 13.24, "learning_rate": 4.338e-05, "loss": 8.7587, "step": 1655000 }, { "epoch": 13.24, "learning_rate": 4.3378000000000004e-05, "loss": 8.7576, "step": 1655500 }, { "epoch": 13.25, "learning_rate": 4.3376e-05, "loss": 8.7322, "step": 1656000 }, { "epoch": 13.25, "learning_rate": 4.3374e-05, "loss": 8.7577, "step": 1656500 }, { "epoch": 13.26, "learning_rate": 4.3372000000000004e-05, "loss": 8.7452, "step": 1657000 }, { "epoch": 13.26, "learning_rate": 4.337e-05, "loss": 8.7442, "step": 1657500 }, { "epoch": 13.26, "learning_rate": 4.3368e-05, "loss": 8.7528, "step": 1658000 }, { "epoch": 13.27, "learning_rate": 4.3366e-05, "loss": 8.7549, "step": 1658500 }, { "epoch": 13.27, "learning_rate": 4.336400000000001e-05, "loss": 8.7559, "step": 1659000 }, { "epoch": 13.28, "learning_rate": 4.3362e-05, "loss": 8.7701, "step": 1659500 }, { "epoch": 13.28, "learning_rate": 4.336e-05, "loss": 8.7816, "step": 1660000 }, { "epoch": 13.28, "learning_rate": 4.335800000000001e-05, "loss": 8.7502, "step": 1660500 }, { "epoch": 13.29, "learning_rate": 4.3356e-05, "loss": 8.7789, "step": 1661000 }, { "epoch": 13.29, "learning_rate": 4.3354e-05, "loss": 8.749, "step": 1661500 }, { "epoch": 13.3, "learning_rate": 4.3352e-05, "loss": 8.7242, "step": 1662000 }, { "epoch": 13.3, "learning_rate": 4.335e-05, "loss": 8.7619, "step": 1662500 }, { "epoch": 13.3, "learning_rate": 4.3348e-05, "loss": 8.7598, "step": 1663000 }, { "epoch": 13.31, "learning_rate": 4.3346e-05, "loss": 8.7308, "step": 1663500 }, { "epoch": 13.31, "learning_rate": 4.3344000000000004e-05, "loss": 8.7416, "step": 1664000 }, { "epoch": 13.32, "learning_rate": 4.3342000000000006e-05, "loss": 8.7714, "step": 1664500 }, { "epoch": 13.32, "learning_rate": 4.334e-05, "loss": 8.7388, "step": 1665000 }, { "epoch": 13.32, "learning_rate": 4.3338000000000004e-05, "loss": 8.7392, "step": 1665500 }, { "epoch": 13.33, "learning_rate": 4.3336000000000007e-05, "loss": 8.7396, "step": 1666000 }, { "epoch": 13.33, "learning_rate": 4.3334e-05, "loss": 8.7611, "step": 1666500 }, { "epoch": 13.34, "learning_rate": 4.3332e-05, "loss": 8.7696, "step": 1667000 }, { "epoch": 13.34, "learning_rate": 4.333000000000001e-05, "loss": 8.7571, "step": 1667500 }, { "epoch": 13.34, "learning_rate": 4.3328e-05, "loss": 8.764, "step": 1668000 }, { "epoch": 13.35, "learning_rate": 4.3326e-05, "loss": 8.7514, "step": 1668500 }, { "epoch": 13.35, "learning_rate": 4.3324e-05, "loss": 8.744, "step": 1669000 }, { "epoch": 13.36, "learning_rate": 4.3322e-05, "loss": 8.7436, "step": 1669500 }, { "epoch": 13.36, "learning_rate": 4.332e-05, "loss": 8.7441, "step": 1670000 }, { "epoch": 13.36, "learning_rate": 4.3318e-05, "loss": 8.7378, "step": 1670500 }, { "epoch": 13.37, "learning_rate": 4.3316e-05, "loss": 8.7407, "step": 1671000 }, { "epoch": 13.37, "learning_rate": 4.3314000000000006e-05, "loss": 8.7529, "step": 1671500 }, { "epoch": 13.38, "learning_rate": 4.3312e-05, "loss": 8.7275, "step": 1672000 }, { "epoch": 13.38, "learning_rate": 4.3310000000000004e-05, "loss": 8.7435, "step": 1672500 }, { "epoch": 13.38, "learning_rate": 4.3308000000000006e-05, "loss": 8.7619, "step": 1673000 }, { "epoch": 13.39, "learning_rate": 4.3306e-05, "loss": 8.7437, "step": 1673500 }, { "epoch": 13.39, "learning_rate": 4.3304e-05, "loss": 8.7532, "step": 1674000 }, { "epoch": 13.4, "learning_rate": 4.3302000000000007e-05, "loss": 8.7492, "step": 1674500 }, { "epoch": 13.4, "learning_rate": 4.33e-05, "loss": 8.7318, "step": 1675000 }, { "epoch": 13.4, "learning_rate": 4.3298e-05, "loss": 8.7677, "step": 1675500 }, { "epoch": 13.41, "learning_rate": 4.3296e-05, "loss": 8.7693, "step": 1676000 }, { "epoch": 13.41, "learning_rate": 4.3294e-05, "loss": 8.7574, "step": 1676500 }, { "epoch": 13.42, "learning_rate": 4.3292000000000005e-05, "loss": 8.7392, "step": 1677000 }, { "epoch": 13.42, "learning_rate": 4.329e-05, "loss": 8.7526, "step": 1677500 }, { "epoch": 13.42, "learning_rate": 4.3288e-05, "loss": 8.7404, "step": 1678000 }, { "epoch": 13.43, "learning_rate": 4.3286000000000005e-05, "loss": 8.7532, "step": 1678500 }, { "epoch": 13.43, "learning_rate": 4.3284e-05, "loss": 8.7773, "step": 1679000 }, { "epoch": 13.44, "learning_rate": 4.3282e-05, "loss": 8.7621, "step": 1679500 }, { "epoch": 13.44, "learning_rate": 4.3280000000000006e-05, "loss": 8.7625, "step": 1680000 }, { "epoch": 13.44, "learning_rate": 4.3278e-05, "loss": 8.7402, "step": 1680500 }, { "epoch": 13.45, "learning_rate": 4.3276e-05, "loss": 8.7611, "step": 1681000 }, { "epoch": 13.45, "learning_rate": 4.3274000000000006e-05, "loss": 8.7465, "step": 1681500 }, { "epoch": 13.46, "learning_rate": 4.3272e-05, "loss": 8.7314, "step": 1682000 }, { "epoch": 13.46, "learning_rate": 4.327e-05, "loss": 8.7356, "step": 1682500 }, { "epoch": 13.46, "learning_rate": 4.3268e-05, "loss": 8.7637, "step": 1683000 }, { "epoch": 13.47, "learning_rate": 4.3266e-05, "loss": 8.7528, "step": 1683500 }, { "epoch": 13.47, "learning_rate": 4.3264000000000005e-05, "loss": 8.7494, "step": 1684000 }, { "epoch": 13.48, "learning_rate": 4.3262e-05, "loss": 8.7453, "step": 1684500 }, { "epoch": 13.48, "learning_rate": 4.326e-05, "loss": 8.7448, "step": 1685000 }, { "epoch": 13.48, "learning_rate": 4.3258000000000005e-05, "loss": 8.7323, "step": 1685500 }, { "epoch": 13.49, "learning_rate": 4.3256e-05, "loss": 8.7664, "step": 1686000 }, { "epoch": 13.49, "learning_rate": 4.3254e-05, "loss": 8.7491, "step": 1686500 }, { "epoch": 13.5, "learning_rate": 4.3252000000000005e-05, "loss": 8.7582, "step": 1687000 }, { "epoch": 13.5, "learning_rate": 4.325e-05, "loss": 8.7366, "step": 1687500 }, { "epoch": 13.5, "learning_rate": 4.3247999999999997e-05, "loss": 8.7333, "step": 1688000 }, { "epoch": 13.51, "learning_rate": 4.3246000000000006e-05, "loss": 8.7674, "step": 1688500 }, { "epoch": 13.51, "learning_rate": 4.3244e-05, "loss": 8.747, "step": 1689000 }, { "epoch": 13.52, "learning_rate": 4.3242000000000004e-05, "loss": 8.7511, "step": 1689500 }, { "epoch": 13.52, "learning_rate": 4.324e-05, "loss": 8.7707, "step": 1690000 }, { "epoch": 13.52, "learning_rate": 4.3238e-05, "loss": 8.7629, "step": 1690500 }, { "epoch": 13.53, "learning_rate": 4.3236000000000004e-05, "loss": 8.7546, "step": 1691000 }, { "epoch": 13.53, "learning_rate": 4.3234e-05, "loss": 8.746, "step": 1691500 }, { "epoch": 13.54, "learning_rate": 4.3232e-05, "loss": 8.7469, "step": 1692000 }, { "epoch": 13.54, "learning_rate": 4.3230000000000005e-05, "loss": 8.7646, "step": 1692500 }, { "epoch": 13.54, "learning_rate": 4.3228e-05, "loss": 8.7464, "step": 1693000 }, { "epoch": 13.55, "learning_rate": 4.3226e-05, "loss": 8.7621, "step": 1693500 }, { "epoch": 13.55, "learning_rate": 4.3224000000000005e-05, "loss": 8.7483, "step": 1694000 }, { "epoch": 13.56, "learning_rate": 4.3222e-05, "loss": 8.7634, "step": 1694500 }, { "epoch": 13.56, "learning_rate": 4.3219999999999996e-05, "loss": 8.7541, "step": 1695000 }, { "epoch": 13.56, "learning_rate": 4.3218000000000005e-05, "loss": 8.7605, "step": 1695500 }, { "epoch": 13.57, "learning_rate": 4.3216e-05, "loss": 8.744, "step": 1696000 }, { "epoch": 13.57, "learning_rate": 4.3214e-05, "loss": 8.768, "step": 1696500 }, { "epoch": 13.58, "learning_rate": 4.3212000000000006e-05, "loss": 8.7667, "step": 1697000 }, { "epoch": 13.58, "learning_rate": 4.321e-05, "loss": 8.7595, "step": 1697500 }, { "epoch": 13.58, "learning_rate": 4.3208000000000004e-05, "loss": 8.7606, "step": 1698000 }, { "epoch": 13.59, "learning_rate": 4.3206e-05, "loss": 8.7506, "step": 1698500 }, { "epoch": 13.59, "learning_rate": 4.3204e-05, "loss": 8.7311, "step": 1699000 }, { "epoch": 13.6, "learning_rate": 4.3202000000000004e-05, "loss": 8.7267, "step": 1699500 }, { "epoch": 13.6, "learning_rate": 4.32e-05, "loss": 8.7574, "step": 1700000 }, { "epoch": 13.6, "learning_rate": 4.3198e-05, "loss": 8.7703, "step": 1700500 }, { "epoch": 13.61, "learning_rate": 4.3196000000000005e-05, "loss": 8.7437, "step": 1701000 }, { "epoch": 13.61, "learning_rate": 4.3194e-05, "loss": 8.7453, "step": 1701500 }, { "epoch": 13.62, "learning_rate": 4.3192e-05, "loss": 8.7623, "step": 1702000 }, { "epoch": 13.62, "learning_rate": 4.3190000000000005e-05, "loss": 8.7557, "step": 1702500 }, { "epoch": 13.62, "learning_rate": 4.3188e-05, "loss": 8.7529, "step": 1703000 }, { "epoch": 13.63, "learning_rate": 4.3186e-05, "loss": 8.7446, "step": 1703500 }, { "epoch": 13.63, "learning_rate": 4.3184000000000005e-05, "loss": 8.7496, "step": 1704000 }, { "epoch": 13.64, "learning_rate": 4.3182e-05, "loss": 8.7464, "step": 1704500 }, { "epoch": 13.64, "learning_rate": 4.318e-05, "loss": 8.7479, "step": 1705000 }, { "epoch": 13.64, "learning_rate": 4.3178e-05, "loss": 8.7544, "step": 1705500 }, { "epoch": 13.65, "learning_rate": 4.3176e-05, "loss": 8.7698, "step": 1706000 }, { "epoch": 13.65, "learning_rate": 4.3174000000000004e-05, "loss": 8.729, "step": 1706500 }, { "epoch": 13.66, "learning_rate": 4.3172e-05, "loss": 8.7513, "step": 1707000 }, { "epoch": 13.66, "learning_rate": 4.317e-05, "loss": 8.7497, "step": 1707500 }, { "epoch": 13.66, "learning_rate": 4.3168000000000004e-05, "loss": 8.7547, "step": 1708000 }, { "epoch": 13.67, "learning_rate": 4.3166e-05, "loss": 8.7654, "step": 1708500 }, { "epoch": 13.67, "learning_rate": 4.3164e-05, "loss": 8.737, "step": 1709000 }, { "epoch": 13.68, "learning_rate": 4.3162000000000005e-05, "loss": 8.7537, "step": 1709500 }, { "epoch": 13.68, "learning_rate": 4.316e-05, "loss": 8.7584, "step": 1710000 }, { "epoch": 13.68, "learning_rate": 4.3158e-05, "loss": 8.7816, "step": 1710500 }, { "epoch": 13.69, "learning_rate": 4.3156000000000005e-05, "loss": 8.7479, "step": 1711000 }, { "epoch": 13.69, "learning_rate": 4.3154e-05, "loss": 8.7474, "step": 1711500 }, { "epoch": 13.7, "learning_rate": 4.3152e-05, "loss": 8.7627, "step": 1712000 }, { "epoch": 13.7, "learning_rate": 4.315e-05, "loss": 8.7625, "step": 1712500 }, { "epoch": 13.7, "learning_rate": 4.3148e-05, "loss": 8.7774, "step": 1713000 }, { "epoch": 13.71, "learning_rate": 4.3146e-05, "loss": 8.7568, "step": 1713500 }, { "epoch": 13.71, "learning_rate": 4.3144e-05, "loss": 8.7394, "step": 1714000 }, { "epoch": 13.72, "learning_rate": 4.3142e-05, "loss": 8.735, "step": 1714500 }, { "epoch": 13.72, "learning_rate": 4.3140000000000004e-05, "loss": 8.7588, "step": 1715000 }, { "epoch": 13.72, "learning_rate": 4.3138e-05, "loss": 8.7473, "step": 1715500 }, { "epoch": 13.73, "learning_rate": 4.3136e-05, "loss": 8.7473, "step": 1716000 }, { "epoch": 13.73, "learning_rate": 4.3134000000000004e-05, "loss": 8.7541, "step": 1716500 }, { "epoch": 13.74, "learning_rate": 4.3132e-05, "loss": 8.7372, "step": 1717000 }, { "epoch": 13.74, "learning_rate": 4.313e-05, "loss": 8.7502, "step": 1717500 }, { "epoch": 13.74, "learning_rate": 4.3128000000000004e-05, "loss": 8.7407, "step": 1718000 }, { "epoch": 13.75, "learning_rate": 4.3126e-05, "loss": 8.7736, "step": 1718500 }, { "epoch": 13.75, "learning_rate": 4.3124e-05, "loss": 8.7519, "step": 1719000 }, { "epoch": 13.76, "learning_rate": 4.3122e-05, "loss": 8.7236, "step": 1719500 }, { "epoch": 13.76, "learning_rate": 4.312000000000001e-05, "loss": 8.7318, "step": 1720000 }, { "epoch": 13.76, "learning_rate": 4.3118e-05, "loss": 8.7522, "step": 1720500 }, { "epoch": 13.77, "learning_rate": 4.3116e-05, "loss": 8.753, "step": 1721000 }, { "epoch": 13.77, "learning_rate": 4.311400000000001e-05, "loss": 8.7342, "step": 1721500 }, { "epoch": 13.78, "learning_rate": 4.3112e-05, "loss": 8.7488, "step": 1722000 }, { "epoch": 13.78, "learning_rate": 4.311e-05, "loss": 8.7589, "step": 1722500 }, { "epoch": 13.78, "learning_rate": 4.3108e-05, "loss": 8.7408, "step": 1723000 }, { "epoch": 13.79, "learning_rate": 4.3106000000000004e-05, "loss": 8.7533, "step": 1723500 }, { "epoch": 13.79, "learning_rate": 4.3104e-05, "loss": 8.7644, "step": 1724000 }, { "epoch": 13.8, "learning_rate": 4.3102e-05, "loss": 8.7607, "step": 1724500 }, { "epoch": 13.8, "learning_rate": 4.3100000000000004e-05, "loss": 8.7506, "step": 1725000 }, { "epoch": 13.8, "learning_rate": 4.3098e-05, "loss": 8.7511, "step": 1725500 }, { "epoch": 13.81, "learning_rate": 4.3096e-05, "loss": 8.7652, "step": 1726000 }, { "epoch": 13.81, "learning_rate": 4.3094e-05, "loss": 8.7562, "step": 1726500 }, { "epoch": 13.82, "learning_rate": 4.309200000000001e-05, "loss": 8.7577, "step": 1727000 }, { "epoch": 13.82, "learning_rate": 4.309e-05, "loss": 8.7563, "step": 1727500 }, { "epoch": 13.82, "learning_rate": 4.3088e-05, "loss": 8.7447, "step": 1728000 }, { "epoch": 13.83, "learning_rate": 4.308600000000001e-05, "loss": 8.7537, "step": 1728500 }, { "epoch": 13.83, "learning_rate": 4.3084e-05, "loss": 8.7511, "step": 1729000 }, { "epoch": 13.84, "learning_rate": 4.3082e-05, "loss": 8.7556, "step": 1729500 }, { "epoch": 13.84, "learning_rate": 4.308e-05, "loss": 8.7831, "step": 1730000 }, { "epoch": 13.84, "learning_rate": 4.3078e-05, "loss": 8.7414, "step": 1730500 }, { "epoch": 13.85, "learning_rate": 4.3076e-05, "loss": 8.7472, "step": 1731000 }, { "epoch": 13.85, "learning_rate": 4.3074e-05, "loss": 8.7548, "step": 1731500 }, { "epoch": 13.86, "learning_rate": 4.3072000000000004e-05, "loss": 8.7525, "step": 1732000 }, { "epoch": 13.86, "learning_rate": 4.3070000000000006e-05, "loss": 8.7539, "step": 1732500 }, { "epoch": 13.86, "learning_rate": 4.3068e-05, "loss": 8.7575, "step": 1733000 }, { "epoch": 13.87, "learning_rate": 4.3066000000000004e-05, "loss": 8.7749, "step": 1733500 }, { "epoch": 13.87, "learning_rate": 4.3064000000000006e-05, "loss": 8.7497, "step": 1734000 }, { "epoch": 13.88, "learning_rate": 4.3062e-05, "loss": 8.7703, "step": 1734500 }, { "epoch": 13.88, "learning_rate": 4.306e-05, "loss": 8.74, "step": 1735000 }, { "epoch": 13.88, "learning_rate": 4.305800000000001e-05, "loss": 8.7306, "step": 1735500 }, { "epoch": 13.89, "learning_rate": 4.3056e-05, "loss": 8.7482, "step": 1736000 }, { "epoch": 13.89, "learning_rate": 4.3054e-05, "loss": 8.7421, "step": 1736500 }, { "epoch": 13.9, "learning_rate": 4.3052e-05, "loss": 8.7355, "step": 1737000 }, { "epoch": 13.9, "learning_rate": 4.305e-05, "loss": 8.7487, "step": 1737500 }, { "epoch": 13.9, "learning_rate": 4.3048e-05, "loss": 8.7735, "step": 1738000 }, { "epoch": 13.91, "learning_rate": 4.3046e-05, "loss": 8.7322, "step": 1738500 }, { "epoch": 13.91, "learning_rate": 4.3044e-05, "loss": 8.7587, "step": 1739000 }, { "epoch": 13.92, "learning_rate": 4.3042000000000006e-05, "loss": 8.7581, "step": 1739500 }, { "epoch": 13.92, "learning_rate": 4.304e-05, "loss": 8.7494, "step": 1740000 }, { "epoch": 13.92, "learning_rate": 4.3038000000000004e-05, "loss": 8.7533, "step": 1740500 }, { "epoch": 13.93, "learning_rate": 4.3036000000000006e-05, "loss": 8.7407, "step": 1741000 }, { "epoch": 13.93, "learning_rate": 4.3034e-05, "loss": 8.7531, "step": 1741500 }, { "epoch": 13.94, "learning_rate": 4.3032e-05, "loss": 8.7466, "step": 1742000 }, { "epoch": 13.94, "learning_rate": 4.3030000000000006e-05, "loss": 8.7518, "step": 1742500 }, { "epoch": 13.94, "learning_rate": 4.3028e-05, "loss": 8.7417, "step": 1743000 }, { "epoch": 13.95, "learning_rate": 4.3026e-05, "loss": 8.7506, "step": 1743500 }, { "epoch": 13.95, "learning_rate": 4.3024e-05, "loss": 8.7466, "step": 1744000 }, { "epoch": 13.96, "learning_rate": 4.3022e-05, "loss": 8.7538, "step": 1744500 }, { "epoch": 13.96, "learning_rate": 4.3020000000000005e-05, "loss": 8.7481, "step": 1745000 }, { "epoch": 13.96, "learning_rate": 4.3018e-05, "loss": 8.7454, "step": 1745500 }, { "epoch": 13.97, "learning_rate": 4.3016e-05, "loss": 8.7798, "step": 1746000 }, { "epoch": 13.97, "learning_rate": 4.3014000000000005e-05, "loss": 8.7491, "step": 1746500 }, { "epoch": 13.98, "learning_rate": 4.3012e-05, "loss": 8.7487, "step": 1747000 }, { "epoch": 13.98, "learning_rate": 4.301e-05, "loss": 8.7397, "step": 1747500 }, { "epoch": 13.98, "learning_rate": 4.3008000000000006e-05, "loss": 8.7614, "step": 1748000 }, { "epoch": 13.99, "learning_rate": 4.3006e-05, "loss": 8.7391, "step": 1748500 }, { "epoch": 13.99, "learning_rate": 4.3004e-05, "loss": 8.7525, "step": 1749000 }, { "epoch": 14.0, "learning_rate": 4.3002000000000006e-05, "loss": 8.752, "step": 1749500 }, { "epoch": 14.0, "learning_rate": 4.3e-05, "loss": 8.7395, "step": 1750000 }, { "epoch": 14.0, "learning_rate": 4.2998e-05, "loss": 8.7469, "step": 1750500 }, { "epoch": 14.01, "learning_rate": 4.2996e-05, "loss": 8.7383, "step": 1751000 }, { "epoch": 14.01, "learning_rate": 4.2994e-05, "loss": 8.7597, "step": 1751500 }, { "epoch": 14.02, "learning_rate": 4.2992000000000004e-05, "loss": 8.7532, "step": 1752000 }, { "epoch": 14.02, "learning_rate": 4.299e-05, "loss": 8.7567, "step": 1752500 }, { "epoch": 14.02, "learning_rate": 4.2988e-05, "loss": 8.7422, "step": 1753000 }, { "epoch": 14.03, "learning_rate": 4.2986000000000005e-05, "loss": 8.74, "step": 1753500 }, { "epoch": 14.03, "learning_rate": 4.2984e-05, "loss": 8.7657, "step": 1754000 }, { "epoch": 14.04, "learning_rate": 4.2982e-05, "loss": 8.7148, "step": 1754500 }, { "epoch": 14.04, "learning_rate": 4.2980000000000005e-05, "loss": 8.7483, "step": 1755000 }, { "epoch": 14.04, "learning_rate": 4.2978e-05, "loss": 8.7387, "step": 1755500 }, { "epoch": 14.05, "learning_rate": 4.2975999999999996e-05, "loss": 8.765, "step": 1756000 }, { "epoch": 14.05, "learning_rate": 4.2974000000000006e-05, "loss": 8.7496, "step": 1756500 }, { "epoch": 14.06, "learning_rate": 4.2972e-05, "loss": 8.7494, "step": 1757000 }, { "epoch": 14.06, "learning_rate": 4.2970000000000004e-05, "loss": 8.7614, "step": 1757500 }, { "epoch": 14.06, "learning_rate": 4.2968000000000006e-05, "loss": 8.7293, "step": 1758000 }, { "epoch": 14.07, "learning_rate": 4.2966e-05, "loss": 8.7427, "step": 1758500 }, { "epoch": 14.07, "learning_rate": 4.2964000000000004e-05, "loss": 8.7417, "step": 1759000 }, { "epoch": 14.08, "learning_rate": 4.2962e-05, "loss": 8.7417, "step": 1759500 }, { "epoch": 14.08, "learning_rate": 4.296e-05, "loss": 8.7462, "step": 1760000 }, { "epoch": 14.08, "learning_rate": 4.2958000000000004e-05, "loss": 8.749, "step": 1760500 }, { "epoch": 14.09, "learning_rate": 4.2956e-05, "loss": 8.7611, "step": 1761000 }, { "epoch": 14.09, "learning_rate": 4.2954e-05, "loss": 8.7551, "step": 1761500 }, { "epoch": 14.1, "learning_rate": 4.2952000000000005e-05, "loss": 8.7573, "step": 1762000 }, { "epoch": 14.1, "learning_rate": 4.295e-05, "loss": 8.7428, "step": 1762500 }, { "epoch": 14.1, "learning_rate": 4.2947999999999996e-05, "loss": 8.7665, "step": 1763000 }, { "epoch": 14.11, "learning_rate": 4.2946000000000005e-05, "loss": 8.7431, "step": 1763500 }, { "epoch": 14.11, "learning_rate": 4.2944e-05, "loss": 8.7384, "step": 1764000 }, { "epoch": 14.12, "learning_rate": 4.2942e-05, "loss": 8.7506, "step": 1764500 }, { "epoch": 14.12, "learning_rate": 4.2940000000000006e-05, "loss": 8.758, "step": 1765000 }, { "epoch": 14.12, "learning_rate": 4.2938e-05, "loss": 8.7371, "step": 1765500 }, { "epoch": 14.13, "learning_rate": 4.2936000000000004e-05, "loss": 8.7534, "step": 1766000 }, { "epoch": 14.13, "learning_rate": 4.2934e-05, "loss": 8.7491, "step": 1766500 }, { "epoch": 14.14, "learning_rate": 4.2932e-05, "loss": 8.7399, "step": 1767000 }, { "epoch": 14.14, "learning_rate": 4.2930000000000004e-05, "loss": 8.7533, "step": 1767500 }, { "epoch": 14.14, "learning_rate": 4.2928e-05, "loss": 8.7593, "step": 1768000 }, { "epoch": 14.15, "learning_rate": 4.2926e-05, "loss": 8.7669, "step": 1768500 }, { "epoch": 14.15, "learning_rate": 4.2924000000000004e-05, "loss": 8.7457, "step": 1769000 }, { "epoch": 14.16, "learning_rate": 4.2922e-05, "loss": 8.7335, "step": 1769500 }, { "epoch": 14.16, "learning_rate": 4.292e-05, "loss": 8.7535, "step": 1770000 }, { "epoch": 14.16, "learning_rate": 4.2918000000000005e-05, "loss": 8.7585, "step": 1770500 }, { "epoch": 14.17, "learning_rate": 4.2916e-05, "loss": 8.7359, "step": 1771000 }, { "epoch": 14.17, "learning_rate": 4.2914e-05, "loss": 8.7544, "step": 1771500 }, { "epoch": 14.18, "learning_rate": 4.2912000000000005e-05, "loss": 8.7793, "step": 1772000 }, { "epoch": 14.18, "learning_rate": 4.291e-05, "loss": 8.7461, "step": 1772500 }, { "epoch": 14.18, "learning_rate": 4.2908e-05, "loss": 8.737, "step": 1773000 }, { "epoch": 14.19, "learning_rate": 4.2906e-05, "loss": 8.7599, "step": 1773500 }, { "epoch": 14.19, "learning_rate": 4.2904e-05, "loss": 8.7576, "step": 1774000 }, { "epoch": 14.2, "learning_rate": 4.2902000000000004e-05, "loss": 8.7594, "step": 1774500 }, { "epoch": 14.2, "learning_rate": 4.29e-05, "loss": 8.7413, "step": 1775000 }, { "epoch": 14.2, "learning_rate": 4.2898e-05, "loss": 8.717, "step": 1775500 }, { "epoch": 14.21, "learning_rate": 4.2896000000000004e-05, "loss": 8.7402, "step": 1776000 }, { "epoch": 14.21, "learning_rate": 4.2894e-05, "loss": 8.7538, "step": 1776500 }, { "epoch": 14.22, "learning_rate": 4.2892e-05, "loss": 8.7617, "step": 1777000 }, { "epoch": 14.22, "learning_rate": 4.2890000000000004e-05, "loss": 8.7404, "step": 1777500 }, { "epoch": 14.22, "learning_rate": 4.2888e-05, "loss": 8.7493, "step": 1778000 }, { "epoch": 14.23, "learning_rate": 4.2886e-05, "loss": 8.7689, "step": 1778500 }, { "epoch": 14.23, "learning_rate": 4.2884000000000005e-05, "loss": 8.7588, "step": 1779000 }, { "epoch": 14.24, "learning_rate": 4.2882e-05, "loss": 8.7289, "step": 1779500 }, { "epoch": 14.24, "learning_rate": 4.288e-05, "loss": 8.7472, "step": 1780000 }, { "epoch": 14.24, "learning_rate": 4.2878e-05, "loss": 8.7437, "step": 1780500 }, { "epoch": 14.25, "learning_rate": 4.2876e-05, "loss": 8.7483, "step": 1781000 }, { "epoch": 14.25, "learning_rate": 4.2874e-05, "loss": 8.7592, "step": 1781500 }, { "epoch": 14.26, "learning_rate": 4.2872e-05, "loss": 8.7391, "step": 1782000 }, { "epoch": 14.26, "learning_rate": 4.287000000000001e-05, "loss": 8.7308, "step": 1782500 }, { "epoch": 14.26, "learning_rate": 4.2868000000000004e-05, "loss": 8.7643, "step": 1783000 }, { "epoch": 14.27, "learning_rate": 4.2866e-05, "loss": 8.759, "step": 1783500 }, { "epoch": 14.27, "learning_rate": 4.2864e-05, "loss": 8.7578, "step": 1784000 }, { "epoch": 14.28, "learning_rate": 4.2862000000000004e-05, "loss": 8.7355, "step": 1784500 }, { "epoch": 14.28, "learning_rate": 4.286e-05, "loss": 8.7828, "step": 1785000 }, { "epoch": 14.28, "learning_rate": 4.2858e-05, "loss": 8.7482, "step": 1785500 }, { "epoch": 14.29, "learning_rate": 4.2856000000000004e-05, "loss": 8.7495, "step": 1786000 }, { "epoch": 14.29, "learning_rate": 4.2854e-05, "loss": 8.7461, "step": 1786500 }, { "epoch": 14.3, "learning_rate": 4.2852e-05, "loss": 8.742, "step": 1787000 }, { "epoch": 14.3, "learning_rate": 4.285e-05, "loss": 8.7587, "step": 1787500 }, { "epoch": 14.3, "learning_rate": 4.284800000000001e-05, "loss": 8.7532, "step": 1788000 }, { "epoch": 14.31, "learning_rate": 4.2846e-05, "loss": 8.7614, "step": 1788500 }, { "epoch": 14.31, "learning_rate": 4.2844e-05, "loss": 8.7487, "step": 1789000 }, { "epoch": 14.32, "learning_rate": 4.284200000000001e-05, "loss": 8.7363, "step": 1789500 }, { "epoch": 14.32, "learning_rate": 4.284e-05, "loss": 8.7571, "step": 1790000 }, { "epoch": 14.32, "learning_rate": 4.2838e-05, "loss": 8.7491, "step": 1790500 }, { "epoch": 14.33, "learning_rate": 4.2836e-05, "loss": 8.7508, "step": 1791000 }, { "epoch": 14.33, "learning_rate": 4.2834000000000004e-05, "loss": 8.762, "step": 1791500 }, { "epoch": 14.34, "learning_rate": 4.2832e-05, "loss": 8.7208, "step": 1792000 }, { "epoch": 14.34, "learning_rate": 4.283e-05, "loss": 8.721, "step": 1792500 }, { "epoch": 14.34, "learning_rate": 4.2828000000000004e-05, "loss": 8.7382, "step": 1793000 }, { "epoch": 14.35, "learning_rate": 4.2826e-05, "loss": 8.7763, "step": 1793500 }, { "epoch": 14.35, "learning_rate": 4.2824e-05, "loss": 8.7608, "step": 1794000 }, { "epoch": 14.36, "learning_rate": 4.2822000000000004e-05, "loss": 8.7406, "step": 1794500 }, { "epoch": 14.36, "learning_rate": 4.282000000000001e-05, "loss": 8.7591, "step": 1795000 }, { "epoch": 14.36, "learning_rate": 4.2818e-05, "loss": 8.7503, "step": 1795500 }, { "epoch": 14.37, "learning_rate": 4.2816e-05, "loss": 8.7525, "step": 1796000 }, { "epoch": 14.37, "learning_rate": 4.281400000000001e-05, "loss": 8.7414, "step": 1796500 }, { "epoch": 14.38, "learning_rate": 4.2812e-05, "loss": 8.7548, "step": 1797000 }, { "epoch": 14.38, "learning_rate": 4.281e-05, "loss": 8.7595, "step": 1797500 }, { "epoch": 14.38, "learning_rate": 4.2808e-05, "loss": 8.7507, "step": 1798000 }, { "epoch": 14.39, "learning_rate": 4.2806e-05, "loss": 8.7254, "step": 1798500 }, { "epoch": 14.39, "learning_rate": 4.2804e-05, "loss": 8.7233, "step": 1799000 }, { "epoch": 14.4, "learning_rate": 4.2802e-05, "loss": 8.7665, "step": 1799500 }, { "epoch": 14.4, "learning_rate": 4.2800000000000004e-05, "loss": 8.7706, "step": 1800000 }, { "epoch": 14.4, "learning_rate": 4.2798000000000006e-05, "loss": 8.7425, "step": 1800500 }, { "epoch": 14.41, "learning_rate": 4.2796e-05, "loss": 8.7733, "step": 1801000 }, { "epoch": 14.41, "learning_rate": 4.2794000000000004e-05, "loss": 8.7538, "step": 1801500 }, { "epoch": 14.42, "learning_rate": 4.2792000000000006e-05, "loss": 8.7647, "step": 1802000 }, { "epoch": 14.42, "learning_rate": 4.279e-05, "loss": 8.7416, "step": 1802500 }, { "epoch": 14.42, "learning_rate": 4.2788e-05, "loss": 8.7405, "step": 1803000 }, { "epoch": 14.43, "learning_rate": 4.278600000000001e-05, "loss": 8.7533, "step": 1803500 }, { "epoch": 14.43, "learning_rate": 4.2784e-05, "loss": 8.7467, "step": 1804000 }, { "epoch": 14.44, "learning_rate": 4.2782e-05, "loss": 8.7448, "step": 1804500 }, { "epoch": 14.44, "learning_rate": 4.278e-05, "loss": 8.7493, "step": 1805000 }, { "epoch": 14.44, "learning_rate": 4.2778e-05, "loss": 8.7666, "step": 1805500 }, { "epoch": 14.45, "learning_rate": 4.2776e-05, "loss": 8.735, "step": 1806000 }, { "epoch": 14.45, "learning_rate": 4.2774e-05, "loss": 8.7528, "step": 1806500 }, { "epoch": 14.46, "learning_rate": 4.2772e-05, "loss": 8.751, "step": 1807000 }, { "epoch": 14.46, "learning_rate": 4.2770000000000006e-05, "loss": 8.7511, "step": 1807500 }, { "epoch": 14.46, "learning_rate": 4.2768e-05, "loss": 8.7446, "step": 1808000 }, { "epoch": 14.47, "learning_rate": 4.2766000000000004e-05, "loss": 8.7648, "step": 1808500 }, { "epoch": 14.47, "learning_rate": 4.2764000000000006e-05, "loss": 8.7461, "step": 1809000 }, { "epoch": 14.48, "learning_rate": 4.2762e-05, "loss": 8.7377, "step": 1809500 }, { "epoch": 14.48, "learning_rate": 4.276e-05, "loss": 8.7597, "step": 1810000 }, { "epoch": 14.48, "learning_rate": 4.2758000000000006e-05, "loss": 8.7615, "step": 1810500 }, { "epoch": 14.49, "learning_rate": 4.2756e-05, "loss": 8.7455, "step": 1811000 }, { "epoch": 14.49, "learning_rate": 4.2754e-05, "loss": 8.7401, "step": 1811500 }, { "epoch": 14.5, "learning_rate": 4.2752e-05, "loss": 8.7539, "step": 1812000 }, { "epoch": 14.5, "learning_rate": 4.275e-05, "loss": 8.7592, "step": 1812500 }, { "epoch": 14.5, "learning_rate": 4.2748000000000005e-05, "loss": 8.7339, "step": 1813000 }, { "epoch": 14.51, "learning_rate": 4.2746e-05, "loss": 8.7441, "step": 1813500 }, { "epoch": 14.51, "learning_rate": 4.2744e-05, "loss": 8.7434, "step": 1814000 }, { "epoch": 14.52, "learning_rate": 4.2742000000000005e-05, "loss": 8.7543, "step": 1814500 }, { "epoch": 14.52, "learning_rate": 4.274e-05, "loss": 8.7438, "step": 1815000 }, { "epoch": 14.52, "learning_rate": 4.2738e-05, "loss": 8.732, "step": 1815500 }, { "epoch": 14.53, "learning_rate": 4.2736000000000006e-05, "loss": 8.7363, "step": 1816000 }, { "epoch": 14.53, "learning_rate": 4.2734e-05, "loss": 8.7396, "step": 1816500 }, { "epoch": 14.54, "learning_rate": 4.2732e-05, "loss": 8.7609, "step": 1817000 }, { "epoch": 14.54, "learning_rate": 4.2730000000000006e-05, "loss": 8.7405, "step": 1817500 }, { "epoch": 14.54, "learning_rate": 4.2728e-05, "loss": 8.7308, "step": 1818000 }, { "epoch": 14.55, "learning_rate": 4.2726e-05, "loss": 8.7523, "step": 1818500 }, { "epoch": 14.55, "learning_rate": 4.2724e-05, "loss": 8.775, "step": 1819000 }, { "epoch": 14.56, "learning_rate": 4.2722e-05, "loss": 8.7374, "step": 1819500 }, { "epoch": 14.56, "learning_rate": 4.2720000000000004e-05, "loss": 8.7495, "step": 1820000 }, { "epoch": 14.56, "learning_rate": 4.2718e-05, "loss": 8.7193, "step": 1820500 }, { "epoch": 14.57, "learning_rate": 4.2716e-05, "loss": 8.7537, "step": 1821000 }, { "epoch": 14.57, "learning_rate": 4.2714000000000005e-05, "loss": 8.7539, "step": 1821500 }, { "epoch": 14.58, "learning_rate": 4.2712e-05, "loss": 8.7536, "step": 1822000 }, { "epoch": 14.58, "learning_rate": 4.271e-05, "loss": 8.7534, "step": 1822500 }, { "epoch": 14.58, "learning_rate": 4.2708000000000005e-05, "loss": 8.7438, "step": 1823000 }, { "epoch": 14.59, "learning_rate": 4.2706e-05, "loss": 8.7401, "step": 1823500 }, { "epoch": 14.59, "learning_rate": 4.2703999999999996e-05, "loss": 8.7664, "step": 1824000 }, { "epoch": 14.6, "learning_rate": 4.2702000000000006e-05, "loss": 8.7382, "step": 1824500 }, { "epoch": 14.6, "learning_rate": 4.27e-05, "loss": 8.7665, "step": 1825000 }, { "epoch": 14.6, "learning_rate": 4.2698000000000004e-05, "loss": 8.7491, "step": 1825500 }, { "epoch": 14.61, "learning_rate": 4.2696000000000006e-05, "loss": 8.7466, "step": 1826000 }, { "epoch": 14.61, "learning_rate": 4.2694e-05, "loss": 8.742, "step": 1826500 }, { "epoch": 14.62, "learning_rate": 4.2692000000000004e-05, "loss": 8.7518, "step": 1827000 }, { "epoch": 14.62, "learning_rate": 4.269e-05, "loss": 8.7368, "step": 1827500 }, { "epoch": 14.62, "learning_rate": 4.2688e-05, "loss": 8.7781, "step": 1828000 }, { "epoch": 14.63, "learning_rate": 4.2686000000000004e-05, "loss": 8.7598, "step": 1828500 }, { "epoch": 14.63, "learning_rate": 4.2684e-05, "loss": 8.7376, "step": 1829000 }, { "epoch": 14.64, "learning_rate": 4.2682e-05, "loss": 8.7617, "step": 1829500 }, { "epoch": 14.64, "learning_rate": 4.2680000000000005e-05, "loss": 8.7364, "step": 1830000 }, { "epoch": 14.64, "learning_rate": 4.2678e-05, "loss": 8.7533, "step": 1830500 }, { "epoch": 14.65, "learning_rate": 4.2675999999999996e-05, "loss": 8.7536, "step": 1831000 }, { "epoch": 14.65, "learning_rate": 4.2674000000000005e-05, "loss": 8.7588, "step": 1831500 }, { "epoch": 14.66, "learning_rate": 4.2672e-05, "loss": 8.7578, "step": 1832000 }, { "epoch": 14.66, "learning_rate": 4.267e-05, "loss": 8.7368, "step": 1832500 }, { "epoch": 14.66, "learning_rate": 4.2668000000000006e-05, "loss": 8.7277, "step": 1833000 }, { "epoch": 14.67, "learning_rate": 4.2666e-05, "loss": 8.763, "step": 1833500 }, { "epoch": 14.67, "learning_rate": 4.2664000000000004e-05, "loss": 8.7332, "step": 1834000 }, { "epoch": 14.68, "learning_rate": 4.2662e-05, "loss": 8.7552, "step": 1834500 }, { "epoch": 14.68, "learning_rate": 4.266e-05, "loss": 8.7493, "step": 1835000 }, { "epoch": 14.68, "learning_rate": 4.2658000000000004e-05, "loss": 8.763, "step": 1835500 }, { "epoch": 14.69, "learning_rate": 4.2656e-05, "loss": 8.7358, "step": 1836000 }, { "epoch": 14.69, "learning_rate": 4.2654e-05, "loss": 8.7299, "step": 1836500 }, { "epoch": 14.7, "learning_rate": 4.2652000000000004e-05, "loss": 8.7314, "step": 1837000 }, { "epoch": 14.7, "learning_rate": 4.265e-05, "loss": 8.7312, "step": 1837500 }, { "epoch": 14.7, "learning_rate": 4.2648e-05, "loss": 8.7498, "step": 1838000 }, { "epoch": 14.71, "learning_rate": 4.2646000000000005e-05, "loss": 8.7382, "step": 1838500 }, { "epoch": 14.71, "learning_rate": 4.2644e-05, "loss": 8.7614, "step": 1839000 }, { "epoch": 14.72, "learning_rate": 4.2642e-05, "loss": 8.7305, "step": 1839500 }, { "epoch": 14.72, "learning_rate": 4.2640000000000005e-05, "loss": 8.7515, "step": 1840000 }, { "epoch": 14.72, "learning_rate": 4.2638e-05, "loss": 8.7391, "step": 1840500 }, { "epoch": 14.73, "learning_rate": 4.2636e-05, "loss": 8.7568, "step": 1841000 }, { "epoch": 14.73, "learning_rate": 4.2634e-05, "loss": 8.7507, "step": 1841500 }, { "epoch": 14.74, "learning_rate": 4.2632e-05, "loss": 8.7424, "step": 1842000 }, { "epoch": 14.74, "learning_rate": 4.2630000000000004e-05, "loss": 8.7259, "step": 1842500 }, { "epoch": 14.74, "learning_rate": 4.2628e-05, "loss": 8.7571, "step": 1843000 }, { "epoch": 14.75, "learning_rate": 4.2626e-05, "loss": 8.7526, "step": 1843500 }, { "epoch": 14.75, "learning_rate": 4.2624000000000004e-05, "loss": 8.7467, "step": 1844000 }, { "epoch": 14.76, "learning_rate": 4.2622e-05, "loss": 8.7379, "step": 1844500 }, { "epoch": 14.76, "learning_rate": 4.262e-05, "loss": 8.7445, "step": 1845000 }, { "epoch": 14.76, "learning_rate": 4.2618000000000004e-05, "loss": 8.7605, "step": 1845500 }, { "epoch": 14.77, "learning_rate": 4.2616e-05, "loss": 8.7631, "step": 1846000 }, { "epoch": 14.77, "learning_rate": 4.2614e-05, "loss": 8.7334, "step": 1846500 }, { "epoch": 14.78, "learning_rate": 4.2612000000000005e-05, "loss": 8.7355, "step": 1847000 }, { "epoch": 14.78, "learning_rate": 4.261e-05, "loss": 8.771, "step": 1847500 }, { "epoch": 14.78, "learning_rate": 4.2608e-05, "loss": 8.7477, "step": 1848000 }, { "epoch": 14.79, "learning_rate": 4.2606e-05, "loss": 8.7436, "step": 1848500 }, { "epoch": 14.79, "learning_rate": 4.2604e-05, "loss": 8.7508, "step": 1849000 }, { "epoch": 14.8, "learning_rate": 4.2602e-05, "loss": 8.756, "step": 1849500 }, { "epoch": 14.8, "learning_rate": 4.26e-05, "loss": 8.7334, "step": 1850000 }, { "epoch": 14.8, "learning_rate": 4.259800000000001e-05, "loss": 8.7688, "step": 1850500 }, { "epoch": 14.81, "learning_rate": 4.2596000000000003e-05, "loss": 8.7521, "step": 1851000 }, { "epoch": 14.81, "learning_rate": 4.2594e-05, "loss": 8.7454, "step": 1851500 }, { "epoch": 14.82, "learning_rate": 4.2592e-05, "loss": 8.7555, "step": 1852000 }, { "epoch": 14.82, "learning_rate": 4.2590000000000004e-05, "loss": 8.7574, "step": 1852500 }, { "epoch": 14.82, "learning_rate": 4.2588e-05, "loss": 8.7475, "step": 1853000 }, { "epoch": 14.83, "learning_rate": 4.2586e-05, "loss": 8.7485, "step": 1853500 }, { "epoch": 14.83, "learning_rate": 4.2584000000000004e-05, "loss": 8.7653, "step": 1854000 }, { "epoch": 14.84, "learning_rate": 4.2582e-05, "loss": 8.7653, "step": 1854500 }, { "epoch": 14.84, "learning_rate": 4.258e-05, "loss": 8.764, "step": 1855000 }, { "epoch": 14.84, "learning_rate": 4.2578e-05, "loss": 8.7549, "step": 1855500 }, { "epoch": 14.85, "learning_rate": 4.257600000000001e-05, "loss": 8.7729, "step": 1856000 }, { "epoch": 14.85, "learning_rate": 4.2574e-05, "loss": 8.7356, "step": 1856500 }, { "epoch": 14.86, "learning_rate": 4.2572e-05, "loss": 8.7549, "step": 1857000 }, { "epoch": 14.86, "learning_rate": 4.257000000000001e-05, "loss": 8.7598, "step": 1857500 }, { "epoch": 14.86, "learning_rate": 4.2568e-05, "loss": 8.748, "step": 1858000 }, { "epoch": 14.87, "learning_rate": 4.2566e-05, "loss": 8.7609, "step": 1858500 }, { "epoch": 14.87, "learning_rate": 4.2564e-05, "loss": 8.7547, "step": 1859000 }, { "epoch": 14.88, "learning_rate": 4.2562000000000003e-05, "loss": 8.7384, "step": 1859500 }, { "epoch": 14.88, "learning_rate": 4.256e-05, "loss": 8.7549, "step": 1860000 }, { "epoch": 14.88, "learning_rate": 4.2558e-05, "loss": 8.7547, "step": 1860500 }, { "epoch": 14.89, "learning_rate": 4.2556000000000004e-05, "loss": 8.7374, "step": 1861000 }, { "epoch": 14.89, "learning_rate": 4.2554e-05, "loss": 8.7599, "step": 1861500 }, { "epoch": 14.9, "learning_rate": 4.2552e-05, "loss": 8.7308, "step": 1862000 }, { "epoch": 14.9, "learning_rate": 4.2550000000000004e-05, "loss": 8.7633, "step": 1862500 }, { "epoch": 14.9, "learning_rate": 4.254800000000001e-05, "loss": 8.7493, "step": 1863000 }, { "epoch": 14.91, "learning_rate": 4.2546e-05, "loss": 8.7553, "step": 1863500 }, { "epoch": 14.91, "learning_rate": 4.2544e-05, "loss": 8.7601, "step": 1864000 }, { "epoch": 14.92, "learning_rate": 4.254200000000001e-05, "loss": 8.7214, "step": 1864500 }, { "epoch": 14.92, "learning_rate": 4.254e-05, "loss": 8.7452, "step": 1865000 }, { "epoch": 14.92, "learning_rate": 4.2538e-05, "loss": 8.7551, "step": 1865500 }, { "epoch": 14.93, "learning_rate": 4.2536e-05, "loss": 8.7477, "step": 1866000 }, { "epoch": 14.93, "learning_rate": 4.2534e-05, "loss": 8.7519, "step": 1866500 }, { "epoch": 14.94, "learning_rate": 4.2532e-05, "loss": 8.7563, "step": 1867000 }, { "epoch": 14.94, "learning_rate": 4.253e-05, "loss": 8.7349, "step": 1867500 }, { "epoch": 14.94, "learning_rate": 4.2528000000000003e-05, "loss": 8.7376, "step": 1868000 }, { "epoch": 14.95, "learning_rate": 4.2526000000000006e-05, "loss": 8.7507, "step": 1868500 }, { "epoch": 14.95, "learning_rate": 4.2524e-05, "loss": 8.7618, "step": 1869000 }, { "epoch": 14.96, "learning_rate": 4.2522000000000004e-05, "loss": 8.7547, "step": 1869500 }, { "epoch": 14.96, "learning_rate": 4.2520000000000006e-05, "loss": 8.7507, "step": 1870000 }, { "epoch": 14.96, "learning_rate": 4.2518e-05, "loss": 8.7466, "step": 1870500 }, { "epoch": 14.97, "learning_rate": 4.2516e-05, "loss": 8.7686, "step": 1871000 }, { "epoch": 14.97, "learning_rate": 4.251400000000001e-05, "loss": 8.7736, "step": 1871500 }, { "epoch": 14.98, "learning_rate": 4.2512e-05, "loss": 8.7395, "step": 1872000 }, { "epoch": 14.98, "learning_rate": 4.251e-05, "loss": 8.7467, "step": 1872500 }, { "epoch": 14.98, "learning_rate": 4.2508e-05, "loss": 8.7633, "step": 1873000 }, { "epoch": 14.99, "learning_rate": 4.2506e-05, "loss": 8.7406, "step": 1873500 }, { "epoch": 14.99, "learning_rate": 4.2504e-05, "loss": 8.7455, "step": 1874000 }, { "epoch": 15.0, "learning_rate": 4.2502e-05, "loss": 8.7539, "step": 1874500 }, { "epoch": 15.0, "learning_rate": 4.25e-05, "loss": 8.7571, "step": 1875000 }, { "epoch": 15.0, "learning_rate": 4.2498000000000005e-05, "loss": 8.7432, "step": 1875500 }, { "epoch": 15.01, "learning_rate": 4.2496e-05, "loss": 8.7535, "step": 1876000 }, { "epoch": 15.01, "learning_rate": 4.2494000000000003e-05, "loss": 8.7586, "step": 1876500 }, { "epoch": 15.02, "learning_rate": 4.2492000000000006e-05, "loss": 8.7597, "step": 1877000 }, { "epoch": 15.02, "learning_rate": 4.249e-05, "loss": 8.7576, "step": 1877500 }, { "epoch": 15.02, "learning_rate": 4.2488e-05, "loss": 8.7218, "step": 1878000 }, { "epoch": 15.03, "learning_rate": 4.2486000000000006e-05, "loss": 8.7517, "step": 1878500 }, { "epoch": 15.03, "learning_rate": 4.2484e-05, "loss": 8.7514, "step": 1879000 }, { "epoch": 15.04, "learning_rate": 4.2482e-05, "loss": 8.7393, "step": 1879500 }, { "epoch": 15.04, "learning_rate": 4.248e-05, "loss": 8.7364, "step": 1880000 }, { "epoch": 15.04, "learning_rate": 4.2478e-05, "loss": 8.7526, "step": 1880500 }, { "epoch": 15.05, "learning_rate": 4.2476000000000005e-05, "loss": 8.7529, "step": 1881000 }, { "epoch": 15.05, "learning_rate": 4.2474e-05, "loss": 8.7466, "step": 1881500 }, { "epoch": 15.06, "learning_rate": 4.2472e-05, "loss": 8.7513, "step": 1882000 }, { "epoch": 15.06, "learning_rate": 4.2470000000000005e-05, "loss": 8.7543, "step": 1882500 }, { "epoch": 15.06, "learning_rate": 4.2468e-05, "loss": 8.7585, "step": 1883000 }, { "epoch": 15.07, "learning_rate": 4.2466e-05, "loss": 8.7687, "step": 1883500 }, { "epoch": 15.07, "learning_rate": 4.2464000000000005e-05, "loss": 8.7693, "step": 1884000 }, { "epoch": 15.08, "learning_rate": 4.2462e-05, "loss": 8.7316, "step": 1884500 }, { "epoch": 15.08, "learning_rate": 4.246e-05, "loss": 8.7553, "step": 1885000 }, { "epoch": 15.08, "learning_rate": 4.2458000000000006e-05, "loss": 8.7389, "step": 1885500 }, { "epoch": 15.09, "learning_rate": 4.2456e-05, "loss": 8.7366, "step": 1886000 }, { "epoch": 15.09, "learning_rate": 4.2454e-05, "loss": 8.7563, "step": 1886500 }, { "epoch": 15.1, "learning_rate": 4.2452000000000006e-05, "loss": 8.7658, "step": 1887000 }, { "epoch": 15.1, "learning_rate": 4.245e-05, "loss": 8.7598, "step": 1887500 }, { "epoch": 15.1, "learning_rate": 4.2448000000000004e-05, "loss": 8.7621, "step": 1888000 }, { "epoch": 15.11, "learning_rate": 4.2446e-05, "loss": 8.7554, "step": 1888500 }, { "epoch": 15.11, "learning_rate": 4.2444e-05, "loss": 8.7534, "step": 1889000 }, { "epoch": 15.12, "learning_rate": 4.2442000000000005e-05, "loss": 8.7267, "step": 1889500 }, { "epoch": 15.12, "learning_rate": 4.244e-05, "loss": 8.7485, "step": 1890000 }, { "epoch": 15.12, "learning_rate": 4.2438e-05, "loss": 8.7471, "step": 1890500 }, { "epoch": 15.13, "learning_rate": 4.2436000000000005e-05, "loss": 8.7274, "step": 1891000 }, { "epoch": 15.13, "learning_rate": 4.2434e-05, "loss": 8.747, "step": 1891500 }, { "epoch": 15.14, "learning_rate": 4.2431999999999996e-05, "loss": 8.776, "step": 1892000 }, { "epoch": 15.14, "learning_rate": 4.2430000000000005e-05, "loss": 8.7589, "step": 1892500 }, { "epoch": 15.14, "learning_rate": 4.2428e-05, "loss": 8.7519, "step": 1893000 }, { "epoch": 15.15, "learning_rate": 4.2426000000000003e-05, "loss": 8.7433, "step": 1893500 }, { "epoch": 15.15, "learning_rate": 4.2424000000000006e-05, "loss": 8.757, "step": 1894000 }, { "epoch": 15.16, "learning_rate": 4.2422e-05, "loss": 8.757, "step": 1894500 }, { "epoch": 15.16, "learning_rate": 4.2420000000000004e-05, "loss": 8.7747, "step": 1895000 }, { "epoch": 15.16, "learning_rate": 4.2418e-05, "loss": 8.7353, "step": 1895500 }, { "epoch": 15.17, "learning_rate": 4.2416e-05, "loss": 8.7529, "step": 1896000 }, { "epoch": 15.17, "learning_rate": 4.2414000000000004e-05, "loss": 8.7568, "step": 1896500 }, { "epoch": 15.18, "learning_rate": 4.2412e-05, "loss": 8.7509, "step": 1897000 }, { "epoch": 15.18, "learning_rate": 4.241e-05, "loss": 8.7448, "step": 1897500 }, { "epoch": 15.18, "learning_rate": 4.2408000000000005e-05, "loss": 8.7532, "step": 1898000 }, { "epoch": 15.19, "learning_rate": 4.2406e-05, "loss": 8.7393, "step": 1898500 }, { "epoch": 15.19, "learning_rate": 4.2404e-05, "loss": 8.7722, "step": 1899000 }, { "epoch": 15.2, "learning_rate": 4.2402000000000005e-05, "loss": 8.7451, "step": 1899500 }, { "epoch": 15.2, "learning_rate": 4.24e-05, "loss": 8.7562, "step": 1900000 }, { "epoch": 15.2, "learning_rate": 4.2398e-05, "loss": 8.7505, "step": 1900500 }, { "epoch": 15.21, "learning_rate": 4.2396000000000005e-05, "loss": 8.7358, "step": 1901000 }, { "epoch": 15.21, "learning_rate": 4.2394e-05, "loss": 8.7492, "step": 1901500 }, { "epoch": 15.22, "learning_rate": 4.2392000000000003e-05, "loss": 8.7551, "step": 1902000 }, { "epoch": 15.22, "learning_rate": 4.239e-05, "loss": 8.77, "step": 1902500 }, { "epoch": 15.22, "learning_rate": 4.2388e-05, "loss": 8.7395, "step": 1903000 }, { "epoch": 15.23, "learning_rate": 4.2386000000000004e-05, "loss": 8.7347, "step": 1903500 }, { "epoch": 15.23, "learning_rate": 4.2384e-05, "loss": 8.7555, "step": 1904000 }, { "epoch": 15.24, "learning_rate": 4.2382e-05, "loss": 8.7711, "step": 1904500 }, { "epoch": 15.24, "learning_rate": 4.2380000000000004e-05, "loss": 8.7591, "step": 1905000 }, { "epoch": 15.24, "learning_rate": 4.2378e-05, "loss": 8.7497, "step": 1905500 }, { "epoch": 15.25, "learning_rate": 4.2376e-05, "loss": 8.7523, "step": 1906000 }, { "epoch": 15.25, "learning_rate": 4.2374000000000005e-05, "loss": 8.7473, "step": 1906500 }, { "epoch": 15.26, "learning_rate": 4.2372e-05, "loss": 8.7607, "step": 1907000 }, { "epoch": 15.26, "learning_rate": 4.237e-05, "loss": 8.7575, "step": 1907500 }, { "epoch": 15.26, "learning_rate": 4.2368000000000005e-05, "loss": 8.7423, "step": 1908000 }, { "epoch": 15.27, "learning_rate": 4.2366e-05, "loss": 8.7416, "step": 1908500 }, { "epoch": 15.27, "learning_rate": 4.2364e-05, "loss": 8.7498, "step": 1909000 }, { "epoch": 15.28, "learning_rate": 4.2362e-05, "loss": 8.7542, "step": 1909500 }, { "epoch": 15.28, "learning_rate": 4.236e-05, "loss": 8.7447, "step": 1910000 }, { "epoch": 15.28, "learning_rate": 4.2358000000000003e-05, "loss": 8.7557, "step": 1910500 }, { "epoch": 15.29, "learning_rate": 4.2356e-05, "loss": 8.7578, "step": 1911000 }, { "epoch": 15.29, "learning_rate": 4.2354e-05, "loss": 8.7492, "step": 1911500 }, { "epoch": 15.3, "learning_rate": 4.2352000000000004e-05, "loss": 8.7581, "step": 1912000 }, { "epoch": 15.3, "learning_rate": 4.235e-05, "loss": 8.7652, "step": 1912500 }, { "epoch": 15.3, "learning_rate": 4.2348e-05, "loss": 8.7443, "step": 1913000 }, { "epoch": 15.31, "learning_rate": 4.2346000000000004e-05, "loss": 8.7467, "step": 1913500 }, { "epoch": 15.31, "learning_rate": 4.2344e-05, "loss": 8.7233, "step": 1914000 }, { "epoch": 15.32, "learning_rate": 4.2342e-05, "loss": 8.7365, "step": 1914500 }, { "epoch": 15.32, "learning_rate": 4.2340000000000005e-05, "loss": 8.7435, "step": 1915000 }, { "epoch": 15.32, "learning_rate": 4.2338e-05, "loss": 8.7517, "step": 1915500 }, { "epoch": 15.33, "learning_rate": 4.2336e-05, "loss": 8.7235, "step": 1916000 }, { "epoch": 15.33, "learning_rate": 4.2334e-05, "loss": 8.7573, "step": 1916500 }, { "epoch": 15.34, "learning_rate": 4.2332e-05, "loss": 8.7395, "step": 1917000 }, { "epoch": 15.34, "learning_rate": 4.233e-05, "loss": 8.7387, "step": 1917500 }, { "epoch": 15.34, "learning_rate": 4.2328e-05, "loss": 8.7419, "step": 1918000 }, { "epoch": 15.35, "learning_rate": 4.232600000000001e-05, "loss": 8.7652, "step": 1918500 }, { "epoch": 15.35, "learning_rate": 4.2324e-05, "loss": 8.7227, "step": 1919000 }, { "epoch": 15.36, "learning_rate": 4.2322e-05, "loss": 8.7467, "step": 1919500 }, { "epoch": 15.36, "learning_rate": 4.232e-05, "loss": 8.7507, "step": 1920000 }, { "epoch": 15.36, "learning_rate": 4.2318000000000004e-05, "loss": 8.7411, "step": 1920500 }, { "epoch": 15.37, "learning_rate": 4.2316e-05, "loss": 8.754, "step": 1921000 }, { "epoch": 15.37, "learning_rate": 4.2314e-05, "loss": 8.7512, "step": 1921500 }, { "epoch": 15.38, "learning_rate": 4.2312000000000004e-05, "loss": 8.7518, "step": 1922000 }, { "epoch": 15.38, "learning_rate": 4.231e-05, "loss": 8.7557, "step": 1922500 }, { "epoch": 15.38, "learning_rate": 4.2308e-05, "loss": 8.7531, "step": 1923000 }, { "epoch": 15.39, "learning_rate": 4.2306000000000005e-05, "loss": 8.7497, "step": 1923500 }, { "epoch": 15.39, "learning_rate": 4.230400000000001e-05, "loss": 8.7532, "step": 1924000 }, { "epoch": 15.4, "learning_rate": 4.2302e-05, "loss": 8.7288, "step": 1924500 }, { "epoch": 15.4, "learning_rate": 4.23e-05, "loss": 8.7601, "step": 1925000 }, { "epoch": 15.4, "learning_rate": 4.229800000000001e-05, "loss": 8.7532, "step": 1925500 }, { "epoch": 15.41, "learning_rate": 4.2296e-05, "loss": 8.7556, "step": 1926000 }, { "epoch": 15.41, "learning_rate": 4.2294e-05, "loss": 8.7431, "step": 1926500 }, { "epoch": 15.42, "learning_rate": 4.2292e-05, "loss": 8.7734, "step": 1927000 }, { "epoch": 15.42, "learning_rate": 4.229e-05, "loss": 8.7552, "step": 1927500 }, { "epoch": 15.42, "learning_rate": 4.2288e-05, "loss": 8.753, "step": 1928000 }, { "epoch": 15.43, "learning_rate": 4.2286e-05, "loss": 8.7461, "step": 1928500 }, { "epoch": 15.43, "learning_rate": 4.2284000000000004e-05, "loss": 8.7579, "step": 1929000 }, { "epoch": 15.44, "learning_rate": 4.2282e-05, "loss": 8.7595, "step": 1929500 }, { "epoch": 15.44, "learning_rate": 4.228e-05, "loss": 8.7541, "step": 1930000 }, { "epoch": 15.44, "learning_rate": 4.2278000000000004e-05, "loss": 8.7621, "step": 1930500 }, { "epoch": 15.45, "learning_rate": 4.2276000000000007e-05, "loss": 8.7547, "step": 1931000 }, { "epoch": 15.45, "learning_rate": 4.2274e-05, "loss": 8.761, "step": 1931500 }, { "epoch": 15.46, "learning_rate": 4.2272e-05, "loss": 8.7509, "step": 1932000 }, { "epoch": 15.46, "learning_rate": 4.227000000000001e-05, "loss": 8.7747, "step": 1932500 }, { "epoch": 15.46, "learning_rate": 4.2268e-05, "loss": 8.7609, "step": 1933000 }, { "epoch": 15.47, "learning_rate": 4.2266e-05, "loss": 8.7443, "step": 1933500 }, { "epoch": 15.47, "learning_rate": 4.2264e-05, "loss": 8.7711, "step": 1934000 }, { "epoch": 15.48, "learning_rate": 4.2262e-05, "loss": 8.7643, "step": 1934500 }, { "epoch": 15.48, "learning_rate": 4.226e-05, "loss": 8.7476, "step": 1935000 }, { "epoch": 15.48, "learning_rate": 4.2258e-05, "loss": 8.7499, "step": 1935500 }, { "epoch": 15.49, "learning_rate": 4.2256e-05, "loss": 8.7609, "step": 1936000 }, { "epoch": 15.49, "learning_rate": 4.2254000000000006e-05, "loss": 8.7587, "step": 1936500 }, { "epoch": 15.5, "learning_rate": 4.2252e-05, "loss": 8.7369, "step": 1937000 }, { "epoch": 15.5, "learning_rate": 4.2250000000000004e-05, "loss": 8.7408, "step": 1937500 }, { "epoch": 15.5, "learning_rate": 4.2248000000000006e-05, "loss": 8.7544, "step": 1938000 }, { "epoch": 15.51, "learning_rate": 4.2246e-05, "loss": 8.7695, "step": 1938500 }, { "epoch": 15.51, "learning_rate": 4.2244e-05, "loss": 8.7369, "step": 1939000 }, { "epoch": 15.52, "learning_rate": 4.2242000000000007e-05, "loss": 8.7764, "step": 1939500 }, { "epoch": 15.52, "learning_rate": 4.224e-05, "loss": 8.7439, "step": 1940000 }, { "epoch": 15.52, "learning_rate": 4.2238e-05, "loss": 8.7436, "step": 1940500 }, { "epoch": 15.53, "learning_rate": 4.2236e-05, "loss": 8.7473, "step": 1941000 }, { "epoch": 15.53, "learning_rate": 4.2234e-05, "loss": 8.7554, "step": 1941500 }, { "epoch": 15.54, "learning_rate": 4.2232e-05, "loss": 8.7416, "step": 1942000 }, { "epoch": 15.54, "learning_rate": 4.223e-05, "loss": 8.7534, "step": 1942500 }, { "epoch": 15.54, "learning_rate": 4.2228e-05, "loss": 8.7352, "step": 1943000 }, { "epoch": 15.55, "learning_rate": 4.2226000000000005e-05, "loss": 8.758, "step": 1943500 }, { "epoch": 15.55, "learning_rate": 4.2224e-05, "loss": 8.7292, "step": 1944000 }, { "epoch": 15.56, "learning_rate": 4.2222e-05, "loss": 8.7574, "step": 1944500 }, { "epoch": 15.56, "learning_rate": 4.2220000000000006e-05, "loss": 8.7613, "step": 1945000 }, { "epoch": 15.56, "learning_rate": 4.2218e-05, "loss": 8.761, "step": 1945500 }, { "epoch": 15.57, "learning_rate": 4.2216e-05, "loss": 8.7702, "step": 1946000 }, { "epoch": 15.57, "learning_rate": 4.2214000000000006e-05, "loss": 8.7531, "step": 1946500 }, { "epoch": 15.58, "learning_rate": 4.2212e-05, "loss": 8.746, "step": 1947000 }, { "epoch": 15.58, "learning_rate": 4.221e-05, "loss": 8.7363, "step": 1947500 }, { "epoch": 15.58, "learning_rate": 4.2208e-05, "loss": 8.7471, "step": 1948000 }, { "epoch": 15.59, "learning_rate": 4.2206e-05, "loss": 8.7391, "step": 1948500 }, { "epoch": 15.59, "learning_rate": 4.2204000000000005e-05, "loss": 8.7451, "step": 1949000 }, { "epoch": 15.6, "learning_rate": 4.2202e-05, "loss": 8.7484, "step": 1949500 }, { "epoch": 15.6, "learning_rate": 4.22e-05, "loss": 8.7577, "step": 1950000 }, { "epoch": 15.6, "learning_rate": 4.2198000000000005e-05, "loss": 8.7601, "step": 1950500 }, { "epoch": 15.61, "learning_rate": 4.2196e-05, "loss": 8.7374, "step": 1951000 }, { "epoch": 15.61, "learning_rate": 4.2194e-05, "loss": 8.7242, "step": 1951500 }, { "epoch": 15.62, "learning_rate": 4.2192000000000005e-05, "loss": 8.771, "step": 1952000 }, { "epoch": 15.62, "learning_rate": 4.219e-05, "loss": 8.7578, "step": 1952500 }, { "epoch": 15.62, "learning_rate": 4.2187999999999997e-05, "loss": 8.7683, "step": 1953000 }, { "epoch": 15.63, "learning_rate": 4.2186000000000006e-05, "loss": 8.7231, "step": 1953500 }, { "epoch": 15.63, "learning_rate": 4.2184e-05, "loss": 8.7282, "step": 1954000 }, { "epoch": 15.64, "learning_rate": 4.2182e-05, "loss": 8.7545, "step": 1954500 }, { "epoch": 15.64, "learning_rate": 4.2180000000000006e-05, "loss": 8.7457, "step": 1955000 }, { "epoch": 15.64, "learning_rate": 4.2178e-05, "loss": 8.762, "step": 1955500 }, { "epoch": 15.65, "learning_rate": 4.2176000000000004e-05, "loss": 8.752, "step": 1956000 }, { "epoch": 15.65, "learning_rate": 4.2174e-05, "loss": 8.7606, "step": 1956500 }, { "epoch": 15.66, "learning_rate": 4.2172e-05, "loss": 8.7566, "step": 1957000 }, { "epoch": 15.66, "learning_rate": 4.2170000000000005e-05, "loss": 8.7573, "step": 1957500 }, { "epoch": 15.66, "learning_rate": 4.2168e-05, "loss": 8.7482, "step": 1958000 }, { "epoch": 15.67, "learning_rate": 4.2166e-05, "loss": 8.7484, "step": 1958500 }, { "epoch": 15.67, "learning_rate": 4.2164000000000005e-05, "loss": 8.7486, "step": 1959000 }, { "epoch": 15.68, "learning_rate": 4.2162e-05, "loss": 8.7287, "step": 1959500 }, { "epoch": 15.68, "learning_rate": 4.2159999999999996e-05, "loss": 8.7618, "step": 1960000 }, { "epoch": 15.68, "learning_rate": 4.2158000000000005e-05, "loss": 8.7449, "step": 1960500 }, { "epoch": 15.69, "learning_rate": 4.2156e-05, "loss": 8.7586, "step": 1961000 }, { "epoch": 15.69, "learning_rate": 4.2154e-05, "loss": 8.7516, "step": 1961500 }, { "epoch": 15.7, "learning_rate": 4.2152000000000006e-05, "loss": 8.7333, "step": 1962000 }, { "epoch": 15.7, "learning_rate": 4.215e-05, "loss": 8.7387, "step": 1962500 }, { "epoch": 15.7, "learning_rate": 4.2148000000000004e-05, "loss": 8.7455, "step": 1963000 }, { "epoch": 15.71, "learning_rate": 4.2146e-05, "loss": 8.7527, "step": 1963500 }, { "epoch": 15.71, "learning_rate": 4.2144e-05, "loss": 8.7415, "step": 1964000 }, { "epoch": 15.72, "learning_rate": 4.2142000000000004e-05, "loss": 8.7468, "step": 1964500 }, { "epoch": 15.72, "learning_rate": 4.214e-05, "loss": 8.7655, "step": 1965000 }, { "epoch": 15.72, "learning_rate": 4.2138e-05, "loss": 8.7564, "step": 1965500 }, { "epoch": 15.73, "learning_rate": 4.2136000000000005e-05, "loss": 8.7452, "step": 1966000 }, { "epoch": 15.73, "learning_rate": 4.2134e-05, "loss": 8.7503, "step": 1966500 }, { "epoch": 15.74, "learning_rate": 4.2132e-05, "loss": 8.7501, "step": 1967000 }, { "epoch": 15.74, "learning_rate": 4.2130000000000005e-05, "loss": 8.7553, "step": 1967500 }, { "epoch": 15.74, "learning_rate": 4.2128e-05, "loss": 8.7449, "step": 1968000 }, { "epoch": 15.75, "learning_rate": 4.2126e-05, "loss": 8.7437, "step": 1968500 }, { "epoch": 15.75, "learning_rate": 4.2124000000000005e-05, "loss": 8.7248, "step": 1969000 }, { "epoch": 15.76, "learning_rate": 4.2122e-05, "loss": 8.7641, "step": 1969500 }, { "epoch": 15.76, "learning_rate": 4.212e-05, "loss": 8.7591, "step": 1970000 }, { "epoch": 15.76, "learning_rate": 4.2118e-05, "loss": 8.7608, "step": 1970500 }, { "epoch": 15.77, "learning_rate": 4.2116e-05, "loss": 8.7638, "step": 1971000 }, { "epoch": 15.77, "learning_rate": 4.2114000000000004e-05, "loss": 8.7409, "step": 1971500 }, { "epoch": 15.78, "learning_rate": 4.2112e-05, "loss": 8.7591, "step": 1972000 }, { "epoch": 15.78, "learning_rate": 4.211e-05, "loss": 8.746, "step": 1972500 }, { "epoch": 15.78, "learning_rate": 4.2108000000000004e-05, "loss": 8.7614, "step": 1973000 }, { "epoch": 15.79, "learning_rate": 4.2106e-05, "loss": 8.7527, "step": 1973500 }, { "epoch": 15.79, "learning_rate": 4.2104e-05, "loss": 8.7691, "step": 1974000 }, { "epoch": 15.8, "learning_rate": 4.2102000000000004e-05, "loss": 8.7461, "step": 1974500 }, { "epoch": 15.8, "learning_rate": 4.21e-05, "loss": 8.7422, "step": 1975000 }, { "epoch": 15.8, "learning_rate": 4.2098e-05, "loss": 8.7433, "step": 1975500 }, { "epoch": 15.81, "learning_rate": 4.2096000000000005e-05, "loss": 8.7551, "step": 1976000 }, { "epoch": 15.81, "learning_rate": 4.2094e-05, "loss": 8.747, "step": 1976500 }, { "epoch": 15.82, "learning_rate": 4.2092e-05, "loss": 8.7548, "step": 1977000 }, { "epoch": 15.82, "learning_rate": 4.209e-05, "loss": 8.7617, "step": 1977500 }, { "epoch": 15.82, "learning_rate": 4.2088e-05, "loss": 8.744, "step": 1978000 }, { "epoch": 15.83, "learning_rate": 4.2086e-05, "loss": 8.743, "step": 1978500 }, { "epoch": 15.83, "learning_rate": 4.2084e-05, "loss": 8.7485, "step": 1979000 }, { "epoch": 15.84, "learning_rate": 4.208200000000001e-05, "loss": 8.7315, "step": 1979500 }, { "epoch": 15.84, "learning_rate": 4.2080000000000004e-05, "loss": 8.7456, "step": 1980000 }, { "epoch": 15.84, "learning_rate": 4.2078e-05, "loss": 8.7522, "step": 1980500 }, { "epoch": 15.85, "learning_rate": 4.2076e-05, "loss": 8.7492, "step": 1981000 }, { "epoch": 15.85, "learning_rate": 4.2074000000000004e-05, "loss": 8.7588, "step": 1981500 }, { "epoch": 15.86, "learning_rate": 4.2072e-05, "loss": 8.759, "step": 1982000 }, { "epoch": 15.86, "learning_rate": 4.207e-05, "loss": 8.7703, "step": 1982500 }, { "epoch": 15.86, "learning_rate": 4.2068000000000004e-05, "loss": 8.7354, "step": 1983000 }, { "epoch": 15.87, "learning_rate": 4.2066e-05, "loss": 8.7573, "step": 1983500 }, { "epoch": 15.87, "learning_rate": 4.2064e-05, "loss": 8.7484, "step": 1984000 }, { "epoch": 15.88, "learning_rate": 4.2062e-05, "loss": 8.7359, "step": 1984500 }, { "epoch": 15.88, "learning_rate": 4.206e-05, "loss": 8.7444, "step": 1985000 }, { "epoch": 15.88, "learning_rate": 4.2058e-05, "loss": 8.7433, "step": 1985500 }, { "epoch": 15.89, "learning_rate": 4.2056e-05, "loss": 8.7501, "step": 1986000 }, { "epoch": 15.89, "learning_rate": 4.205400000000001e-05, "loss": 8.7587, "step": 1986500 }, { "epoch": 15.9, "learning_rate": 4.2052e-05, "loss": 8.7706, "step": 1987000 }, { "epoch": 15.9, "learning_rate": 4.205e-05, "loss": 8.7609, "step": 1987500 }, { "epoch": 15.9, "learning_rate": 4.2048e-05, "loss": 8.757, "step": 1988000 }, { "epoch": 15.91, "learning_rate": 4.2046000000000004e-05, "loss": 8.7595, "step": 1988500 }, { "epoch": 15.91, "learning_rate": 4.2044e-05, "loss": 8.7452, "step": 1989000 }, { "epoch": 15.92, "learning_rate": 4.2042e-05, "loss": 8.7573, "step": 1989500 }, { "epoch": 15.92, "learning_rate": 4.2040000000000004e-05, "loss": 8.7389, "step": 1990000 }, { "epoch": 15.92, "learning_rate": 4.2038e-05, "loss": 8.7406, "step": 1990500 }, { "epoch": 15.93, "learning_rate": 4.2036e-05, "loss": 8.7606, "step": 1991000 }, { "epoch": 15.93, "learning_rate": 4.2034000000000004e-05, "loss": 8.7606, "step": 1991500 }, { "epoch": 15.94, "learning_rate": 4.203200000000001e-05, "loss": 8.7428, "step": 1992000 }, { "epoch": 15.94, "learning_rate": 4.203e-05, "loss": 8.7677, "step": 1992500 }, { "epoch": 15.94, "learning_rate": 4.2028e-05, "loss": 8.7553, "step": 1993000 }, { "epoch": 15.95, "learning_rate": 4.202600000000001e-05, "loss": 8.7524, "step": 1993500 }, { "epoch": 15.95, "learning_rate": 4.2024e-05, "loss": 8.7583, "step": 1994000 }, { "epoch": 15.96, "learning_rate": 4.2022e-05, "loss": 8.7546, "step": 1994500 }, { "epoch": 15.96, "learning_rate": 4.202e-05, "loss": 8.7542, "step": 1995000 }, { "epoch": 15.96, "learning_rate": 4.2018e-05, "loss": 8.7757, "step": 1995500 }, { "epoch": 15.97, "learning_rate": 4.2016e-05, "loss": 8.7573, "step": 1996000 }, { "epoch": 15.97, "learning_rate": 4.2014e-05, "loss": 8.7318, "step": 1996500 }, { "epoch": 15.98, "learning_rate": 4.2012000000000004e-05, "loss": 8.7699, "step": 1997000 }, { "epoch": 15.98, "learning_rate": 4.201e-05, "loss": 8.7398, "step": 1997500 }, { "epoch": 15.98, "learning_rate": 4.2008e-05, "loss": 8.7609, "step": 1998000 }, { "epoch": 15.99, "learning_rate": 4.2006000000000004e-05, "loss": 8.7568, "step": 1998500 }, { "epoch": 15.99, "learning_rate": 4.2004000000000006e-05, "loss": 8.747, "step": 1999000 }, { "epoch": 16.0, "learning_rate": 4.2002e-05, "loss": 8.7171, "step": 1999500 }, { "epoch": 16.0, "learning_rate": 4.2e-05, "loss": 8.7608, "step": 2000000 }, { "epoch": 16.0, "learning_rate": 4.199800000000001e-05, "loss": 8.7603, "step": 2000500 }, { "epoch": 16.01, "learning_rate": 4.1996e-05, "loss": 8.7322, "step": 2001000 }, { "epoch": 16.01, "learning_rate": 4.1994e-05, "loss": 8.7446, "step": 2001500 }, { "epoch": 16.02, "learning_rate": 4.1992e-05, "loss": 8.7591, "step": 2002000 }, { "epoch": 16.02, "learning_rate": 4.199e-05, "loss": 8.7607, "step": 2002500 }, { "epoch": 16.02, "learning_rate": 4.1988e-05, "loss": 8.7632, "step": 2003000 }, { "epoch": 16.03, "learning_rate": 4.1986e-05, "loss": 8.7359, "step": 2003500 }, { "epoch": 16.03, "learning_rate": 4.1984e-05, "loss": 8.753, "step": 2004000 }, { "epoch": 16.04, "learning_rate": 4.1982000000000006e-05, "loss": 8.7339, "step": 2004500 }, { "epoch": 16.04, "learning_rate": 4.198e-05, "loss": 8.7643, "step": 2005000 }, { "epoch": 16.04, "learning_rate": 4.1978000000000004e-05, "loss": 8.7316, "step": 2005500 }, { "epoch": 16.05, "learning_rate": 4.1976000000000006e-05, "loss": 8.7388, "step": 2006000 }, { "epoch": 16.05, "learning_rate": 4.1974e-05, "loss": 8.7504, "step": 2006500 }, { "epoch": 16.06, "learning_rate": 4.1972e-05, "loss": 8.7563, "step": 2007000 }, { "epoch": 16.06, "learning_rate": 4.1970000000000006e-05, "loss": 8.7791, "step": 2007500 }, { "epoch": 16.06, "learning_rate": 4.1968e-05, "loss": 8.7936, "step": 2008000 }, { "epoch": 16.07, "learning_rate": 4.1966e-05, "loss": 8.7543, "step": 2008500 }, { "epoch": 16.07, "learning_rate": 4.1964e-05, "loss": 8.7398, "step": 2009000 }, { "epoch": 16.08, "learning_rate": 4.1962e-05, "loss": 8.7413, "step": 2009500 }, { "epoch": 16.08, "learning_rate": 4.196e-05, "loss": 8.7716, "step": 2010000 }, { "epoch": 16.08, "learning_rate": 4.1958e-05, "loss": 8.7591, "step": 2010500 }, { "epoch": 16.09, "learning_rate": 4.1956e-05, "loss": 8.7776, "step": 2011000 }, { "epoch": 16.09, "learning_rate": 4.1954000000000005e-05, "loss": 8.7429, "step": 2011500 }, { "epoch": 16.1, "learning_rate": 4.1952e-05, "loss": 8.7634, "step": 2012000 }, { "epoch": 16.1, "learning_rate": 4.195e-05, "loss": 8.7525, "step": 2012500 }, { "epoch": 16.1, "learning_rate": 4.1948000000000006e-05, "loss": 8.7548, "step": 2013000 }, { "epoch": 16.11, "learning_rate": 4.1946e-05, "loss": 8.7466, "step": 2013500 }, { "epoch": 16.11, "learning_rate": 4.1944e-05, "loss": 8.7492, "step": 2014000 }, { "epoch": 16.12, "learning_rate": 4.1942000000000006e-05, "loss": 8.7876, "step": 2014500 }, { "epoch": 16.12, "learning_rate": 4.194e-05, "loss": 8.7488, "step": 2015000 }, { "epoch": 16.12, "learning_rate": 4.1938e-05, "loss": 8.7558, "step": 2015500 }, { "epoch": 16.13, "learning_rate": 4.1936000000000006e-05, "loss": 8.7694, "step": 2016000 }, { "epoch": 16.13, "learning_rate": 4.1934e-05, "loss": 8.7355, "step": 2016500 }, { "epoch": 16.14, "learning_rate": 4.1932000000000004e-05, "loss": 8.7453, "step": 2017000 }, { "epoch": 16.14, "learning_rate": 4.193e-05, "loss": 8.7639, "step": 2017500 }, { "epoch": 16.14, "learning_rate": 4.1928e-05, "loss": 8.7574, "step": 2018000 }, { "epoch": 16.15, "learning_rate": 4.1926000000000005e-05, "loss": 8.7594, "step": 2018500 }, { "epoch": 16.15, "learning_rate": 4.1924e-05, "loss": 8.7552, "step": 2019000 }, { "epoch": 16.16, "learning_rate": 4.1922e-05, "loss": 8.757, "step": 2019500 }, { "epoch": 16.16, "learning_rate": 4.1920000000000005e-05, "loss": 8.7506, "step": 2020000 }, { "epoch": 16.16, "learning_rate": 4.1918e-05, "loss": 8.7554, "step": 2020500 }, { "epoch": 16.17, "learning_rate": 4.1915999999999996e-05, "loss": 8.7672, "step": 2021000 }, { "epoch": 16.17, "learning_rate": 4.1914000000000006e-05, "loss": 8.7381, "step": 2021500 }, { "epoch": 16.18, "learning_rate": 4.1912e-05, "loss": 8.7385, "step": 2022000 }, { "epoch": 16.18, "learning_rate": 4.191e-05, "loss": 8.7482, "step": 2022500 }, { "epoch": 16.18, "learning_rate": 4.1908000000000006e-05, "loss": 8.7515, "step": 2023000 }, { "epoch": 16.19, "learning_rate": 4.1906e-05, "loss": 8.7658, "step": 2023500 }, { "epoch": 16.19, "learning_rate": 4.1904000000000004e-05, "loss": 8.7434, "step": 2024000 }, { "epoch": 16.2, "learning_rate": 4.1902e-05, "loss": 8.7406, "step": 2024500 }, { "epoch": 16.2, "learning_rate": 4.19e-05, "loss": 8.756, "step": 2025000 }, { "epoch": 16.2, "learning_rate": 4.1898000000000004e-05, "loss": 8.7494, "step": 2025500 }, { "epoch": 16.21, "learning_rate": 4.1896e-05, "loss": 8.7423, "step": 2026000 }, { "epoch": 16.21, "learning_rate": 4.1894e-05, "loss": 8.7419, "step": 2026500 }, { "epoch": 16.22, "learning_rate": 4.1892000000000005e-05, "loss": 8.746, "step": 2027000 }, { "epoch": 16.22, "learning_rate": 4.189e-05, "loss": 8.7506, "step": 2027500 }, { "epoch": 16.22, "learning_rate": 4.1888e-05, "loss": 8.7708, "step": 2028000 }, { "epoch": 16.23, "learning_rate": 4.1886000000000005e-05, "loss": 8.7232, "step": 2028500 }, { "epoch": 16.23, "learning_rate": 4.1884e-05, "loss": 8.7531, "step": 2029000 }, { "epoch": 16.24, "learning_rate": 4.1882e-05, "loss": 8.7373, "step": 2029500 }, { "epoch": 16.24, "learning_rate": 4.1880000000000006e-05, "loss": 8.7798, "step": 2030000 }, { "epoch": 16.24, "learning_rate": 4.1878e-05, "loss": 8.7661, "step": 2030500 }, { "epoch": 16.25, "learning_rate": 4.1876000000000004e-05, "loss": 8.7326, "step": 2031000 }, { "epoch": 16.25, "learning_rate": 4.1874e-05, "loss": 8.758, "step": 2031500 }, { "epoch": 16.26, "learning_rate": 4.1872e-05, "loss": 8.7464, "step": 2032000 }, { "epoch": 16.26, "learning_rate": 4.1870000000000004e-05, "loss": 8.7568, "step": 2032500 }, { "epoch": 16.26, "learning_rate": 4.1868e-05, "loss": 8.7424, "step": 2033000 }, { "epoch": 16.27, "learning_rate": 4.1866e-05, "loss": 8.7383, "step": 2033500 }, { "epoch": 16.27, "learning_rate": 4.1864000000000004e-05, "loss": 8.7597, "step": 2034000 }, { "epoch": 16.28, "learning_rate": 4.1862e-05, "loss": 8.7556, "step": 2034500 }, { "epoch": 16.28, "learning_rate": 4.186e-05, "loss": 8.7518, "step": 2035000 }, { "epoch": 16.28, "learning_rate": 4.1858000000000005e-05, "loss": 8.7547, "step": 2035500 }, { "epoch": 16.29, "learning_rate": 4.1856e-05, "loss": 8.7433, "step": 2036000 }, { "epoch": 16.29, "learning_rate": 4.1854e-05, "loss": 8.7402, "step": 2036500 }, { "epoch": 16.3, "learning_rate": 4.1852000000000005e-05, "loss": 8.7634, "step": 2037000 }, { "epoch": 16.3, "learning_rate": 4.185e-05, "loss": 8.752, "step": 2037500 }, { "epoch": 16.3, "learning_rate": 4.1848e-05, "loss": 8.7599, "step": 2038000 }, { "epoch": 16.31, "learning_rate": 4.1846e-05, "loss": 8.7417, "step": 2038500 }, { "epoch": 16.31, "learning_rate": 4.1844e-05, "loss": 8.7392, "step": 2039000 }, { "epoch": 16.32, "learning_rate": 4.1842000000000004e-05, "loss": 8.7516, "step": 2039500 }, { "epoch": 16.32, "learning_rate": 4.184e-05, "loss": 8.7434, "step": 2040000 }, { "epoch": 16.32, "learning_rate": 4.1838e-05, "loss": 8.7443, "step": 2040500 }, { "epoch": 16.33, "learning_rate": 4.1836000000000004e-05, "loss": 8.7701, "step": 2041000 }, { "epoch": 16.33, "learning_rate": 4.1834e-05, "loss": 8.7575, "step": 2041500 }, { "epoch": 16.34, "learning_rate": 4.1832e-05, "loss": 8.7752, "step": 2042000 }, { "epoch": 16.34, "learning_rate": 4.1830000000000004e-05, "loss": 8.757, "step": 2042500 }, { "epoch": 16.34, "learning_rate": 4.1828e-05, "loss": 8.7495, "step": 2043000 }, { "epoch": 16.35, "learning_rate": 4.1826e-05, "loss": 8.7539, "step": 2043500 }, { "epoch": 16.35, "learning_rate": 4.1824000000000005e-05, "loss": 8.7495, "step": 2044000 }, { "epoch": 16.36, "learning_rate": 4.1822e-05, "loss": 8.7414, "step": 2044500 }, { "epoch": 16.36, "learning_rate": 4.182e-05, "loss": 8.7538, "step": 2045000 }, { "epoch": 16.36, "learning_rate": 4.1818e-05, "loss": 8.7409, "step": 2045500 }, { "epoch": 16.37, "learning_rate": 4.1816e-05, "loss": 8.7614, "step": 2046000 }, { "epoch": 16.37, "learning_rate": 4.1814e-05, "loss": 8.7634, "step": 2046500 }, { "epoch": 16.38, "learning_rate": 4.1812e-05, "loss": 8.7473, "step": 2047000 }, { "epoch": 16.38, "learning_rate": 4.181000000000001e-05, "loss": 8.7351, "step": 2047500 }, { "epoch": 16.38, "learning_rate": 4.1808000000000004e-05, "loss": 8.7791, "step": 2048000 }, { "epoch": 16.39, "learning_rate": 4.1806e-05, "loss": 8.7584, "step": 2048500 }, { "epoch": 16.39, "learning_rate": 4.1804e-05, "loss": 8.7455, "step": 2049000 }, { "epoch": 16.4, "learning_rate": 4.1802000000000004e-05, "loss": 8.7334, "step": 2049500 }, { "epoch": 16.4, "learning_rate": 4.18e-05, "loss": 8.7368, "step": 2050000 }, { "epoch": 16.4, "learning_rate": 4.1798e-05, "loss": 8.7516, "step": 2050500 }, { "epoch": 16.41, "learning_rate": 4.1796000000000004e-05, "loss": 8.7608, "step": 2051000 }, { "epoch": 16.41, "learning_rate": 4.1794e-05, "loss": 8.7496, "step": 2051500 }, { "epoch": 16.42, "learning_rate": 4.1792e-05, "loss": 8.764, "step": 2052000 }, { "epoch": 16.42, "learning_rate": 4.179e-05, "loss": 8.7511, "step": 2052500 }, { "epoch": 16.42, "learning_rate": 4.1788e-05, "loss": 8.7512, "step": 2053000 }, { "epoch": 16.43, "learning_rate": 4.1786e-05, "loss": 8.7525, "step": 2053500 }, { "epoch": 16.43, "learning_rate": 4.1784e-05, "loss": 8.7236, "step": 2054000 }, { "epoch": 16.44, "learning_rate": 4.178200000000001e-05, "loss": 8.753, "step": 2054500 }, { "epoch": 16.44, "learning_rate": 4.178e-05, "loss": 8.7447, "step": 2055000 }, { "epoch": 16.44, "learning_rate": 4.1778e-05, "loss": 8.7578, "step": 2055500 }, { "epoch": 16.45, "learning_rate": 4.1776e-05, "loss": 8.7446, "step": 2056000 }, { "epoch": 16.45, "learning_rate": 4.1774000000000004e-05, "loss": 8.7397, "step": 2056500 }, { "epoch": 16.46, "learning_rate": 4.1772e-05, "loss": 8.7553, "step": 2057000 }, { "epoch": 16.46, "learning_rate": 4.177e-05, "loss": 8.7578, "step": 2057500 }, { "epoch": 16.46, "learning_rate": 4.1768000000000004e-05, "loss": 8.743, "step": 2058000 }, { "epoch": 16.47, "learning_rate": 4.1766e-05, "loss": 8.7574, "step": 2058500 }, { "epoch": 16.47, "learning_rate": 4.1764e-05, "loss": 8.76, "step": 2059000 }, { "epoch": 16.48, "learning_rate": 4.1762000000000004e-05, "loss": 8.7383, "step": 2059500 }, { "epoch": 16.48, "learning_rate": 4.176000000000001e-05, "loss": 8.7674, "step": 2060000 }, { "epoch": 16.48, "learning_rate": 4.1758e-05, "loss": 8.747, "step": 2060500 }, { "epoch": 16.49, "learning_rate": 4.1756e-05, "loss": 8.7446, "step": 2061000 }, { "epoch": 16.49, "learning_rate": 4.175400000000001e-05, "loss": 8.7696, "step": 2061500 }, { "epoch": 16.5, "learning_rate": 4.1752e-05, "loss": 8.7492, "step": 2062000 }, { "epoch": 16.5, "learning_rate": 4.175e-05, "loss": 8.7368, "step": 2062500 }, { "epoch": 16.5, "learning_rate": 4.1748e-05, "loss": 8.7548, "step": 2063000 }, { "epoch": 16.51, "learning_rate": 4.1746e-05, "loss": 8.7737, "step": 2063500 }, { "epoch": 16.51, "learning_rate": 4.1744e-05, "loss": 8.7497, "step": 2064000 }, { "epoch": 16.52, "learning_rate": 4.1742e-05, "loss": 8.7529, "step": 2064500 }, { "epoch": 16.52, "learning_rate": 4.1740000000000004e-05, "loss": 8.7633, "step": 2065000 }, { "epoch": 16.52, "learning_rate": 4.1738e-05, "loss": 8.7667, "step": 2065500 }, { "epoch": 16.53, "learning_rate": 4.1736e-05, "loss": 8.7558, "step": 2066000 }, { "epoch": 16.53, "learning_rate": 4.1734000000000004e-05, "loss": 8.7623, "step": 2066500 }, { "epoch": 16.54, "learning_rate": 4.1732000000000006e-05, "loss": 8.7691, "step": 2067000 }, { "epoch": 16.54, "learning_rate": 4.173e-05, "loss": 8.7636, "step": 2067500 }, { "epoch": 16.54, "learning_rate": 4.1728e-05, "loss": 8.7705, "step": 2068000 }, { "epoch": 16.55, "learning_rate": 4.172600000000001e-05, "loss": 8.7511, "step": 2068500 }, { "epoch": 16.55, "learning_rate": 4.1724e-05, "loss": 8.7579, "step": 2069000 }, { "epoch": 16.56, "learning_rate": 4.1722e-05, "loss": 8.771, "step": 2069500 }, { "epoch": 16.56, "learning_rate": 4.172e-05, "loss": 8.7329, "step": 2070000 }, { "epoch": 16.56, "learning_rate": 4.1718e-05, "loss": 8.741, "step": 2070500 }, { "epoch": 16.57, "learning_rate": 4.1716e-05, "loss": 8.7696, "step": 2071000 }, { "epoch": 16.57, "learning_rate": 4.1714e-05, "loss": 8.7508, "step": 2071500 }, { "epoch": 16.58, "learning_rate": 4.1712e-05, "loss": 8.754, "step": 2072000 }, { "epoch": 16.58, "learning_rate": 4.1710000000000006e-05, "loss": 8.743, "step": 2072500 }, { "epoch": 16.58, "learning_rate": 4.1708e-05, "loss": 8.7391, "step": 2073000 }, { "epoch": 16.59, "learning_rate": 4.1706000000000004e-05, "loss": 8.7473, "step": 2073500 }, { "epoch": 16.59, "learning_rate": 4.1704000000000006e-05, "loss": 8.7513, "step": 2074000 }, { "epoch": 16.6, "learning_rate": 4.1702e-05, "loss": 8.75, "step": 2074500 }, { "epoch": 16.6, "learning_rate": 4.17e-05, "loss": 8.7375, "step": 2075000 }, { "epoch": 16.6, "learning_rate": 4.1698000000000006e-05, "loss": 8.7569, "step": 2075500 }, { "epoch": 16.61, "learning_rate": 4.1696e-05, "loss": 8.743, "step": 2076000 }, { "epoch": 16.61, "learning_rate": 4.1694e-05, "loss": 8.7508, "step": 2076500 }, { "epoch": 16.62, "learning_rate": 4.1692e-05, "loss": 8.7461, "step": 2077000 }, { "epoch": 16.62, "learning_rate": 4.169e-05, "loss": 8.7577, "step": 2077500 }, { "epoch": 16.62, "learning_rate": 4.1688e-05, "loss": 8.7492, "step": 2078000 }, { "epoch": 16.63, "learning_rate": 4.1686e-05, "loss": 8.773, "step": 2078500 }, { "epoch": 16.63, "learning_rate": 4.1684e-05, "loss": 8.7439, "step": 2079000 }, { "epoch": 16.64, "learning_rate": 4.1682000000000005e-05, "loss": 8.7541, "step": 2079500 }, { "epoch": 16.64, "learning_rate": 4.168e-05, "loss": 8.7632, "step": 2080000 }, { "epoch": 16.64, "learning_rate": 4.1678e-05, "loss": 8.7533, "step": 2080500 }, { "epoch": 16.65, "learning_rate": 4.1676000000000006e-05, "loss": 8.7469, "step": 2081000 }, { "epoch": 16.65, "learning_rate": 4.1674e-05, "loss": 8.758, "step": 2081500 }, { "epoch": 16.66, "learning_rate": 4.1672e-05, "loss": 8.7351, "step": 2082000 }, { "epoch": 16.66, "learning_rate": 4.1670000000000006e-05, "loss": 8.7452, "step": 2082500 }, { "epoch": 16.66, "learning_rate": 4.1668e-05, "loss": 8.7494, "step": 2083000 }, { "epoch": 16.67, "learning_rate": 4.1666e-05, "loss": 8.7468, "step": 2083500 }, { "epoch": 16.67, "learning_rate": 4.1664000000000006e-05, "loss": 8.7491, "step": 2084000 }, { "epoch": 16.68, "learning_rate": 4.1662e-05, "loss": 8.7462, "step": 2084500 }, { "epoch": 16.68, "learning_rate": 4.1660000000000004e-05, "loss": 8.7529, "step": 2085000 }, { "epoch": 16.68, "learning_rate": 4.1658e-05, "loss": 8.7489, "step": 2085500 }, { "epoch": 16.69, "learning_rate": 4.1656e-05, "loss": 8.7582, "step": 2086000 }, { "epoch": 16.69, "learning_rate": 4.1654000000000005e-05, "loss": 8.7739, "step": 2086500 }, { "epoch": 16.7, "learning_rate": 4.1652e-05, "loss": 8.76, "step": 2087000 }, { "epoch": 16.7, "learning_rate": 4.165e-05, "loss": 8.7592, "step": 2087500 }, { "epoch": 16.7, "learning_rate": 4.1648000000000005e-05, "loss": 8.7442, "step": 2088000 }, { "epoch": 16.71, "learning_rate": 4.1646e-05, "loss": 8.7387, "step": 2088500 }, { "epoch": 16.71, "learning_rate": 4.1643999999999996e-05, "loss": 8.7654, "step": 2089000 }, { "epoch": 16.72, "learning_rate": 4.1642000000000006e-05, "loss": 8.7713, "step": 2089500 }, { "epoch": 16.72, "learning_rate": 4.164e-05, "loss": 8.751, "step": 2090000 }, { "epoch": 16.72, "learning_rate": 4.1638e-05, "loss": 8.7748, "step": 2090500 }, { "epoch": 16.73, "learning_rate": 4.1636000000000006e-05, "loss": 8.7561, "step": 2091000 }, { "epoch": 16.73, "learning_rate": 4.1634e-05, "loss": 8.7445, "step": 2091500 }, { "epoch": 16.74, "learning_rate": 4.1632000000000004e-05, "loss": 8.7482, "step": 2092000 }, { "epoch": 16.74, "learning_rate": 4.163e-05, "loss": 8.7523, "step": 2092500 }, { "epoch": 16.74, "learning_rate": 4.1628e-05, "loss": 8.7527, "step": 2093000 }, { "epoch": 16.75, "learning_rate": 4.1626000000000004e-05, "loss": 8.7527, "step": 2093500 }, { "epoch": 16.75, "learning_rate": 4.1624e-05, "loss": 8.7656, "step": 2094000 }, { "epoch": 16.76, "learning_rate": 4.1622e-05, "loss": 8.7497, "step": 2094500 }, { "epoch": 16.76, "learning_rate": 4.1620000000000005e-05, "loss": 8.7437, "step": 2095000 }, { "epoch": 16.76, "learning_rate": 4.1618e-05, "loss": 8.7745, "step": 2095500 }, { "epoch": 16.77, "learning_rate": 4.1616e-05, "loss": 8.7408, "step": 2096000 }, { "epoch": 16.77, "learning_rate": 4.1614000000000005e-05, "loss": 8.7423, "step": 2096500 }, { "epoch": 16.78, "learning_rate": 4.1612e-05, "loss": 8.7386, "step": 2097000 }, { "epoch": 16.78, "learning_rate": 4.161e-05, "loss": 8.7614, "step": 2097500 }, { "epoch": 16.78, "learning_rate": 4.1608000000000005e-05, "loss": 8.7512, "step": 2098000 }, { "epoch": 16.79, "learning_rate": 4.1606e-05, "loss": 8.7669, "step": 2098500 }, { "epoch": 16.79, "learning_rate": 4.1604000000000004e-05, "loss": 8.7505, "step": 2099000 }, { "epoch": 16.8, "learning_rate": 4.1602e-05, "loss": 8.7555, "step": 2099500 }, { "epoch": 16.8, "learning_rate": 4.16e-05, "loss": 8.7586, "step": 2100000 }, { "epoch": 16.8, "learning_rate": 4.1598000000000004e-05, "loss": 8.7423, "step": 2100500 }, { "epoch": 16.81, "learning_rate": 4.1596e-05, "loss": 8.7527, "step": 2101000 }, { "epoch": 16.81, "learning_rate": 4.1594e-05, "loss": 8.7576, "step": 2101500 }, { "epoch": 16.82, "learning_rate": 4.1592000000000004e-05, "loss": 8.7578, "step": 2102000 }, { "epoch": 16.82, "learning_rate": 4.159e-05, "loss": 8.7496, "step": 2102500 }, { "epoch": 16.82, "learning_rate": 4.1588e-05, "loss": 8.7536, "step": 2103000 }, { "epoch": 16.83, "learning_rate": 4.1586000000000005e-05, "loss": 8.7484, "step": 2103500 }, { "epoch": 16.83, "learning_rate": 4.1584e-05, "loss": 8.7307, "step": 2104000 }, { "epoch": 16.84, "learning_rate": 4.1582e-05, "loss": 8.7598, "step": 2104500 }, { "epoch": 16.84, "learning_rate": 4.1580000000000005e-05, "loss": 8.752, "step": 2105000 }, { "epoch": 16.84, "learning_rate": 4.1578e-05, "loss": 8.7638, "step": 2105500 }, { "epoch": 16.85, "learning_rate": 4.1576e-05, "loss": 8.7455, "step": 2106000 }, { "epoch": 16.85, "learning_rate": 4.1574e-05, "loss": 8.7394, "step": 2106500 }, { "epoch": 16.86, "learning_rate": 4.1572e-05, "loss": 8.7506, "step": 2107000 }, { "epoch": 16.86, "learning_rate": 4.1570000000000003e-05, "loss": 8.7338, "step": 2107500 }, { "epoch": 16.86, "learning_rate": 4.1568e-05, "loss": 8.737, "step": 2108000 }, { "epoch": 16.87, "learning_rate": 4.1566e-05, "loss": 8.7504, "step": 2108500 }, { "epoch": 16.87, "learning_rate": 4.1564000000000004e-05, "loss": 8.7588, "step": 2109000 }, { "epoch": 16.88, "learning_rate": 4.1562e-05, "loss": 8.7361, "step": 2109500 }, { "epoch": 16.88, "learning_rate": 4.156e-05, "loss": 8.7391, "step": 2110000 }, { "epoch": 16.88, "learning_rate": 4.1558000000000004e-05, "loss": 8.7412, "step": 2110500 }, { "epoch": 16.89, "learning_rate": 4.1556e-05, "loss": 8.7386, "step": 2111000 }, { "epoch": 16.89, "learning_rate": 4.1554e-05, "loss": 8.7361, "step": 2111500 }, { "epoch": 16.9, "learning_rate": 4.1552000000000005e-05, "loss": 8.7641, "step": 2112000 }, { "epoch": 16.9, "learning_rate": 4.155e-05, "loss": 8.7459, "step": 2112500 }, { "epoch": 16.9, "learning_rate": 4.1548e-05, "loss": 8.7429, "step": 2113000 }, { "epoch": 16.91, "learning_rate": 4.1546e-05, "loss": 8.7447, "step": 2113500 }, { "epoch": 16.91, "learning_rate": 4.1544e-05, "loss": 8.75, "step": 2114000 }, { "epoch": 16.92, "learning_rate": 4.1542e-05, "loss": 8.7613, "step": 2114500 }, { "epoch": 16.92, "learning_rate": 4.154e-05, "loss": 8.7592, "step": 2115000 }, { "epoch": 16.92, "learning_rate": 4.153800000000001e-05, "loss": 8.7531, "step": 2115500 }, { "epoch": 16.93, "learning_rate": 4.1536000000000003e-05, "loss": 8.7436, "step": 2116000 }, { "epoch": 16.93, "learning_rate": 4.1534e-05, "loss": 8.7495, "step": 2116500 }, { "epoch": 16.94, "learning_rate": 4.1532e-05, "loss": 8.7435, "step": 2117000 }, { "epoch": 16.94, "learning_rate": 4.1530000000000004e-05, "loss": 8.7423, "step": 2117500 }, { "epoch": 16.94, "learning_rate": 4.1528e-05, "loss": 8.7508, "step": 2118000 }, { "epoch": 16.95, "learning_rate": 4.1526e-05, "loss": 8.7476, "step": 2118500 }, { "epoch": 16.95, "learning_rate": 4.1524000000000004e-05, "loss": 8.7404, "step": 2119000 }, { "epoch": 16.96, "learning_rate": 4.1522e-05, "loss": 8.7513, "step": 2119500 }, { "epoch": 16.96, "learning_rate": 4.152e-05, "loss": 8.7342, "step": 2120000 }, { "epoch": 16.96, "learning_rate": 4.1518000000000005e-05, "loss": 8.7574, "step": 2120500 }, { "epoch": 16.97, "learning_rate": 4.1516e-05, "loss": 8.7517, "step": 2121000 }, { "epoch": 16.97, "learning_rate": 4.1514e-05, "loss": 8.7548, "step": 2121500 }, { "epoch": 16.98, "learning_rate": 4.1512e-05, "loss": 8.7393, "step": 2122000 }, { "epoch": 16.98, "learning_rate": 4.151000000000001e-05, "loss": 8.731, "step": 2122500 }, { "epoch": 16.98, "learning_rate": 4.1508e-05, "loss": 8.7529, "step": 2123000 }, { "epoch": 16.99, "learning_rate": 4.1506e-05, "loss": 8.7616, "step": 2123500 }, { "epoch": 16.99, "learning_rate": 4.1504e-05, "loss": 8.7447, "step": 2124000 }, { "epoch": 17.0, "learning_rate": 4.1502000000000003e-05, "loss": 8.7483, "step": 2124500 }, { "epoch": 17.0, "learning_rate": 4.15e-05, "loss": 8.7396, "step": 2125000 }, { "epoch": 17.0, "learning_rate": 4.1498e-05, "loss": 8.7392, "step": 2125500 }, { "epoch": 17.01, "learning_rate": 4.1496000000000004e-05, "loss": 8.7513, "step": 2126000 }, { "epoch": 17.01, "learning_rate": 4.1494e-05, "loss": 8.7611, "step": 2126500 }, { "epoch": 17.02, "learning_rate": 4.1492e-05, "loss": 8.7619, "step": 2127000 }, { "epoch": 17.02, "learning_rate": 4.1490000000000004e-05, "loss": 8.7699, "step": 2127500 }, { "epoch": 17.02, "learning_rate": 4.148800000000001e-05, "loss": 8.7486, "step": 2128000 }, { "epoch": 17.03, "learning_rate": 4.1486e-05, "loss": 8.7597, "step": 2128500 }, { "epoch": 17.03, "learning_rate": 4.1484e-05, "loss": 8.7676, "step": 2129000 }, { "epoch": 17.04, "learning_rate": 4.148200000000001e-05, "loss": 8.731, "step": 2129500 }, { "epoch": 17.04, "learning_rate": 4.148e-05, "loss": 8.7674, "step": 2130000 }, { "epoch": 17.04, "learning_rate": 4.1478e-05, "loss": 8.7485, "step": 2130500 }, { "epoch": 17.05, "learning_rate": 4.1476e-05, "loss": 8.7298, "step": 2131000 }, { "epoch": 17.05, "learning_rate": 4.1474e-05, "loss": 8.7679, "step": 2131500 }, { "epoch": 17.06, "learning_rate": 4.1472e-05, "loss": 8.7559, "step": 2132000 }, { "epoch": 17.06, "learning_rate": 4.147e-05, "loss": 8.7398, "step": 2132500 }, { "epoch": 17.06, "learning_rate": 4.1468000000000003e-05, "loss": 8.7278, "step": 2133000 }, { "epoch": 17.07, "learning_rate": 4.1466e-05, "loss": 8.7539, "step": 2133500 }, { "epoch": 17.07, "learning_rate": 4.1464e-05, "loss": 8.7538, "step": 2134000 }, { "epoch": 17.08, "learning_rate": 4.1462000000000004e-05, "loss": 8.7639, "step": 2134500 }, { "epoch": 17.08, "learning_rate": 4.1460000000000006e-05, "loss": 8.7789, "step": 2135000 }, { "epoch": 17.08, "learning_rate": 4.1458e-05, "loss": 8.7676, "step": 2135500 }, { "epoch": 17.09, "learning_rate": 4.1456e-05, "loss": 8.7492, "step": 2136000 }, { "epoch": 17.09, "learning_rate": 4.145400000000001e-05, "loss": 8.7428, "step": 2136500 }, { "epoch": 17.1, "learning_rate": 4.1452e-05, "loss": 8.7483, "step": 2137000 }, { "epoch": 17.1, "learning_rate": 4.145e-05, "loss": 8.7386, "step": 2137500 }, { "epoch": 17.1, "learning_rate": 4.1448e-05, "loss": 8.7531, "step": 2138000 }, { "epoch": 17.11, "learning_rate": 4.1446e-05, "loss": 8.7459, "step": 2138500 }, { "epoch": 17.11, "learning_rate": 4.1444e-05, "loss": 8.7513, "step": 2139000 }, { "epoch": 17.12, "learning_rate": 4.1442e-05, "loss": 8.7511, "step": 2139500 }, { "epoch": 17.12, "learning_rate": 4.144e-05, "loss": 8.7413, "step": 2140000 }, { "epoch": 17.12, "learning_rate": 4.1438000000000005e-05, "loss": 8.761, "step": 2140500 }, { "epoch": 17.13, "learning_rate": 4.1436e-05, "loss": 8.7486, "step": 2141000 }, { "epoch": 17.13, "learning_rate": 4.1434000000000003e-05, "loss": 8.7433, "step": 2141500 }, { "epoch": 17.14, "learning_rate": 4.1432000000000006e-05, "loss": 8.7561, "step": 2142000 }, { "epoch": 17.14, "learning_rate": 4.143e-05, "loss": 8.7532, "step": 2142500 }, { "epoch": 17.14, "learning_rate": 4.1428e-05, "loss": 8.7481, "step": 2143000 }, { "epoch": 17.15, "learning_rate": 4.1426000000000006e-05, "loss": 8.7327, "step": 2143500 }, { "epoch": 17.15, "learning_rate": 4.1424e-05, "loss": 8.7619, "step": 2144000 }, { "epoch": 17.16, "learning_rate": 4.1422e-05, "loss": 8.7434, "step": 2144500 }, { "epoch": 17.16, "learning_rate": 4.142000000000001e-05, "loss": 8.764, "step": 2145000 }, { "epoch": 17.16, "learning_rate": 4.1418e-05, "loss": 8.756, "step": 2145500 }, { "epoch": 17.17, "learning_rate": 4.1416e-05, "loss": 8.7592, "step": 2146000 }, { "epoch": 17.17, "learning_rate": 4.1414e-05, "loss": 8.7613, "step": 2146500 }, { "epoch": 17.18, "learning_rate": 4.1412e-05, "loss": 8.7713, "step": 2147000 }, { "epoch": 17.18, "learning_rate": 4.1410000000000005e-05, "loss": 8.7496, "step": 2147500 }, { "epoch": 17.18, "learning_rate": 4.1408e-05, "loss": 8.7422, "step": 2148000 }, { "epoch": 17.19, "learning_rate": 4.1406e-05, "loss": 8.7649, "step": 2148500 }, { "epoch": 17.19, "learning_rate": 4.1404000000000005e-05, "loss": 8.7539, "step": 2149000 }, { "epoch": 17.2, "learning_rate": 4.1402e-05, "loss": 8.7599, "step": 2149500 }, { "epoch": 17.2, "learning_rate": 4.14e-05, "loss": 8.7614, "step": 2150000 }, { "epoch": 17.2, "learning_rate": 4.1398000000000006e-05, "loss": 8.757, "step": 2150500 }, { "epoch": 17.21, "learning_rate": 4.1396e-05, "loss": 8.7497, "step": 2151000 }, { "epoch": 17.21, "learning_rate": 4.1394e-05, "loss": 8.7425, "step": 2151500 }, { "epoch": 17.22, "learning_rate": 4.1392000000000006e-05, "loss": 8.7514, "step": 2152000 }, { "epoch": 17.22, "learning_rate": 4.139e-05, "loss": 8.7522, "step": 2152500 }, { "epoch": 17.22, "learning_rate": 4.1388000000000004e-05, "loss": 8.746, "step": 2153000 }, { "epoch": 17.23, "learning_rate": 4.1386e-05, "loss": 8.7293, "step": 2153500 }, { "epoch": 17.23, "learning_rate": 4.1384e-05, "loss": 8.7383, "step": 2154000 }, { "epoch": 17.24, "learning_rate": 4.1382000000000005e-05, "loss": 8.7321, "step": 2154500 }, { "epoch": 17.24, "learning_rate": 4.138e-05, "loss": 8.7462, "step": 2155000 }, { "epoch": 17.24, "learning_rate": 4.1378e-05, "loss": 8.7587, "step": 2155500 }, { "epoch": 17.25, "learning_rate": 4.1376000000000005e-05, "loss": 8.7471, "step": 2156000 }, { "epoch": 17.25, "learning_rate": 4.1374e-05, "loss": 8.7557, "step": 2156500 }, { "epoch": 17.26, "learning_rate": 4.1372e-05, "loss": 8.7409, "step": 2157000 }, { "epoch": 17.26, "learning_rate": 4.1370000000000005e-05, "loss": 8.7413, "step": 2157500 }, { "epoch": 17.26, "learning_rate": 4.1368e-05, "loss": 8.7308, "step": 2158000 }, { "epoch": 17.27, "learning_rate": 4.1366000000000003e-05, "loss": 8.7529, "step": 2158500 }, { "epoch": 17.27, "learning_rate": 4.1364000000000006e-05, "loss": 8.7655, "step": 2159000 }, { "epoch": 17.28, "learning_rate": 4.1362e-05, "loss": 8.7497, "step": 2159500 }, { "epoch": 17.28, "learning_rate": 4.1360000000000004e-05, "loss": 8.7527, "step": 2160000 }, { "epoch": 17.28, "learning_rate": 4.1358e-05, "loss": 8.75, "step": 2160500 }, { "epoch": 17.29, "learning_rate": 4.1356e-05, "loss": 8.7464, "step": 2161000 }, { "epoch": 17.29, "learning_rate": 4.1354000000000004e-05, "loss": 8.7635, "step": 2161500 }, { "epoch": 17.3, "learning_rate": 4.1352e-05, "loss": 8.7722, "step": 2162000 }, { "epoch": 17.3, "learning_rate": 4.135e-05, "loss": 8.7528, "step": 2162500 }, { "epoch": 17.3, "learning_rate": 4.1348000000000005e-05, "loss": 8.7665, "step": 2163000 }, { "epoch": 17.31, "learning_rate": 4.1346e-05, "loss": 8.7643, "step": 2163500 }, { "epoch": 17.31, "learning_rate": 4.1344e-05, "loss": 8.7468, "step": 2164000 }, { "epoch": 17.32, "learning_rate": 4.1342000000000005e-05, "loss": 8.7548, "step": 2164500 }, { "epoch": 17.32, "learning_rate": 4.134e-05, "loss": 8.7358, "step": 2165000 }, { "epoch": 17.32, "learning_rate": 4.1338e-05, "loss": 8.7475, "step": 2165500 }, { "epoch": 17.33, "learning_rate": 4.1336000000000005e-05, "loss": 8.7487, "step": 2166000 }, { "epoch": 17.33, "learning_rate": 4.1334e-05, "loss": 8.7386, "step": 2166500 }, { "epoch": 17.34, "learning_rate": 4.1332000000000003e-05, "loss": 8.761, "step": 2167000 }, { "epoch": 17.34, "learning_rate": 4.133e-05, "loss": 8.7576, "step": 2167500 }, { "epoch": 17.34, "learning_rate": 4.1328e-05, "loss": 8.7803, "step": 2168000 }, { "epoch": 17.35, "learning_rate": 4.1326000000000004e-05, "loss": 8.7521, "step": 2168500 }, { "epoch": 17.35, "learning_rate": 4.1324e-05, "loss": 8.746, "step": 2169000 }, { "epoch": 17.36, "learning_rate": 4.1322e-05, "loss": 8.7227, "step": 2169500 }, { "epoch": 17.36, "learning_rate": 4.1320000000000004e-05, "loss": 8.7558, "step": 2170000 }, { "epoch": 17.36, "learning_rate": 4.1318e-05, "loss": 8.7494, "step": 2170500 }, { "epoch": 17.37, "learning_rate": 4.1316e-05, "loss": 8.7446, "step": 2171000 }, { "epoch": 17.37, "learning_rate": 4.1314000000000005e-05, "loss": 8.7353, "step": 2171500 }, { "epoch": 17.38, "learning_rate": 4.1312e-05, "loss": 8.7718, "step": 2172000 }, { "epoch": 17.38, "learning_rate": 4.131e-05, "loss": 8.7369, "step": 2172500 }, { "epoch": 17.38, "learning_rate": 4.1308000000000005e-05, "loss": 8.7343, "step": 2173000 }, { "epoch": 17.39, "learning_rate": 4.1306e-05, "loss": 8.77, "step": 2173500 }, { "epoch": 17.39, "learning_rate": 4.1304e-05, "loss": 8.7412, "step": 2174000 }, { "epoch": 17.4, "learning_rate": 4.1302e-05, "loss": 8.7683, "step": 2174500 }, { "epoch": 17.4, "learning_rate": 4.13e-05, "loss": 8.7587, "step": 2175000 }, { "epoch": 17.4, "learning_rate": 4.1298e-05, "loss": 8.7617, "step": 2175500 }, { "epoch": 17.41, "learning_rate": 4.1296e-05, "loss": 8.7626, "step": 2176000 }, { "epoch": 17.41, "learning_rate": 4.1294e-05, "loss": 8.7566, "step": 2176500 }, { "epoch": 17.42, "learning_rate": 4.1292000000000004e-05, "loss": 8.7591, "step": 2177000 }, { "epoch": 17.42, "learning_rate": 4.129e-05, "loss": 8.7358, "step": 2177500 }, { "epoch": 17.42, "learning_rate": 4.1288e-05, "loss": 8.7497, "step": 2178000 }, { "epoch": 17.43, "learning_rate": 4.1286000000000004e-05, "loss": 8.7429, "step": 2178500 }, { "epoch": 17.43, "learning_rate": 4.1284e-05, "loss": 8.7421, "step": 2179000 }, { "epoch": 17.44, "learning_rate": 4.1282e-05, "loss": 8.7547, "step": 2179500 }, { "epoch": 17.44, "learning_rate": 4.1280000000000005e-05, "loss": 8.7733, "step": 2180000 }, { "epoch": 17.44, "learning_rate": 4.1278e-05, "loss": 8.7251, "step": 2180500 }, { "epoch": 17.45, "learning_rate": 4.1276e-05, "loss": 8.7632, "step": 2181000 }, { "epoch": 17.45, "learning_rate": 4.1274e-05, "loss": 8.7379, "step": 2181500 }, { "epoch": 17.46, "learning_rate": 4.1272e-05, "loss": 8.7683, "step": 2182000 }, { "epoch": 17.46, "learning_rate": 4.127e-05, "loss": 8.7495, "step": 2182500 }, { "epoch": 17.46, "learning_rate": 4.1268e-05, "loss": 8.7531, "step": 2183000 }, { "epoch": 17.47, "learning_rate": 4.126600000000001e-05, "loss": 8.7432, "step": 2183500 }, { "epoch": 17.47, "learning_rate": 4.1264e-05, "loss": 8.7459, "step": 2184000 }, { "epoch": 17.48, "learning_rate": 4.1262e-05, "loss": 8.757, "step": 2184500 }, { "epoch": 17.48, "learning_rate": 4.126e-05, "loss": 8.7456, "step": 2185000 }, { "epoch": 17.48, "learning_rate": 4.1258000000000004e-05, "loss": 8.753, "step": 2185500 }, { "epoch": 17.49, "learning_rate": 4.1256e-05, "loss": 8.7451, "step": 2186000 }, { "epoch": 17.49, "learning_rate": 4.1254e-05, "loss": 8.7453, "step": 2186500 }, { "epoch": 17.5, "learning_rate": 4.1252000000000004e-05, "loss": 8.7526, "step": 2187000 }, { "epoch": 17.5, "learning_rate": 4.125e-05, "loss": 8.7646, "step": 2187500 }, { "epoch": 17.5, "learning_rate": 4.1248e-05, "loss": 8.7458, "step": 2188000 }, { "epoch": 17.51, "learning_rate": 4.1246000000000005e-05, "loss": 8.7395, "step": 2188500 }, { "epoch": 17.51, "learning_rate": 4.1244e-05, "loss": 8.7455, "step": 2189000 }, { "epoch": 17.52, "learning_rate": 4.1242e-05, "loss": 8.7654, "step": 2189500 }, { "epoch": 17.52, "learning_rate": 4.124e-05, "loss": 8.7434, "step": 2190000 }, { "epoch": 17.52, "learning_rate": 4.123800000000001e-05, "loss": 8.7486, "step": 2190500 }, { "epoch": 17.53, "learning_rate": 4.1236e-05, "loss": 8.77, "step": 2191000 }, { "epoch": 17.53, "learning_rate": 4.1234e-05, "loss": 8.7459, "step": 2191500 }, { "epoch": 17.54, "learning_rate": 4.1232e-05, "loss": 8.7483, "step": 2192000 }, { "epoch": 17.54, "learning_rate": 4.123e-05, "loss": 8.7639, "step": 2192500 }, { "epoch": 17.54, "learning_rate": 4.1228e-05, "loss": 8.7556, "step": 2193000 }, { "epoch": 17.55, "learning_rate": 4.1226e-05, "loss": 8.7451, "step": 2193500 }, { "epoch": 17.55, "learning_rate": 4.1224000000000004e-05, "loss": 8.7405, "step": 2194000 }, { "epoch": 17.56, "learning_rate": 4.1222e-05, "loss": 8.7466, "step": 2194500 }, { "epoch": 17.56, "learning_rate": 4.122e-05, "loss": 8.7512, "step": 2195000 }, { "epoch": 17.56, "learning_rate": 4.1218000000000004e-05, "loss": 8.7596, "step": 2195500 }, { "epoch": 17.57, "learning_rate": 4.1216000000000007e-05, "loss": 8.7422, "step": 2196000 }, { "epoch": 17.57, "learning_rate": 4.1214e-05, "loss": 8.752, "step": 2196500 }, { "epoch": 17.58, "learning_rate": 4.1212e-05, "loss": 8.7513, "step": 2197000 }, { "epoch": 17.58, "learning_rate": 4.121000000000001e-05, "loss": 8.7754, "step": 2197500 }, { "epoch": 17.58, "learning_rate": 4.1208e-05, "loss": 8.7413, "step": 2198000 }, { "epoch": 17.59, "learning_rate": 4.1206e-05, "loss": 8.7396, "step": 2198500 }, { "epoch": 17.59, "learning_rate": 4.1204e-05, "loss": 8.7519, "step": 2199000 }, { "epoch": 17.6, "learning_rate": 4.1202e-05, "loss": 8.7578, "step": 2199500 }, { "epoch": 17.6, "learning_rate": 4.12e-05, "loss": 8.7426, "step": 2200000 }, { "epoch": 17.6, "learning_rate": 4.1198e-05, "loss": 8.732, "step": 2200500 }, { "epoch": 17.61, "learning_rate": 4.1196e-05, "loss": 8.7629, "step": 2201000 }, { "epoch": 17.61, "learning_rate": 4.1194e-05, "loss": 8.7584, "step": 2201500 }, { "epoch": 17.62, "learning_rate": 4.1192e-05, "loss": 8.7607, "step": 2202000 }, { "epoch": 17.62, "learning_rate": 4.1190000000000004e-05, "loss": 8.7514, "step": 2202500 }, { "epoch": 17.62, "learning_rate": 4.1188000000000006e-05, "loss": 8.7546, "step": 2203000 }, { "epoch": 17.63, "learning_rate": 4.1186e-05, "loss": 8.7497, "step": 2203500 }, { "epoch": 17.63, "learning_rate": 4.1184e-05, "loss": 8.7586, "step": 2204000 }, { "epoch": 17.64, "learning_rate": 4.1182000000000007e-05, "loss": 8.7386, "step": 2204500 }, { "epoch": 17.64, "learning_rate": 4.118e-05, "loss": 8.7645, "step": 2205000 }, { "epoch": 17.64, "learning_rate": 4.1178e-05, "loss": 8.7423, "step": 2205500 }, { "epoch": 17.65, "learning_rate": 4.1176e-05, "loss": 8.7423, "step": 2206000 }, { "epoch": 17.65, "learning_rate": 4.1174e-05, "loss": 8.7431, "step": 2206500 }, { "epoch": 17.66, "learning_rate": 4.1172e-05, "loss": 8.7319, "step": 2207000 }, { "epoch": 17.66, "learning_rate": 4.117e-05, "loss": 8.7472, "step": 2207500 }, { "epoch": 17.66, "learning_rate": 4.1168e-05, "loss": 8.7616, "step": 2208000 }, { "epoch": 17.67, "learning_rate": 4.1166000000000005e-05, "loss": 8.751, "step": 2208500 }, { "epoch": 17.67, "learning_rate": 4.1164e-05, "loss": 8.7399, "step": 2209000 }, { "epoch": 17.68, "learning_rate": 4.1162e-05, "loss": 8.7719, "step": 2209500 }, { "epoch": 17.68, "learning_rate": 4.1160000000000006e-05, "loss": 8.7497, "step": 2210000 }, { "epoch": 17.68, "learning_rate": 4.1158e-05, "loss": 8.7417, "step": 2210500 }, { "epoch": 17.69, "learning_rate": 4.1156e-05, "loss": 8.7601, "step": 2211000 }, { "epoch": 17.69, "learning_rate": 4.1154000000000006e-05, "loss": 8.7611, "step": 2211500 }, { "epoch": 17.7, "learning_rate": 4.1152e-05, "loss": 8.7676, "step": 2212000 }, { "epoch": 17.7, "learning_rate": 4.115e-05, "loss": 8.7491, "step": 2212500 }, { "epoch": 17.7, "learning_rate": 4.1148000000000007e-05, "loss": 8.7526, "step": 2213000 }, { "epoch": 17.71, "learning_rate": 4.1146e-05, "loss": 8.7613, "step": 2213500 }, { "epoch": 17.71, "learning_rate": 4.1144e-05, "loss": 8.7431, "step": 2214000 }, { "epoch": 17.72, "learning_rate": 4.1142e-05, "loss": 8.7733, "step": 2214500 }, { "epoch": 17.72, "learning_rate": 4.114e-05, "loss": 8.7616, "step": 2215000 }, { "epoch": 17.72, "learning_rate": 4.1138000000000005e-05, "loss": 8.7539, "step": 2215500 }, { "epoch": 17.73, "learning_rate": 4.1136e-05, "loss": 8.7788, "step": 2216000 }, { "epoch": 17.73, "learning_rate": 4.1134e-05, "loss": 8.7616, "step": 2216500 }, { "epoch": 17.74, "learning_rate": 4.1132000000000005e-05, "loss": 8.7679, "step": 2217000 }, { "epoch": 17.74, "learning_rate": 4.113e-05, "loss": 8.7593, "step": 2217500 }, { "epoch": 17.74, "learning_rate": 4.1127999999999997e-05, "loss": 8.7482, "step": 2218000 }, { "epoch": 17.75, "learning_rate": 4.1126000000000006e-05, "loss": 8.7599, "step": 2218500 }, { "epoch": 17.75, "learning_rate": 4.1124e-05, "loss": 8.7325, "step": 2219000 }, { "epoch": 17.76, "learning_rate": 4.1122e-05, "loss": 8.7577, "step": 2219500 }, { "epoch": 17.76, "learning_rate": 4.1120000000000006e-05, "loss": 8.7497, "step": 2220000 }, { "epoch": 17.76, "learning_rate": 4.1118e-05, "loss": 8.7649, "step": 2220500 }, { "epoch": 17.77, "learning_rate": 4.1116000000000004e-05, "loss": 8.7483, "step": 2221000 }, { "epoch": 17.77, "learning_rate": 4.1114e-05, "loss": 8.7556, "step": 2221500 }, { "epoch": 17.78, "learning_rate": 4.1112e-05, "loss": 8.7467, "step": 2222000 }, { "epoch": 17.78, "learning_rate": 4.1110000000000005e-05, "loss": 8.7596, "step": 2222500 }, { "epoch": 17.78, "learning_rate": 4.1108e-05, "loss": 8.7232, "step": 2223000 }, { "epoch": 17.79, "learning_rate": 4.1106e-05, "loss": 8.7352, "step": 2223500 }, { "epoch": 17.79, "learning_rate": 4.1104000000000005e-05, "loss": 8.7371, "step": 2224000 }, { "epoch": 17.8, "learning_rate": 4.1102e-05, "loss": 8.7604, "step": 2224500 }, { "epoch": 17.8, "learning_rate": 4.11e-05, "loss": 8.7343, "step": 2225000 }, { "epoch": 17.8, "learning_rate": 4.1098000000000005e-05, "loss": 8.7653, "step": 2225500 }, { "epoch": 17.81, "learning_rate": 4.1096e-05, "loss": 8.739, "step": 2226000 }, { "epoch": 17.81, "learning_rate": 4.1094e-05, "loss": 8.7626, "step": 2226500 }, { "epoch": 17.82, "learning_rate": 4.1092000000000006e-05, "loss": 8.7527, "step": 2227000 }, { "epoch": 17.82, "learning_rate": 4.109e-05, "loss": 8.76, "step": 2227500 }, { "epoch": 17.82, "learning_rate": 4.1088000000000004e-05, "loss": 8.7416, "step": 2228000 }, { "epoch": 17.83, "learning_rate": 4.1086e-05, "loss": 8.7662, "step": 2228500 }, { "epoch": 17.83, "learning_rate": 4.1084e-05, "loss": 8.7406, "step": 2229000 }, { "epoch": 17.84, "learning_rate": 4.1082000000000004e-05, "loss": 8.7587, "step": 2229500 }, { "epoch": 17.84, "learning_rate": 4.108e-05, "loss": 8.7452, "step": 2230000 }, { "epoch": 17.84, "learning_rate": 4.1078e-05, "loss": 8.7632, "step": 2230500 }, { "epoch": 17.85, "learning_rate": 4.1076000000000004e-05, "loss": 8.7516, "step": 2231000 }, { "epoch": 17.85, "learning_rate": 4.1074e-05, "loss": 8.74, "step": 2231500 }, { "epoch": 17.86, "learning_rate": 4.1072e-05, "loss": 8.7381, "step": 2232000 }, { "epoch": 17.86, "learning_rate": 4.1070000000000005e-05, "loss": 8.756, "step": 2232500 }, { "epoch": 17.86, "learning_rate": 4.1068e-05, "loss": 8.7566, "step": 2233000 }, { "epoch": 17.87, "learning_rate": 4.1066e-05, "loss": 8.737, "step": 2233500 }, { "epoch": 17.87, "learning_rate": 4.1064000000000005e-05, "loss": 8.7573, "step": 2234000 }, { "epoch": 17.88, "learning_rate": 4.1062e-05, "loss": 8.7623, "step": 2234500 }, { "epoch": 17.88, "learning_rate": 4.106e-05, "loss": 8.7389, "step": 2235000 }, { "epoch": 17.88, "learning_rate": 4.1058e-05, "loss": 8.753, "step": 2235500 }, { "epoch": 17.89, "learning_rate": 4.1056e-05, "loss": 8.7387, "step": 2236000 }, { "epoch": 17.89, "learning_rate": 4.1054000000000004e-05, "loss": 8.7589, "step": 2236500 }, { "epoch": 17.9, "learning_rate": 4.1052e-05, "loss": 8.7581, "step": 2237000 }, { "epoch": 17.9, "learning_rate": 4.105e-05, "loss": 8.7541, "step": 2237500 }, { "epoch": 17.9, "learning_rate": 4.1048000000000004e-05, "loss": 8.7501, "step": 2238000 }, { "epoch": 17.91, "learning_rate": 4.1046e-05, "loss": 8.7421, "step": 2238500 }, { "epoch": 17.91, "learning_rate": 4.1044e-05, "loss": 8.761, "step": 2239000 }, { "epoch": 17.92, "learning_rate": 4.1042000000000004e-05, "loss": 8.7685, "step": 2239500 }, { "epoch": 17.92, "learning_rate": 4.104e-05, "loss": 8.7564, "step": 2240000 }, { "epoch": 17.92, "learning_rate": 4.1038e-05, "loss": 8.7389, "step": 2240500 }, { "epoch": 17.93, "learning_rate": 4.1036000000000005e-05, "loss": 8.7647, "step": 2241000 }, { "epoch": 17.93, "learning_rate": 4.1034e-05, "loss": 8.7477, "step": 2241500 }, { "epoch": 17.94, "learning_rate": 4.1032e-05, "loss": 8.7442, "step": 2242000 }, { "epoch": 17.94, "learning_rate": 4.103e-05, "loss": 8.7501, "step": 2242500 }, { "epoch": 17.94, "learning_rate": 4.1028e-05, "loss": 8.7711, "step": 2243000 }, { "epoch": 17.95, "learning_rate": 4.1026e-05, "loss": 8.7403, "step": 2243500 }, { "epoch": 17.95, "learning_rate": 4.1024e-05, "loss": 8.7439, "step": 2244000 }, { "epoch": 17.96, "learning_rate": 4.1022e-05, "loss": 8.7552, "step": 2244500 }, { "epoch": 17.96, "learning_rate": 4.1020000000000004e-05, "loss": 8.7393, "step": 2245000 }, { "epoch": 17.96, "learning_rate": 4.1018e-05, "loss": 8.7533, "step": 2245500 }, { "epoch": 17.97, "learning_rate": 4.1016e-05, "loss": 8.7575, "step": 2246000 }, { "epoch": 17.97, "learning_rate": 4.1014000000000004e-05, "loss": 8.7243, "step": 2246500 }, { "epoch": 17.98, "learning_rate": 4.1012e-05, "loss": 8.7549, "step": 2247000 }, { "epoch": 17.98, "learning_rate": 4.101e-05, "loss": 8.7499, "step": 2247500 }, { "epoch": 17.98, "learning_rate": 4.1008000000000004e-05, "loss": 8.7781, "step": 2248000 }, { "epoch": 17.99, "learning_rate": 4.1006e-05, "loss": 8.7531, "step": 2248500 }, { "epoch": 17.99, "learning_rate": 4.1004e-05, "loss": 8.7644, "step": 2249000 }, { "epoch": 18.0, "learning_rate": 4.1002000000000005e-05, "loss": 8.7326, "step": 2249500 }, { "epoch": 18.0, "learning_rate": 4.1e-05, "loss": 8.7651, "step": 2250000 }, { "epoch": 18.0, "learning_rate": 4.0998e-05, "loss": 8.7542, "step": 2250500 }, { "epoch": 18.01, "learning_rate": 4.0996e-05, "loss": 8.7449, "step": 2251000 }, { "epoch": 18.01, "learning_rate": 4.099400000000001e-05, "loss": 8.735, "step": 2251500 }, { "epoch": 18.02, "learning_rate": 4.0992e-05, "loss": 8.732, "step": 2252000 }, { "epoch": 18.02, "learning_rate": 4.099e-05, "loss": 8.7457, "step": 2252500 }, { "epoch": 18.02, "learning_rate": 4.0988e-05, "loss": 8.7577, "step": 2253000 }, { "epoch": 18.03, "learning_rate": 4.0986000000000004e-05, "loss": 8.759, "step": 2253500 }, { "epoch": 18.03, "learning_rate": 4.0984e-05, "loss": 8.7445, "step": 2254000 }, { "epoch": 18.04, "learning_rate": 4.0982e-05, "loss": 8.7526, "step": 2254500 }, { "epoch": 18.04, "learning_rate": 4.0980000000000004e-05, "loss": 8.7558, "step": 2255000 }, { "epoch": 18.04, "learning_rate": 4.0978e-05, "loss": 8.739, "step": 2255500 }, { "epoch": 18.05, "learning_rate": 4.0976e-05, "loss": 8.7545, "step": 2256000 }, { "epoch": 18.05, "learning_rate": 4.0974000000000004e-05, "loss": 8.7665, "step": 2256500 }, { "epoch": 18.06, "learning_rate": 4.0972e-05, "loss": 8.7514, "step": 2257000 }, { "epoch": 18.06, "learning_rate": 4.097e-05, "loss": 8.7364, "step": 2257500 }, { "epoch": 18.06, "learning_rate": 4.0968e-05, "loss": 8.7448, "step": 2258000 }, { "epoch": 18.07, "learning_rate": 4.096600000000001e-05, "loss": 8.7452, "step": 2258500 }, { "epoch": 18.07, "learning_rate": 4.0964e-05, "loss": 8.7593, "step": 2259000 }, { "epoch": 18.08, "learning_rate": 4.0962e-05, "loss": 8.7547, "step": 2259500 }, { "epoch": 18.08, "learning_rate": 4.096e-05, "loss": 8.743, "step": 2260000 }, { "epoch": 18.08, "learning_rate": 4.0958e-05, "loss": 8.7538, "step": 2260500 }, { "epoch": 18.09, "learning_rate": 4.0956e-05, "loss": 8.7449, "step": 2261000 }, { "epoch": 18.09, "learning_rate": 4.0954e-05, "loss": 8.7529, "step": 2261500 }, { "epoch": 18.1, "learning_rate": 4.0952000000000004e-05, "loss": 8.7431, "step": 2262000 }, { "epoch": 18.1, "learning_rate": 4.095e-05, "loss": 8.7556, "step": 2262500 }, { "epoch": 18.1, "learning_rate": 4.0948e-05, "loss": 8.7457, "step": 2263000 }, { "epoch": 18.11, "learning_rate": 4.0946000000000004e-05, "loss": 8.7599, "step": 2263500 }, { "epoch": 18.11, "learning_rate": 4.0944000000000006e-05, "loss": 8.7591, "step": 2264000 }, { "epoch": 18.12, "learning_rate": 4.0942e-05, "loss": 8.7589, "step": 2264500 }, { "epoch": 18.12, "learning_rate": 4.094e-05, "loss": 8.7332, "step": 2265000 }, { "epoch": 18.12, "learning_rate": 4.093800000000001e-05, "loss": 8.7482, "step": 2265500 }, { "epoch": 18.13, "learning_rate": 4.0936e-05, "loss": 8.7531, "step": 2266000 }, { "epoch": 18.13, "learning_rate": 4.0934e-05, "loss": 8.7541, "step": 2266500 }, { "epoch": 18.14, "learning_rate": 4.0932e-05, "loss": 8.7677, "step": 2267000 }, { "epoch": 18.14, "learning_rate": 4.093e-05, "loss": 8.7695, "step": 2267500 }, { "epoch": 18.14, "learning_rate": 4.0928e-05, "loss": 8.7556, "step": 2268000 }, { "epoch": 18.15, "learning_rate": 4.0926e-05, "loss": 8.7491, "step": 2268500 }, { "epoch": 18.15, "learning_rate": 4.0924e-05, "loss": 8.7439, "step": 2269000 }, { "epoch": 18.16, "learning_rate": 4.0922e-05, "loss": 8.7603, "step": 2269500 }, { "epoch": 18.16, "learning_rate": 4.092e-05, "loss": 8.757, "step": 2270000 }, { "epoch": 18.16, "learning_rate": 4.0918000000000004e-05, "loss": 8.752, "step": 2270500 }, { "epoch": 18.17, "learning_rate": 4.0916000000000006e-05, "loss": 8.741, "step": 2271000 }, { "epoch": 18.17, "learning_rate": 4.0914e-05, "loss": 8.738, "step": 2271500 }, { "epoch": 18.18, "learning_rate": 4.0912e-05, "loss": 8.7624, "step": 2272000 }, { "epoch": 18.18, "learning_rate": 4.0910000000000006e-05, "loss": 8.7561, "step": 2272500 }, { "epoch": 18.18, "learning_rate": 4.0908e-05, "loss": 8.7531, "step": 2273000 }, { "epoch": 18.19, "learning_rate": 4.0906e-05, "loss": 8.7591, "step": 2273500 }, { "epoch": 18.19, "learning_rate": 4.090400000000001e-05, "loss": 8.7658, "step": 2274000 }, { "epoch": 18.2, "learning_rate": 4.0902e-05, "loss": 8.724, "step": 2274500 }, { "epoch": 18.2, "learning_rate": 4.09e-05, "loss": 8.7611, "step": 2275000 }, { "epoch": 18.2, "learning_rate": 4.0898e-05, "loss": 8.7662, "step": 2275500 }, { "epoch": 18.21, "learning_rate": 4.0896e-05, "loss": 8.7737, "step": 2276000 }, { "epoch": 18.21, "learning_rate": 4.0894000000000005e-05, "loss": 8.7387, "step": 2276500 }, { "epoch": 18.22, "learning_rate": 4.0892e-05, "loss": 8.7455, "step": 2277000 }, { "epoch": 18.22, "learning_rate": 4.089e-05, "loss": 8.7601, "step": 2277500 }, { "epoch": 18.22, "learning_rate": 4.0888000000000006e-05, "loss": 8.7484, "step": 2278000 }, { "epoch": 18.23, "learning_rate": 4.0886e-05, "loss": 8.7589, "step": 2278500 }, { "epoch": 18.23, "learning_rate": 4.0884e-05, "loss": 8.7627, "step": 2279000 }, { "epoch": 18.24, "learning_rate": 4.0882000000000006e-05, "loss": 8.7683, "step": 2279500 }, { "epoch": 18.24, "learning_rate": 4.088e-05, "loss": 8.7545, "step": 2280000 }, { "epoch": 18.24, "learning_rate": 4.0878e-05, "loss": 8.7585, "step": 2280500 }, { "epoch": 18.25, "learning_rate": 4.0876000000000006e-05, "loss": 8.7599, "step": 2281000 }, { "epoch": 18.25, "learning_rate": 4.0874e-05, "loss": 8.7489, "step": 2281500 }, { "epoch": 18.26, "learning_rate": 4.0872000000000004e-05, "loss": 8.7416, "step": 2282000 }, { "epoch": 18.26, "learning_rate": 4.087e-05, "loss": 8.7228, "step": 2282500 }, { "epoch": 18.26, "learning_rate": 4.0868e-05, "loss": 8.776, "step": 2283000 }, { "epoch": 18.27, "learning_rate": 4.0866000000000005e-05, "loss": 8.7368, "step": 2283500 }, { "epoch": 18.27, "learning_rate": 4.0864e-05, "loss": 8.7374, "step": 2284000 }, { "epoch": 18.28, "learning_rate": 4.0862e-05, "loss": 8.7608, "step": 2284500 }, { "epoch": 18.28, "learning_rate": 4.0860000000000005e-05, "loss": 8.734, "step": 2285000 }, { "epoch": 18.28, "learning_rate": 4.0858e-05, "loss": 8.7669, "step": 2285500 }, { "epoch": 18.29, "learning_rate": 4.0856e-05, "loss": 8.7504, "step": 2286000 }, { "epoch": 18.29, "learning_rate": 4.0854000000000006e-05, "loss": 8.7602, "step": 2286500 }, { "epoch": 18.3, "learning_rate": 4.0852e-05, "loss": 8.7587, "step": 2287000 }, { "epoch": 18.3, "learning_rate": 4.085e-05, "loss": 8.7531, "step": 2287500 }, { "epoch": 18.3, "learning_rate": 4.0848000000000006e-05, "loss": 8.7591, "step": 2288000 }, { "epoch": 18.31, "learning_rate": 4.0846e-05, "loss": 8.7294, "step": 2288500 }, { "epoch": 18.31, "learning_rate": 4.0844000000000004e-05, "loss": 8.7497, "step": 2289000 }, { "epoch": 18.32, "learning_rate": 4.0842e-05, "loss": 8.7565, "step": 2289500 }, { "epoch": 18.32, "learning_rate": 4.084e-05, "loss": 8.7615, "step": 2290000 }, { "epoch": 18.32, "learning_rate": 4.0838000000000004e-05, "loss": 8.7526, "step": 2290500 }, { "epoch": 18.33, "learning_rate": 4.0836e-05, "loss": 8.7666, "step": 2291000 }, { "epoch": 18.33, "learning_rate": 4.0834e-05, "loss": 8.76, "step": 2291500 }, { "epoch": 18.34, "learning_rate": 4.0832000000000005e-05, "loss": 8.7365, "step": 2292000 }, { "epoch": 18.34, "learning_rate": 4.083e-05, "loss": 8.7415, "step": 2292500 }, { "epoch": 18.34, "learning_rate": 4.0828e-05, "loss": 8.7276, "step": 2293000 }, { "epoch": 18.35, "learning_rate": 4.0826000000000005e-05, "loss": 8.7608, "step": 2293500 }, { "epoch": 18.35, "learning_rate": 4.0824e-05, "loss": 8.7523, "step": 2294000 }, { "epoch": 18.36, "learning_rate": 4.0822e-05, "loss": 8.7569, "step": 2294500 }, { "epoch": 18.36, "learning_rate": 4.0820000000000006e-05, "loss": 8.7397, "step": 2295000 }, { "epoch": 18.36, "learning_rate": 4.0818e-05, "loss": 8.7574, "step": 2295500 }, { "epoch": 18.37, "learning_rate": 4.0816000000000004e-05, "loss": 8.7569, "step": 2296000 }, { "epoch": 18.37, "learning_rate": 4.0814e-05, "loss": 8.7561, "step": 2296500 }, { "epoch": 18.38, "learning_rate": 4.0812e-05, "loss": 8.7581, "step": 2297000 }, { "epoch": 18.38, "learning_rate": 4.0810000000000004e-05, "loss": 8.7496, "step": 2297500 }, { "epoch": 18.38, "learning_rate": 4.0808e-05, "loss": 8.7576, "step": 2298000 }, { "epoch": 18.39, "learning_rate": 4.0806e-05, "loss": 8.7483, "step": 2298500 }, { "epoch": 18.39, "learning_rate": 4.0804000000000004e-05, "loss": 8.7602, "step": 2299000 }, { "epoch": 18.4, "learning_rate": 4.0802e-05, "loss": 8.7554, "step": 2299500 }, { "epoch": 18.4, "learning_rate": 4.08e-05, "loss": 8.7527, "step": 2300000 }, { "epoch": 18.4, "learning_rate": 4.0798000000000005e-05, "loss": 8.7314, "step": 2300500 }, { "epoch": 18.41, "learning_rate": 4.0796e-05, "loss": 8.7517, "step": 2301000 }, { "epoch": 18.41, "learning_rate": 4.0794e-05, "loss": 8.7367, "step": 2301500 }, { "epoch": 18.42, "learning_rate": 4.0792000000000005e-05, "loss": 8.7304, "step": 2302000 }, { "epoch": 18.42, "learning_rate": 4.079e-05, "loss": 8.7578, "step": 2302500 }, { "epoch": 18.42, "learning_rate": 4.0788e-05, "loss": 8.7436, "step": 2303000 }, { "epoch": 18.43, "learning_rate": 4.0786e-05, "loss": 8.7407, "step": 2303500 }, { "epoch": 18.43, "learning_rate": 4.0784e-05, "loss": 8.7603, "step": 2304000 }, { "epoch": 18.44, "learning_rate": 4.0782000000000004e-05, "loss": 8.7687, "step": 2304500 }, { "epoch": 18.44, "learning_rate": 4.078e-05, "loss": 8.7501, "step": 2305000 }, { "epoch": 18.44, "learning_rate": 4.0778e-05, "loss": 8.7622, "step": 2305500 }, { "epoch": 18.45, "learning_rate": 4.0776000000000004e-05, "loss": 8.761, "step": 2306000 }, { "epoch": 18.45, "learning_rate": 4.0774e-05, "loss": 8.743, "step": 2306500 }, { "epoch": 18.46, "learning_rate": 4.0772e-05, "loss": 8.748, "step": 2307000 }, { "epoch": 18.46, "learning_rate": 4.0770000000000004e-05, "loss": 8.7548, "step": 2307500 }, { "epoch": 18.46, "learning_rate": 4.0768e-05, "loss": 8.761, "step": 2308000 }, { "epoch": 18.47, "learning_rate": 4.0766e-05, "loss": 8.7527, "step": 2308500 }, { "epoch": 18.47, "learning_rate": 4.0764000000000005e-05, "loss": 8.766, "step": 2309000 }, { "epoch": 18.48, "learning_rate": 4.0762e-05, "loss": 8.7638, "step": 2309500 }, { "epoch": 18.48, "learning_rate": 4.076e-05, "loss": 8.7628, "step": 2310000 }, { "epoch": 18.48, "learning_rate": 4.0758e-05, "loss": 8.7547, "step": 2310500 }, { "epoch": 18.49, "learning_rate": 4.0756e-05, "loss": 8.7442, "step": 2311000 }, { "epoch": 18.49, "learning_rate": 4.0754e-05, "loss": 8.749, "step": 2311500 }, { "epoch": 18.5, "learning_rate": 4.0752e-05, "loss": 8.7483, "step": 2312000 }, { "epoch": 18.5, "learning_rate": 4.075e-05, "loss": 8.7581, "step": 2312500 }, { "epoch": 18.5, "learning_rate": 4.0748000000000004e-05, "loss": 8.756, "step": 2313000 }, { "epoch": 18.51, "learning_rate": 4.0746e-05, "loss": 8.7512, "step": 2313500 }, { "epoch": 18.51, "learning_rate": 4.0744e-05, "loss": 8.7392, "step": 2314000 }, { "epoch": 18.52, "learning_rate": 4.0742000000000004e-05, "loss": 8.7229, "step": 2314500 }, { "epoch": 18.52, "learning_rate": 4.074e-05, "loss": 8.7554, "step": 2315000 }, { "epoch": 18.52, "learning_rate": 4.0738e-05, "loss": 8.7296, "step": 2315500 }, { "epoch": 18.53, "learning_rate": 4.0736000000000004e-05, "loss": 8.7773, "step": 2316000 }, { "epoch": 18.53, "learning_rate": 4.0734e-05, "loss": 8.7578, "step": 2316500 }, { "epoch": 18.54, "learning_rate": 4.0732e-05, "loss": 8.751, "step": 2317000 }, { "epoch": 18.54, "learning_rate": 4.0730000000000005e-05, "loss": 8.7626, "step": 2317500 }, { "epoch": 18.54, "learning_rate": 4.0728e-05, "loss": 8.7568, "step": 2318000 }, { "epoch": 18.55, "learning_rate": 4.0726e-05, "loss": 8.7578, "step": 2318500 }, { "epoch": 18.55, "learning_rate": 4.0724e-05, "loss": 8.7506, "step": 2319000 }, { "epoch": 18.56, "learning_rate": 4.072200000000001e-05, "loss": 8.7426, "step": 2319500 }, { "epoch": 18.56, "learning_rate": 4.072e-05, "loss": 8.7556, "step": 2320000 }, { "epoch": 18.56, "learning_rate": 4.0718e-05, "loss": 8.7733, "step": 2320500 }, { "epoch": 18.57, "learning_rate": 4.0716e-05, "loss": 8.7407, "step": 2321000 }, { "epoch": 18.57, "learning_rate": 4.0714000000000004e-05, "loss": 8.7377, "step": 2321500 }, { "epoch": 18.58, "learning_rate": 4.0712e-05, "loss": 8.7628, "step": 2322000 }, { "epoch": 18.58, "learning_rate": 4.071e-05, "loss": 8.7474, "step": 2322500 }, { "epoch": 18.58, "learning_rate": 4.0708000000000004e-05, "loss": 8.7629, "step": 2323000 }, { "epoch": 18.59, "learning_rate": 4.0706e-05, "loss": 8.7716, "step": 2323500 }, { "epoch": 18.59, "learning_rate": 4.0704e-05, "loss": 8.737, "step": 2324000 }, { "epoch": 18.6, "learning_rate": 4.0702000000000004e-05, "loss": 8.7446, "step": 2324500 }, { "epoch": 18.6, "learning_rate": 4.07e-05, "loss": 8.7352, "step": 2325000 }, { "epoch": 18.6, "learning_rate": 4.0698e-05, "loss": 8.7476, "step": 2325500 }, { "epoch": 18.61, "learning_rate": 4.0696e-05, "loss": 8.7497, "step": 2326000 }, { "epoch": 18.61, "learning_rate": 4.069400000000001e-05, "loss": 8.7456, "step": 2326500 }, { "epoch": 18.62, "learning_rate": 4.0692e-05, "loss": 8.7487, "step": 2327000 }, { "epoch": 18.62, "learning_rate": 4.069e-05, "loss": 8.745, "step": 2327500 }, { "epoch": 18.62, "learning_rate": 4.0688e-05, "loss": 8.7619, "step": 2328000 }, { "epoch": 18.63, "learning_rate": 4.0686e-05, "loss": 8.7527, "step": 2328500 }, { "epoch": 18.63, "learning_rate": 4.0684e-05, "loss": 8.7524, "step": 2329000 }, { "epoch": 18.64, "learning_rate": 4.0682e-05, "loss": 8.7628, "step": 2329500 }, { "epoch": 18.64, "learning_rate": 4.0680000000000004e-05, "loss": 8.7539, "step": 2330000 }, { "epoch": 18.64, "learning_rate": 4.0678e-05, "loss": 8.735, "step": 2330500 }, { "epoch": 18.65, "learning_rate": 4.0676e-05, "loss": 8.7596, "step": 2331000 }, { "epoch": 18.65, "learning_rate": 4.0674000000000004e-05, "loss": 8.7777, "step": 2331500 }, { "epoch": 18.66, "learning_rate": 4.0672000000000006e-05, "loss": 8.7696, "step": 2332000 }, { "epoch": 18.66, "learning_rate": 4.067e-05, "loss": 8.7567, "step": 2332500 }, { "epoch": 18.66, "learning_rate": 4.0668e-05, "loss": 8.7586, "step": 2333000 }, { "epoch": 18.67, "learning_rate": 4.066600000000001e-05, "loss": 8.7533, "step": 2333500 }, { "epoch": 18.67, "learning_rate": 4.0664e-05, "loss": 8.7548, "step": 2334000 }, { "epoch": 18.68, "learning_rate": 4.0662e-05, "loss": 8.737, "step": 2334500 }, { "epoch": 18.68, "learning_rate": 4.066e-05, "loss": 8.7537, "step": 2335000 }, { "epoch": 18.68, "learning_rate": 4.0658e-05, "loss": 8.7335, "step": 2335500 }, { "epoch": 18.69, "learning_rate": 4.0656e-05, "loss": 8.7562, "step": 2336000 }, { "epoch": 18.69, "learning_rate": 4.0654e-05, "loss": 8.7388, "step": 2336500 }, { "epoch": 18.7, "learning_rate": 4.0652e-05, "loss": 8.7485, "step": 2337000 }, { "epoch": 18.7, "learning_rate": 4.065e-05, "loss": 8.7534, "step": 2337500 }, { "epoch": 18.7, "learning_rate": 4.0648e-05, "loss": 8.7486, "step": 2338000 }, { "epoch": 18.71, "learning_rate": 4.0646000000000004e-05, "loss": 8.7641, "step": 2338500 }, { "epoch": 18.71, "learning_rate": 4.0644000000000006e-05, "loss": 8.7509, "step": 2339000 }, { "epoch": 18.72, "learning_rate": 4.0642e-05, "loss": 8.7438, "step": 2339500 }, { "epoch": 18.72, "learning_rate": 4.064e-05, "loss": 8.7423, "step": 2340000 }, { "epoch": 18.72, "learning_rate": 4.0638000000000006e-05, "loss": 8.7705, "step": 2340500 }, { "epoch": 18.73, "learning_rate": 4.0636e-05, "loss": 8.7523, "step": 2341000 }, { "epoch": 18.73, "learning_rate": 4.0634e-05, "loss": 8.7417, "step": 2341500 }, { "epoch": 18.74, "learning_rate": 4.063200000000001e-05, "loss": 8.7348, "step": 2342000 }, { "epoch": 18.74, "learning_rate": 4.063e-05, "loss": 8.7648, "step": 2342500 }, { "epoch": 18.74, "learning_rate": 4.0628e-05, "loss": 8.76, "step": 2343000 }, { "epoch": 18.75, "learning_rate": 4.0626e-05, "loss": 8.7698, "step": 2343500 }, { "epoch": 18.75, "learning_rate": 4.0624e-05, "loss": 8.769, "step": 2344000 }, { "epoch": 18.76, "learning_rate": 4.0622000000000005e-05, "loss": 8.7388, "step": 2344500 }, { "epoch": 18.76, "learning_rate": 4.062e-05, "loss": 8.7468, "step": 2345000 }, { "epoch": 18.76, "learning_rate": 4.0618e-05, "loss": 8.742, "step": 2345500 }, { "epoch": 18.77, "learning_rate": 4.0616000000000006e-05, "loss": 8.7481, "step": 2346000 }, { "epoch": 18.77, "learning_rate": 4.0614e-05, "loss": 8.7341, "step": 2346500 }, { "epoch": 18.78, "learning_rate": 4.0612e-05, "loss": 8.7449, "step": 2347000 }, { "epoch": 18.78, "learning_rate": 4.0610000000000006e-05, "loss": 8.7729, "step": 2347500 }, { "epoch": 18.78, "learning_rate": 4.0608e-05, "loss": 8.7272, "step": 2348000 }, { "epoch": 18.79, "learning_rate": 4.0606e-05, "loss": 8.7614, "step": 2348500 }, { "epoch": 18.79, "learning_rate": 4.0604000000000006e-05, "loss": 8.7401, "step": 2349000 }, { "epoch": 18.8, "learning_rate": 4.0602e-05, "loss": 8.7552, "step": 2349500 }, { "epoch": 18.8, "learning_rate": 4.0600000000000004e-05, "loss": 8.7231, "step": 2350000 }, { "epoch": 18.8, "learning_rate": 4.0598e-05, "loss": 8.7099, "step": 2350500 }, { "epoch": 18.81, "learning_rate": 4.0596e-05, "loss": 8.7426, "step": 2351000 }, { "epoch": 18.81, "learning_rate": 4.0594000000000005e-05, "loss": 8.7478, "step": 2351500 }, { "epoch": 18.82, "learning_rate": 4.0592e-05, "loss": 8.7508, "step": 2352000 }, { "epoch": 18.82, "learning_rate": 4.059e-05, "loss": 8.7575, "step": 2352500 }, { "epoch": 18.82, "learning_rate": 4.0588000000000005e-05, "loss": 8.753, "step": 2353000 }, { "epoch": 18.83, "learning_rate": 4.0586e-05, "loss": 8.7608, "step": 2353500 }, { "epoch": 18.83, "learning_rate": 4.0584e-05, "loss": 8.7303, "step": 2354000 }, { "epoch": 18.84, "learning_rate": 4.0582000000000005e-05, "loss": 8.7408, "step": 2354500 }, { "epoch": 18.84, "learning_rate": 4.058e-05, "loss": 8.7687, "step": 2355000 }, { "epoch": 18.84, "learning_rate": 4.0578e-05, "loss": 8.7571, "step": 2355500 }, { "epoch": 18.85, "learning_rate": 4.0576000000000006e-05, "loss": 8.7358, "step": 2356000 }, { "epoch": 18.85, "learning_rate": 4.0574e-05, "loss": 8.7498, "step": 2356500 }, { "epoch": 18.86, "learning_rate": 4.0572000000000004e-05, "loss": 8.7736, "step": 2357000 }, { "epoch": 18.86, "learning_rate": 4.057e-05, "loss": 8.7463, "step": 2357500 }, { "epoch": 18.86, "learning_rate": 4.0568e-05, "loss": 8.7547, "step": 2358000 }, { "epoch": 18.87, "learning_rate": 4.0566000000000004e-05, "loss": 8.7614, "step": 2358500 }, { "epoch": 18.87, "learning_rate": 4.0564e-05, "loss": 8.733, "step": 2359000 }, { "epoch": 18.88, "learning_rate": 4.0562e-05, "loss": 8.7275, "step": 2359500 }, { "epoch": 18.88, "learning_rate": 4.0560000000000005e-05, "loss": 8.7443, "step": 2360000 }, { "epoch": 18.88, "learning_rate": 4.0558e-05, "loss": 8.7548, "step": 2360500 }, { "epoch": 18.89, "learning_rate": 4.0556e-05, "loss": 8.7427, "step": 2361000 }, { "epoch": 18.89, "learning_rate": 4.0554000000000005e-05, "loss": 8.719, "step": 2361500 }, { "epoch": 18.9, "learning_rate": 4.0552e-05, "loss": 8.7559, "step": 2362000 }, { "epoch": 18.9, "learning_rate": 4.055e-05, "loss": 8.7426, "step": 2362500 }, { "epoch": 18.9, "learning_rate": 4.0548000000000005e-05, "loss": 8.7653, "step": 2363000 }, { "epoch": 18.91, "learning_rate": 4.0546e-05, "loss": 8.7474, "step": 2363500 }, { "epoch": 18.91, "learning_rate": 4.0544000000000003e-05, "loss": 8.7501, "step": 2364000 }, { "epoch": 18.92, "learning_rate": 4.0542e-05, "loss": 8.7536, "step": 2364500 }, { "epoch": 18.92, "learning_rate": 4.054e-05, "loss": 8.7379, "step": 2365000 }, { "epoch": 18.92, "learning_rate": 4.0538000000000004e-05, "loss": 8.7425, "step": 2365500 }, { "epoch": 18.93, "learning_rate": 4.0536e-05, "loss": 8.7578, "step": 2366000 }, { "epoch": 18.93, "learning_rate": 4.0534e-05, "loss": 8.7447, "step": 2366500 }, { "epoch": 18.94, "learning_rate": 4.0532000000000004e-05, "loss": 8.765, "step": 2367000 }, { "epoch": 18.94, "learning_rate": 4.053e-05, "loss": 8.7485, "step": 2367500 }, { "epoch": 18.94, "learning_rate": 4.0528e-05, "loss": 8.7629, "step": 2368000 }, { "epoch": 18.95, "learning_rate": 4.0526000000000005e-05, "loss": 8.7524, "step": 2368500 }, { "epoch": 18.95, "learning_rate": 4.0524e-05, "loss": 8.7542, "step": 2369000 }, { "epoch": 18.96, "learning_rate": 4.0522e-05, "loss": 8.7655, "step": 2369500 }, { "epoch": 18.96, "learning_rate": 4.0520000000000005e-05, "loss": 8.7574, "step": 2370000 }, { "epoch": 18.96, "learning_rate": 4.0518e-05, "loss": 8.756, "step": 2370500 }, { "epoch": 18.97, "learning_rate": 4.0516e-05, "loss": 8.7609, "step": 2371000 }, { "epoch": 18.97, "learning_rate": 4.0514e-05, "loss": 8.7573, "step": 2371500 }, { "epoch": 18.98, "learning_rate": 4.0512e-05, "loss": 8.7338, "step": 2372000 }, { "epoch": 18.98, "learning_rate": 4.0510000000000003e-05, "loss": 8.7613, "step": 2372500 }, { "epoch": 18.98, "learning_rate": 4.0508e-05, "loss": 8.734, "step": 2373000 }, { "epoch": 18.99, "learning_rate": 4.0506e-05, "loss": 8.7489, "step": 2373500 }, { "epoch": 18.99, "learning_rate": 4.0504000000000004e-05, "loss": 8.7384, "step": 2374000 }, { "epoch": 19.0, "learning_rate": 4.0502e-05, "loss": 8.7357, "step": 2374500 }, { "epoch": 19.0, "learning_rate": 4.05e-05, "loss": 8.7563, "step": 2375000 }, { "epoch": 19.0, "learning_rate": 4.0498000000000004e-05, "loss": 8.7461, "step": 2375500 }, { "epoch": 19.01, "learning_rate": 4.0496e-05, "loss": 8.7426, "step": 2376000 }, { "epoch": 19.01, "learning_rate": 4.0494e-05, "loss": 8.7708, "step": 2376500 }, { "epoch": 19.02, "learning_rate": 4.0492000000000005e-05, "loss": 8.7647, "step": 2377000 }, { "epoch": 19.02, "learning_rate": 4.049e-05, "loss": 8.7456, "step": 2377500 }, { "epoch": 19.02, "learning_rate": 4.0488e-05, "loss": 8.7405, "step": 2378000 }, { "epoch": 19.03, "learning_rate": 4.0486000000000005e-05, "loss": 8.7477, "step": 2378500 }, { "epoch": 19.03, "learning_rate": 4.0484e-05, "loss": 8.7816, "step": 2379000 }, { "epoch": 19.04, "learning_rate": 4.0482e-05, "loss": 8.7504, "step": 2379500 }, { "epoch": 19.04, "learning_rate": 4.048e-05, "loss": 8.733, "step": 2380000 }, { "epoch": 19.04, "learning_rate": 4.0478e-05, "loss": 8.7661, "step": 2380500 }, { "epoch": 19.05, "learning_rate": 4.0476000000000003e-05, "loss": 8.7617, "step": 2381000 }, { "epoch": 19.05, "learning_rate": 4.0474e-05, "loss": 8.7434, "step": 2381500 }, { "epoch": 19.06, "learning_rate": 4.0472e-05, "loss": 8.7752, "step": 2382000 }, { "epoch": 19.06, "learning_rate": 4.0470000000000004e-05, "loss": 8.7657, "step": 2382500 }, { "epoch": 19.06, "learning_rate": 4.0468e-05, "loss": 8.7553, "step": 2383000 }, { "epoch": 19.07, "learning_rate": 4.0466e-05, "loss": 8.7644, "step": 2383500 }, { "epoch": 19.07, "learning_rate": 4.0464000000000004e-05, "loss": 8.7783, "step": 2384000 }, { "epoch": 19.08, "learning_rate": 4.0462e-05, "loss": 8.7582, "step": 2384500 }, { "epoch": 19.08, "learning_rate": 4.046e-05, "loss": 8.7423, "step": 2385000 }, { "epoch": 19.08, "learning_rate": 4.0458000000000005e-05, "loss": 8.7385, "step": 2385500 }, { "epoch": 19.09, "learning_rate": 4.0456e-05, "loss": 8.7639, "step": 2386000 }, { "epoch": 19.09, "learning_rate": 4.0454e-05, "loss": 8.761, "step": 2386500 }, { "epoch": 19.1, "learning_rate": 4.0452e-05, "loss": 8.7316, "step": 2387000 }, { "epoch": 19.1, "learning_rate": 4.045000000000001e-05, "loss": 8.7574, "step": 2387500 }, { "epoch": 19.1, "learning_rate": 4.0448e-05, "loss": 8.7431, "step": 2388000 }, { "epoch": 19.11, "learning_rate": 4.0446e-05, "loss": 8.7348, "step": 2388500 }, { "epoch": 19.11, "learning_rate": 4.0444e-05, "loss": 8.7528, "step": 2389000 }, { "epoch": 19.12, "learning_rate": 4.0442000000000003e-05, "loss": 8.7458, "step": 2389500 }, { "epoch": 19.12, "learning_rate": 4.044e-05, "loss": 8.7648, "step": 2390000 }, { "epoch": 19.12, "learning_rate": 4.0438e-05, "loss": 8.7585, "step": 2390500 }, { "epoch": 19.13, "learning_rate": 4.0436000000000004e-05, "loss": 8.7518, "step": 2391000 }, { "epoch": 19.13, "learning_rate": 4.0434e-05, "loss": 8.7612, "step": 2391500 }, { "epoch": 19.14, "learning_rate": 4.0432e-05, "loss": 8.7406, "step": 2392000 }, { "epoch": 19.14, "learning_rate": 4.0430000000000004e-05, "loss": 8.7534, "step": 2392500 }, { "epoch": 19.14, "learning_rate": 4.0428e-05, "loss": 8.7427, "step": 2393000 }, { "epoch": 19.15, "learning_rate": 4.0426e-05, "loss": 8.7663, "step": 2393500 }, { "epoch": 19.15, "learning_rate": 4.0424e-05, "loss": 8.7277, "step": 2394000 }, { "epoch": 19.16, "learning_rate": 4.042200000000001e-05, "loss": 8.7518, "step": 2394500 }, { "epoch": 19.16, "learning_rate": 4.042e-05, "loss": 8.7511, "step": 2395000 }, { "epoch": 19.16, "learning_rate": 4.0418e-05, "loss": 8.7656, "step": 2395500 }, { "epoch": 19.17, "learning_rate": 4.0416e-05, "loss": 8.7466, "step": 2396000 }, { "epoch": 19.17, "learning_rate": 4.0414e-05, "loss": 8.7625, "step": 2396500 }, { "epoch": 19.18, "learning_rate": 4.0412e-05, "loss": 8.7536, "step": 2397000 }, { "epoch": 19.18, "learning_rate": 4.041e-05, "loss": 8.7479, "step": 2397500 }, { "epoch": 19.18, "learning_rate": 4.0408000000000003e-05, "loss": 8.7536, "step": 2398000 }, { "epoch": 19.19, "learning_rate": 4.0406e-05, "loss": 8.761, "step": 2398500 }, { "epoch": 19.19, "learning_rate": 4.0404e-05, "loss": 8.7753, "step": 2399000 }, { "epoch": 19.2, "learning_rate": 4.0402000000000004e-05, "loss": 8.7438, "step": 2399500 }, { "epoch": 19.2, "learning_rate": 4.0400000000000006e-05, "loss": 8.7478, "step": 2400000 }, { "epoch": 19.2, "learning_rate": 4.0398e-05, "loss": 8.7615, "step": 2400500 }, { "epoch": 19.21, "learning_rate": 4.0396e-05, "loss": 8.7349, "step": 2401000 }, { "epoch": 19.21, "learning_rate": 4.039400000000001e-05, "loss": 8.7791, "step": 2401500 }, { "epoch": 19.22, "learning_rate": 4.0392e-05, "loss": 8.7616, "step": 2402000 }, { "epoch": 19.22, "learning_rate": 4.039e-05, "loss": 8.7566, "step": 2402500 }, { "epoch": 19.22, "learning_rate": 4.0388e-05, "loss": 8.7492, "step": 2403000 }, { "epoch": 19.23, "learning_rate": 4.0386e-05, "loss": 8.766, "step": 2403500 }, { "epoch": 19.23, "learning_rate": 4.0384e-05, "loss": 8.7346, "step": 2404000 }, { "epoch": 19.24, "learning_rate": 4.0382e-05, "loss": 8.7265, "step": 2404500 }, { "epoch": 19.24, "learning_rate": 4.038e-05, "loss": 8.751, "step": 2405000 }, { "epoch": 19.24, "learning_rate": 4.0378e-05, "loss": 8.7456, "step": 2405500 }, { "epoch": 19.25, "learning_rate": 4.0376e-05, "loss": 8.7584, "step": 2406000 }, { "epoch": 19.25, "learning_rate": 4.0374000000000003e-05, "loss": 8.7414, "step": 2406500 }, { "epoch": 19.26, "learning_rate": 4.0372000000000006e-05, "loss": 8.7558, "step": 2407000 }, { "epoch": 19.26, "learning_rate": 4.037e-05, "loss": 8.7592, "step": 2407500 }, { "epoch": 19.26, "learning_rate": 4.0368e-05, "loss": 8.7686, "step": 2408000 }, { "epoch": 19.27, "learning_rate": 4.0366000000000006e-05, "loss": 8.7685, "step": 2408500 }, { "epoch": 19.27, "learning_rate": 4.0364e-05, "loss": 8.7617, "step": 2409000 }, { "epoch": 19.28, "learning_rate": 4.0362e-05, "loss": 8.7372, "step": 2409500 }, { "epoch": 19.28, "learning_rate": 4.0360000000000007e-05, "loss": 8.752, "step": 2410000 }, { "epoch": 19.28, "learning_rate": 4.0358e-05, "loss": 8.7489, "step": 2410500 }, { "epoch": 19.29, "learning_rate": 4.0356e-05, "loss": 8.7417, "step": 2411000 }, { "epoch": 19.29, "learning_rate": 4.0354e-05, "loss": 8.7564, "step": 2411500 }, { "epoch": 19.3, "learning_rate": 4.0352e-05, "loss": 8.7469, "step": 2412000 }, { "epoch": 19.3, "learning_rate": 4.0350000000000005e-05, "loss": 8.7325, "step": 2412500 }, { "epoch": 19.3, "learning_rate": 4.0348e-05, "loss": 8.7601, "step": 2413000 }, { "epoch": 19.31, "learning_rate": 4.0346e-05, "loss": 8.7597, "step": 2413500 }, { "epoch": 19.31, "learning_rate": 4.0344000000000005e-05, "loss": 8.7579, "step": 2414000 }, { "epoch": 19.32, "learning_rate": 4.0342e-05, "loss": 8.736, "step": 2414500 }, { "epoch": 19.32, "learning_rate": 4.034e-05, "loss": 8.7605, "step": 2415000 }, { "epoch": 19.32, "learning_rate": 4.0338000000000006e-05, "loss": 8.7582, "step": 2415500 }, { "epoch": 19.33, "learning_rate": 4.0336e-05, "loss": 8.7349, "step": 2416000 }, { "epoch": 19.33, "learning_rate": 4.0334e-05, "loss": 8.7446, "step": 2416500 }, { "epoch": 19.34, "learning_rate": 4.0332000000000006e-05, "loss": 8.7501, "step": 2417000 }, { "epoch": 19.34, "learning_rate": 4.033e-05, "loss": 8.749, "step": 2417500 }, { "epoch": 19.34, "learning_rate": 4.0328000000000004e-05, "loss": 8.7446, "step": 2418000 }, { "epoch": 19.35, "learning_rate": 4.0326e-05, "loss": 8.7403, "step": 2418500 }, { "epoch": 19.35, "learning_rate": 4.0324e-05, "loss": 8.7461, "step": 2419000 }, { "epoch": 19.36, "learning_rate": 4.0322000000000005e-05, "loss": 8.7407, "step": 2419500 }, { "epoch": 19.36, "learning_rate": 4.032e-05, "loss": 8.7547, "step": 2420000 }, { "epoch": 19.36, "learning_rate": 4.0318e-05, "loss": 8.7661, "step": 2420500 }, { "epoch": 19.37, "learning_rate": 4.0316000000000005e-05, "loss": 8.7476, "step": 2421000 }, { "epoch": 19.37, "learning_rate": 4.0314e-05, "loss": 8.7448, "step": 2421500 }, { "epoch": 19.38, "learning_rate": 4.0312e-05, "loss": 8.7671, "step": 2422000 }, { "epoch": 19.38, "learning_rate": 4.0310000000000005e-05, "loss": 8.7413, "step": 2422500 }, { "epoch": 19.38, "learning_rate": 4.0308e-05, "loss": 8.7644, "step": 2423000 }, { "epoch": 19.39, "learning_rate": 4.0306e-05, "loss": 8.7589, "step": 2423500 }, { "epoch": 19.39, "learning_rate": 4.0304000000000006e-05, "loss": 8.7546, "step": 2424000 }, { "epoch": 19.4, "learning_rate": 4.0302e-05, "loss": 8.7405, "step": 2424500 }, { "epoch": 19.4, "learning_rate": 4.0300000000000004e-05, "loss": 8.7328, "step": 2425000 }, { "epoch": 19.4, "learning_rate": 4.0298e-05, "loss": 8.7554, "step": 2425500 }, { "epoch": 19.41, "learning_rate": 4.0296e-05, "loss": 8.7492, "step": 2426000 }, { "epoch": 19.41, "learning_rate": 4.0294000000000004e-05, "loss": 8.7363, "step": 2426500 }, { "epoch": 19.42, "learning_rate": 4.0292e-05, "loss": 8.7693, "step": 2427000 }, { "epoch": 19.42, "learning_rate": 4.029e-05, "loss": 8.7698, "step": 2427500 }, { "epoch": 19.42, "learning_rate": 4.0288000000000005e-05, "loss": 8.7477, "step": 2428000 }, { "epoch": 19.43, "learning_rate": 4.0286e-05, "loss": 8.7462, "step": 2428500 }, { "epoch": 19.43, "learning_rate": 4.0284e-05, "loss": 8.7616, "step": 2429000 }, { "epoch": 19.44, "learning_rate": 4.0282000000000005e-05, "loss": 8.7497, "step": 2429500 }, { "epoch": 19.44, "learning_rate": 4.028e-05, "loss": 8.7691, "step": 2430000 }, { "epoch": 19.44, "learning_rate": 4.0278e-05, "loss": 8.7368, "step": 2430500 }, { "epoch": 19.45, "learning_rate": 4.0276000000000005e-05, "loss": 8.748, "step": 2431000 }, { "epoch": 19.45, "learning_rate": 4.0274e-05, "loss": 8.748, "step": 2431500 }, { "epoch": 19.46, "learning_rate": 4.0272e-05, "loss": 8.7404, "step": 2432000 }, { "epoch": 19.46, "learning_rate": 4.027e-05, "loss": 8.7609, "step": 2432500 }, { "epoch": 19.46, "learning_rate": 4.0268e-05, "loss": 8.7639, "step": 2433000 }, { "epoch": 19.47, "learning_rate": 4.0266000000000004e-05, "loss": 8.7623, "step": 2433500 }, { "epoch": 19.47, "learning_rate": 4.0264e-05, "loss": 8.7612, "step": 2434000 }, { "epoch": 19.48, "learning_rate": 4.0262e-05, "loss": 8.7644, "step": 2434500 }, { "epoch": 19.48, "learning_rate": 4.0260000000000004e-05, "loss": 8.7645, "step": 2435000 }, { "epoch": 19.48, "learning_rate": 4.0258e-05, "loss": 8.7458, "step": 2435500 }, { "epoch": 19.49, "learning_rate": 4.0256e-05, "loss": 8.7624, "step": 2436000 }, { "epoch": 19.49, "learning_rate": 4.0254000000000005e-05, "loss": 8.7517, "step": 2436500 }, { "epoch": 19.5, "learning_rate": 4.0252e-05, "loss": 8.7472, "step": 2437000 }, { "epoch": 19.5, "learning_rate": 4.025e-05, "loss": 8.741, "step": 2437500 }, { "epoch": 19.5, "learning_rate": 4.0248000000000005e-05, "loss": 8.7517, "step": 2438000 }, { "epoch": 19.51, "learning_rate": 4.0246e-05, "loss": 8.7268, "step": 2438500 }, { "epoch": 19.51, "learning_rate": 4.0244e-05, "loss": 8.7218, "step": 2439000 }, { "epoch": 19.52, "learning_rate": 4.0242e-05, "loss": 8.7421, "step": 2439500 }, { "epoch": 19.52, "learning_rate": 4.024e-05, "loss": 8.7431, "step": 2440000 }, { "epoch": 19.52, "learning_rate": 4.0238e-05, "loss": 8.7463, "step": 2440500 }, { "epoch": 19.53, "learning_rate": 4.0236e-05, "loss": 8.7449, "step": 2441000 }, { "epoch": 19.53, "learning_rate": 4.0234e-05, "loss": 8.761, "step": 2441500 }, { "epoch": 19.54, "learning_rate": 4.0232000000000004e-05, "loss": 8.773, "step": 2442000 }, { "epoch": 19.54, "learning_rate": 4.023e-05, "loss": 8.7444, "step": 2442500 }, { "epoch": 19.54, "learning_rate": 4.0228e-05, "loss": 8.7298, "step": 2443000 }, { "epoch": 19.55, "learning_rate": 4.0226000000000004e-05, "loss": 8.752, "step": 2443500 }, { "epoch": 19.55, "learning_rate": 4.0224e-05, "loss": 8.7413, "step": 2444000 }, { "epoch": 19.56, "learning_rate": 4.0222e-05, "loss": 8.7557, "step": 2444500 }, { "epoch": 19.56, "learning_rate": 4.0220000000000005e-05, "loss": 8.7668, "step": 2445000 }, { "epoch": 19.56, "learning_rate": 4.0218e-05, "loss": 8.7495, "step": 2445500 }, { "epoch": 19.57, "learning_rate": 4.0216e-05, "loss": 8.765, "step": 2446000 }, { "epoch": 19.57, "learning_rate": 4.0214000000000005e-05, "loss": 8.7609, "step": 2446500 }, { "epoch": 19.58, "learning_rate": 4.0212e-05, "loss": 8.7564, "step": 2447000 }, { "epoch": 19.58, "learning_rate": 4.021e-05, "loss": 8.755, "step": 2447500 }, { "epoch": 19.58, "learning_rate": 4.0208e-05, "loss": 8.7406, "step": 2448000 }, { "epoch": 19.59, "learning_rate": 4.0206e-05, "loss": 8.7519, "step": 2448500 }, { "epoch": 19.59, "learning_rate": 4.0204e-05, "loss": 8.7516, "step": 2449000 }, { "epoch": 19.6, "learning_rate": 4.0202e-05, "loss": 8.7555, "step": 2449500 }, { "epoch": 19.6, "learning_rate": 4.02e-05, "loss": 8.7346, "step": 2450000 }, { "epoch": 19.6, "learning_rate": 4.0198000000000004e-05, "loss": 8.7446, "step": 2450500 }, { "epoch": 19.61, "learning_rate": 4.0196e-05, "loss": 8.746, "step": 2451000 }, { "epoch": 19.61, "learning_rate": 4.0194e-05, "loss": 8.7724, "step": 2451500 }, { "epoch": 19.62, "learning_rate": 4.0192000000000004e-05, "loss": 8.747, "step": 2452000 }, { "epoch": 19.62, "learning_rate": 4.019e-05, "loss": 8.7369, "step": 2452500 }, { "epoch": 19.62, "learning_rate": 4.0188e-05, "loss": 8.7662, "step": 2453000 }, { "epoch": 19.63, "learning_rate": 4.0186000000000005e-05, "loss": 8.7382, "step": 2453500 }, { "epoch": 19.63, "learning_rate": 4.0184e-05, "loss": 8.7463, "step": 2454000 }, { "epoch": 19.64, "learning_rate": 4.0182e-05, "loss": 8.738, "step": 2454500 }, { "epoch": 19.64, "learning_rate": 4.018e-05, "loss": 8.7372, "step": 2455000 }, { "epoch": 19.64, "learning_rate": 4.017800000000001e-05, "loss": 8.7547, "step": 2455500 }, { "epoch": 19.65, "learning_rate": 4.0176e-05, "loss": 8.7291, "step": 2456000 }, { "epoch": 19.65, "learning_rate": 4.0174e-05, "loss": 8.7378, "step": 2456500 }, { "epoch": 19.66, "learning_rate": 4.0172e-05, "loss": 8.7624, "step": 2457000 }, { "epoch": 19.66, "learning_rate": 4.017e-05, "loss": 8.7687, "step": 2457500 }, { "epoch": 19.66, "learning_rate": 4.0168e-05, "loss": 8.7551, "step": 2458000 }, { "epoch": 19.67, "learning_rate": 4.0166e-05, "loss": 8.7521, "step": 2458500 }, { "epoch": 19.67, "learning_rate": 4.0164000000000004e-05, "loss": 8.7308, "step": 2459000 }, { "epoch": 19.68, "learning_rate": 4.0162e-05, "loss": 8.7549, "step": 2459500 }, { "epoch": 19.68, "learning_rate": 4.016e-05, "loss": 8.7479, "step": 2460000 }, { "epoch": 19.68, "learning_rate": 4.0158000000000004e-05, "loss": 8.7623, "step": 2460500 }, { "epoch": 19.69, "learning_rate": 4.0156e-05, "loss": 8.7438, "step": 2461000 }, { "epoch": 19.69, "learning_rate": 4.0154e-05, "loss": 8.7415, "step": 2461500 }, { "epoch": 19.7, "learning_rate": 4.0152e-05, "loss": 8.7475, "step": 2462000 }, { "epoch": 19.7, "learning_rate": 4.015000000000001e-05, "loss": 8.7502, "step": 2462500 }, { "epoch": 19.7, "learning_rate": 4.0148e-05, "loss": 8.7435, "step": 2463000 }, { "epoch": 19.71, "learning_rate": 4.0146e-05, "loss": 8.7676, "step": 2463500 }, { "epoch": 19.71, "learning_rate": 4.0144e-05, "loss": 8.739, "step": 2464000 }, { "epoch": 19.72, "learning_rate": 4.0142e-05, "loss": 8.745, "step": 2464500 }, { "epoch": 19.72, "learning_rate": 4.014e-05, "loss": 8.7478, "step": 2465000 }, { "epoch": 19.72, "learning_rate": 4.0138e-05, "loss": 8.7578, "step": 2465500 }, { "epoch": 19.73, "learning_rate": 4.0136e-05, "loss": 8.7516, "step": 2466000 }, { "epoch": 19.73, "learning_rate": 4.0134e-05, "loss": 8.7629, "step": 2466500 }, { "epoch": 19.74, "learning_rate": 4.0132e-05, "loss": 8.7453, "step": 2467000 }, { "epoch": 19.74, "learning_rate": 4.0130000000000004e-05, "loss": 8.7713, "step": 2467500 }, { "epoch": 19.74, "learning_rate": 4.0128000000000006e-05, "loss": 8.755, "step": 2468000 }, { "epoch": 19.75, "learning_rate": 4.0126e-05, "loss": 8.7479, "step": 2468500 }, { "epoch": 19.75, "learning_rate": 4.0124e-05, "loss": 8.7455, "step": 2469000 }, { "epoch": 19.76, "learning_rate": 4.0122000000000007e-05, "loss": 8.7413, "step": 2469500 }, { "epoch": 19.76, "learning_rate": 4.012e-05, "loss": 8.7444, "step": 2470000 }, { "epoch": 19.76, "learning_rate": 4.0118e-05, "loss": 8.7411, "step": 2470500 }, { "epoch": 19.77, "learning_rate": 4.011600000000001e-05, "loss": 8.7341, "step": 2471000 }, { "epoch": 19.77, "learning_rate": 4.0114e-05, "loss": 8.7165, "step": 2471500 }, { "epoch": 19.78, "learning_rate": 4.0112e-05, "loss": 8.7461, "step": 2472000 }, { "epoch": 19.78, "learning_rate": 4.011e-05, "loss": 8.7534, "step": 2472500 }, { "epoch": 19.78, "learning_rate": 4.0108e-05, "loss": 8.7545, "step": 2473000 }, { "epoch": 19.79, "learning_rate": 4.0106000000000005e-05, "loss": 8.7669, "step": 2473500 }, { "epoch": 19.79, "learning_rate": 4.0104e-05, "loss": 8.744, "step": 2474000 }, { "epoch": 19.8, "learning_rate": 4.0102e-05, "loss": 8.7275, "step": 2474500 }, { "epoch": 19.8, "learning_rate": 4.0100000000000006e-05, "loss": 8.7572, "step": 2475000 }, { "epoch": 19.8, "learning_rate": 4.0098e-05, "loss": 8.7374, "step": 2475500 }, { "epoch": 19.81, "learning_rate": 4.0096e-05, "loss": 8.7175, "step": 2476000 }, { "epoch": 19.81, "learning_rate": 4.0094000000000006e-05, "loss": 8.7567, "step": 2476500 }, { "epoch": 19.82, "learning_rate": 4.0092e-05, "loss": 8.7663, "step": 2477000 }, { "epoch": 19.82, "learning_rate": 4.009e-05, "loss": 8.7503, "step": 2477500 }, { "epoch": 19.82, "learning_rate": 4.0088000000000006e-05, "loss": 8.763, "step": 2478000 }, { "epoch": 19.83, "learning_rate": 4.0086e-05, "loss": 8.7604, "step": 2478500 }, { "epoch": 19.83, "learning_rate": 4.0084e-05, "loss": 8.756, "step": 2479000 }, { "epoch": 19.84, "learning_rate": 4.0082e-05, "loss": 8.7316, "step": 2479500 }, { "epoch": 19.84, "learning_rate": 4.008e-05, "loss": 8.7477, "step": 2480000 }, { "epoch": 19.84, "learning_rate": 4.0078000000000005e-05, "loss": 8.7653, "step": 2480500 }, { "epoch": 19.85, "learning_rate": 4.0076e-05, "loss": 8.7437, "step": 2481000 }, { "epoch": 19.85, "learning_rate": 4.0074e-05, "loss": 8.7481, "step": 2481500 }, { "epoch": 19.86, "learning_rate": 4.0072000000000005e-05, "loss": 8.7411, "step": 2482000 }, { "epoch": 19.86, "learning_rate": 4.007e-05, "loss": 8.7691, "step": 2482500 }, { "epoch": 19.86, "learning_rate": 4.0068e-05, "loss": 8.7346, "step": 2483000 }, { "epoch": 19.87, "learning_rate": 4.0066000000000006e-05, "loss": 8.7504, "step": 2483500 }, { "epoch": 19.87, "learning_rate": 4.0064e-05, "loss": 8.7561, "step": 2484000 }, { "epoch": 19.88, "learning_rate": 4.0062e-05, "loss": 8.7586, "step": 2484500 }, { "epoch": 19.88, "learning_rate": 4.0060000000000006e-05, "loss": 8.7346, "step": 2485000 }, { "epoch": 19.88, "learning_rate": 4.0058e-05, "loss": 8.7417, "step": 2485500 }, { "epoch": 19.89, "learning_rate": 4.0056000000000004e-05, "loss": 8.7543, "step": 2486000 }, { "epoch": 19.89, "learning_rate": 4.0054e-05, "loss": 8.7291, "step": 2486500 }, { "epoch": 19.9, "learning_rate": 4.0052e-05, "loss": 8.7415, "step": 2487000 }, { "epoch": 19.9, "learning_rate": 4.0050000000000004e-05, "loss": 8.7356, "step": 2487500 }, { "epoch": 19.9, "learning_rate": 4.0048e-05, "loss": 8.7701, "step": 2488000 }, { "epoch": 19.91, "learning_rate": 4.0046e-05, "loss": 8.7674, "step": 2488500 }, { "epoch": 19.91, "learning_rate": 4.0044000000000005e-05, "loss": 8.7679, "step": 2489000 }, { "epoch": 19.92, "learning_rate": 4.0042e-05, "loss": 8.7612, "step": 2489500 }, { "epoch": 19.92, "learning_rate": 4.004e-05, "loss": 8.7544, "step": 2490000 }, { "epoch": 19.92, "learning_rate": 4.0038000000000005e-05, "loss": 8.7422, "step": 2490500 }, { "epoch": 19.93, "learning_rate": 4.0036e-05, "loss": 8.7671, "step": 2491000 }, { "epoch": 19.93, "learning_rate": 4.0033999999999997e-05, "loss": 8.7461, "step": 2491500 }, { "epoch": 19.94, "learning_rate": 4.0032000000000006e-05, "loss": 8.7468, "step": 2492000 }, { "epoch": 19.94, "learning_rate": 4.003e-05, "loss": 8.7511, "step": 2492500 }, { "epoch": 19.94, "learning_rate": 4.0028000000000004e-05, "loss": 8.7539, "step": 2493000 }, { "epoch": 19.95, "learning_rate": 4.0026e-05, "loss": 8.7582, "step": 2493500 }, { "epoch": 19.95, "learning_rate": 4.0024e-05, "loss": 8.7511, "step": 2494000 }, { "epoch": 19.96, "learning_rate": 4.0022000000000004e-05, "loss": 8.7501, "step": 2494500 }, { "epoch": 19.96, "learning_rate": 4.002e-05, "loss": 8.7371, "step": 2495000 }, { "epoch": 19.96, "learning_rate": 4.0018e-05, "loss": 8.7695, "step": 2495500 }, { "epoch": 19.97, "learning_rate": 4.0016000000000004e-05, "loss": 8.7516, "step": 2496000 }, { "epoch": 19.97, "learning_rate": 4.0014e-05, "loss": 8.7499, "step": 2496500 }, { "epoch": 19.98, "learning_rate": 4.0012e-05, "loss": 8.7461, "step": 2497000 }, { "epoch": 19.98, "learning_rate": 4.0010000000000005e-05, "loss": 8.7601, "step": 2497500 }, { "epoch": 19.98, "learning_rate": 4.0008e-05, "loss": 8.7519, "step": 2498000 }, { "epoch": 19.99, "learning_rate": 4.0006e-05, "loss": 8.7743, "step": 2498500 }, { "epoch": 19.99, "learning_rate": 4.0004000000000005e-05, "loss": 8.7511, "step": 2499000 }, { "epoch": 20.0, "learning_rate": 4.0002e-05, "loss": 8.7614, "step": 2499500 }, { "epoch": 20.0, "learning_rate": 4e-05, "loss": 8.7764, "step": 2500000 }, { "epoch": 20.0, "learning_rate": 3.9998e-05, "loss": 8.7371, "step": 2500500 }, { "epoch": 20.01, "learning_rate": 3.9996e-05, "loss": 8.7558, "step": 2501000 }, { "epoch": 20.01, "learning_rate": 3.9994000000000004e-05, "loss": 8.7452, "step": 2501500 }, { "epoch": 20.02, "learning_rate": 3.9992e-05, "loss": 8.7401, "step": 2502000 }, { "epoch": 20.02, "learning_rate": 3.999e-05, "loss": 8.7521, "step": 2502500 }, { "epoch": 20.02, "learning_rate": 3.9988000000000004e-05, "loss": 8.7534, "step": 2503000 }, { "epoch": 20.03, "learning_rate": 3.9986e-05, "loss": 8.7616, "step": 2503500 }, { "epoch": 20.03, "learning_rate": 3.9984e-05, "loss": 8.7592, "step": 2504000 }, { "epoch": 20.04, "learning_rate": 3.9982000000000004e-05, "loss": 8.7595, "step": 2504500 }, { "epoch": 20.04, "learning_rate": 3.998e-05, "loss": 8.7477, "step": 2505000 }, { "epoch": 20.04, "learning_rate": 3.9978e-05, "loss": 8.7414, "step": 2505500 }, { "epoch": 20.05, "learning_rate": 3.9976000000000005e-05, "loss": 8.7495, "step": 2506000 }, { "epoch": 20.05, "learning_rate": 3.9974e-05, "loss": 8.7413, "step": 2506500 }, { "epoch": 20.06, "learning_rate": 3.9972e-05, "loss": 8.75, "step": 2507000 }, { "epoch": 20.06, "learning_rate": 3.9970000000000005e-05, "loss": 8.7506, "step": 2507500 }, { "epoch": 20.06, "learning_rate": 3.9968e-05, "loss": 8.7277, "step": 2508000 }, { "epoch": 20.07, "learning_rate": 3.9966e-05, "loss": 8.737, "step": 2508500 }, { "epoch": 20.07, "learning_rate": 3.9964e-05, "loss": 8.7652, "step": 2509000 }, { "epoch": 20.08, "learning_rate": 3.9962e-05, "loss": 8.7609, "step": 2509500 }, { "epoch": 20.08, "learning_rate": 3.9960000000000004e-05, "loss": 8.751, "step": 2510000 }, { "epoch": 20.08, "learning_rate": 3.9958e-05, "loss": 8.7632, "step": 2510500 }, { "epoch": 20.09, "learning_rate": 3.9956e-05, "loss": 8.7489, "step": 2511000 }, { "epoch": 20.09, "learning_rate": 3.9954000000000004e-05, "loss": 8.7437, "step": 2511500 }, { "epoch": 20.1, "learning_rate": 3.9952e-05, "loss": 8.7423, "step": 2512000 }, { "epoch": 20.1, "learning_rate": 3.995e-05, "loss": 8.7464, "step": 2512500 }, { "epoch": 20.1, "learning_rate": 3.9948000000000004e-05, "loss": 8.7425, "step": 2513000 }, { "epoch": 20.11, "learning_rate": 3.9946e-05, "loss": 8.7445, "step": 2513500 }, { "epoch": 20.11, "learning_rate": 3.9944e-05, "loss": 8.745, "step": 2514000 }, { "epoch": 20.12, "learning_rate": 3.9942000000000005e-05, "loss": 8.7316, "step": 2514500 }, { "epoch": 20.12, "learning_rate": 3.994e-05, "loss": 8.7418, "step": 2515000 }, { "epoch": 20.12, "learning_rate": 3.9938e-05, "loss": 8.7455, "step": 2515500 }, { "epoch": 20.13, "learning_rate": 3.9936e-05, "loss": 8.747, "step": 2516000 }, { "epoch": 20.13, "learning_rate": 3.9934e-05, "loss": 8.7449, "step": 2516500 }, { "epoch": 20.14, "learning_rate": 3.9932e-05, "loss": 8.7609, "step": 2517000 }, { "epoch": 20.14, "learning_rate": 3.993e-05, "loss": 8.7478, "step": 2517500 }, { "epoch": 20.14, "learning_rate": 3.9928e-05, "loss": 8.7484, "step": 2518000 }, { "epoch": 20.15, "learning_rate": 3.9926000000000004e-05, "loss": 8.7603, "step": 2518500 }, { "epoch": 20.15, "learning_rate": 3.9924e-05, "loss": 8.7424, "step": 2519000 }, { "epoch": 20.16, "learning_rate": 3.9922e-05, "loss": 8.7391, "step": 2519500 }, { "epoch": 20.16, "learning_rate": 3.9920000000000004e-05, "loss": 8.7556, "step": 2520000 }, { "epoch": 20.16, "learning_rate": 3.9918e-05, "loss": 8.7489, "step": 2520500 }, { "epoch": 20.17, "learning_rate": 3.9916e-05, "loss": 8.738, "step": 2521000 }, { "epoch": 20.17, "learning_rate": 3.9914000000000004e-05, "loss": 8.7397, "step": 2521500 }, { "epoch": 20.18, "learning_rate": 3.9912e-05, "loss": 8.7666, "step": 2522000 }, { "epoch": 20.18, "learning_rate": 3.991e-05, "loss": 8.7634, "step": 2522500 }, { "epoch": 20.18, "learning_rate": 3.9908e-05, "loss": 8.766, "step": 2523000 }, { "epoch": 20.19, "learning_rate": 3.990600000000001e-05, "loss": 8.7453, "step": 2523500 }, { "epoch": 20.19, "learning_rate": 3.9904e-05, "loss": 8.7611, "step": 2524000 }, { "epoch": 20.2, "learning_rate": 3.9902e-05, "loss": 8.7573, "step": 2524500 }, { "epoch": 20.2, "learning_rate": 3.99e-05, "loss": 8.7483, "step": 2525000 }, { "epoch": 20.2, "learning_rate": 3.9898e-05, "loss": 8.7523, "step": 2525500 }, { "epoch": 20.21, "learning_rate": 3.9896e-05, "loss": 8.7371, "step": 2526000 }, { "epoch": 20.21, "learning_rate": 3.9894e-05, "loss": 8.7497, "step": 2526500 }, { "epoch": 20.22, "learning_rate": 3.9892000000000004e-05, "loss": 8.7514, "step": 2527000 }, { "epoch": 20.22, "learning_rate": 3.989e-05, "loss": 8.7436, "step": 2527500 }, { "epoch": 20.22, "learning_rate": 3.9888e-05, "loss": 8.7758, "step": 2528000 }, { "epoch": 20.23, "learning_rate": 3.9886000000000004e-05, "loss": 8.7418, "step": 2528500 }, { "epoch": 20.23, "learning_rate": 3.9884e-05, "loss": 8.7563, "step": 2529000 }, { "epoch": 20.24, "learning_rate": 3.9882e-05, "loss": 8.7427, "step": 2529500 }, { "epoch": 20.24, "learning_rate": 3.988e-05, "loss": 8.7548, "step": 2530000 }, { "epoch": 20.24, "learning_rate": 3.987800000000001e-05, "loss": 8.7452, "step": 2530500 }, { "epoch": 20.25, "learning_rate": 3.9876e-05, "loss": 8.754, "step": 2531000 }, { "epoch": 20.25, "learning_rate": 3.9874e-05, "loss": 8.7687, "step": 2531500 }, { "epoch": 20.26, "learning_rate": 3.9872e-05, "loss": 8.7575, "step": 2532000 }, { "epoch": 20.26, "learning_rate": 3.987e-05, "loss": 8.7528, "step": 2532500 }, { "epoch": 20.26, "learning_rate": 3.9868e-05, "loss": 8.7593, "step": 2533000 }, { "epoch": 20.27, "learning_rate": 3.9866e-05, "loss": 8.7557, "step": 2533500 }, { "epoch": 20.27, "learning_rate": 3.9864e-05, "loss": 8.728, "step": 2534000 }, { "epoch": 20.28, "learning_rate": 3.9862e-05, "loss": 8.7382, "step": 2534500 }, { "epoch": 20.28, "learning_rate": 3.986e-05, "loss": 8.774, "step": 2535000 }, { "epoch": 20.28, "learning_rate": 3.9858000000000004e-05, "loss": 8.7526, "step": 2535500 }, { "epoch": 20.29, "learning_rate": 3.9856000000000006e-05, "loss": 8.7463, "step": 2536000 }, { "epoch": 20.29, "learning_rate": 3.9854e-05, "loss": 8.7506, "step": 2536500 }, { "epoch": 20.3, "learning_rate": 3.9852e-05, "loss": 8.762, "step": 2537000 }, { "epoch": 20.3, "learning_rate": 3.9850000000000006e-05, "loss": 8.7645, "step": 2537500 }, { "epoch": 20.3, "learning_rate": 3.9848e-05, "loss": 8.7679, "step": 2538000 }, { "epoch": 20.31, "learning_rate": 3.9846e-05, "loss": 8.7622, "step": 2538500 }, { "epoch": 20.31, "learning_rate": 3.984400000000001e-05, "loss": 8.7486, "step": 2539000 }, { "epoch": 20.32, "learning_rate": 3.9842e-05, "loss": 8.741, "step": 2539500 }, { "epoch": 20.32, "learning_rate": 3.984e-05, "loss": 8.7501, "step": 2540000 }, { "epoch": 20.32, "learning_rate": 3.9838e-05, "loss": 8.7434, "step": 2540500 }, { "epoch": 20.33, "learning_rate": 3.9836e-05, "loss": 8.7571, "step": 2541000 }, { "epoch": 20.33, "learning_rate": 3.9834000000000005e-05, "loss": 8.7364, "step": 2541500 }, { "epoch": 20.34, "learning_rate": 3.9832e-05, "loss": 8.7423, "step": 2542000 }, { "epoch": 20.34, "learning_rate": 3.983e-05, "loss": 8.756, "step": 2542500 }, { "epoch": 20.34, "learning_rate": 3.9828000000000006e-05, "loss": 8.7345, "step": 2543000 }, { "epoch": 20.35, "learning_rate": 3.9826e-05, "loss": 8.7537, "step": 2543500 }, { "epoch": 20.35, "learning_rate": 3.9824e-05, "loss": 8.7819, "step": 2544000 }, { "epoch": 20.36, "learning_rate": 3.9822000000000006e-05, "loss": 8.7347, "step": 2544500 }, { "epoch": 20.36, "learning_rate": 3.982e-05, "loss": 8.7493, "step": 2545000 }, { "epoch": 20.36, "learning_rate": 3.9818e-05, "loss": 8.7572, "step": 2545500 }, { "epoch": 20.37, "learning_rate": 3.9816000000000006e-05, "loss": 8.7643, "step": 2546000 }, { "epoch": 20.37, "learning_rate": 3.9814e-05, "loss": 8.7455, "step": 2546500 }, { "epoch": 20.38, "learning_rate": 3.9812e-05, "loss": 8.7471, "step": 2547000 }, { "epoch": 20.38, "learning_rate": 3.981e-05, "loss": 8.7594, "step": 2547500 }, { "epoch": 20.38, "learning_rate": 3.9808e-05, "loss": 8.7679, "step": 2548000 }, { "epoch": 20.39, "learning_rate": 3.9806000000000005e-05, "loss": 8.7528, "step": 2548500 }, { "epoch": 20.39, "learning_rate": 3.9804e-05, "loss": 8.7436, "step": 2549000 }, { "epoch": 20.4, "learning_rate": 3.9802e-05, "loss": 8.7474, "step": 2549500 }, { "epoch": 20.4, "learning_rate": 3.9800000000000005e-05, "loss": 8.7466, "step": 2550000 }, { "epoch": 20.4, "learning_rate": 3.9798e-05, "loss": 8.74, "step": 2550500 }, { "epoch": 20.41, "learning_rate": 3.9796e-05, "loss": 8.7523, "step": 2551000 }, { "epoch": 20.41, "learning_rate": 3.9794000000000006e-05, "loss": 8.7484, "step": 2551500 }, { "epoch": 20.42, "learning_rate": 3.9792e-05, "loss": 8.7699, "step": 2552000 }, { "epoch": 20.42, "learning_rate": 3.979e-05, "loss": 8.7618, "step": 2552500 }, { "epoch": 20.42, "learning_rate": 3.9788000000000006e-05, "loss": 8.7496, "step": 2553000 }, { "epoch": 20.43, "learning_rate": 3.9786e-05, "loss": 8.754, "step": 2553500 }, { "epoch": 20.43, "learning_rate": 3.9784000000000004e-05, "loss": 8.7275, "step": 2554000 }, { "epoch": 20.44, "learning_rate": 3.9782e-05, "loss": 8.7431, "step": 2554500 }, { "epoch": 20.44, "learning_rate": 3.978e-05, "loss": 8.7648, "step": 2555000 }, { "epoch": 20.44, "learning_rate": 3.9778000000000004e-05, "loss": 8.757, "step": 2555500 }, { "epoch": 20.45, "learning_rate": 3.9776e-05, "loss": 8.7434, "step": 2556000 }, { "epoch": 20.45, "learning_rate": 3.9774e-05, "loss": 8.7382, "step": 2556500 }, { "epoch": 20.46, "learning_rate": 3.9772000000000005e-05, "loss": 8.7438, "step": 2557000 }, { "epoch": 20.46, "learning_rate": 3.977e-05, "loss": 8.7572, "step": 2557500 }, { "epoch": 20.46, "learning_rate": 3.9768e-05, "loss": 8.7276, "step": 2558000 }, { "epoch": 20.47, "learning_rate": 3.9766000000000005e-05, "loss": 8.7529, "step": 2558500 }, { "epoch": 20.47, "learning_rate": 3.9764e-05, "loss": 8.7441, "step": 2559000 }, { "epoch": 20.48, "learning_rate": 3.9761999999999996e-05, "loss": 8.7588, "step": 2559500 }, { "epoch": 20.48, "learning_rate": 3.9760000000000006e-05, "loss": 8.7583, "step": 2560000 }, { "epoch": 20.48, "learning_rate": 3.9758e-05, "loss": 8.7787, "step": 2560500 }, { "epoch": 20.49, "learning_rate": 3.9756000000000004e-05, "loss": 8.7775, "step": 2561000 }, { "epoch": 20.49, "learning_rate": 3.9754e-05, "loss": 8.7228, "step": 2561500 }, { "epoch": 20.5, "learning_rate": 3.9752e-05, "loss": 8.7313, "step": 2562000 }, { "epoch": 20.5, "learning_rate": 3.9750000000000004e-05, "loss": 8.7412, "step": 2562500 }, { "epoch": 20.5, "learning_rate": 3.9748e-05, "loss": 8.782, "step": 2563000 }, { "epoch": 20.51, "learning_rate": 3.9746e-05, "loss": 8.7659, "step": 2563500 }, { "epoch": 20.51, "learning_rate": 3.9744000000000004e-05, "loss": 8.7517, "step": 2564000 }, { "epoch": 20.52, "learning_rate": 3.9742e-05, "loss": 8.7642, "step": 2564500 }, { "epoch": 20.52, "learning_rate": 3.974e-05, "loss": 8.7448, "step": 2565000 }, { "epoch": 20.52, "learning_rate": 3.9738000000000005e-05, "loss": 8.7451, "step": 2565500 }, { "epoch": 20.53, "learning_rate": 3.9736e-05, "loss": 8.7637, "step": 2566000 }, { "epoch": 20.53, "learning_rate": 3.9734e-05, "loss": 8.746, "step": 2566500 }, { "epoch": 20.54, "learning_rate": 3.9732000000000005e-05, "loss": 8.7712, "step": 2567000 }, { "epoch": 20.54, "learning_rate": 3.973e-05, "loss": 8.7442, "step": 2567500 }, { "epoch": 20.54, "learning_rate": 3.9728e-05, "loss": 8.7426, "step": 2568000 }, { "epoch": 20.55, "learning_rate": 3.9726e-05, "loss": 8.7313, "step": 2568500 }, { "epoch": 20.55, "learning_rate": 3.9724e-05, "loss": 8.7444, "step": 2569000 }, { "epoch": 20.56, "learning_rate": 3.9722000000000004e-05, "loss": 8.7357, "step": 2569500 }, { "epoch": 20.56, "learning_rate": 3.972e-05, "loss": 8.756, "step": 2570000 }, { "epoch": 20.56, "learning_rate": 3.9718e-05, "loss": 8.7577, "step": 2570500 }, { "epoch": 20.57, "learning_rate": 3.9716000000000004e-05, "loss": 8.7396, "step": 2571000 }, { "epoch": 20.57, "learning_rate": 3.9714e-05, "loss": 8.7762, "step": 2571500 }, { "epoch": 20.58, "learning_rate": 3.9712e-05, "loss": 8.7765, "step": 2572000 }, { "epoch": 20.58, "learning_rate": 3.9710000000000004e-05, "loss": 8.7375, "step": 2572500 }, { "epoch": 20.58, "learning_rate": 3.9708e-05, "loss": 8.7453, "step": 2573000 }, { "epoch": 20.59, "learning_rate": 3.9706e-05, "loss": 8.7423, "step": 2573500 }, { "epoch": 20.59, "learning_rate": 3.9704000000000005e-05, "loss": 8.7519, "step": 2574000 }, { "epoch": 20.6, "learning_rate": 3.9702e-05, "loss": 8.7459, "step": 2574500 }, { "epoch": 20.6, "learning_rate": 3.97e-05, "loss": 8.7378, "step": 2575000 }, { "epoch": 20.6, "learning_rate": 3.9698000000000005e-05, "loss": 8.7495, "step": 2575500 }, { "epoch": 20.61, "learning_rate": 3.9696e-05, "loss": 8.7274, "step": 2576000 }, { "epoch": 20.61, "learning_rate": 3.9694e-05, "loss": 8.7505, "step": 2576500 }, { "epoch": 20.62, "learning_rate": 3.9692e-05, "loss": 8.7558, "step": 2577000 }, { "epoch": 20.62, "learning_rate": 3.969e-05, "loss": 8.7506, "step": 2577500 }, { "epoch": 20.62, "learning_rate": 3.9688000000000004e-05, "loss": 8.7763, "step": 2578000 }, { "epoch": 20.63, "learning_rate": 3.9686e-05, "loss": 8.7477, "step": 2578500 }, { "epoch": 20.63, "learning_rate": 3.9684e-05, "loss": 8.7461, "step": 2579000 }, { "epoch": 20.64, "learning_rate": 3.9682000000000004e-05, "loss": 8.7493, "step": 2579500 }, { "epoch": 20.64, "learning_rate": 3.968e-05, "loss": 8.7388, "step": 2580000 }, { "epoch": 20.64, "learning_rate": 3.9678e-05, "loss": 8.7664, "step": 2580500 }, { "epoch": 20.65, "learning_rate": 3.9676000000000004e-05, "loss": 8.7581, "step": 2581000 }, { "epoch": 20.65, "learning_rate": 3.9674e-05, "loss": 8.7532, "step": 2581500 }, { "epoch": 20.66, "learning_rate": 3.9672e-05, "loss": 8.736, "step": 2582000 }, { "epoch": 20.66, "learning_rate": 3.9670000000000005e-05, "loss": 8.7516, "step": 2582500 }, { "epoch": 20.66, "learning_rate": 3.9668e-05, "loss": 8.7647, "step": 2583000 }, { "epoch": 20.67, "learning_rate": 3.9666e-05, "loss": 8.746, "step": 2583500 }, { "epoch": 20.67, "learning_rate": 3.9664e-05, "loss": 8.7431, "step": 2584000 }, { "epoch": 20.68, "learning_rate": 3.9662e-05, "loss": 8.7488, "step": 2584500 }, { "epoch": 20.68, "learning_rate": 3.966e-05, "loss": 8.7619, "step": 2585000 }, { "epoch": 20.68, "learning_rate": 3.9658e-05, "loss": 8.7424, "step": 2585500 }, { "epoch": 20.69, "learning_rate": 3.9656e-05, "loss": 8.7527, "step": 2586000 }, { "epoch": 20.69, "learning_rate": 3.9654000000000004e-05, "loss": 8.7339, "step": 2586500 }, { "epoch": 20.7, "learning_rate": 3.9652e-05, "loss": 8.772, "step": 2587000 }, { "epoch": 20.7, "learning_rate": 3.965e-05, "loss": 8.7377, "step": 2587500 }, { "epoch": 20.7, "learning_rate": 3.9648000000000004e-05, "loss": 8.7291, "step": 2588000 }, { "epoch": 20.71, "learning_rate": 3.9646e-05, "loss": 8.7439, "step": 2588500 }, { "epoch": 20.71, "learning_rate": 3.9644e-05, "loss": 8.7439, "step": 2589000 }, { "epoch": 20.72, "learning_rate": 3.9642000000000004e-05, "loss": 8.7804, "step": 2589500 }, { "epoch": 20.72, "learning_rate": 3.964e-05, "loss": 8.7469, "step": 2590000 }, { "epoch": 20.72, "learning_rate": 3.9638e-05, "loss": 8.7453, "step": 2590500 }, { "epoch": 20.73, "learning_rate": 3.9636e-05, "loss": 8.7581, "step": 2591000 }, { "epoch": 20.73, "learning_rate": 3.963400000000001e-05, "loss": 8.7647, "step": 2591500 }, { "epoch": 20.74, "learning_rate": 3.9632e-05, "loss": 8.7428, "step": 2592000 }, { "epoch": 20.74, "learning_rate": 3.963e-05, "loss": 8.7661, "step": 2592500 }, { "epoch": 20.74, "learning_rate": 3.9628e-05, "loss": 8.7405, "step": 2593000 }, { "epoch": 20.75, "learning_rate": 3.9626e-05, "loss": 8.7643, "step": 2593500 }, { "epoch": 20.75, "learning_rate": 3.9624e-05, "loss": 8.763, "step": 2594000 }, { "epoch": 20.76, "learning_rate": 3.9622e-05, "loss": 8.7443, "step": 2594500 }, { "epoch": 20.76, "learning_rate": 3.9620000000000004e-05, "loss": 8.7409, "step": 2595000 }, { "epoch": 20.76, "learning_rate": 3.9618e-05, "loss": 8.7305, "step": 2595500 }, { "epoch": 20.77, "learning_rate": 3.9616e-05, "loss": 8.7561, "step": 2596000 }, { "epoch": 20.77, "learning_rate": 3.9614000000000004e-05, "loss": 8.7499, "step": 2596500 }, { "epoch": 20.78, "learning_rate": 3.9612000000000006e-05, "loss": 8.746, "step": 2597000 }, { "epoch": 20.78, "learning_rate": 3.961e-05, "loss": 8.73, "step": 2597500 }, { "epoch": 20.78, "learning_rate": 3.9608e-05, "loss": 8.7546, "step": 2598000 }, { "epoch": 20.79, "learning_rate": 3.960600000000001e-05, "loss": 8.735, "step": 2598500 }, { "epoch": 20.79, "learning_rate": 3.9604e-05, "loss": 8.7594, "step": 2599000 }, { "epoch": 20.8, "learning_rate": 3.9602e-05, "loss": 8.7451, "step": 2599500 }, { "epoch": 20.8, "learning_rate": 3.960000000000001e-05, "loss": 8.7523, "step": 2600000 }, { "epoch": 20.8, "learning_rate": 3.9598e-05, "loss": 8.7358, "step": 2600500 }, { "epoch": 20.81, "learning_rate": 3.9596e-05, "loss": 8.7465, "step": 2601000 }, { "epoch": 20.81, "learning_rate": 3.9594e-05, "loss": 8.7418, "step": 2601500 }, { "epoch": 20.82, "learning_rate": 3.9592e-05, "loss": 8.7286, "step": 2602000 }, { "epoch": 20.82, "learning_rate": 3.959e-05, "loss": 8.7478, "step": 2602500 }, { "epoch": 20.82, "learning_rate": 3.9588e-05, "loss": 8.7297, "step": 2603000 }, { "epoch": 20.83, "learning_rate": 3.9586000000000004e-05, "loss": 8.7377, "step": 2603500 }, { "epoch": 20.83, "learning_rate": 3.9584000000000006e-05, "loss": 8.7434, "step": 2604000 }, { "epoch": 20.84, "learning_rate": 3.9582e-05, "loss": 8.7356, "step": 2604500 }, { "epoch": 20.84, "learning_rate": 3.958e-05, "loss": 8.7381, "step": 2605000 }, { "epoch": 20.84, "learning_rate": 3.9578000000000006e-05, "loss": 8.7341, "step": 2605500 }, { "epoch": 20.85, "learning_rate": 3.9576e-05, "loss": 8.7779, "step": 2606000 }, { "epoch": 20.85, "learning_rate": 3.9574e-05, "loss": 8.7562, "step": 2606500 }, { "epoch": 20.86, "learning_rate": 3.957200000000001e-05, "loss": 8.7583, "step": 2607000 }, { "epoch": 20.86, "learning_rate": 3.957e-05, "loss": 8.7652, "step": 2607500 }, { "epoch": 20.86, "learning_rate": 3.9568e-05, "loss": 8.7554, "step": 2608000 }, { "epoch": 20.87, "learning_rate": 3.9566e-05, "loss": 8.7556, "step": 2608500 }, { "epoch": 20.87, "learning_rate": 3.9564e-05, "loss": 8.7468, "step": 2609000 }, { "epoch": 20.88, "learning_rate": 3.9562000000000005e-05, "loss": 8.7588, "step": 2609500 }, { "epoch": 20.88, "learning_rate": 3.956e-05, "loss": 8.7564, "step": 2610000 }, { "epoch": 20.88, "learning_rate": 3.9558e-05, "loss": 8.7609, "step": 2610500 }, { "epoch": 20.89, "learning_rate": 3.9556000000000005e-05, "loss": 8.7527, "step": 2611000 }, { "epoch": 20.89, "learning_rate": 3.9554e-05, "loss": 8.7193, "step": 2611500 }, { "epoch": 20.9, "learning_rate": 3.9552000000000003e-05, "loss": 8.7582, "step": 2612000 }, { "epoch": 20.9, "learning_rate": 3.9550000000000006e-05, "loss": 8.759, "step": 2612500 }, { "epoch": 20.9, "learning_rate": 3.9548e-05, "loss": 8.7641, "step": 2613000 }, { "epoch": 20.91, "learning_rate": 3.9546e-05, "loss": 8.755, "step": 2613500 }, { "epoch": 20.91, "learning_rate": 3.9544000000000006e-05, "loss": 8.7534, "step": 2614000 }, { "epoch": 20.92, "learning_rate": 3.9542e-05, "loss": 8.7532, "step": 2614500 }, { "epoch": 20.92, "learning_rate": 3.954e-05, "loss": 8.7431, "step": 2615000 }, { "epoch": 20.92, "learning_rate": 3.9538e-05, "loss": 8.7619, "step": 2615500 }, { "epoch": 20.93, "learning_rate": 3.9536e-05, "loss": 8.7415, "step": 2616000 }, { "epoch": 20.93, "learning_rate": 3.9534000000000005e-05, "loss": 8.7536, "step": 2616500 }, { "epoch": 20.94, "learning_rate": 3.9532e-05, "loss": 8.7386, "step": 2617000 }, { "epoch": 20.94, "learning_rate": 3.953e-05, "loss": 8.7562, "step": 2617500 }, { "epoch": 20.94, "learning_rate": 3.9528000000000005e-05, "loss": 8.7474, "step": 2618000 }, { "epoch": 20.95, "learning_rate": 3.9526e-05, "loss": 8.7442, "step": 2618500 }, { "epoch": 20.95, "learning_rate": 3.9524e-05, "loss": 8.7655, "step": 2619000 }, { "epoch": 20.96, "learning_rate": 3.9522000000000005e-05, "loss": 8.7445, "step": 2619500 }, { "epoch": 20.96, "learning_rate": 3.952e-05, "loss": 8.7439, "step": 2620000 }, { "epoch": 20.96, "learning_rate": 3.9518e-05, "loss": 8.7373, "step": 2620500 }, { "epoch": 20.97, "learning_rate": 3.9516000000000006e-05, "loss": 8.7364, "step": 2621000 }, { "epoch": 20.97, "learning_rate": 3.9514e-05, "loss": 8.7567, "step": 2621500 }, { "epoch": 20.98, "learning_rate": 3.9512000000000004e-05, "loss": 8.7445, "step": 2622000 }, { "epoch": 20.98, "learning_rate": 3.951e-05, "loss": 8.7257, "step": 2622500 }, { "epoch": 20.98, "learning_rate": 3.9508e-05, "loss": 8.7494, "step": 2623000 }, { "epoch": 20.99, "learning_rate": 3.9506000000000004e-05, "loss": 8.7625, "step": 2623500 }, { "epoch": 20.99, "learning_rate": 3.9504e-05, "loss": 8.7571, "step": 2624000 }, { "epoch": 21.0, "learning_rate": 3.9502e-05, "loss": 8.7377, "step": 2624500 }, { "epoch": 21.0, "learning_rate": 3.9500000000000005e-05, "loss": 8.7462, "step": 2625000 }, { "epoch": 21.0, "learning_rate": 3.9498e-05, "loss": 8.7537, "step": 2625500 }, { "epoch": 21.01, "learning_rate": 3.9496e-05, "loss": 8.7369, "step": 2626000 }, { "epoch": 21.01, "learning_rate": 3.9494000000000005e-05, "loss": 8.7495, "step": 2626500 }, { "epoch": 21.02, "learning_rate": 3.9492e-05, "loss": 8.7477, "step": 2627000 }, { "epoch": 21.02, "learning_rate": 3.9489999999999996e-05, "loss": 8.7414, "step": 2627500 }, { "epoch": 21.02, "learning_rate": 3.9488000000000005e-05, "loss": 8.7373, "step": 2628000 }, { "epoch": 21.03, "learning_rate": 3.9486e-05, "loss": 8.7534, "step": 2628500 }, { "epoch": 21.03, "learning_rate": 3.9484000000000003e-05, "loss": 8.7582, "step": 2629000 }, { "epoch": 21.04, "learning_rate": 3.9482e-05, "loss": 8.7478, "step": 2629500 }, { "epoch": 21.04, "learning_rate": 3.948e-05, "loss": 8.7562, "step": 2630000 }, { "epoch": 21.04, "learning_rate": 3.9478000000000004e-05, "loss": 8.7449, "step": 2630500 }, { "epoch": 21.05, "learning_rate": 3.9476e-05, "loss": 8.7414, "step": 2631000 }, { "epoch": 21.05, "learning_rate": 3.9474e-05, "loss": 8.7493, "step": 2631500 }, { "epoch": 21.06, "learning_rate": 3.9472000000000004e-05, "loss": 8.7687, "step": 2632000 }, { "epoch": 21.06, "learning_rate": 3.947e-05, "loss": 8.7414, "step": 2632500 }, { "epoch": 21.06, "learning_rate": 3.9468e-05, "loss": 8.7596, "step": 2633000 }, { "epoch": 21.07, "learning_rate": 3.9466000000000005e-05, "loss": 8.7636, "step": 2633500 }, { "epoch": 21.07, "learning_rate": 3.9464e-05, "loss": 8.7497, "step": 2634000 }, { "epoch": 21.08, "learning_rate": 3.9462e-05, "loss": 8.7399, "step": 2634500 }, { "epoch": 21.08, "learning_rate": 3.9460000000000005e-05, "loss": 8.7529, "step": 2635000 }, { "epoch": 21.08, "learning_rate": 3.9458e-05, "loss": 8.7663, "step": 2635500 }, { "epoch": 21.09, "learning_rate": 3.9456e-05, "loss": 8.7418, "step": 2636000 }, { "epoch": 21.09, "learning_rate": 3.9454000000000005e-05, "loss": 8.7522, "step": 2636500 }, { "epoch": 21.1, "learning_rate": 3.9452e-05, "loss": 8.7533, "step": 2637000 }, { "epoch": 21.1, "learning_rate": 3.9450000000000003e-05, "loss": 8.7415, "step": 2637500 }, { "epoch": 21.1, "learning_rate": 3.9448e-05, "loss": 8.7385, "step": 2638000 }, { "epoch": 21.11, "learning_rate": 3.9446e-05, "loss": 8.7419, "step": 2638500 }, { "epoch": 21.11, "learning_rate": 3.9444000000000004e-05, "loss": 8.7715, "step": 2639000 }, { "epoch": 21.12, "learning_rate": 3.9442e-05, "loss": 8.7507, "step": 2639500 }, { "epoch": 21.12, "learning_rate": 3.944e-05, "loss": 8.7477, "step": 2640000 }, { "epoch": 21.12, "learning_rate": 3.9438000000000004e-05, "loss": 8.7394, "step": 2640500 }, { "epoch": 21.13, "learning_rate": 3.9436e-05, "loss": 8.7386, "step": 2641000 }, { "epoch": 21.13, "learning_rate": 3.9434e-05, "loss": 8.7454, "step": 2641500 }, { "epoch": 21.14, "learning_rate": 3.9432000000000005e-05, "loss": 8.7578, "step": 2642000 }, { "epoch": 21.14, "learning_rate": 3.943e-05, "loss": 8.7504, "step": 2642500 }, { "epoch": 21.14, "learning_rate": 3.9428e-05, "loss": 8.7524, "step": 2643000 }, { "epoch": 21.15, "learning_rate": 3.9426000000000005e-05, "loss": 8.7521, "step": 2643500 }, { "epoch": 21.15, "learning_rate": 3.9424e-05, "loss": 8.7392, "step": 2644000 }, { "epoch": 21.16, "learning_rate": 3.9422e-05, "loss": 8.7712, "step": 2644500 }, { "epoch": 21.16, "learning_rate": 3.942e-05, "loss": 8.7487, "step": 2645000 }, { "epoch": 21.16, "learning_rate": 3.9418e-05, "loss": 8.7405, "step": 2645500 }, { "epoch": 21.17, "learning_rate": 3.9416000000000003e-05, "loss": 8.7639, "step": 2646000 }, { "epoch": 21.17, "learning_rate": 3.9414e-05, "loss": 8.7544, "step": 2646500 }, { "epoch": 21.18, "learning_rate": 3.9412e-05, "loss": 8.7579, "step": 2647000 }, { "epoch": 21.18, "learning_rate": 3.9410000000000004e-05, "loss": 8.7649, "step": 2647500 }, { "epoch": 21.18, "learning_rate": 3.9408e-05, "loss": 8.757, "step": 2648000 }, { "epoch": 21.19, "learning_rate": 3.9406e-05, "loss": 8.7403, "step": 2648500 }, { "epoch": 21.19, "learning_rate": 3.9404000000000004e-05, "loss": 8.7594, "step": 2649000 }, { "epoch": 21.2, "learning_rate": 3.9402e-05, "loss": 8.7412, "step": 2649500 }, { "epoch": 21.2, "learning_rate": 3.94e-05, "loss": 8.761, "step": 2650000 }, { "epoch": 21.2, "learning_rate": 3.9398000000000005e-05, "loss": 8.7683, "step": 2650500 }, { "epoch": 21.21, "learning_rate": 3.9396e-05, "loss": 8.7619, "step": 2651000 }, { "epoch": 21.21, "learning_rate": 3.9394e-05, "loss": 8.7598, "step": 2651500 }, { "epoch": 21.22, "learning_rate": 3.9392e-05, "loss": 8.742, "step": 2652000 }, { "epoch": 21.22, "learning_rate": 3.939e-05, "loss": 8.7523, "step": 2652500 }, { "epoch": 21.22, "learning_rate": 3.9388e-05, "loss": 8.7477, "step": 2653000 }, { "epoch": 21.23, "learning_rate": 3.9386e-05, "loss": 8.7616, "step": 2653500 }, { "epoch": 21.23, "learning_rate": 3.9384e-05, "loss": 8.7424, "step": 2654000 }, { "epoch": 21.24, "learning_rate": 3.9382000000000003e-05, "loss": 8.7301, "step": 2654500 }, { "epoch": 21.24, "learning_rate": 3.938e-05, "loss": 8.7595, "step": 2655000 }, { "epoch": 21.24, "learning_rate": 3.9378e-05, "loss": 8.7544, "step": 2655500 }, { "epoch": 21.25, "learning_rate": 3.9376000000000004e-05, "loss": 8.7263, "step": 2656000 }, { "epoch": 21.25, "learning_rate": 3.9374e-05, "loss": 8.7458, "step": 2656500 }, { "epoch": 21.26, "learning_rate": 3.9372e-05, "loss": 8.7498, "step": 2657000 }, { "epoch": 21.26, "learning_rate": 3.9370000000000004e-05, "loss": 8.7327, "step": 2657500 }, { "epoch": 21.26, "learning_rate": 3.9368e-05, "loss": 8.747, "step": 2658000 }, { "epoch": 21.27, "learning_rate": 3.9366e-05, "loss": 8.748, "step": 2658500 }, { "epoch": 21.27, "learning_rate": 3.9364e-05, "loss": 8.7489, "step": 2659000 }, { "epoch": 21.28, "learning_rate": 3.936200000000001e-05, "loss": 8.7491, "step": 2659500 }, { "epoch": 21.28, "learning_rate": 3.936e-05, "loss": 8.7658, "step": 2660000 }, { "epoch": 21.28, "learning_rate": 3.9358e-05, "loss": 8.7597, "step": 2660500 }, { "epoch": 21.29, "learning_rate": 3.9356e-05, "loss": 8.7517, "step": 2661000 }, { "epoch": 21.29, "learning_rate": 3.9354e-05, "loss": 8.7591, "step": 2661500 }, { "epoch": 21.3, "learning_rate": 3.9352e-05, "loss": 8.7671, "step": 2662000 }, { "epoch": 21.3, "learning_rate": 3.935e-05, "loss": 8.768, "step": 2662500 }, { "epoch": 21.3, "learning_rate": 3.9348000000000003e-05, "loss": 8.7603, "step": 2663000 }, { "epoch": 21.31, "learning_rate": 3.9346e-05, "loss": 8.7579, "step": 2663500 }, { "epoch": 21.31, "learning_rate": 3.9344e-05, "loss": 8.7519, "step": 2664000 }, { "epoch": 21.32, "learning_rate": 3.9342000000000004e-05, "loss": 8.7588, "step": 2664500 }, { "epoch": 21.32, "learning_rate": 3.9340000000000006e-05, "loss": 8.7488, "step": 2665000 }, { "epoch": 21.32, "learning_rate": 3.9338e-05, "loss": 8.7409, "step": 2665500 }, { "epoch": 21.33, "learning_rate": 3.9336e-05, "loss": 8.741, "step": 2666000 }, { "epoch": 21.33, "learning_rate": 3.9334000000000007e-05, "loss": 8.7455, "step": 2666500 }, { "epoch": 21.34, "learning_rate": 3.9332e-05, "loss": 8.7599, "step": 2667000 }, { "epoch": 21.34, "learning_rate": 3.933e-05, "loss": 8.7435, "step": 2667500 }, { "epoch": 21.34, "learning_rate": 3.932800000000001e-05, "loss": 8.7904, "step": 2668000 }, { "epoch": 21.35, "learning_rate": 3.9326e-05, "loss": 8.7415, "step": 2668500 }, { "epoch": 21.35, "learning_rate": 3.9324e-05, "loss": 8.7281, "step": 2669000 }, { "epoch": 21.36, "learning_rate": 3.9322e-05, "loss": 8.7484, "step": 2669500 }, { "epoch": 21.36, "learning_rate": 3.932e-05, "loss": 8.7585, "step": 2670000 }, { "epoch": 21.36, "learning_rate": 3.9318e-05, "loss": 8.7509, "step": 2670500 }, { "epoch": 21.37, "learning_rate": 3.9316e-05, "loss": 8.7465, "step": 2671000 }, { "epoch": 21.37, "learning_rate": 3.9314000000000003e-05, "loss": 8.7548, "step": 2671500 }, { "epoch": 21.38, "learning_rate": 3.9312000000000006e-05, "loss": 8.7575, "step": 2672000 }, { "epoch": 21.38, "learning_rate": 3.931e-05, "loss": 8.7693, "step": 2672500 }, { "epoch": 21.38, "learning_rate": 3.9308e-05, "loss": 8.7572, "step": 2673000 }, { "epoch": 21.39, "learning_rate": 3.9306000000000006e-05, "loss": 8.7605, "step": 2673500 }, { "epoch": 21.39, "learning_rate": 3.9304e-05, "loss": 8.7572, "step": 2674000 }, { "epoch": 21.4, "learning_rate": 3.9302e-05, "loss": 8.7421, "step": 2674500 }, { "epoch": 21.4, "learning_rate": 3.9300000000000007e-05, "loss": 8.7615, "step": 2675000 }, { "epoch": 21.4, "learning_rate": 3.9298e-05, "loss": 8.7423, "step": 2675500 }, { "epoch": 21.41, "learning_rate": 3.9296e-05, "loss": 8.76, "step": 2676000 }, { "epoch": 21.41, "learning_rate": 3.9294e-05, "loss": 8.7466, "step": 2676500 }, { "epoch": 21.42, "learning_rate": 3.9292e-05, "loss": 8.7614, "step": 2677000 }, { "epoch": 21.42, "learning_rate": 3.9290000000000005e-05, "loss": 8.7362, "step": 2677500 }, { "epoch": 21.42, "learning_rate": 3.9288e-05, "loss": 8.755, "step": 2678000 }, { "epoch": 21.43, "learning_rate": 3.9286e-05, "loss": 8.7449, "step": 2678500 }, { "epoch": 21.43, "learning_rate": 3.9284000000000005e-05, "loss": 8.7552, "step": 2679000 }, { "epoch": 21.44, "learning_rate": 3.9282e-05, "loss": 8.7674, "step": 2679500 }, { "epoch": 21.44, "learning_rate": 3.9280000000000003e-05, "loss": 8.756, "step": 2680000 }, { "epoch": 21.44, "learning_rate": 3.9278000000000006e-05, "loss": 8.7526, "step": 2680500 }, { "epoch": 21.45, "learning_rate": 3.9276e-05, "loss": 8.7271, "step": 2681000 }, { "epoch": 21.45, "learning_rate": 3.9274e-05, "loss": 8.7505, "step": 2681500 }, { "epoch": 21.46, "learning_rate": 3.9272000000000006e-05, "loss": 8.7501, "step": 2682000 }, { "epoch": 21.46, "learning_rate": 3.927e-05, "loss": 8.7527, "step": 2682500 }, { "epoch": 21.46, "learning_rate": 3.9268e-05, "loss": 8.7749, "step": 2683000 }, { "epoch": 21.47, "learning_rate": 3.9266e-05, "loss": 8.771, "step": 2683500 }, { "epoch": 21.47, "learning_rate": 3.9264e-05, "loss": 8.7672, "step": 2684000 }, { "epoch": 21.48, "learning_rate": 3.9262000000000005e-05, "loss": 8.7374, "step": 2684500 }, { "epoch": 21.48, "learning_rate": 3.926e-05, "loss": 8.7486, "step": 2685000 }, { "epoch": 21.48, "learning_rate": 3.9258e-05, "loss": 8.7362, "step": 2685500 }, { "epoch": 21.49, "learning_rate": 3.9256000000000005e-05, "loss": 8.7447, "step": 2686000 }, { "epoch": 21.49, "learning_rate": 3.9254e-05, "loss": 8.7494, "step": 2686500 }, { "epoch": 21.5, "learning_rate": 3.9252e-05, "loss": 8.7582, "step": 2687000 }, { "epoch": 21.5, "learning_rate": 3.9250000000000005e-05, "loss": 8.7494, "step": 2687500 }, { "epoch": 21.5, "learning_rate": 3.9248e-05, "loss": 8.7503, "step": 2688000 }, { "epoch": 21.51, "learning_rate": 3.9245999999999997e-05, "loss": 8.7564, "step": 2688500 }, { "epoch": 21.51, "learning_rate": 3.9244000000000006e-05, "loss": 8.7582, "step": 2689000 }, { "epoch": 21.52, "learning_rate": 3.9242e-05, "loss": 8.7445, "step": 2689500 }, { "epoch": 21.52, "learning_rate": 3.9240000000000004e-05, "loss": 8.7609, "step": 2690000 }, { "epoch": 21.52, "learning_rate": 3.9238e-05, "loss": 8.7466, "step": 2690500 }, { "epoch": 21.53, "learning_rate": 3.9236e-05, "loss": 8.7613, "step": 2691000 }, { "epoch": 21.53, "learning_rate": 3.9234000000000004e-05, "loss": 8.7652, "step": 2691500 }, { "epoch": 21.54, "learning_rate": 3.9232e-05, "loss": 8.7512, "step": 2692000 }, { "epoch": 21.54, "learning_rate": 3.923e-05, "loss": 8.7371, "step": 2692500 }, { "epoch": 21.54, "learning_rate": 3.9228000000000005e-05, "loss": 8.743, "step": 2693000 }, { "epoch": 21.55, "learning_rate": 3.9226e-05, "loss": 8.7588, "step": 2693500 }, { "epoch": 21.55, "learning_rate": 3.9224e-05, "loss": 8.7515, "step": 2694000 }, { "epoch": 21.56, "learning_rate": 3.9222000000000005e-05, "loss": 8.7558, "step": 2694500 }, { "epoch": 21.56, "learning_rate": 3.922e-05, "loss": 8.7645, "step": 2695000 }, { "epoch": 21.56, "learning_rate": 3.9217999999999996e-05, "loss": 8.7394, "step": 2695500 }, { "epoch": 21.57, "learning_rate": 3.9216000000000005e-05, "loss": 8.7447, "step": 2696000 }, { "epoch": 21.57, "learning_rate": 3.9214e-05, "loss": 8.7325, "step": 2696500 }, { "epoch": 21.58, "learning_rate": 3.9212e-05, "loss": 8.7391, "step": 2697000 }, { "epoch": 21.58, "learning_rate": 3.921e-05, "loss": 8.7545, "step": 2697500 }, { "epoch": 21.58, "learning_rate": 3.9208e-05, "loss": 8.7332, "step": 2698000 }, { "epoch": 21.59, "learning_rate": 3.9206000000000004e-05, "loss": 8.7581, "step": 2698500 }, { "epoch": 21.59, "learning_rate": 3.9204e-05, "loss": 8.7392, "step": 2699000 }, { "epoch": 21.6, "learning_rate": 3.9202e-05, "loss": 8.7479, "step": 2699500 }, { "epoch": 21.6, "learning_rate": 3.9200000000000004e-05, "loss": 8.7562, "step": 2700000 }, { "epoch": 21.6, "learning_rate": 3.9198e-05, "loss": 8.7599, "step": 2700500 }, { "epoch": 21.61, "learning_rate": 3.9196e-05, "loss": 8.7577, "step": 2701000 }, { "epoch": 21.61, "learning_rate": 3.9194000000000005e-05, "loss": 8.7517, "step": 2701500 }, { "epoch": 21.62, "learning_rate": 3.9192e-05, "loss": 8.7459, "step": 2702000 }, { "epoch": 21.62, "learning_rate": 3.919e-05, "loss": 8.747, "step": 2702500 }, { "epoch": 21.62, "learning_rate": 3.9188000000000005e-05, "loss": 8.7424, "step": 2703000 }, { "epoch": 21.63, "learning_rate": 3.9186e-05, "loss": 8.7484, "step": 2703500 }, { "epoch": 21.63, "learning_rate": 3.9184e-05, "loss": 8.7567, "step": 2704000 }, { "epoch": 21.64, "learning_rate": 3.9182000000000005e-05, "loss": 8.7457, "step": 2704500 }, { "epoch": 21.64, "learning_rate": 3.918e-05, "loss": 8.7553, "step": 2705000 }, { "epoch": 21.64, "learning_rate": 3.9178e-05, "loss": 8.7627, "step": 2705500 }, { "epoch": 21.65, "learning_rate": 3.9176e-05, "loss": 8.751, "step": 2706000 }, { "epoch": 21.65, "learning_rate": 3.9174e-05, "loss": 8.7361, "step": 2706500 }, { "epoch": 21.66, "learning_rate": 3.9172000000000004e-05, "loss": 8.758, "step": 2707000 }, { "epoch": 21.66, "learning_rate": 3.917e-05, "loss": 8.7374, "step": 2707500 }, { "epoch": 21.66, "learning_rate": 3.9168e-05, "loss": 8.7641, "step": 2708000 }, { "epoch": 21.67, "learning_rate": 3.9166000000000004e-05, "loss": 8.7491, "step": 2708500 }, { "epoch": 21.67, "learning_rate": 3.9164e-05, "loss": 8.7498, "step": 2709000 }, { "epoch": 21.68, "learning_rate": 3.9162e-05, "loss": 8.7684, "step": 2709500 }, { "epoch": 21.68, "learning_rate": 3.9160000000000005e-05, "loss": 8.7378, "step": 2710000 }, { "epoch": 21.68, "learning_rate": 3.9158e-05, "loss": 8.7516, "step": 2710500 }, { "epoch": 21.69, "learning_rate": 3.9156e-05, "loss": 8.7467, "step": 2711000 }, { "epoch": 21.69, "learning_rate": 3.9154000000000005e-05, "loss": 8.7462, "step": 2711500 }, { "epoch": 21.7, "learning_rate": 3.9152e-05, "loss": 8.7339, "step": 2712000 }, { "epoch": 21.7, "learning_rate": 3.915e-05, "loss": 8.737, "step": 2712500 }, { "epoch": 21.7, "learning_rate": 3.9148e-05, "loss": 8.7494, "step": 2713000 }, { "epoch": 21.71, "learning_rate": 3.9146e-05, "loss": 8.7419, "step": 2713500 }, { "epoch": 21.71, "learning_rate": 3.9144e-05, "loss": 8.7408, "step": 2714000 }, { "epoch": 21.72, "learning_rate": 3.9142e-05, "loss": 8.7589, "step": 2714500 }, { "epoch": 21.72, "learning_rate": 3.914e-05, "loss": 8.7455, "step": 2715000 }, { "epoch": 21.72, "learning_rate": 3.9138000000000004e-05, "loss": 8.7491, "step": 2715500 }, { "epoch": 21.73, "learning_rate": 3.9136e-05, "loss": 8.7614, "step": 2716000 }, { "epoch": 21.73, "learning_rate": 3.9134e-05, "loss": 8.7613, "step": 2716500 }, { "epoch": 21.74, "learning_rate": 3.9132000000000004e-05, "loss": 8.766, "step": 2717000 }, { "epoch": 21.74, "learning_rate": 3.913e-05, "loss": 8.743, "step": 2717500 }, { "epoch": 21.74, "learning_rate": 3.9128e-05, "loss": 8.7527, "step": 2718000 }, { "epoch": 21.75, "learning_rate": 3.9126000000000005e-05, "loss": 8.7655, "step": 2718500 }, { "epoch": 21.75, "learning_rate": 3.9124e-05, "loss": 8.7501, "step": 2719000 }, { "epoch": 21.76, "learning_rate": 3.9122e-05, "loss": 8.745, "step": 2719500 }, { "epoch": 21.76, "learning_rate": 3.912e-05, "loss": 8.7711, "step": 2720000 }, { "epoch": 21.76, "learning_rate": 3.911800000000001e-05, "loss": 8.7457, "step": 2720500 }, { "epoch": 21.77, "learning_rate": 3.9116e-05, "loss": 8.7472, "step": 2721000 }, { "epoch": 21.77, "learning_rate": 3.9114e-05, "loss": 8.7145, "step": 2721500 }, { "epoch": 21.78, "learning_rate": 3.9112e-05, "loss": 8.758, "step": 2722000 }, { "epoch": 21.78, "learning_rate": 3.911e-05, "loss": 8.7595, "step": 2722500 }, { "epoch": 21.78, "learning_rate": 3.9108e-05, "loss": 8.7386, "step": 2723000 }, { "epoch": 21.79, "learning_rate": 3.9106e-05, "loss": 8.7493, "step": 2723500 }, { "epoch": 21.79, "learning_rate": 3.9104000000000004e-05, "loss": 8.7442, "step": 2724000 }, { "epoch": 21.8, "learning_rate": 3.9102e-05, "loss": 8.7562, "step": 2724500 }, { "epoch": 21.8, "learning_rate": 3.91e-05, "loss": 8.7416, "step": 2725000 }, { "epoch": 21.8, "learning_rate": 3.9098000000000004e-05, "loss": 8.7391, "step": 2725500 }, { "epoch": 21.81, "learning_rate": 3.9096e-05, "loss": 8.7682, "step": 2726000 }, { "epoch": 21.81, "learning_rate": 3.9094e-05, "loss": 8.7512, "step": 2726500 }, { "epoch": 21.82, "learning_rate": 3.9092e-05, "loss": 8.7514, "step": 2727000 }, { "epoch": 21.82, "learning_rate": 3.909000000000001e-05, "loss": 8.7522, "step": 2727500 }, { "epoch": 21.82, "learning_rate": 3.9088e-05, "loss": 8.7911, "step": 2728000 }, { "epoch": 21.83, "learning_rate": 3.9086e-05, "loss": 8.7713, "step": 2728500 }, { "epoch": 21.83, "learning_rate": 3.908400000000001e-05, "loss": 8.7604, "step": 2729000 }, { "epoch": 21.84, "learning_rate": 3.9082e-05, "loss": 8.7477, "step": 2729500 }, { "epoch": 21.84, "learning_rate": 3.908e-05, "loss": 8.7555, "step": 2730000 }, { "epoch": 21.84, "learning_rate": 3.9078e-05, "loss": 8.742, "step": 2730500 }, { "epoch": 21.85, "learning_rate": 3.9076e-05, "loss": 8.74, "step": 2731000 }, { "epoch": 21.85, "learning_rate": 3.9074e-05, "loss": 8.7484, "step": 2731500 }, { "epoch": 21.86, "learning_rate": 3.9072e-05, "loss": 8.7578, "step": 2732000 }, { "epoch": 21.86, "learning_rate": 3.9070000000000004e-05, "loss": 8.7372, "step": 2732500 }, { "epoch": 21.86, "learning_rate": 3.9068000000000006e-05, "loss": 8.7412, "step": 2733000 }, { "epoch": 21.87, "learning_rate": 3.9066e-05, "loss": 8.7525, "step": 2733500 }, { "epoch": 21.87, "learning_rate": 3.9064e-05, "loss": 8.7627, "step": 2734000 }, { "epoch": 21.88, "learning_rate": 3.9062000000000006e-05, "loss": 8.7533, "step": 2734500 }, { "epoch": 21.88, "learning_rate": 3.906e-05, "loss": 8.7451, "step": 2735000 }, { "epoch": 21.88, "learning_rate": 3.9058e-05, "loss": 8.7315, "step": 2735500 }, { "epoch": 21.89, "learning_rate": 3.905600000000001e-05, "loss": 8.7638, "step": 2736000 }, { "epoch": 21.89, "learning_rate": 3.9054e-05, "loss": 8.7571, "step": 2736500 }, { "epoch": 21.9, "learning_rate": 3.9052e-05, "loss": 8.7506, "step": 2737000 }, { "epoch": 21.9, "learning_rate": 3.905e-05, "loss": 8.7366, "step": 2737500 }, { "epoch": 21.9, "learning_rate": 3.9048e-05, "loss": 8.7582, "step": 2738000 }, { "epoch": 21.91, "learning_rate": 3.9046e-05, "loss": 8.7469, "step": 2738500 }, { "epoch": 21.91, "learning_rate": 3.9044e-05, "loss": 8.7621, "step": 2739000 }, { "epoch": 21.92, "learning_rate": 3.9042e-05, "loss": 8.7573, "step": 2739500 }, { "epoch": 21.92, "learning_rate": 3.9040000000000006e-05, "loss": 8.75, "step": 2740000 }, { "epoch": 21.92, "learning_rate": 3.9038e-05, "loss": 8.7369, "step": 2740500 }, { "epoch": 21.93, "learning_rate": 3.9036000000000004e-05, "loss": 8.7577, "step": 2741000 }, { "epoch": 21.93, "learning_rate": 3.9034000000000006e-05, "loss": 8.7455, "step": 2741500 }, { "epoch": 21.94, "learning_rate": 3.9032e-05, "loss": 8.7336, "step": 2742000 }, { "epoch": 21.94, "learning_rate": 3.903e-05, "loss": 8.7395, "step": 2742500 }, { "epoch": 21.94, "learning_rate": 3.9028000000000006e-05, "loss": 8.7471, "step": 2743000 }, { "epoch": 21.95, "learning_rate": 3.9026e-05, "loss": 8.7546, "step": 2743500 }, { "epoch": 21.95, "learning_rate": 3.9024e-05, "loss": 8.7433, "step": 2744000 }, { "epoch": 21.96, "learning_rate": 3.9022e-05, "loss": 8.7506, "step": 2744500 }, { "epoch": 21.96, "learning_rate": 3.902e-05, "loss": 8.7792, "step": 2745000 }, { "epoch": 21.96, "learning_rate": 3.9018000000000005e-05, "loss": 8.765, "step": 2745500 }, { "epoch": 21.97, "learning_rate": 3.9016e-05, "loss": 8.7628, "step": 2746000 }, { "epoch": 21.97, "learning_rate": 3.9014e-05, "loss": 8.7517, "step": 2746500 }, { "epoch": 21.98, "learning_rate": 3.9012000000000005e-05, "loss": 8.7479, "step": 2747000 }, { "epoch": 21.98, "learning_rate": 3.901e-05, "loss": 8.7648, "step": 2747500 }, { "epoch": 21.98, "learning_rate": 3.9008e-05, "loss": 8.7723, "step": 2748000 }, { "epoch": 21.99, "learning_rate": 3.9006000000000006e-05, "loss": 8.7453, "step": 2748500 }, { "epoch": 21.99, "learning_rate": 3.9004e-05, "loss": 8.7511, "step": 2749000 }, { "epoch": 22.0, "learning_rate": 3.9002e-05, "loss": 8.7495, "step": 2749500 }, { "epoch": 22.0, "learning_rate": 3.9000000000000006e-05, "loss": 8.7403, "step": 2750000 }, { "epoch": 22.0, "learning_rate": 3.8998e-05, "loss": 8.7663, "step": 2750500 }, { "epoch": 22.01, "learning_rate": 3.8996e-05, "loss": 8.7472, "step": 2751000 }, { "epoch": 22.01, "learning_rate": 3.8994e-05, "loss": 8.7739, "step": 2751500 }, { "epoch": 22.02, "learning_rate": 3.8992e-05, "loss": 8.751, "step": 2752000 }, { "epoch": 22.02, "learning_rate": 3.8990000000000004e-05, "loss": 8.757, "step": 2752500 }, { "epoch": 22.02, "learning_rate": 3.8988e-05, "loss": 8.7633, "step": 2753000 }, { "epoch": 22.03, "learning_rate": 3.8986e-05, "loss": 8.7506, "step": 2753500 }, { "epoch": 22.03, "learning_rate": 3.8984000000000005e-05, "loss": 8.7463, "step": 2754000 }, { "epoch": 22.04, "learning_rate": 3.8982e-05, "loss": 8.773, "step": 2754500 }, { "epoch": 22.04, "learning_rate": 3.898e-05, "loss": 8.7641, "step": 2755000 }, { "epoch": 22.04, "learning_rate": 3.8978000000000005e-05, "loss": 8.771, "step": 2755500 }, { "epoch": 22.05, "learning_rate": 3.8976e-05, "loss": 8.7548, "step": 2756000 }, { "epoch": 22.05, "learning_rate": 3.8973999999999996e-05, "loss": 8.7454, "step": 2756500 }, { "epoch": 22.06, "learning_rate": 3.8972000000000006e-05, "loss": 8.771, "step": 2757000 }, { "epoch": 22.06, "learning_rate": 3.897e-05, "loss": 8.7595, "step": 2757500 }, { "epoch": 22.06, "learning_rate": 3.8968000000000004e-05, "loss": 8.7522, "step": 2758000 }, { "epoch": 22.07, "learning_rate": 3.8966e-05, "loss": 8.7468, "step": 2758500 }, { "epoch": 22.07, "learning_rate": 3.8964e-05, "loss": 8.7671, "step": 2759000 }, { "epoch": 22.08, "learning_rate": 3.8962000000000004e-05, "loss": 8.7367, "step": 2759500 }, { "epoch": 22.08, "learning_rate": 3.896e-05, "loss": 8.7453, "step": 2760000 }, { "epoch": 22.08, "learning_rate": 3.8958e-05, "loss": 8.747, "step": 2760500 }, { "epoch": 22.09, "learning_rate": 3.8956000000000004e-05, "loss": 8.7516, "step": 2761000 }, { "epoch": 22.09, "learning_rate": 3.8954e-05, "loss": 8.7363, "step": 2761500 }, { "epoch": 22.1, "learning_rate": 3.8952e-05, "loss": 8.7664, "step": 2762000 }, { "epoch": 22.1, "learning_rate": 3.8950000000000005e-05, "loss": 8.7527, "step": 2762500 }, { "epoch": 22.1, "learning_rate": 3.8948e-05, "loss": 8.7387, "step": 2763000 }, { "epoch": 22.11, "learning_rate": 3.8945999999999996e-05, "loss": 8.751, "step": 2763500 }, { "epoch": 22.11, "learning_rate": 3.8944000000000005e-05, "loss": 8.7612, "step": 2764000 }, { "epoch": 22.12, "learning_rate": 3.8942e-05, "loss": 8.7418, "step": 2764500 }, { "epoch": 22.12, "learning_rate": 3.894e-05, "loss": 8.7211, "step": 2765000 }, { "epoch": 22.12, "learning_rate": 3.8938e-05, "loss": 8.7545, "step": 2765500 }, { "epoch": 22.13, "learning_rate": 3.8936e-05, "loss": 8.7452, "step": 2766000 }, { "epoch": 22.13, "learning_rate": 3.8934000000000004e-05, "loss": 8.7511, "step": 2766500 }, { "epoch": 22.14, "learning_rate": 3.8932e-05, "loss": 8.7725, "step": 2767000 }, { "epoch": 22.14, "learning_rate": 3.893e-05, "loss": 8.7706, "step": 2767500 }, { "epoch": 22.14, "learning_rate": 3.8928000000000004e-05, "loss": 8.751, "step": 2768000 }, { "epoch": 22.15, "learning_rate": 3.8926e-05, "loss": 8.7555, "step": 2768500 }, { "epoch": 22.15, "learning_rate": 3.8924e-05, "loss": 8.7528, "step": 2769000 }, { "epoch": 22.16, "learning_rate": 3.8922000000000004e-05, "loss": 8.7328, "step": 2769500 }, { "epoch": 22.16, "learning_rate": 3.892e-05, "loss": 8.7547, "step": 2770000 }, { "epoch": 22.16, "learning_rate": 3.8918e-05, "loss": 8.7488, "step": 2770500 }, { "epoch": 22.17, "learning_rate": 3.8916000000000005e-05, "loss": 8.7404, "step": 2771000 }, { "epoch": 22.17, "learning_rate": 3.8914e-05, "loss": 8.7492, "step": 2771500 }, { "epoch": 22.18, "learning_rate": 3.8912e-05, "loss": 8.746, "step": 2772000 }, { "epoch": 22.18, "learning_rate": 3.8910000000000005e-05, "loss": 8.7703, "step": 2772500 }, { "epoch": 22.18, "learning_rate": 3.8908e-05, "loss": 8.7495, "step": 2773000 }, { "epoch": 22.19, "learning_rate": 3.8906e-05, "loss": 8.7459, "step": 2773500 }, { "epoch": 22.19, "learning_rate": 3.8904e-05, "loss": 8.7376, "step": 2774000 }, { "epoch": 22.2, "learning_rate": 3.8902e-05, "loss": 8.7666, "step": 2774500 }, { "epoch": 22.2, "learning_rate": 3.8900000000000004e-05, "loss": 8.7564, "step": 2775000 }, { "epoch": 22.2, "learning_rate": 3.8898e-05, "loss": 8.7422, "step": 2775500 }, { "epoch": 22.21, "learning_rate": 3.8896e-05, "loss": 8.733, "step": 2776000 }, { "epoch": 22.21, "learning_rate": 3.8894000000000004e-05, "loss": 8.7276, "step": 2776500 }, { "epoch": 22.22, "learning_rate": 3.8892e-05, "loss": 8.7709, "step": 2777000 }, { "epoch": 22.22, "learning_rate": 3.889e-05, "loss": 8.769, "step": 2777500 }, { "epoch": 22.22, "learning_rate": 3.8888000000000004e-05, "loss": 8.7603, "step": 2778000 }, { "epoch": 22.23, "learning_rate": 3.8886e-05, "loss": 8.7466, "step": 2778500 }, { "epoch": 22.23, "learning_rate": 3.8884e-05, "loss": 8.7354, "step": 2779000 }, { "epoch": 22.24, "learning_rate": 3.8882000000000005e-05, "loss": 8.7348, "step": 2779500 }, { "epoch": 22.24, "learning_rate": 3.888e-05, "loss": 8.7543, "step": 2780000 }, { "epoch": 22.24, "learning_rate": 3.8878e-05, "loss": 8.7529, "step": 2780500 }, { "epoch": 22.25, "learning_rate": 3.8876e-05, "loss": 8.7581, "step": 2781000 }, { "epoch": 22.25, "learning_rate": 3.8874e-05, "loss": 8.7424, "step": 2781500 }, { "epoch": 22.26, "learning_rate": 3.8872e-05, "loss": 8.7277, "step": 2782000 }, { "epoch": 22.26, "learning_rate": 3.887e-05, "loss": 8.756, "step": 2782500 }, { "epoch": 22.26, "learning_rate": 3.8868e-05, "loss": 8.7691, "step": 2783000 }, { "epoch": 22.27, "learning_rate": 3.8866000000000004e-05, "loss": 8.7414, "step": 2783500 }, { "epoch": 22.27, "learning_rate": 3.8864e-05, "loss": 8.7618, "step": 2784000 }, { "epoch": 22.28, "learning_rate": 3.8862e-05, "loss": 8.7562, "step": 2784500 }, { "epoch": 22.28, "learning_rate": 3.8860000000000004e-05, "loss": 8.7533, "step": 2785000 }, { "epoch": 22.28, "learning_rate": 3.8858e-05, "loss": 8.7558, "step": 2785500 }, { "epoch": 22.29, "learning_rate": 3.8856e-05, "loss": 8.7326, "step": 2786000 }, { "epoch": 22.29, "learning_rate": 3.8854000000000004e-05, "loss": 8.7692, "step": 2786500 }, { "epoch": 22.3, "learning_rate": 3.8852e-05, "loss": 8.7535, "step": 2787000 }, { "epoch": 22.3, "learning_rate": 3.885e-05, "loss": 8.7584, "step": 2787500 }, { "epoch": 22.3, "learning_rate": 3.8848e-05, "loss": 8.7481, "step": 2788000 }, { "epoch": 22.31, "learning_rate": 3.884600000000001e-05, "loss": 8.7528, "step": 2788500 }, { "epoch": 22.31, "learning_rate": 3.8844e-05, "loss": 8.7548, "step": 2789000 }, { "epoch": 22.32, "learning_rate": 3.8842e-05, "loss": 8.7556, "step": 2789500 }, { "epoch": 22.32, "learning_rate": 3.884e-05, "loss": 8.762, "step": 2790000 }, { "epoch": 22.32, "learning_rate": 3.8838e-05, "loss": 8.769, "step": 2790500 }, { "epoch": 22.33, "learning_rate": 3.8836e-05, "loss": 8.7511, "step": 2791000 }, { "epoch": 22.33, "learning_rate": 3.8834e-05, "loss": 8.7536, "step": 2791500 }, { "epoch": 22.34, "learning_rate": 3.8832000000000004e-05, "loss": 8.7818, "step": 2792000 }, { "epoch": 22.34, "learning_rate": 3.883e-05, "loss": 8.7559, "step": 2792500 }, { "epoch": 22.34, "learning_rate": 3.8828e-05, "loss": 8.7542, "step": 2793000 }, { "epoch": 22.35, "learning_rate": 3.8826000000000004e-05, "loss": 8.74, "step": 2793500 }, { "epoch": 22.35, "learning_rate": 3.8824e-05, "loss": 8.7555, "step": 2794000 }, { "epoch": 22.36, "learning_rate": 3.8822e-05, "loss": 8.7608, "step": 2794500 }, { "epoch": 22.36, "learning_rate": 3.882e-05, "loss": 8.7475, "step": 2795000 }, { "epoch": 22.36, "learning_rate": 3.881800000000001e-05, "loss": 8.7268, "step": 2795500 }, { "epoch": 22.37, "learning_rate": 3.8816e-05, "loss": 8.7615, "step": 2796000 }, { "epoch": 22.37, "learning_rate": 3.8814e-05, "loss": 8.7638, "step": 2796500 }, { "epoch": 22.38, "learning_rate": 3.881200000000001e-05, "loss": 8.7712, "step": 2797000 }, { "epoch": 22.38, "learning_rate": 3.881e-05, "loss": 8.726, "step": 2797500 }, { "epoch": 22.38, "learning_rate": 3.8808e-05, "loss": 8.7295, "step": 2798000 }, { "epoch": 22.39, "learning_rate": 3.8806e-05, "loss": 8.7308, "step": 2798500 }, { "epoch": 22.39, "learning_rate": 3.8804e-05, "loss": 8.745, "step": 2799000 }, { "epoch": 22.4, "learning_rate": 3.8802e-05, "loss": 8.7809, "step": 2799500 }, { "epoch": 22.4, "learning_rate": 3.88e-05, "loss": 8.7517, "step": 2800000 }, { "epoch": 22.4, "learning_rate": 3.8798000000000004e-05, "loss": 8.751, "step": 2800500 }, { "epoch": 22.41, "learning_rate": 3.8796000000000006e-05, "loss": 8.7699, "step": 2801000 }, { "epoch": 22.41, "learning_rate": 3.8794e-05, "loss": 8.7535, "step": 2801500 }, { "epoch": 22.42, "learning_rate": 3.8792e-05, "loss": 8.723, "step": 2802000 }, { "epoch": 22.42, "learning_rate": 3.8790000000000006e-05, "loss": 8.7509, "step": 2802500 }, { "epoch": 22.42, "learning_rate": 3.8788e-05, "loss": 8.7667, "step": 2803000 }, { "epoch": 22.43, "learning_rate": 3.8786e-05, "loss": 8.7559, "step": 2803500 }, { "epoch": 22.43, "learning_rate": 3.878400000000001e-05, "loss": 8.7272, "step": 2804000 }, { "epoch": 22.44, "learning_rate": 3.8782e-05, "loss": 8.7243, "step": 2804500 }, { "epoch": 22.44, "learning_rate": 3.878e-05, "loss": 8.7655, "step": 2805000 }, { "epoch": 22.44, "learning_rate": 3.8778e-05, "loss": 8.7563, "step": 2805500 }, { "epoch": 22.45, "learning_rate": 3.8776e-05, "loss": 8.7542, "step": 2806000 }, { "epoch": 22.45, "learning_rate": 3.8774e-05, "loss": 8.7496, "step": 2806500 }, { "epoch": 22.46, "learning_rate": 3.8772e-05, "loss": 8.7615, "step": 2807000 }, { "epoch": 22.46, "learning_rate": 3.877e-05, "loss": 8.7507, "step": 2807500 }, { "epoch": 22.46, "learning_rate": 3.8768000000000006e-05, "loss": 8.744, "step": 2808000 }, { "epoch": 22.47, "learning_rate": 3.8766e-05, "loss": 8.7448, "step": 2808500 }, { "epoch": 22.47, "learning_rate": 3.8764000000000004e-05, "loss": 8.7438, "step": 2809000 }, { "epoch": 22.48, "learning_rate": 3.8762000000000006e-05, "loss": 8.7536, "step": 2809500 }, { "epoch": 22.48, "learning_rate": 3.876e-05, "loss": 8.7515, "step": 2810000 }, { "epoch": 22.48, "learning_rate": 3.8758e-05, "loss": 8.7537, "step": 2810500 }, { "epoch": 22.49, "learning_rate": 3.8756000000000006e-05, "loss": 8.7621, "step": 2811000 }, { "epoch": 22.49, "learning_rate": 3.8754e-05, "loss": 8.7568, "step": 2811500 }, { "epoch": 22.5, "learning_rate": 3.8752e-05, "loss": 8.7432, "step": 2812000 }, { "epoch": 22.5, "learning_rate": 3.875e-05, "loss": 8.7484, "step": 2812500 }, { "epoch": 22.5, "learning_rate": 3.8748e-05, "loss": 8.7401, "step": 2813000 }, { "epoch": 22.51, "learning_rate": 3.8746000000000005e-05, "loss": 8.7563, "step": 2813500 }, { "epoch": 22.51, "learning_rate": 3.8744e-05, "loss": 8.741, "step": 2814000 }, { "epoch": 22.52, "learning_rate": 3.8742e-05, "loss": 8.7586, "step": 2814500 }, { "epoch": 22.52, "learning_rate": 3.8740000000000005e-05, "loss": 8.7614, "step": 2815000 }, { "epoch": 22.52, "learning_rate": 3.8738e-05, "loss": 8.7498, "step": 2815500 }, { "epoch": 22.53, "learning_rate": 3.8736e-05, "loss": 8.7428, "step": 2816000 }, { "epoch": 22.53, "learning_rate": 3.8734000000000006e-05, "loss": 8.7593, "step": 2816500 }, { "epoch": 22.54, "learning_rate": 3.8732e-05, "loss": 8.7351, "step": 2817000 }, { "epoch": 22.54, "learning_rate": 3.873e-05, "loss": 8.7543, "step": 2817500 }, { "epoch": 22.54, "learning_rate": 3.8728000000000006e-05, "loss": 8.7549, "step": 2818000 }, { "epoch": 22.55, "learning_rate": 3.8726e-05, "loss": 8.7417, "step": 2818500 }, { "epoch": 22.55, "learning_rate": 3.8724e-05, "loss": 8.7464, "step": 2819000 }, { "epoch": 22.56, "learning_rate": 3.8722e-05, "loss": 8.7536, "step": 2819500 }, { "epoch": 22.56, "learning_rate": 3.872e-05, "loss": 8.7375, "step": 2820000 }, { "epoch": 22.56, "learning_rate": 3.8718000000000004e-05, "loss": 8.7678, "step": 2820500 }, { "epoch": 22.57, "learning_rate": 3.8716e-05, "loss": 8.7356, "step": 2821000 }, { "epoch": 22.57, "learning_rate": 3.8714e-05, "loss": 8.7463, "step": 2821500 }, { "epoch": 22.58, "learning_rate": 3.8712000000000005e-05, "loss": 8.7394, "step": 2822000 }, { "epoch": 22.58, "learning_rate": 3.871e-05, "loss": 8.7551, "step": 2822500 }, { "epoch": 22.58, "learning_rate": 3.8708e-05, "loss": 8.7329, "step": 2823000 }, { "epoch": 22.59, "learning_rate": 3.8706000000000005e-05, "loss": 8.748, "step": 2823500 }, { "epoch": 22.59, "learning_rate": 3.8704e-05, "loss": 8.784, "step": 2824000 }, { "epoch": 22.6, "learning_rate": 3.8701999999999996e-05, "loss": 8.7617, "step": 2824500 }, { "epoch": 22.6, "learning_rate": 3.8700000000000006e-05, "loss": 8.7492, "step": 2825000 }, { "epoch": 22.6, "learning_rate": 3.8698e-05, "loss": 8.7554, "step": 2825500 }, { "epoch": 22.61, "learning_rate": 3.8696000000000004e-05, "loss": 8.7458, "step": 2826000 }, { "epoch": 22.61, "learning_rate": 3.8694e-05, "loss": 8.7523, "step": 2826500 }, { "epoch": 22.62, "learning_rate": 3.8692e-05, "loss": 8.7325, "step": 2827000 }, { "epoch": 22.62, "learning_rate": 3.8690000000000004e-05, "loss": 8.7542, "step": 2827500 }, { "epoch": 22.62, "learning_rate": 3.8688e-05, "loss": 8.755, "step": 2828000 }, { "epoch": 22.63, "learning_rate": 3.8686e-05, "loss": 8.7736, "step": 2828500 }, { "epoch": 22.63, "learning_rate": 3.8684000000000004e-05, "loss": 8.7517, "step": 2829000 }, { "epoch": 22.64, "learning_rate": 3.8682e-05, "loss": 8.7614, "step": 2829500 }, { "epoch": 22.64, "learning_rate": 3.868e-05, "loss": 8.7576, "step": 2830000 }, { "epoch": 22.64, "learning_rate": 3.8678000000000005e-05, "loss": 8.7328, "step": 2830500 }, { "epoch": 22.65, "learning_rate": 3.8676e-05, "loss": 8.737, "step": 2831000 }, { "epoch": 22.65, "learning_rate": 3.8673999999999996e-05, "loss": 8.7623, "step": 2831500 }, { "epoch": 22.66, "learning_rate": 3.8672000000000005e-05, "loss": 8.7528, "step": 2832000 }, { "epoch": 22.66, "learning_rate": 3.867e-05, "loss": 8.7642, "step": 2832500 }, { "epoch": 22.66, "learning_rate": 3.8668e-05, "loss": 8.7508, "step": 2833000 }, { "epoch": 22.67, "learning_rate": 3.8666000000000006e-05, "loss": 8.7572, "step": 2833500 }, { "epoch": 22.67, "learning_rate": 3.8664e-05, "loss": 8.7468, "step": 2834000 }, { "epoch": 22.68, "learning_rate": 3.8662000000000004e-05, "loss": 8.7499, "step": 2834500 }, { "epoch": 22.68, "learning_rate": 3.866e-05, "loss": 8.755, "step": 2835000 }, { "epoch": 22.68, "learning_rate": 3.8658e-05, "loss": 8.7423, "step": 2835500 }, { "epoch": 22.69, "learning_rate": 3.8656000000000004e-05, "loss": 8.7747, "step": 2836000 }, { "epoch": 22.69, "learning_rate": 3.8654e-05, "loss": 8.7392, "step": 2836500 }, { "epoch": 22.7, "learning_rate": 3.8652e-05, "loss": 8.745, "step": 2837000 }, { "epoch": 22.7, "learning_rate": 3.8650000000000004e-05, "loss": 8.7505, "step": 2837500 }, { "epoch": 22.7, "learning_rate": 3.8648e-05, "loss": 8.7496, "step": 2838000 }, { "epoch": 22.71, "learning_rate": 3.8646e-05, "loss": 8.755, "step": 2838500 }, { "epoch": 22.71, "learning_rate": 3.8644000000000005e-05, "loss": 8.7191, "step": 2839000 }, { "epoch": 22.72, "learning_rate": 3.8642e-05, "loss": 8.7182, "step": 2839500 }, { "epoch": 22.72, "learning_rate": 3.864e-05, "loss": 8.7531, "step": 2840000 }, { "epoch": 22.72, "learning_rate": 3.8638000000000005e-05, "loss": 8.7556, "step": 2840500 }, { "epoch": 22.73, "learning_rate": 3.8636e-05, "loss": 8.7539, "step": 2841000 }, { "epoch": 22.73, "learning_rate": 3.8634e-05, "loss": 8.7496, "step": 2841500 }, { "epoch": 22.74, "learning_rate": 3.8632e-05, "loss": 8.7507, "step": 2842000 }, { "epoch": 22.74, "learning_rate": 3.863e-05, "loss": 8.741, "step": 2842500 }, { "epoch": 22.74, "learning_rate": 3.8628000000000004e-05, "loss": 8.7744, "step": 2843000 }, { "epoch": 22.75, "learning_rate": 3.8626e-05, "loss": 8.7546, "step": 2843500 }, { "epoch": 22.75, "learning_rate": 3.8624e-05, "loss": 8.7697, "step": 2844000 }, { "epoch": 22.76, "learning_rate": 3.8622000000000004e-05, "loss": 8.7533, "step": 2844500 }, { "epoch": 22.76, "learning_rate": 3.862e-05, "loss": 8.7472, "step": 2845000 }, { "epoch": 22.76, "learning_rate": 3.8618e-05, "loss": 8.749, "step": 2845500 }, { "epoch": 22.77, "learning_rate": 3.8616000000000004e-05, "loss": 8.7538, "step": 2846000 }, { "epoch": 22.77, "learning_rate": 3.8614e-05, "loss": 8.7436, "step": 2846500 }, { "epoch": 22.78, "learning_rate": 3.8612e-05, "loss": 8.7519, "step": 2847000 }, { "epoch": 22.78, "learning_rate": 3.8610000000000005e-05, "loss": 8.7459, "step": 2847500 }, { "epoch": 22.78, "learning_rate": 3.8608e-05, "loss": 8.7572, "step": 2848000 }, { "epoch": 22.79, "learning_rate": 3.8606e-05, "loss": 8.7448, "step": 2848500 }, { "epoch": 22.79, "learning_rate": 3.8604e-05, "loss": 8.7557, "step": 2849000 }, { "epoch": 22.8, "learning_rate": 3.8602e-05, "loss": 8.7338, "step": 2849500 }, { "epoch": 22.8, "learning_rate": 3.86e-05, "loss": 8.734, "step": 2850000 }, { "epoch": 22.8, "learning_rate": 3.8598e-05, "loss": 8.7288, "step": 2850500 }, { "epoch": 22.81, "learning_rate": 3.8596e-05, "loss": 8.7392, "step": 2851000 }, { "epoch": 22.81, "learning_rate": 3.8594000000000004e-05, "loss": 8.758, "step": 2851500 }, { "epoch": 22.82, "learning_rate": 3.8592e-05, "loss": 8.7489, "step": 2852000 }, { "epoch": 22.82, "learning_rate": 3.859e-05, "loss": 8.7514, "step": 2852500 }, { "epoch": 22.82, "learning_rate": 3.8588000000000004e-05, "loss": 8.7464, "step": 2853000 }, { "epoch": 22.83, "learning_rate": 3.8586e-05, "loss": 8.759, "step": 2853500 }, { "epoch": 22.83, "learning_rate": 3.8584e-05, "loss": 8.7481, "step": 2854000 }, { "epoch": 22.84, "learning_rate": 3.8582000000000004e-05, "loss": 8.7589, "step": 2854500 }, { "epoch": 22.84, "learning_rate": 3.858e-05, "loss": 8.7254, "step": 2855000 }, { "epoch": 22.84, "learning_rate": 3.8578e-05, "loss": 8.7685, "step": 2855500 }, { "epoch": 22.85, "learning_rate": 3.8576e-05, "loss": 8.7347, "step": 2856000 }, { "epoch": 22.85, "learning_rate": 3.857400000000001e-05, "loss": 8.7401, "step": 2856500 }, { "epoch": 22.86, "learning_rate": 3.8572e-05, "loss": 8.7365, "step": 2857000 }, { "epoch": 22.86, "learning_rate": 3.857e-05, "loss": 8.7477, "step": 2857500 }, { "epoch": 22.86, "learning_rate": 3.856800000000001e-05, "loss": 8.739, "step": 2858000 }, { "epoch": 22.87, "learning_rate": 3.8566e-05, "loss": 8.7594, "step": 2858500 }, { "epoch": 22.87, "learning_rate": 3.8564e-05, "loss": 8.7667, "step": 2859000 }, { "epoch": 22.88, "learning_rate": 3.8562e-05, "loss": 8.7269, "step": 2859500 }, { "epoch": 22.88, "learning_rate": 3.8560000000000004e-05, "loss": 8.7622, "step": 2860000 }, { "epoch": 22.88, "learning_rate": 3.8558e-05, "loss": 8.744, "step": 2860500 }, { "epoch": 22.89, "learning_rate": 3.8556e-05, "loss": 8.739, "step": 2861000 }, { "epoch": 22.89, "learning_rate": 3.8554000000000004e-05, "loss": 8.7539, "step": 2861500 }, { "epoch": 22.9, "learning_rate": 3.8552e-05, "loss": 8.7646, "step": 2862000 }, { "epoch": 22.9, "learning_rate": 3.855e-05, "loss": 8.7542, "step": 2862500 }, { "epoch": 22.9, "learning_rate": 3.8548e-05, "loss": 8.7626, "step": 2863000 }, { "epoch": 22.91, "learning_rate": 3.854600000000001e-05, "loss": 8.7482, "step": 2863500 }, { "epoch": 22.91, "learning_rate": 3.8544e-05, "loss": 8.7636, "step": 2864000 }, { "epoch": 22.92, "learning_rate": 3.8542e-05, "loss": 8.7563, "step": 2864500 }, { "epoch": 22.92, "learning_rate": 3.854000000000001e-05, "loss": 8.7526, "step": 2865000 }, { "epoch": 22.92, "learning_rate": 3.8538e-05, "loss": 8.7501, "step": 2865500 }, { "epoch": 22.93, "learning_rate": 3.8536e-05, "loss": 8.7399, "step": 2866000 }, { "epoch": 22.93, "learning_rate": 3.8534e-05, "loss": 8.7605, "step": 2866500 }, { "epoch": 22.94, "learning_rate": 3.8532e-05, "loss": 8.7493, "step": 2867000 }, { "epoch": 22.94, "learning_rate": 3.853e-05, "loss": 8.7509, "step": 2867500 }, { "epoch": 22.94, "learning_rate": 3.8528e-05, "loss": 8.7507, "step": 2868000 }, { "epoch": 22.95, "learning_rate": 3.8526000000000003e-05, "loss": 8.7449, "step": 2868500 }, { "epoch": 22.95, "learning_rate": 3.8524000000000006e-05, "loss": 8.7563, "step": 2869000 }, { "epoch": 22.96, "learning_rate": 3.8522e-05, "loss": 8.7575, "step": 2869500 }, { "epoch": 22.96, "learning_rate": 3.8520000000000004e-05, "loss": 8.7495, "step": 2870000 }, { "epoch": 22.96, "learning_rate": 3.8518000000000006e-05, "loss": 8.7508, "step": 2870500 }, { "epoch": 22.97, "learning_rate": 3.8516e-05, "loss": 8.7508, "step": 2871000 }, { "epoch": 22.97, "learning_rate": 3.8514e-05, "loss": 8.7311, "step": 2871500 }, { "epoch": 22.98, "learning_rate": 3.851200000000001e-05, "loss": 8.7353, "step": 2872000 }, { "epoch": 22.98, "learning_rate": 3.851e-05, "loss": 8.7535, "step": 2872500 }, { "epoch": 22.98, "learning_rate": 3.8508e-05, "loss": 8.7553, "step": 2873000 }, { "epoch": 22.99, "learning_rate": 3.8506e-05, "loss": 8.7648, "step": 2873500 }, { "epoch": 22.99, "learning_rate": 3.8504e-05, "loss": 8.7461, "step": 2874000 }, { "epoch": 23.0, "learning_rate": 3.8502e-05, "loss": 8.7589, "step": 2874500 }, { "epoch": 23.0, "learning_rate": 3.85e-05, "loss": 8.7589, "step": 2875000 }, { "epoch": 23.0, "learning_rate": 3.8498e-05, "loss": 8.7601, "step": 2875500 }, { "epoch": 23.01, "learning_rate": 3.8496000000000005e-05, "loss": 8.7396, "step": 2876000 }, { "epoch": 23.01, "learning_rate": 3.8494e-05, "loss": 8.7865, "step": 2876500 }, { "epoch": 23.02, "learning_rate": 3.8492000000000003e-05, "loss": 8.7492, "step": 2877000 }, { "epoch": 23.02, "learning_rate": 3.8490000000000006e-05, "loss": 8.7813, "step": 2877500 }, { "epoch": 23.02, "learning_rate": 3.8488e-05, "loss": 8.7365, "step": 2878000 }, { "epoch": 23.03, "learning_rate": 3.8486e-05, "loss": 8.7717, "step": 2878500 }, { "epoch": 23.03, "learning_rate": 3.8484000000000006e-05, "loss": 8.7589, "step": 2879000 }, { "epoch": 23.04, "learning_rate": 3.8482e-05, "loss": 8.7571, "step": 2879500 }, { "epoch": 23.04, "learning_rate": 3.848e-05, "loss": 8.766, "step": 2880000 }, { "epoch": 23.04, "learning_rate": 3.8478e-05, "loss": 8.755, "step": 2880500 }, { "epoch": 23.05, "learning_rate": 3.8476e-05, "loss": 8.7404, "step": 2881000 }, { "epoch": 23.05, "learning_rate": 3.8474000000000005e-05, "loss": 8.7605, "step": 2881500 }, { "epoch": 23.06, "learning_rate": 3.8472e-05, "loss": 8.7619, "step": 2882000 }, { "epoch": 23.06, "learning_rate": 3.847e-05, "loss": 8.7479, "step": 2882500 }, { "epoch": 23.06, "learning_rate": 3.8468000000000005e-05, "loss": 8.7571, "step": 2883000 }, { "epoch": 23.07, "learning_rate": 3.8466e-05, "loss": 8.7492, "step": 2883500 }, { "epoch": 23.07, "learning_rate": 3.8464e-05, "loss": 8.7466, "step": 2884000 }, { "epoch": 23.08, "learning_rate": 3.8462000000000005e-05, "loss": 8.7598, "step": 2884500 }, { "epoch": 23.08, "learning_rate": 3.846e-05, "loss": 8.7613, "step": 2885000 }, { "epoch": 23.08, "learning_rate": 3.8458e-05, "loss": 8.763, "step": 2885500 }, { "epoch": 23.09, "learning_rate": 3.8456000000000006e-05, "loss": 8.7466, "step": 2886000 }, { "epoch": 23.09, "learning_rate": 3.8454e-05, "loss": 8.7745, "step": 2886500 }, { "epoch": 23.1, "learning_rate": 3.8452e-05, "loss": 8.757, "step": 2887000 }, { "epoch": 23.1, "learning_rate": 3.845e-05, "loss": 8.7519, "step": 2887500 }, { "epoch": 23.1, "learning_rate": 3.8448e-05, "loss": 8.727, "step": 2888000 }, { "epoch": 23.11, "learning_rate": 3.8446000000000004e-05, "loss": 8.7567, "step": 2888500 }, { "epoch": 23.11, "learning_rate": 3.8444e-05, "loss": 8.7511, "step": 2889000 }, { "epoch": 23.12, "learning_rate": 3.8442e-05, "loss": 8.7579, "step": 2889500 }, { "epoch": 23.12, "learning_rate": 3.8440000000000005e-05, "loss": 8.7722, "step": 2890000 }, { "epoch": 23.12, "learning_rate": 3.8438e-05, "loss": 8.7476, "step": 2890500 }, { "epoch": 23.13, "learning_rate": 3.8436e-05, "loss": 8.7659, "step": 2891000 }, { "epoch": 23.13, "learning_rate": 3.8434000000000005e-05, "loss": 8.7618, "step": 2891500 }, { "epoch": 23.14, "learning_rate": 3.8432e-05, "loss": 8.7412, "step": 2892000 }, { "epoch": 23.14, "learning_rate": 3.8429999999999996e-05, "loss": 8.7533, "step": 2892500 }, { "epoch": 23.14, "learning_rate": 3.8428000000000005e-05, "loss": 8.7514, "step": 2893000 }, { "epoch": 23.15, "learning_rate": 3.8426e-05, "loss": 8.7683, "step": 2893500 }, { "epoch": 23.15, "learning_rate": 3.8424000000000003e-05, "loss": 8.7326, "step": 2894000 }, { "epoch": 23.16, "learning_rate": 3.8422e-05, "loss": 8.7801, "step": 2894500 }, { "epoch": 23.16, "learning_rate": 3.842e-05, "loss": 8.7546, "step": 2895000 }, { "epoch": 23.16, "learning_rate": 3.8418000000000004e-05, "loss": 8.7519, "step": 2895500 }, { "epoch": 23.17, "learning_rate": 3.8416e-05, "loss": 8.7724, "step": 2896000 }, { "epoch": 23.17, "learning_rate": 3.8414e-05, "loss": 8.7193, "step": 2896500 }, { "epoch": 23.18, "learning_rate": 3.8412000000000004e-05, "loss": 8.752, "step": 2897000 }, { "epoch": 23.18, "learning_rate": 3.841e-05, "loss": 8.733, "step": 2897500 }, { "epoch": 23.18, "learning_rate": 3.8408e-05, "loss": 8.7617, "step": 2898000 }, { "epoch": 23.19, "learning_rate": 3.8406000000000005e-05, "loss": 8.7501, "step": 2898500 }, { "epoch": 23.19, "learning_rate": 3.8404e-05, "loss": 8.7558, "step": 2899000 }, { "epoch": 23.2, "learning_rate": 3.8401999999999996e-05, "loss": 8.7537, "step": 2899500 }, { "epoch": 23.2, "learning_rate": 3.8400000000000005e-05, "loss": 8.7394, "step": 2900000 }, { "epoch": 23.2, "learning_rate": 3.8398e-05, "loss": 8.7581, "step": 2900500 }, { "epoch": 23.21, "learning_rate": 3.8396e-05, "loss": 8.7653, "step": 2901000 }, { "epoch": 23.21, "learning_rate": 3.8394000000000005e-05, "loss": 8.7484, "step": 2901500 }, { "epoch": 23.22, "learning_rate": 3.8392e-05, "loss": 8.7164, "step": 2902000 }, { "epoch": 23.22, "learning_rate": 3.8390000000000003e-05, "loss": 8.7524, "step": 2902500 }, { "epoch": 23.22, "learning_rate": 3.8388e-05, "loss": 8.7524, "step": 2903000 }, { "epoch": 23.23, "learning_rate": 3.8386e-05, "loss": 8.7394, "step": 2903500 }, { "epoch": 23.23, "learning_rate": 3.8384000000000004e-05, "loss": 8.7447, "step": 2904000 }, { "epoch": 23.24, "learning_rate": 3.8382e-05, "loss": 8.7455, "step": 2904500 }, { "epoch": 23.24, "learning_rate": 3.838e-05, "loss": 8.7588, "step": 2905000 }, { "epoch": 23.24, "learning_rate": 3.8378000000000004e-05, "loss": 8.7657, "step": 2905500 }, { "epoch": 23.25, "learning_rate": 3.8376e-05, "loss": 8.7514, "step": 2906000 }, { "epoch": 23.25, "learning_rate": 3.8374e-05, "loss": 8.7485, "step": 2906500 }, { "epoch": 23.26, "learning_rate": 3.8372000000000005e-05, "loss": 8.7672, "step": 2907000 }, { "epoch": 23.26, "learning_rate": 3.837e-05, "loss": 8.7263, "step": 2907500 }, { "epoch": 23.26, "learning_rate": 3.8368e-05, "loss": 8.7347, "step": 2908000 }, { "epoch": 23.27, "learning_rate": 3.8366000000000005e-05, "loss": 8.7465, "step": 2908500 }, { "epoch": 23.27, "learning_rate": 3.8364e-05, "loss": 8.7651, "step": 2909000 }, { "epoch": 23.28, "learning_rate": 3.8362e-05, "loss": 8.7416, "step": 2909500 }, { "epoch": 23.28, "learning_rate": 3.836e-05, "loss": 8.7443, "step": 2910000 }, { "epoch": 23.28, "learning_rate": 3.8358e-05, "loss": 8.7552, "step": 2910500 }, { "epoch": 23.29, "learning_rate": 3.8356000000000003e-05, "loss": 8.7613, "step": 2911000 }, { "epoch": 23.29, "learning_rate": 3.8354e-05, "loss": 8.7333, "step": 2911500 }, { "epoch": 23.3, "learning_rate": 3.8352e-05, "loss": 8.7531, "step": 2912000 }, { "epoch": 23.3, "learning_rate": 3.8350000000000004e-05, "loss": 8.7601, "step": 2912500 }, { "epoch": 23.3, "learning_rate": 3.8348e-05, "loss": 8.7555, "step": 2913000 }, { "epoch": 23.31, "learning_rate": 3.8346e-05, "loss": 8.7526, "step": 2913500 }, { "epoch": 23.31, "learning_rate": 3.8344000000000004e-05, "loss": 8.7501, "step": 2914000 }, { "epoch": 23.32, "learning_rate": 3.8342e-05, "loss": 8.7451, "step": 2914500 }, { "epoch": 23.32, "learning_rate": 3.834e-05, "loss": 8.7665, "step": 2915000 }, { "epoch": 23.32, "learning_rate": 3.8338000000000005e-05, "loss": 8.7634, "step": 2915500 }, { "epoch": 23.33, "learning_rate": 3.8336e-05, "loss": 8.7452, "step": 2916000 }, { "epoch": 23.33, "learning_rate": 3.8334e-05, "loss": 8.7638, "step": 2916500 }, { "epoch": 23.34, "learning_rate": 3.8332e-05, "loss": 8.751, "step": 2917000 }, { "epoch": 23.34, "learning_rate": 3.833e-05, "loss": 8.7627, "step": 2917500 }, { "epoch": 23.34, "learning_rate": 3.8328e-05, "loss": 8.7581, "step": 2918000 }, { "epoch": 23.35, "learning_rate": 3.8326e-05, "loss": 8.7626, "step": 2918500 }, { "epoch": 23.35, "learning_rate": 3.8324e-05, "loss": 8.7538, "step": 2919000 }, { "epoch": 23.36, "learning_rate": 3.8322000000000003e-05, "loss": 8.7455, "step": 2919500 }, { "epoch": 23.36, "learning_rate": 3.832e-05, "loss": 8.7478, "step": 2920000 }, { "epoch": 23.36, "learning_rate": 3.8318e-05, "loss": 8.7675, "step": 2920500 }, { "epoch": 23.37, "learning_rate": 3.8316000000000004e-05, "loss": 8.7481, "step": 2921000 }, { "epoch": 23.37, "learning_rate": 3.8314e-05, "loss": 8.7404, "step": 2921500 }, { "epoch": 23.38, "learning_rate": 3.8312e-05, "loss": 8.7387, "step": 2922000 }, { "epoch": 23.38, "learning_rate": 3.8310000000000004e-05, "loss": 8.7706, "step": 2922500 }, { "epoch": 23.38, "learning_rate": 3.8308e-05, "loss": 8.7489, "step": 2923000 }, { "epoch": 23.39, "learning_rate": 3.8306e-05, "loss": 8.7753, "step": 2923500 }, { "epoch": 23.39, "learning_rate": 3.8304e-05, "loss": 8.7516, "step": 2924000 }, { "epoch": 23.4, "learning_rate": 3.830200000000001e-05, "loss": 8.7455, "step": 2924500 }, { "epoch": 23.4, "learning_rate": 3.83e-05, "loss": 8.7532, "step": 2925000 }, { "epoch": 23.4, "learning_rate": 3.8298e-05, "loss": 8.7575, "step": 2925500 }, { "epoch": 23.41, "learning_rate": 3.829600000000001e-05, "loss": 8.7292, "step": 2926000 }, { "epoch": 23.41, "learning_rate": 3.8294e-05, "loss": 8.7191, "step": 2926500 }, { "epoch": 23.42, "learning_rate": 3.8292e-05, "loss": 8.7626, "step": 2927000 }, { "epoch": 23.42, "learning_rate": 3.829e-05, "loss": 8.7376, "step": 2927500 }, { "epoch": 23.42, "learning_rate": 3.8288000000000003e-05, "loss": 8.7749, "step": 2928000 }, { "epoch": 23.43, "learning_rate": 3.8286e-05, "loss": 8.7414, "step": 2928500 }, { "epoch": 23.43, "learning_rate": 3.8284e-05, "loss": 8.7432, "step": 2929000 }, { "epoch": 23.44, "learning_rate": 3.8282000000000004e-05, "loss": 8.7485, "step": 2929500 }, { "epoch": 23.44, "learning_rate": 3.828e-05, "loss": 8.748, "step": 2930000 }, { "epoch": 23.44, "learning_rate": 3.8278e-05, "loss": 8.7513, "step": 2930500 }, { "epoch": 23.45, "learning_rate": 3.8276e-05, "loss": 8.7528, "step": 2931000 }, { "epoch": 23.45, "learning_rate": 3.8274000000000007e-05, "loss": 8.7708, "step": 2931500 }, { "epoch": 23.46, "learning_rate": 3.8272e-05, "loss": 8.7128, "step": 2932000 }, { "epoch": 23.46, "learning_rate": 3.827e-05, "loss": 8.75, "step": 2932500 }, { "epoch": 23.46, "learning_rate": 3.826800000000001e-05, "loss": 8.7537, "step": 2933000 }, { "epoch": 23.47, "learning_rate": 3.8266e-05, "loss": 8.7549, "step": 2933500 }, { "epoch": 23.47, "learning_rate": 3.8264e-05, "loss": 8.7403, "step": 2934000 }, { "epoch": 23.48, "learning_rate": 3.8262e-05, "loss": 8.7539, "step": 2934500 }, { "epoch": 23.48, "learning_rate": 3.826e-05, "loss": 8.7608, "step": 2935000 }, { "epoch": 23.48, "learning_rate": 3.8258e-05, "loss": 8.7594, "step": 2935500 }, { "epoch": 23.49, "learning_rate": 3.8256e-05, "loss": 8.7535, "step": 2936000 }, { "epoch": 23.49, "learning_rate": 3.8254e-05, "loss": 8.7603, "step": 2936500 }, { "epoch": 23.5, "learning_rate": 3.8252000000000006e-05, "loss": 8.7399, "step": 2937000 }, { "epoch": 23.5, "learning_rate": 3.825e-05, "loss": 8.7406, "step": 2937500 }, { "epoch": 23.5, "learning_rate": 3.8248000000000004e-05, "loss": 8.7787, "step": 2938000 }, { "epoch": 23.51, "learning_rate": 3.8246000000000006e-05, "loss": 8.7505, "step": 2938500 }, { "epoch": 23.51, "learning_rate": 3.8244e-05, "loss": 8.7605, "step": 2939000 }, { "epoch": 23.52, "learning_rate": 3.8242e-05, "loss": 8.7486, "step": 2939500 }, { "epoch": 23.52, "learning_rate": 3.8240000000000007e-05, "loss": 8.7463, "step": 2940000 }, { "epoch": 23.52, "learning_rate": 3.8238e-05, "loss": 8.7423, "step": 2940500 }, { "epoch": 23.53, "learning_rate": 3.8236e-05, "loss": 8.7575, "step": 2941000 }, { "epoch": 23.53, "learning_rate": 3.8234e-05, "loss": 8.7683, "step": 2941500 }, { "epoch": 23.54, "learning_rate": 3.8232e-05, "loss": 8.7455, "step": 2942000 }, { "epoch": 23.54, "learning_rate": 3.823e-05, "loss": 8.7532, "step": 2942500 }, { "epoch": 23.54, "learning_rate": 3.8228e-05, "loss": 8.7645, "step": 2943000 }, { "epoch": 23.55, "learning_rate": 3.8226e-05, "loss": 8.7521, "step": 2943500 }, { "epoch": 23.55, "learning_rate": 3.8224000000000005e-05, "loss": 8.7577, "step": 2944000 }, { "epoch": 23.56, "learning_rate": 3.8222e-05, "loss": 8.7224, "step": 2944500 }, { "epoch": 23.56, "learning_rate": 3.822e-05, "loss": 8.759, "step": 2945000 }, { "epoch": 23.56, "learning_rate": 3.8218000000000006e-05, "loss": 8.7513, "step": 2945500 }, { "epoch": 23.57, "learning_rate": 3.8216e-05, "loss": 8.7407, "step": 2946000 }, { "epoch": 23.57, "learning_rate": 3.8214e-05, "loss": 8.7476, "step": 2946500 }, { "epoch": 23.58, "learning_rate": 3.8212000000000006e-05, "loss": 8.759, "step": 2947000 }, { "epoch": 23.58, "learning_rate": 3.821e-05, "loss": 8.7761, "step": 2947500 }, { "epoch": 23.58, "learning_rate": 3.8208e-05, "loss": 8.7557, "step": 2948000 }, { "epoch": 23.59, "learning_rate": 3.8206e-05, "loss": 8.7325, "step": 2948500 }, { "epoch": 23.59, "learning_rate": 3.8204e-05, "loss": 8.7467, "step": 2949000 }, { "epoch": 23.6, "learning_rate": 3.8202000000000005e-05, "loss": 8.751, "step": 2949500 }, { "epoch": 23.6, "learning_rate": 3.82e-05, "loss": 8.7293, "step": 2950000 }, { "epoch": 23.6, "learning_rate": 3.8198e-05, "loss": 8.7381, "step": 2950500 }, { "epoch": 23.61, "learning_rate": 3.8196000000000005e-05, "loss": 8.7599, "step": 2951000 }, { "epoch": 23.61, "learning_rate": 3.8194e-05, "loss": 8.7513, "step": 2951500 }, { "epoch": 23.62, "learning_rate": 3.8192e-05, "loss": 8.7616, "step": 2952000 }, { "epoch": 23.62, "learning_rate": 3.8190000000000005e-05, "loss": 8.7262, "step": 2952500 }, { "epoch": 23.62, "learning_rate": 3.8188e-05, "loss": 8.7289, "step": 2953000 }, { "epoch": 23.63, "learning_rate": 3.8185999999999997e-05, "loss": 8.7651, "step": 2953500 }, { "epoch": 23.63, "learning_rate": 3.8184000000000006e-05, "loss": 8.754, "step": 2954000 }, { "epoch": 23.64, "learning_rate": 3.8182e-05, "loss": 8.7362, "step": 2954500 }, { "epoch": 23.64, "learning_rate": 3.818e-05, "loss": 8.7377, "step": 2955000 }, { "epoch": 23.64, "learning_rate": 3.8178e-05, "loss": 8.7493, "step": 2955500 }, { "epoch": 23.65, "learning_rate": 3.8176e-05, "loss": 8.7512, "step": 2956000 }, { "epoch": 23.65, "learning_rate": 3.8174000000000004e-05, "loss": 8.7549, "step": 2956500 }, { "epoch": 23.66, "learning_rate": 3.8172e-05, "loss": 8.7602, "step": 2957000 }, { "epoch": 23.66, "learning_rate": 3.817e-05, "loss": 8.7466, "step": 2957500 }, { "epoch": 23.66, "learning_rate": 3.8168000000000005e-05, "loss": 8.7708, "step": 2958000 }, { "epoch": 23.67, "learning_rate": 3.8166e-05, "loss": 8.7471, "step": 2958500 }, { "epoch": 23.67, "learning_rate": 3.8164e-05, "loss": 8.776, "step": 2959000 }, { "epoch": 23.68, "learning_rate": 3.8162000000000005e-05, "loss": 8.7551, "step": 2959500 }, { "epoch": 23.68, "learning_rate": 3.816e-05, "loss": 8.7698, "step": 2960000 }, { "epoch": 23.68, "learning_rate": 3.8157999999999996e-05, "loss": 8.7669, "step": 2960500 }, { "epoch": 23.69, "learning_rate": 3.8156000000000005e-05, "loss": 8.7365, "step": 2961000 }, { "epoch": 23.69, "learning_rate": 3.8154e-05, "loss": 8.7649, "step": 2961500 }, { "epoch": 23.7, "learning_rate": 3.8152e-05, "loss": 8.7334, "step": 2962000 }, { "epoch": 23.7, "learning_rate": 3.8150000000000006e-05, "loss": 8.7414, "step": 2962500 }, { "epoch": 23.7, "learning_rate": 3.8148e-05, "loss": 8.7451, "step": 2963000 }, { "epoch": 23.71, "learning_rate": 3.8146000000000004e-05, "loss": 8.7501, "step": 2963500 }, { "epoch": 23.71, "learning_rate": 3.8144e-05, "loss": 8.7675, "step": 2964000 }, { "epoch": 23.72, "learning_rate": 3.8142e-05, "loss": 8.7527, "step": 2964500 }, { "epoch": 23.72, "learning_rate": 3.8140000000000004e-05, "loss": 8.735, "step": 2965000 }, { "epoch": 23.72, "learning_rate": 3.8138e-05, "loss": 8.7508, "step": 2965500 }, { "epoch": 23.73, "learning_rate": 3.8136e-05, "loss": 8.7547, "step": 2966000 }, { "epoch": 23.73, "learning_rate": 3.8134000000000005e-05, "loss": 8.7494, "step": 2966500 }, { "epoch": 23.74, "learning_rate": 3.8132e-05, "loss": 8.7233, "step": 2967000 }, { "epoch": 23.74, "learning_rate": 3.8129999999999996e-05, "loss": 8.7449, "step": 2967500 }, { "epoch": 23.74, "learning_rate": 3.8128000000000005e-05, "loss": 8.7406, "step": 2968000 }, { "epoch": 23.75, "learning_rate": 3.8126e-05, "loss": 8.7615, "step": 2968500 }, { "epoch": 23.75, "learning_rate": 3.8124e-05, "loss": 8.746, "step": 2969000 }, { "epoch": 23.76, "learning_rate": 3.8122000000000005e-05, "loss": 8.7637, "step": 2969500 }, { "epoch": 23.76, "learning_rate": 3.812e-05, "loss": 8.7484, "step": 2970000 }, { "epoch": 23.76, "learning_rate": 3.8118e-05, "loss": 8.7482, "step": 2970500 }, { "epoch": 23.77, "learning_rate": 3.8116e-05, "loss": 8.7538, "step": 2971000 }, { "epoch": 23.77, "learning_rate": 3.8114e-05, "loss": 8.779, "step": 2971500 }, { "epoch": 23.78, "learning_rate": 3.8112000000000004e-05, "loss": 8.7539, "step": 2972000 }, { "epoch": 23.78, "learning_rate": 3.811e-05, "loss": 8.7649, "step": 2972500 }, { "epoch": 23.78, "learning_rate": 3.8108e-05, "loss": 8.7503, "step": 2973000 }, { "epoch": 23.79, "learning_rate": 3.8106000000000004e-05, "loss": 8.7707, "step": 2973500 }, { "epoch": 23.79, "learning_rate": 3.8104e-05, "loss": 8.7506, "step": 2974000 }, { "epoch": 23.8, "learning_rate": 3.8102e-05, "loss": 8.7547, "step": 2974500 }, { "epoch": 23.8, "learning_rate": 3.8100000000000005e-05, "loss": 8.7295, "step": 2975000 }, { "epoch": 23.8, "learning_rate": 3.8098e-05, "loss": 8.7586, "step": 2975500 }, { "epoch": 23.81, "learning_rate": 3.8096e-05, "loss": 8.7466, "step": 2976000 }, { "epoch": 23.81, "learning_rate": 3.8094000000000005e-05, "loss": 8.7282, "step": 2976500 }, { "epoch": 23.82, "learning_rate": 3.8092e-05, "loss": 8.7545, "step": 2977000 }, { "epoch": 23.82, "learning_rate": 3.809e-05, "loss": 8.7389, "step": 2977500 }, { "epoch": 23.82, "learning_rate": 3.8088e-05, "loss": 8.7589, "step": 2978000 }, { "epoch": 23.83, "learning_rate": 3.8086e-05, "loss": 8.7614, "step": 2978500 }, { "epoch": 23.83, "learning_rate": 3.8084e-05, "loss": 8.7581, "step": 2979000 }, { "epoch": 23.84, "learning_rate": 3.8082e-05, "loss": 8.7445, "step": 2979500 }, { "epoch": 23.84, "learning_rate": 3.808e-05, "loss": 8.7605, "step": 2980000 }, { "epoch": 23.84, "learning_rate": 3.8078000000000004e-05, "loss": 8.7526, "step": 2980500 }, { "epoch": 23.85, "learning_rate": 3.8076e-05, "loss": 8.7467, "step": 2981000 }, { "epoch": 23.85, "learning_rate": 3.8074e-05, "loss": 8.7568, "step": 2981500 }, { "epoch": 23.86, "learning_rate": 3.8072000000000004e-05, "loss": 8.7291, "step": 2982000 }, { "epoch": 23.86, "learning_rate": 3.807e-05, "loss": 8.7417, "step": 2982500 }, { "epoch": 23.86, "learning_rate": 3.8068e-05, "loss": 8.7538, "step": 2983000 }, { "epoch": 23.87, "learning_rate": 3.8066000000000005e-05, "loss": 8.7569, "step": 2983500 }, { "epoch": 23.87, "learning_rate": 3.8064e-05, "loss": 8.7438, "step": 2984000 }, { "epoch": 23.88, "learning_rate": 3.8062e-05, "loss": 8.7515, "step": 2984500 }, { "epoch": 23.88, "learning_rate": 3.806e-05, "loss": 8.7562, "step": 2985000 }, { "epoch": 23.88, "learning_rate": 3.8058e-05, "loss": 8.7492, "step": 2985500 }, { "epoch": 23.89, "learning_rate": 3.8056e-05, "loss": 8.7783, "step": 2986000 }, { "epoch": 23.89, "learning_rate": 3.8054e-05, "loss": 8.7563, "step": 2986500 }, { "epoch": 23.9, "learning_rate": 3.805200000000001e-05, "loss": 8.7293, "step": 2987000 }, { "epoch": 23.9, "learning_rate": 3.805e-05, "loss": 8.7598, "step": 2987500 }, { "epoch": 23.9, "learning_rate": 3.8048e-05, "loss": 8.7647, "step": 2988000 }, { "epoch": 23.91, "learning_rate": 3.8046e-05, "loss": 8.7482, "step": 2988500 }, { "epoch": 23.91, "learning_rate": 3.8044000000000004e-05, "loss": 8.7452, "step": 2989000 }, { "epoch": 23.92, "learning_rate": 3.8042e-05, "loss": 8.7502, "step": 2989500 }, { "epoch": 23.92, "learning_rate": 3.804e-05, "loss": 8.7368, "step": 2990000 }, { "epoch": 23.92, "learning_rate": 3.8038000000000004e-05, "loss": 8.7449, "step": 2990500 }, { "epoch": 23.93, "learning_rate": 3.8036e-05, "loss": 8.7317, "step": 2991000 }, { "epoch": 23.93, "learning_rate": 3.8034e-05, "loss": 8.7703, "step": 2991500 }, { "epoch": 23.94, "learning_rate": 3.8032e-05, "loss": 8.7617, "step": 2992000 }, { "epoch": 23.94, "learning_rate": 3.803000000000001e-05, "loss": 8.758, "step": 2992500 }, { "epoch": 23.94, "learning_rate": 3.8028e-05, "loss": 8.7456, "step": 2993000 }, { "epoch": 23.95, "learning_rate": 3.8026e-05, "loss": 8.7588, "step": 2993500 }, { "epoch": 23.95, "learning_rate": 3.802400000000001e-05, "loss": 8.7479, "step": 2994000 }, { "epoch": 23.96, "learning_rate": 3.8022e-05, "loss": 8.7611, "step": 2994500 }, { "epoch": 23.96, "learning_rate": 3.802e-05, "loss": 8.7399, "step": 2995000 }, { "epoch": 23.96, "learning_rate": 3.8018e-05, "loss": 8.7453, "step": 2995500 }, { "epoch": 23.97, "learning_rate": 3.8016e-05, "loss": 8.7443, "step": 2996000 }, { "epoch": 23.97, "learning_rate": 3.8014e-05, "loss": 8.7509, "step": 2996500 }, { "epoch": 23.98, "learning_rate": 3.8012e-05, "loss": 8.7365, "step": 2997000 }, { "epoch": 23.98, "learning_rate": 3.8010000000000004e-05, "loss": 8.7355, "step": 2997500 }, { "epoch": 23.98, "learning_rate": 3.8008e-05, "loss": 8.7505, "step": 2998000 }, { "epoch": 23.99, "learning_rate": 3.8006e-05, "loss": 8.7613, "step": 2998500 }, { "epoch": 23.99, "learning_rate": 3.8004000000000004e-05, "loss": 8.7453, "step": 2999000 }, { "epoch": 24.0, "learning_rate": 3.8002000000000006e-05, "loss": 8.7467, "step": 2999500 }, { "epoch": 24.0, "learning_rate": 3.8e-05, "loss": 8.7427, "step": 3000000 }, { "epoch": 24.0, "learning_rate": 3.7998e-05, "loss": 8.7541, "step": 3000500 }, { "epoch": 24.01, "learning_rate": 3.799600000000001e-05, "loss": 8.739, "step": 3001000 }, { "epoch": 24.01, "learning_rate": 3.7994e-05, "loss": 8.7353, "step": 3001500 }, { "epoch": 24.02, "learning_rate": 3.7992e-05, "loss": 8.7579, "step": 3002000 }, { "epoch": 24.02, "learning_rate": 3.799e-05, "loss": 8.7459, "step": 3002500 }, { "epoch": 24.02, "learning_rate": 3.7988e-05, "loss": 8.745, "step": 3003000 }, { "epoch": 24.03, "learning_rate": 3.7986e-05, "loss": 8.755, "step": 3003500 }, { "epoch": 24.03, "learning_rate": 3.7984e-05, "loss": 8.7412, "step": 3004000 }, { "epoch": 24.04, "learning_rate": 3.7982e-05, "loss": 8.7462, "step": 3004500 }, { "epoch": 24.04, "learning_rate": 3.7980000000000006e-05, "loss": 8.7634, "step": 3005000 }, { "epoch": 24.04, "learning_rate": 3.7978e-05, "loss": 8.7574, "step": 3005500 }, { "epoch": 24.05, "learning_rate": 3.7976000000000004e-05, "loss": 8.7548, "step": 3006000 }, { "epoch": 24.05, "learning_rate": 3.7974000000000006e-05, "loss": 8.7386, "step": 3006500 }, { "epoch": 24.06, "learning_rate": 3.7972e-05, "loss": 8.7411, "step": 3007000 }, { "epoch": 24.06, "learning_rate": 3.797e-05, "loss": 8.7467, "step": 3007500 }, { "epoch": 24.06, "learning_rate": 3.7968000000000006e-05, "loss": 8.7522, "step": 3008000 }, { "epoch": 24.07, "learning_rate": 3.7966e-05, "loss": 8.7563, "step": 3008500 }, { "epoch": 24.07, "learning_rate": 3.7964e-05, "loss": 8.7515, "step": 3009000 }, { "epoch": 24.08, "learning_rate": 3.7962e-05, "loss": 8.7549, "step": 3009500 }, { "epoch": 24.08, "learning_rate": 3.796e-05, "loss": 8.7357, "step": 3010000 }, { "epoch": 24.08, "learning_rate": 3.7958e-05, "loss": 8.7637, "step": 3010500 }, { "epoch": 24.09, "learning_rate": 3.7956e-05, "loss": 8.7472, "step": 3011000 }, { "epoch": 24.09, "learning_rate": 3.7954e-05, "loss": 8.7609, "step": 3011500 }, { "epoch": 24.1, "learning_rate": 3.7952000000000005e-05, "loss": 8.746, "step": 3012000 }, { "epoch": 24.1, "learning_rate": 3.795e-05, "loss": 8.7536, "step": 3012500 }, { "epoch": 24.1, "learning_rate": 3.7948e-05, "loss": 8.7427, "step": 3013000 }, { "epoch": 24.11, "learning_rate": 3.7946000000000006e-05, "loss": 8.7414, "step": 3013500 }, { "epoch": 24.11, "learning_rate": 3.7944e-05, "loss": 8.7538, "step": 3014000 }, { "epoch": 24.12, "learning_rate": 3.7942e-05, "loss": 8.7697, "step": 3014500 }, { "epoch": 24.12, "learning_rate": 3.7940000000000006e-05, "loss": 8.7591, "step": 3015000 }, { "epoch": 24.12, "learning_rate": 3.7938e-05, "loss": 8.7592, "step": 3015500 }, { "epoch": 24.13, "learning_rate": 3.7936e-05, "loss": 8.7526, "step": 3016000 }, { "epoch": 24.13, "learning_rate": 3.7934e-05, "loss": 8.7555, "step": 3016500 }, { "epoch": 24.14, "learning_rate": 3.7932e-05, "loss": 8.7533, "step": 3017000 }, { "epoch": 24.14, "learning_rate": 3.7930000000000004e-05, "loss": 8.7462, "step": 3017500 }, { "epoch": 24.14, "learning_rate": 3.7928e-05, "loss": 8.7479, "step": 3018000 }, { "epoch": 24.15, "learning_rate": 3.7926e-05, "loss": 8.7527, "step": 3018500 }, { "epoch": 24.15, "learning_rate": 3.7924000000000005e-05, "loss": 8.7587, "step": 3019000 }, { "epoch": 24.16, "learning_rate": 3.7922e-05, "loss": 8.743, "step": 3019500 }, { "epoch": 24.16, "learning_rate": 3.792e-05, "loss": 8.7393, "step": 3020000 }, { "epoch": 24.16, "learning_rate": 3.7918000000000005e-05, "loss": 8.7546, "step": 3020500 }, { "epoch": 24.17, "learning_rate": 3.7916e-05, "loss": 8.7554, "step": 3021000 }, { "epoch": 24.17, "learning_rate": 3.7913999999999996e-05, "loss": 8.7398, "step": 3021500 }, { "epoch": 24.18, "learning_rate": 3.7912000000000006e-05, "loss": 8.7538, "step": 3022000 }, { "epoch": 24.18, "learning_rate": 3.791e-05, "loss": 8.7544, "step": 3022500 }, { "epoch": 24.18, "learning_rate": 3.7908e-05, "loss": 8.7264, "step": 3023000 }, { "epoch": 24.19, "learning_rate": 3.7906e-05, "loss": 8.7519, "step": 3023500 }, { "epoch": 24.19, "learning_rate": 3.7904e-05, "loss": 8.7544, "step": 3024000 }, { "epoch": 24.2, "learning_rate": 3.7902000000000004e-05, "loss": 8.7652, "step": 3024500 }, { "epoch": 24.2, "learning_rate": 3.79e-05, "loss": 8.7674, "step": 3025000 }, { "epoch": 24.2, "learning_rate": 3.7898e-05, "loss": 8.7607, "step": 3025500 }, { "epoch": 24.21, "learning_rate": 3.7896000000000004e-05, "loss": 8.7626, "step": 3026000 }, { "epoch": 24.21, "learning_rate": 3.7894e-05, "loss": 8.7792, "step": 3026500 }, { "epoch": 24.22, "learning_rate": 3.7892e-05, "loss": 8.7447, "step": 3027000 }, { "epoch": 24.22, "learning_rate": 3.7890000000000005e-05, "loss": 8.7542, "step": 3027500 }, { "epoch": 24.22, "learning_rate": 3.7888e-05, "loss": 8.7415, "step": 3028000 }, { "epoch": 24.23, "learning_rate": 3.7885999999999996e-05, "loss": 8.7451, "step": 3028500 }, { "epoch": 24.23, "learning_rate": 3.7884000000000005e-05, "loss": 8.7436, "step": 3029000 }, { "epoch": 24.24, "learning_rate": 3.7882e-05, "loss": 8.755, "step": 3029500 }, { "epoch": 24.24, "learning_rate": 3.788e-05, "loss": 8.7437, "step": 3030000 }, { "epoch": 24.24, "learning_rate": 3.7878000000000006e-05, "loss": 8.7626, "step": 3030500 }, { "epoch": 24.25, "learning_rate": 3.7876e-05, "loss": 8.7518, "step": 3031000 }, { "epoch": 24.25, "learning_rate": 3.7874000000000004e-05, "loss": 8.7543, "step": 3031500 }, { "epoch": 24.26, "learning_rate": 3.7872e-05, "loss": 8.7667, "step": 3032000 }, { "epoch": 24.26, "learning_rate": 3.787e-05, "loss": 8.7287, "step": 3032500 }, { "epoch": 24.26, "learning_rate": 3.7868000000000004e-05, "loss": 8.7586, "step": 3033000 }, { "epoch": 24.27, "learning_rate": 3.7866e-05, "loss": 8.7706, "step": 3033500 }, { "epoch": 24.27, "learning_rate": 3.7864e-05, "loss": 8.7426, "step": 3034000 }, { "epoch": 24.28, "learning_rate": 3.7862000000000004e-05, "loss": 8.751, "step": 3034500 }, { "epoch": 24.28, "learning_rate": 3.786e-05, "loss": 8.7608, "step": 3035000 }, { "epoch": 24.28, "learning_rate": 3.7858e-05, "loss": 8.7466, "step": 3035500 }, { "epoch": 24.29, "learning_rate": 3.7856000000000005e-05, "loss": 8.7489, "step": 3036000 }, { "epoch": 24.29, "learning_rate": 3.7854e-05, "loss": 8.7432, "step": 3036500 }, { "epoch": 24.3, "learning_rate": 3.7852e-05, "loss": 8.7521, "step": 3037000 }, { "epoch": 24.3, "learning_rate": 3.7850000000000005e-05, "loss": 8.7661, "step": 3037500 }, { "epoch": 24.3, "learning_rate": 3.7848e-05, "loss": 8.7416, "step": 3038000 }, { "epoch": 24.31, "learning_rate": 3.7846e-05, "loss": 8.7547, "step": 3038500 }, { "epoch": 24.31, "learning_rate": 3.7844e-05, "loss": 8.7544, "step": 3039000 }, { "epoch": 24.32, "learning_rate": 3.7842e-05, "loss": 8.7236, "step": 3039500 }, { "epoch": 24.32, "learning_rate": 3.7840000000000004e-05, "loss": 8.7408, "step": 3040000 }, { "epoch": 24.32, "learning_rate": 3.7838e-05, "loss": 8.7488, "step": 3040500 }, { "epoch": 24.33, "learning_rate": 3.7836e-05, "loss": 8.7478, "step": 3041000 }, { "epoch": 24.33, "learning_rate": 3.7834000000000004e-05, "loss": 8.7235, "step": 3041500 }, { "epoch": 24.34, "learning_rate": 3.7832e-05, "loss": 8.7537, "step": 3042000 }, { "epoch": 24.34, "learning_rate": 3.783e-05, "loss": 8.7504, "step": 3042500 }, { "epoch": 24.34, "learning_rate": 3.7828000000000004e-05, "loss": 8.7568, "step": 3043000 }, { "epoch": 24.35, "learning_rate": 3.7826e-05, "loss": 8.7402, "step": 3043500 }, { "epoch": 24.35, "learning_rate": 3.7824e-05, "loss": 8.7386, "step": 3044000 }, { "epoch": 24.36, "learning_rate": 3.7822000000000005e-05, "loss": 8.7588, "step": 3044500 }, { "epoch": 24.36, "learning_rate": 3.782e-05, "loss": 8.7355, "step": 3045000 }, { "epoch": 24.36, "learning_rate": 3.7818e-05, "loss": 8.7446, "step": 3045500 }, { "epoch": 24.37, "learning_rate": 3.7816e-05, "loss": 8.779, "step": 3046000 }, { "epoch": 24.37, "learning_rate": 3.7814e-05, "loss": 8.7405, "step": 3046500 }, { "epoch": 24.38, "learning_rate": 3.7812e-05, "loss": 8.7422, "step": 3047000 }, { "epoch": 24.38, "learning_rate": 3.781e-05, "loss": 8.7451, "step": 3047500 }, { "epoch": 24.38, "learning_rate": 3.7808e-05, "loss": 8.7434, "step": 3048000 }, { "epoch": 24.39, "learning_rate": 3.7806000000000004e-05, "loss": 8.7462, "step": 3048500 }, { "epoch": 24.39, "learning_rate": 3.7804e-05, "loss": 8.7444, "step": 3049000 }, { "epoch": 24.4, "learning_rate": 3.7802e-05, "loss": 8.7614, "step": 3049500 }, { "epoch": 24.4, "learning_rate": 3.7800000000000004e-05, "loss": 8.7578, "step": 3050000 }, { "epoch": 24.4, "learning_rate": 3.7798e-05, "loss": 8.7588, "step": 3050500 }, { "epoch": 24.41, "learning_rate": 3.7796e-05, "loss": 8.7713, "step": 3051000 }, { "epoch": 24.41, "learning_rate": 3.7794000000000004e-05, "loss": 8.7349, "step": 3051500 }, { "epoch": 24.42, "learning_rate": 3.7792e-05, "loss": 8.7253, "step": 3052000 }, { "epoch": 24.42, "learning_rate": 3.779e-05, "loss": 8.7437, "step": 3052500 }, { "epoch": 24.42, "learning_rate": 3.7788e-05, "loss": 8.7382, "step": 3053000 }, { "epoch": 24.43, "learning_rate": 3.7786e-05, "loss": 8.7692, "step": 3053500 }, { "epoch": 24.43, "learning_rate": 3.7784e-05, "loss": 8.7321, "step": 3054000 }, { "epoch": 24.44, "learning_rate": 3.7782e-05, "loss": 8.75, "step": 3054500 }, { "epoch": 24.44, "learning_rate": 3.778000000000001e-05, "loss": 8.7589, "step": 3055000 }, { "epoch": 24.44, "learning_rate": 3.7778e-05, "loss": 8.7522, "step": 3055500 }, { "epoch": 24.45, "learning_rate": 3.7776e-05, "loss": 8.7484, "step": 3056000 }, { "epoch": 24.45, "learning_rate": 3.7774e-05, "loss": 8.7679, "step": 3056500 }, { "epoch": 24.46, "learning_rate": 3.7772000000000004e-05, "loss": 8.7431, "step": 3057000 }, { "epoch": 24.46, "learning_rate": 3.777e-05, "loss": 8.7322, "step": 3057500 }, { "epoch": 24.46, "learning_rate": 3.7768e-05, "loss": 8.7277, "step": 3058000 }, { "epoch": 24.47, "learning_rate": 3.7766000000000004e-05, "loss": 8.7552, "step": 3058500 }, { "epoch": 24.47, "learning_rate": 3.7764e-05, "loss": 8.7547, "step": 3059000 }, { "epoch": 24.48, "learning_rate": 3.7762e-05, "loss": 8.759, "step": 3059500 }, { "epoch": 24.48, "learning_rate": 3.776e-05, "loss": 8.731, "step": 3060000 }, { "epoch": 24.48, "learning_rate": 3.775800000000001e-05, "loss": 8.7551, "step": 3060500 }, { "epoch": 24.49, "learning_rate": 3.7756e-05, "loss": 8.7346, "step": 3061000 }, { "epoch": 24.49, "learning_rate": 3.7754e-05, "loss": 8.7401, "step": 3061500 }, { "epoch": 24.5, "learning_rate": 3.775200000000001e-05, "loss": 8.7744, "step": 3062000 }, { "epoch": 24.5, "learning_rate": 3.775e-05, "loss": 8.7454, "step": 3062500 }, { "epoch": 24.5, "learning_rate": 3.7748e-05, "loss": 8.7602, "step": 3063000 }, { "epoch": 24.51, "learning_rate": 3.7746e-05, "loss": 8.7369, "step": 3063500 }, { "epoch": 24.51, "learning_rate": 3.7744e-05, "loss": 8.7381, "step": 3064000 }, { "epoch": 24.52, "learning_rate": 3.7742e-05, "loss": 8.764, "step": 3064500 }, { "epoch": 24.52, "learning_rate": 3.774e-05, "loss": 8.7475, "step": 3065000 }, { "epoch": 24.52, "learning_rate": 3.7738000000000004e-05, "loss": 8.7485, "step": 3065500 }, { "epoch": 24.53, "learning_rate": 3.7736e-05, "loss": 8.756, "step": 3066000 }, { "epoch": 24.53, "learning_rate": 3.7734e-05, "loss": 8.7564, "step": 3066500 }, { "epoch": 24.54, "learning_rate": 3.7732000000000004e-05, "loss": 8.7511, "step": 3067000 }, { "epoch": 24.54, "learning_rate": 3.7730000000000006e-05, "loss": 8.7515, "step": 3067500 }, { "epoch": 24.54, "learning_rate": 3.7728e-05, "loss": 8.7567, "step": 3068000 }, { "epoch": 24.55, "learning_rate": 3.7726e-05, "loss": 8.7575, "step": 3068500 }, { "epoch": 24.55, "learning_rate": 3.772400000000001e-05, "loss": 8.7203, "step": 3069000 }, { "epoch": 24.56, "learning_rate": 3.7722e-05, "loss": 8.7592, "step": 3069500 }, { "epoch": 24.56, "learning_rate": 3.772e-05, "loss": 8.7365, "step": 3070000 }, { "epoch": 24.56, "learning_rate": 3.7718e-05, "loss": 8.7703, "step": 3070500 }, { "epoch": 24.57, "learning_rate": 3.7716e-05, "loss": 8.7561, "step": 3071000 }, { "epoch": 24.57, "learning_rate": 3.7714e-05, "loss": 8.7463, "step": 3071500 }, { "epoch": 24.58, "learning_rate": 3.7712e-05, "loss": 8.778, "step": 3072000 }, { "epoch": 24.58, "learning_rate": 3.771e-05, "loss": 8.7551, "step": 3072500 }, { "epoch": 24.58, "learning_rate": 3.7708000000000006e-05, "loss": 8.7569, "step": 3073000 }, { "epoch": 24.59, "learning_rate": 3.7706e-05, "loss": 8.7585, "step": 3073500 }, { "epoch": 24.59, "learning_rate": 3.7704000000000004e-05, "loss": 8.7546, "step": 3074000 }, { "epoch": 24.6, "learning_rate": 3.7702000000000006e-05, "loss": 8.7528, "step": 3074500 }, { "epoch": 24.6, "learning_rate": 3.77e-05, "loss": 8.7253, "step": 3075000 }, { "epoch": 24.6, "learning_rate": 3.7698e-05, "loss": 8.7633, "step": 3075500 }, { "epoch": 24.61, "learning_rate": 3.7696000000000006e-05, "loss": 8.7436, "step": 3076000 }, { "epoch": 24.61, "learning_rate": 3.7694e-05, "loss": 8.7578, "step": 3076500 }, { "epoch": 24.62, "learning_rate": 3.7692e-05, "loss": 8.7362, "step": 3077000 }, { "epoch": 24.62, "learning_rate": 3.769e-05, "loss": 8.7499, "step": 3077500 }, { "epoch": 24.62, "learning_rate": 3.7688e-05, "loss": 8.7427, "step": 3078000 }, { "epoch": 24.63, "learning_rate": 3.7686e-05, "loss": 8.7559, "step": 3078500 }, { "epoch": 24.63, "learning_rate": 3.7684e-05, "loss": 8.7493, "step": 3079000 }, { "epoch": 24.64, "learning_rate": 3.7682e-05, "loss": 8.7307, "step": 3079500 }, { "epoch": 24.64, "learning_rate": 3.7680000000000005e-05, "loss": 8.7574, "step": 3080000 }, { "epoch": 24.64, "learning_rate": 3.7678e-05, "loss": 8.7348, "step": 3080500 }, { "epoch": 24.65, "learning_rate": 3.7676e-05, "loss": 8.7557, "step": 3081000 }, { "epoch": 24.65, "learning_rate": 3.7674000000000006e-05, "loss": 8.7681, "step": 3081500 }, { "epoch": 24.66, "learning_rate": 3.7672e-05, "loss": 8.7337, "step": 3082000 }, { "epoch": 24.66, "learning_rate": 3.767e-05, "loss": 8.7491, "step": 3082500 }, { "epoch": 24.66, "learning_rate": 3.7668000000000006e-05, "loss": 8.7667, "step": 3083000 }, { "epoch": 24.67, "learning_rate": 3.7666e-05, "loss": 8.7491, "step": 3083500 }, { "epoch": 24.67, "learning_rate": 3.7664e-05, "loss": 8.7381, "step": 3084000 }, { "epoch": 24.68, "learning_rate": 3.7662e-05, "loss": 8.7476, "step": 3084500 }, { "epoch": 24.68, "learning_rate": 3.766e-05, "loss": 8.7613, "step": 3085000 }, { "epoch": 24.68, "learning_rate": 3.7658000000000004e-05, "loss": 8.7474, "step": 3085500 }, { "epoch": 24.69, "learning_rate": 3.7656e-05, "loss": 8.7432, "step": 3086000 }, { "epoch": 24.69, "learning_rate": 3.7654e-05, "loss": 8.7484, "step": 3086500 }, { "epoch": 24.7, "learning_rate": 3.7652000000000005e-05, "loss": 8.7548, "step": 3087000 }, { "epoch": 24.7, "learning_rate": 3.765e-05, "loss": 8.7502, "step": 3087500 }, { "epoch": 24.7, "learning_rate": 3.7648e-05, "loss": 8.7784, "step": 3088000 }, { "epoch": 24.71, "learning_rate": 3.7646000000000005e-05, "loss": 8.7495, "step": 3088500 }, { "epoch": 24.71, "learning_rate": 3.7644e-05, "loss": 8.7457, "step": 3089000 }, { "epoch": 24.72, "learning_rate": 3.7641999999999996e-05, "loss": 8.7511, "step": 3089500 }, { "epoch": 24.72, "learning_rate": 3.7640000000000006e-05, "loss": 8.7598, "step": 3090000 }, { "epoch": 24.72, "learning_rate": 3.7638e-05, "loss": 8.7611, "step": 3090500 }, { "epoch": 24.73, "learning_rate": 3.7636e-05, "loss": 8.7623, "step": 3091000 }, { "epoch": 24.73, "learning_rate": 3.7634000000000006e-05, "loss": 8.7445, "step": 3091500 }, { "epoch": 24.74, "learning_rate": 3.7632e-05, "loss": 8.7462, "step": 3092000 }, { "epoch": 24.74, "learning_rate": 3.7630000000000004e-05, "loss": 8.7597, "step": 3092500 }, { "epoch": 24.74, "learning_rate": 3.7628e-05, "loss": 8.7444, "step": 3093000 }, { "epoch": 24.75, "learning_rate": 3.7626e-05, "loss": 8.7636, "step": 3093500 }, { "epoch": 24.75, "learning_rate": 3.7624000000000004e-05, "loss": 8.7592, "step": 3094000 }, { "epoch": 24.76, "learning_rate": 3.7622e-05, "loss": 8.7434, "step": 3094500 }, { "epoch": 24.76, "learning_rate": 3.762e-05, "loss": 8.7353, "step": 3095000 }, { "epoch": 24.76, "learning_rate": 3.7618000000000005e-05, "loss": 8.7399, "step": 3095500 }, { "epoch": 24.77, "learning_rate": 3.7616e-05, "loss": 8.757, "step": 3096000 }, { "epoch": 24.77, "learning_rate": 3.7613999999999996e-05, "loss": 8.7707, "step": 3096500 }, { "epoch": 24.78, "learning_rate": 3.7612000000000005e-05, "loss": 8.7484, "step": 3097000 }, { "epoch": 24.78, "learning_rate": 3.761e-05, "loss": 8.7618, "step": 3097500 }, { "epoch": 24.78, "learning_rate": 3.7608e-05, "loss": 8.7666, "step": 3098000 }, { "epoch": 24.79, "learning_rate": 3.7606000000000006e-05, "loss": 8.747, "step": 3098500 }, { "epoch": 24.79, "learning_rate": 3.7604e-05, "loss": 8.7278, "step": 3099000 }, { "epoch": 24.8, "learning_rate": 3.7602000000000004e-05, "loss": 8.7661, "step": 3099500 }, { "epoch": 24.8, "learning_rate": 3.76e-05, "loss": 8.739, "step": 3100000 }, { "epoch": 24.8, "learning_rate": 3.7598e-05, "loss": 8.7547, "step": 3100500 }, { "epoch": 24.81, "learning_rate": 3.7596000000000004e-05, "loss": 8.742, "step": 3101000 }, { "epoch": 24.81, "learning_rate": 3.7594e-05, "loss": 8.7621, "step": 3101500 }, { "epoch": 24.82, "learning_rate": 3.7592e-05, "loss": 8.7498, "step": 3102000 }, { "epoch": 24.82, "learning_rate": 3.7590000000000004e-05, "loss": 8.7501, "step": 3102500 }, { "epoch": 24.82, "learning_rate": 3.7588e-05, "loss": 8.7603, "step": 3103000 }, { "epoch": 24.83, "learning_rate": 3.7586e-05, "loss": 8.7359, "step": 3103500 }, { "epoch": 24.83, "learning_rate": 3.7584000000000005e-05, "loss": 8.7583, "step": 3104000 }, { "epoch": 24.84, "learning_rate": 3.7582e-05, "loss": 8.7343, "step": 3104500 }, { "epoch": 24.84, "learning_rate": 3.758e-05, "loss": 8.7498, "step": 3105000 }, { "epoch": 24.84, "learning_rate": 3.7578000000000005e-05, "loss": 8.7324, "step": 3105500 }, { "epoch": 24.85, "learning_rate": 3.7576e-05, "loss": 8.7484, "step": 3106000 }, { "epoch": 24.85, "learning_rate": 3.7574e-05, "loss": 8.7714, "step": 3106500 }, { "epoch": 24.86, "learning_rate": 3.7572e-05, "loss": 8.7436, "step": 3107000 }, { "epoch": 24.86, "learning_rate": 3.757e-05, "loss": 8.7696, "step": 3107500 }, { "epoch": 24.86, "learning_rate": 3.7568000000000004e-05, "loss": 8.7612, "step": 3108000 }, { "epoch": 24.87, "learning_rate": 3.7566e-05, "loss": 8.7524, "step": 3108500 }, { "epoch": 24.87, "learning_rate": 3.7564e-05, "loss": 8.7546, "step": 3109000 }, { "epoch": 24.88, "learning_rate": 3.7562000000000004e-05, "loss": 8.7528, "step": 3109500 }, { "epoch": 24.88, "learning_rate": 3.756e-05, "loss": 8.7677, "step": 3110000 }, { "epoch": 24.88, "learning_rate": 3.7558e-05, "loss": 8.7551, "step": 3110500 }, { "epoch": 24.89, "learning_rate": 3.7556000000000004e-05, "loss": 8.7524, "step": 3111000 }, { "epoch": 24.89, "learning_rate": 3.7554e-05, "loss": 8.7571, "step": 3111500 }, { "epoch": 24.9, "learning_rate": 3.7552e-05, "loss": 8.7684, "step": 3112000 }, { "epoch": 24.9, "learning_rate": 3.7550000000000005e-05, "loss": 8.7259, "step": 3112500 }, { "epoch": 24.9, "learning_rate": 3.7548e-05, "loss": 8.7519, "step": 3113000 }, { "epoch": 24.91, "learning_rate": 3.7546e-05, "loss": 8.7306, "step": 3113500 }, { "epoch": 24.91, "learning_rate": 3.7544e-05, "loss": 8.7628, "step": 3114000 }, { "epoch": 24.92, "learning_rate": 3.7542e-05, "loss": 8.7744, "step": 3114500 }, { "epoch": 24.92, "learning_rate": 3.754e-05, "loss": 8.7533, "step": 3115000 }, { "epoch": 24.92, "learning_rate": 3.7538e-05, "loss": 8.7687, "step": 3115500 }, { "epoch": 24.93, "learning_rate": 3.7536e-05, "loss": 8.7445, "step": 3116000 }, { "epoch": 24.93, "learning_rate": 3.7534000000000004e-05, "loss": 8.7544, "step": 3116500 }, { "epoch": 24.94, "learning_rate": 3.7532e-05, "loss": 8.753, "step": 3117000 }, { "epoch": 24.94, "learning_rate": 3.753e-05, "loss": 8.7639, "step": 3117500 }, { "epoch": 24.94, "learning_rate": 3.7528000000000004e-05, "loss": 8.7509, "step": 3118000 }, { "epoch": 24.95, "learning_rate": 3.7526e-05, "loss": 8.762, "step": 3118500 }, { "epoch": 24.95, "learning_rate": 3.7524e-05, "loss": 8.7494, "step": 3119000 }, { "epoch": 24.96, "learning_rate": 3.7522000000000004e-05, "loss": 8.7459, "step": 3119500 }, { "epoch": 24.96, "learning_rate": 3.752e-05, "loss": 8.749, "step": 3120000 }, { "epoch": 24.96, "learning_rate": 3.7518e-05, "loss": 8.7701, "step": 3120500 }, { "epoch": 24.97, "learning_rate": 3.7516e-05, "loss": 8.727, "step": 3121000 }, { "epoch": 24.97, "learning_rate": 3.7514e-05, "loss": 8.7345, "step": 3121500 }, { "epoch": 24.98, "learning_rate": 3.7512e-05, "loss": 8.7476, "step": 3122000 }, { "epoch": 24.98, "learning_rate": 3.751e-05, "loss": 8.7448, "step": 3122500 }, { "epoch": 24.98, "learning_rate": 3.750800000000001e-05, "loss": 8.7484, "step": 3123000 }, { "epoch": 24.99, "learning_rate": 3.7506e-05, "loss": 8.7676, "step": 3123500 }, { "epoch": 24.99, "learning_rate": 3.7504e-05, "loss": 8.7557, "step": 3124000 }, { "epoch": 25.0, "learning_rate": 3.7502e-05, "loss": 8.7508, "step": 3124500 }, { "epoch": 25.0, "learning_rate": 3.7500000000000003e-05, "loss": 8.7332, "step": 3125000 }, { "epoch": 25.0, "learning_rate": 3.7498e-05, "loss": 8.7517, "step": 3125500 }, { "epoch": 25.01, "learning_rate": 3.7496e-05, "loss": 8.7541, "step": 3126000 }, { "epoch": 25.01, "learning_rate": 3.7494000000000004e-05, "loss": 8.7709, "step": 3126500 }, { "epoch": 25.02, "learning_rate": 3.7492e-05, "loss": 8.7482, "step": 3127000 }, { "epoch": 25.02, "learning_rate": 3.749e-05, "loss": 8.7732, "step": 3127500 }, { "epoch": 25.02, "learning_rate": 3.7488000000000004e-05, "loss": 8.7742, "step": 3128000 }, { "epoch": 25.03, "learning_rate": 3.748600000000001e-05, "loss": 8.7517, "step": 3128500 }, { "epoch": 25.03, "learning_rate": 3.7484e-05, "loss": 8.7557, "step": 3129000 }, { "epoch": 25.04, "learning_rate": 3.7482e-05, "loss": 8.7574, "step": 3129500 }, { "epoch": 25.04, "learning_rate": 3.748000000000001e-05, "loss": 8.7532, "step": 3130000 }, { "epoch": 25.04, "learning_rate": 3.7478e-05, "loss": 8.7468, "step": 3130500 }, { "epoch": 25.05, "learning_rate": 3.7476e-05, "loss": 8.7402, "step": 3131000 }, { "epoch": 25.05, "learning_rate": 3.7474e-05, "loss": 8.7342, "step": 3131500 }, { "epoch": 25.06, "learning_rate": 3.7472e-05, "loss": 8.7497, "step": 3132000 }, { "epoch": 25.06, "learning_rate": 3.747e-05, "loss": 8.7676, "step": 3132500 }, { "epoch": 25.06, "learning_rate": 3.7468e-05, "loss": 8.7513, "step": 3133000 }, { "epoch": 25.07, "learning_rate": 3.7466000000000003e-05, "loss": 8.7418, "step": 3133500 }, { "epoch": 25.07, "learning_rate": 3.7464e-05, "loss": 8.7741, "step": 3134000 }, { "epoch": 25.08, "learning_rate": 3.7462e-05, "loss": 8.7465, "step": 3134500 }, { "epoch": 25.08, "learning_rate": 3.7460000000000004e-05, "loss": 8.7552, "step": 3135000 }, { "epoch": 25.08, "learning_rate": 3.7458000000000006e-05, "loss": 8.7354, "step": 3135500 }, { "epoch": 25.09, "learning_rate": 3.7456e-05, "loss": 8.7424, "step": 3136000 }, { "epoch": 25.09, "learning_rate": 3.7454e-05, "loss": 8.7527, "step": 3136500 }, { "epoch": 25.1, "learning_rate": 3.745200000000001e-05, "loss": 8.7609, "step": 3137000 }, { "epoch": 25.1, "learning_rate": 3.745e-05, "loss": 8.7475, "step": 3137500 }, { "epoch": 25.1, "learning_rate": 3.7448e-05, "loss": 8.7523, "step": 3138000 }, { "epoch": 25.11, "learning_rate": 3.7446e-05, "loss": 8.7661, "step": 3138500 }, { "epoch": 25.11, "learning_rate": 3.7444e-05, "loss": 8.7576, "step": 3139000 }, { "epoch": 25.12, "learning_rate": 3.7442e-05, "loss": 8.7413, "step": 3139500 }, { "epoch": 25.12, "learning_rate": 3.744e-05, "loss": 8.761, "step": 3140000 }, { "epoch": 25.12, "learning_rate": 3.7438e-05, "loss": 8.7605, "step": 3140500 }, { "epoch": 25.13, "learning_rate": 3.7436000000000005e-05, "loss": 8.7505, "step": 3141000 }, { "epoch": 25.13, "learning_rate": 3.7434e-05, "loss": 8.7373, "step": 3141500 }, { "epoch": 25.14, "learning_rate": 3.7432000000000003e-05, "loss": 8.7379, "step": 3142000 }, { "epoch": 25.14, "learning_rate": 3.7430000000000006e-05, "loss": 8.7245, "step": 3142500 }, { "epoch": 25.14, "learning_rate": 3.7428e-05, "loss": 8.7587, "step": 3143000 }, { "epoch": 25.15, "learning_rate": 3.7426e-05, "loss": 8.7323, "step": 3143500 }, { "epoch": 25.15, "learning_rate": 3.7424000000000006e-05, "loss": 8.7649, "step": 3144000 }, { "epoch": 25.16, "learning_rate": 3.7422e-05, "loss": 8.747, "step": 3144500 }, { "epoch": 25.16, "learning_rate": 3.742e-05, "loss": 8.7659, "step": 3145000 }, { "epoch": 25.16, "learning_rate": 3.7418e-05, "loss": 8.7409, "step": 3145500 }, { "epoch": 25.17, "learning_rate": 3.7416e-05, "loss": 8.7609, "step": 3146000 }, { "epoch": 25.17, "learning_rate": 3.7414e-05, "loss": 8.7348, "step": 3146500 }, { "epoch": 25.18, "learning_rate": 3.7412e-05, "loss": 8.7426, "step": 3147000 }, { "epoch": 25.18, "learning_rate": 3.741e-05, "loss": 8.7457, "step": 3147500 }, { "epoch": 25.18, "learning_rate": 3.7408000000000005e-05, "loss": 8.7339, "step": 3148000 }, { "epoch": 25.19, "learning_rate": 3.7406e-05, "loss": 8.7315, "step": 3148500 }, { "epoch": 25.19, "learning_rate": 3.7404e-05, "loss": 8.7605, "step": 3149000 }, { "epoch": 25.2, "learning_rate": 3.7402000000000005e-05, "loss": 8.7583, "step": 3149500 }, { "epoch": 25.2, "learning_rate": 3.74e-05, "loss": 8.7513, "step": 3150000 }, { "epoch": 25.2, "learning_rate": 3.7398e-05, "loss": 8.7552, "step": 3150500 }, { "epoch": 25.21, "learning_rate": 3.7396000000000006e-05, "loss": 8.7491, "step": 3151000 }, { "epoch": 25.21, "learning_rate": 3.7394e-05, "loss": 8.7528, "step": 3151500 }, { "epoch": 25.22, "learning_rate": 3.7392e-05, "loss": 8.7529, "step": 3152000 }, { "epoch": 25.22, "learning_rate": 3.739e-05, "loss": 8.7669, "step": 3152500 }, { "epoch": 25.22, "learning_rate": 3.7388e-05, "loss": 8.7674, "step": 3153000 }, { "epoch": 25.23, "learning_rate": 3.7386000000000004e-05, "loss": 8.749, "step": 3153500 }, { "epoch": 25.23, "learning_rate": 3.7384e-05, "loss": 8.7585, "step": 3154000 }, { "epoch": 25.24, "learning_rate": 3.7382e-05, "loss": 8.7616, "step": 3154500 }, { "epoch": 25.24, "learning_rate": 3.7380000000000005e-05, "loss": 8.75, "step": 3155000 }, { "epoch": 25.24, "learning_rate": 3.7378e-05, "loss": 8.7574, "step": 3155500 }, { "epoch": 25.25, "learning_rate": 3.7376e-05, "loss": 8.7508, "step": 3156000 }, { "epoch": 25.25, "learning_rate": 3.7374000000000005e-05, "loss": 8.7329, "step": 3156500 }, { "epoch": 25.26, "learning_rate": 3.7372e-05, "loss": 8.7706, "step": 3157000 }, { "epoch": 25.26, "learning_rate": 3.7369999999999996e-05, "loss": 8.763, "step": 3157500 }, { "epoch": 25.26, "learning_rate": 3.7368000000000005e-05, "loss": 8.7394, "step": 3158000 }, { "epoch": 25.27, "learning_rate": 3.7366e-05, "loss": 8.7474, "step": 3158500 }, { "epoch": 25.27, "learning_rate": 3.7364000000000003e-05, "loss": 8.7454, "step": 3159000 }, { "epoch": 25.28, "learning_rate": 3.7362000000000006e-05, "loss": 8.765, "step": 3159500 }, { "epoch": 25.28, "learning_rate": 3.736e-05, "loss": 8.7483, "step": 3160000 }, { "epoch": 25.28, "learning_rate": 3.7358000000000004e-05, "loss": 8.761, "step": 3160500 }, { "epoch": 25.29, "learning_rate": 3.7356e-05, "loss": 8.7573, "step": 3161000 }, { "epoch": 25.29, "learning_rate": 3.7354e-05, "loss": 8.7656, "step": 3161500 }, { "epoch": 25.3, "learning_rate": 3.7352000000000004e-05, "loss": 8.7354, "step": 3162000 }, { "epoch": 25.3, "learning_rate": 3.735e-05, "loss": 8.7465, "step": 3162500 }, { "epoch": 25.3, "learning_rate": 3.7348e-05, "loss": 8.7508, "step": 3163000 }, { "epoch": 25.31, "learning_rate": 3.7346000000000005e-05, "loss": 8.7452, "step": 3163500 }, { "epoch": 25.31, "learning_rate": 3.7344e-05, "loss": 8.7574, "step": 3164000 }, { "epoch": 25.32, "learning_rate": 3.7341999999999996e-05, "loss": 8.75, "step": 3164500 }, { "epoch": 25.32, "learning_rate": 3.7340000000000005e-05, "loss": 8.7399, "step": 3165000 }, { "epoch": 25.32, "learning_rate": 3.7338e-05, "loss": 8.7545, "step": 3165500 }, { "epoch": 25.33, "learning_rate": 3.7336e-05, "loss": 8.7386, "step": 3166000 }, { "epoch": 25.33, "learning_rate": 3.7334000000000005e-05, "loss": 8.7456, "step": 3166500 }, { "epoch": 25.34, "learning_rate": 3.7332e-05, "loss": 8.7455, "step": 3167000 }, { "epoch": 25.34, "learning_rate": 3.7330000000000003e-05, "loss": 8.7585, "step": 3167500 }, { "epoch": 25.34, "learning_rate": 3.7328e-05, "loss": 8.7372, "step": 3168000 }, { "epoch": 25.35, "learning_rate": 3.7326e-05, "loss": 8.749, "step": 3168500 }, { "epoch": 25.35, "learning_rate": 3.7324000000000004e-05, "loss": 8.7528, "step": 3169000 }, { "epoch": 25.36, "learning_rate": 3.7322e-05, "loss": 8.7333, "step": 3169500 }, { "epoch": 25.36, "learning_rate": 3.732e-05, "loss": 8.7637, "step": 3170000 }, { "epoch": 25.36, "learning_rate": 3.7318000000000004e-05, "loss": 8.7604, "step": 3170500 }, { "epoch": 25.37, "learning_rate": 3.7316e-05, "loss": 8.7531, "step": 3171000 }, { "epoch": 25.37, "learning_rate": 3.7314e-05, "loss": 8.7433, "step": 3171500 }, { "epoch": 25.38, "learning_rate": 3.7312000000000005e-05, "loss": 8.733, "step": 3172000 }, { "epoch": 25.38, "learning_rate": 3.731e-05, "loss": 8.741, "step": 3172500 }, { "epoch": 25.38, "learning_rate": 3.7308e-05, "loss": 8.7642, "step": 3173000 }, { "epoch": 25.39, "learning_rate": 3.7306000000000005e-05, "loss": 8.7457, "step": 3173500 }, { "epoch": 25.39, "learning_rate": 3.7304e-05, "loss": 8.7407, "step": 3174000 }, { "epoch": 25.4, "learning_rate": 3.7302e-05, "loss": 8.7515, "step": 3174500 }, { "epoch": 25.4, "learning_rate": 3.73e-05, "loss": 8.766, "step": 3175000 }, { "epoch": 25.4, "learning_rate": 3.7298e-05, "loss": 8.7638, "step": 3175500 }, { "epoch": 25.41, "learning_rate": 3.7296000000000003e-05, "loss": 8.7683, "step": 3176000 }, { "epoch": 25.41, "learning_rate": 3.7294e-05, "loss": 8.7597, "step": 3176500 }, { "epoch": 25.42, "learning_rate": 3.7292e-05, "loss": 8.7674, "step": 3177000 }, { "epoch": 25.42, "learning_rate": 3.7290000000000004e-05, "loss": 8.7341, "step": 3177500 }, { "epoch": 25.42, "learning_rate": 3.7288e-05, "loss": 8.7576, "step": 3178000 }, { "epoch": 25.43, "learning_rate": 3.7286e-05, "loss": 8.7579, "step": 3178500 }, { "epoch": 25.43, "learning_rate": 3.7284000000000004e-05, "loss": 8.7493, "step": 3179000 }, { "epoch": 25.44, "learning_rate": 3.7282e-05, "loss": 8.759, "step": 3179500 }, { "epoch": 25.44, "learning_rate": 3.728e-05, "loss": 8.7247, "step": 3180000 }, { "epoch": 25.44, "learning_rate": 3.7278000000000005e-05, "loss": 8.732, "step": 3180500 }, { "epoch": 25.45, "learning_rate": 3.7276e-05, "loss": 8.7409, "step": 3181000 }, { "epoch": 25.45, "learning_rate": 3.7274e-05, "loss": 8.7475, "step": 3181500 }, { "epoch": 25.46, "learning_rate": 3.7272e-05, "loss": 8.7494, "step": 3182000 }, { "epoch": 25.46, "learning_rate": 3.727e-05, "loss": 8.7335, "step": 3182500 }, { "epoch": 25.46, "learning_rate": 3.7268e-05, "loss": 8.7571, "step": 3183000 }, { "epoch": 25.47, "learning_rate": 3.7266e-05, "loss": 8.7396, "step": 3183500 }, { "epoch": 25.47, "learning_rate": 3.726400000000001e-05, "loss": 8.7505, "step": 3184000 }, { "epoch": 25.48, "learning_rate": 3.7262000000000003e-05, "loss": 8.748, "step": 3184500 }, { "epoch": 25.48, "learning_rate": 3.726e-05, "loss": 8.7304, "step": 3185000 }, { "epoch": 25.48, "learning_rate": 3.7258e-05, "loss": 8.7413, "step": 3185500 }, { "epoch": 25.49, "learning_rate": 3.7256000000000004e-05, "loss": 8.7656, "step": 3186000 }, { "epoch": 25.49, "learning_rate": 3.7254e-05, "loss": 8.7491, "step": 3186500 }, { "epoch": 25.5, "learning_rate": 3.7252e-05, "loss": 8.728, "step": 3187000 }, { "epoch": 25.5, "learning_rate": 3.7250000000000004e-05, "loss": 8.7336, "step": 3187500 }, { "epoch": 25.5, "learning_rate": 3.7248e-05, "loss": 8.7682, "step": 3188000 }, { "epoch": 25.51, "learning_rate": 3.7246e-05, "loss": 8.7455, "step": 3188500 }, { "epoch": 25.51, "learning_rate": 3.7244e-05, "loss": 8.7489, "step": 3189000 }, { "epoch": 25.52, "learning_rate": 3.7242e-05, "loss": 8.7733, "step": 3189500 }, { "epoch": 25.52, "learning_rate": 3.724e-05, "loss": 8.7607, "step": 3190000 }, { "epoch": 25.52, "learning_rate": 3.7238e-05, "loss": 8.7681, "step": 3190500 }, { "epoch": 25.53, "learning_rate": 3.723600000000001e-05, "loss": 8.7648, "step": 3191000 }, { "epoch": 25.53, "learning_rate": 3.7234e-05, "loss": 8.7566, "step": 3191500 }, { "epoch": 25.54, "learning_rate": 3.7232e-05, "loss": 8.7509, "step": 3192000 }, { "epoch": 25.54, "learning_rate": 3.723e-05, "loss": 8.7704, "step": 3192500 }, { "epoch": 25.54, "learning_rate": 3.7228e-05, "loss": 8.7438, "step": 3193000 }, { "epoch": 25.55, "learning_rate": 3.7226e-05, "loss": 8.7808, "step": 3193500 }, { "epoch": 25.55, "learning_rate": 3.7224e-05, "loss": 8.7368, "step": 3194000 }, { "epoch": 25.56, "learning_rate": 3.7222000000000004e-05, "loss": 8.7421, "step": 3194500 }, { "epoch": 25.56, "learning_rate": 3.722e-05, "loss": 8.7524, "step": 3195000 }, { "epoch": 25.56, "learning_rate": 3.7218e-05, "loss": 8.7243, "step": 3195500 }, { "epoch": 25.57, "learning_rate": 3.7216000000000004e-05, "loss": 8.7637, "step": 3196000 }, { "epoch": 25.57, "learning_rate": 3.7214000000000007e-05, "loss": 8.7414, "step": 3196500 }, { "epoch": 25.58, "learning_rate": 3.7212e-05, "loss": 8.7553, "step": 3197000 }, { "epoch": 25.58, "learning_rate": 3.721e-05, "loss": 8.7659, "step": 3197500 }, { "epoch": 25.58, "learning_rate": 3.720800000000001e-05, "loss": 8.7446, "step": 3198000 }, { "epoch": 25.59, "learning_rate": 3.7206e-05, "loss": 8.7576, "step": 3198500 }, { "epoch": 25.59, "learning_rate": 3.7204e-05, "loss": 8.75, "step": 3199000 }, { "epoch": 25.6, "learning_rate": 3.7202e-05, "loss": 8.7414, "step": 3199500 }, { "epoch": 25.6, "learning_rate": 3.72e-05, "loss": 8.7625, "step": 3200000 }, { "epoch": 25.6, "learning_rate": 3.7198e-05, "loss": 8.7672, "step": 3200500 }, { "epoch": 25.61, "learning_rate": 3.7196e-05, "loss": 8.7455, "step": 3201000 }, { "epoch": 25.61, "learning_rate": 3.7194e-05, "loss": 8.7344, "step": 3201500 }, { "epoch": 25.62, "learning_rate": 3.7192e-05, "loss": 8.7595, "step": 3202000 }, { "epoch": 25.62, "learning_rate": 3.719e-05, "loss": 8.742, "step": 3202500 }, { "epoch": 25.62, "learning_rate": 3.7188000000000004e-05, "loss": 8.7617, "step": 3203000 }, { "epoch": 25.63, "learning_rate": 3.7186000000000006e-05, "loss": 8.7673, "step": 3203500 }, { "epoch": 25.63, "learning_rate": 3.7184e-05, "loss": 8.761, "step": 3204000 }, { "epoch": 25.64, "learning_rate": 3.7182e-05, "loss": 8.7515, "step": 3204500 }, { "epoch": 25.64, "learning_rate": 3.7180000000000007e-05, "loss": 8.7381, "step": 3205000 }, { "epoch": 25.64, "learning_rate": 3.7178e-05, "loss": 8.7473, "step": 3205500 }, { "epoch": 25.65, "learning_rate": 3.7176e-05, "loss": 8.7526, "step": 3206000 }, { "epoch": 25.65, "learning_rate": 3.7174e-05, "loss": 8.7728, "step": 3206500 }, { "epoch": 25.66, "learning_rate": 3.7172e-05, "loss": 8.7635, "step": 3207000 }, { "epoch": 25.66, "learning_rate": 3.717e-05, "loss": 8.7643, "step": 3207500 }, { "epoch": 25.66, "learning_rate": 3.7168e-05, "loss": 8.7323, "step": 3208000 }, { "epoch": 25.67, "learning_rate": 3.7166e-05, "loss": 8.7558, "step": 3208500 }, { "epoch": 25.67, "learning_rate": 3.7164000000000005e-05, "loss": 8.734, "step": 3209000 }, { "epoch": 25.68, "learning_rate": 3.7162e-05, "loss": 8.7409, "step": 3209500 }, { "epoch": 25.68, "learning_rate": 3.716e-05, "loss": 8.7335, "step": 3210000 }, { "epoch": 25.68, "learning_rate": 3.7158000000000006e-05, "loss": 8.7527, "step": 3210500 }, { "epoch": 25.69, "learning_rate": 3.7156e-05, "loss": 8.7348, "step": 3211000 }, { "epoch": 25.69, "learning_rate": 3.7154e-05, "loss": 8.7426, "step": 3211500 }, { "epoch": 25.7, "learning_rate": 3.7152000000000006e-05, "loss": 8.745, "step": 3212000 }, { "epoch": 25.7, "learning_rate": 3.715e-05, "loss": 8.7437, "step": 3212500 }, { "epoch": 25.7, "learning_rate": 3.7148e-05, "loss": 8.7688, "step": 3213000 }, { "epoch": 25.71, "learning_rate": 3.7146e-05, "loss": 8.7112, "step": 3213500 }, { "epoch": 25.71, "learning_rate": 3.7144e-05, "loss": 8.7544, "step": 3214000 }, { "epoch": 25.72, "learning_rate": 3.7142e-05, "loss": 8.756, "step": 3214500 }, { "epoch": 25.72, "learning_rate": 3.714e-05, "loss": 8.7357, "step": 3215000 }, { "epoch": 25.72, "learning_rate": 3.7138e-05, "loss": 8.7383, "step": 3215500 }, { "epoch": 25.73, "learning_rate": 3.7136000000000005e-05, "loss": 8.7395, "step": 3216000 }, { "epoch": 25.73, "learning_rate": 3.7134e-05, "loss": 8.7748, "step": 3216500 }, { "epoch": 25.74, "learning_rate": 3.7132e-05, "loss": 8.7768, "step": 3217000 }, { "epoch": 25.74, "learning_rate": 3.7130000000000005e-05, "loss": 8.7262, "step": 3217500 }, { "epoch": 25.74, "learning_rate": 3.7128e-05, "loss": 8.7536, "step": 3218000 }, { "epoch": 25.75, "learning_rate": 3.7125999999999997e-05, "loss": 8.7517, "step": 3218500 }, { "epoch": 25.75, "learning_rate": 3.7124000000000006e-05, "loss": 8.7583, "step": 3219000 }, { "epoch": 25.76, "learning_rate": 3.7122e-05, "loss": 8.7627, "step": 3219500 }, { "epoch": 25.76, "learning_rate": 3.712e-05, "loss": 8.7577, "step": 3220000 }, { "epoch": 25.76, "learning_rate": 3.7118000000000006e-05, "loss": 8.746, "step": 3220500 }, { "epoch": 25.77, "learning_rate": 3.7116e-05, "loss": 8.7514, "step": 3221000 }, { "epoch": 25.77, "learning_rate": 3.7114000000000004e-05, "loss": 8.7597, "step": 3221500 }, { "epoch": 25.78, "learning_rate": 3.7112e-05, "loss": 8.7536, "step": 3222000 }, { "epoch": 25.78, "learning_rate": 3.711e-05, "loss": 8.773, "step": 3222500 }, { "epoch": 25.78, "learning_rate": 3.7108000000000005e-05, "loss": 8.7555, "step": 3223000 }, { "epoch": 25.79, "learning_rate": 3.7106e-05, "loss": 8.7407, "step": 3223500 }, { "epoch": 25.79, "learning_rate": 3.7104e-05, "loss": 8.7603, "step": 3224000 }, { "epoch": 25.8, "learning_rate": 3.7102000000000005e-05, "loss": 8.7497, "step": 3224500 }, { "epoch": 25.8, "learning_rate": 3.71e-05, "loss": 8.7446, "step": 3225000 }, { "epoch": 25.8, "learning_rate": 3.7097999999999996e-05, "loss": 8.7474, "step": 3225500 }, { "epoch": 25.81, "learning_rate": 3.7096000000000005e-05, "loss": 8.7303, "step": 3226000 }, { "epoch": 25.81, "learning_rate": 3.7094e-05, "loss": 8.7409, "step": 3226500 }, { "epoch": 25.82, "learning_rate": 3.7092e-05, "loss": 8.7398, "step": 3227000 }, { "epoch": 25.82, "learning_rate": 3.7090000000000006e-05, "loss": 8.7489, "step": 3227500 }, { "epoch": 25.82, "learning_rate": 3.7088e-05, "loss": 8.7556, "step": 3228000 }, { "epoch": 25.83, "learning_rate": 3.7086000000000004e-05, "loss": 8.7551, "step": 3228500 }, { "epoch": 25.83, "learning_rate": 3.7084e-05, "loss": 8.7205, "step": 3229000 }, { "epoch": 25.84, "learning_rate": 3.7082e-05, "loss": 8.77, "step": 3229500 }, { "epoch": 25.84, "learning_rate": 3.7080000000000004e-05, "loss": 8.765, "step": 3230000 }, { "epoch": 25.84, "learning_rate": 3.7078e-05, "loss": 8.7532, "step": 3230500 }, { "epoch": 25.85, "learning_rate": 3.7076e-05, "loss": 8.7472, "step": 3231000 }, { "epoch": 25.85, "learning_rate": 3.7074000000000005e-05, "loss": 8.7432, "step": 3231500 }, { "epoch": 25.86, "learning_rate": 3.7072e-05, "loss": 8.7456, "step": 3232000 }, { "epoch": 25.86, "learning_rate": 3.707e-05, "loss": 8.761, "step": 3232500 }, { "epoch": 25.86, "learning_rate": 3.7068000000000005e-05, "loss": 8.7293, "step": 3233000 }, { "epoch": 25.87, "learning_rate": 3.7066e-05, "loss": 8.7424, "step": 3233500 }, { "epoch": 25.87, "learning_rate": 3.7064e-05, "loss": 8.7538, "step": 3234000 }, { "epoch": 25.88, "learning_rate": 3.7062000000000005e-05, "loss": 8.7586, "step": 3234500 }, { "epoch": 25.88, "learning_rate": 3.706e-05, "loss": 8.7242, "step": 3235000 }, { "epoch": 25.88, "learning_rate": 3.7058e-05, "loss": 8.7318, "step": 3235500 }, { "epoch": 25.89, "learning_rate": 3.7056e-05, "loss": 8.7581, "step": 3236000 }, { "epoch": 25.89, "learning_rate": 3.7054e-05, "loss": 8.7677, "step": 3236500 }, { "epoch": 25.9, "learning_rate": 3.7052000000000004e-05, "loss": 8.7676, "step": 3237000 }, { "epoch": 25.9, "learning_rate": 3.705e-05, "loss": 8.7461, "step": 3237500 }, { "epoch": 25.9, "learning_rate": 3.7048e-05, "loss": 8.7457, "step": 3238000 }, { "epoch": 25.91, "learning_rate": 3.7046000000000004e-05, "loss": 8.7572, "step": 3238500 }, { "epoch": 25.91, "learning_rate": 3.7044e-05, "loss": 8.7581, "step": 3239000 }, { "epoch": 25.92, "learning_rate": 3.7042e-05, "loss": 8.738, "step": 3239500 }, { "epoch": 25.92, "learning_rate": 3.7040000000000005e-05, "loss": 8.7501, "step": 3240000 }, { "epoch": 25.92, "learning_rate": 3.7038e-05, "loss": 8.7498, "step": 3240500 }, { "epoch": 25.93, "learning_rate": 3.7036e-05, "loss": 8.753, "step": 3241000 }, { "epoch": 25.93, "learning_rate": 3.7034000000000005e-05, "loss": 8.774, "step": 3241500 }, { "epoch": 25.94, "learning_rate": 3.7032e-05, "loss": 8.7439, "step": 3242000 }, { "epoch": 25.94, "learning_rate": 3.703e-05, "loss": 8.7464, "step": 3242500 }, { "epoch": 25.94, "learning_rate": 3.7028e-05, "loss": 8.7611, "step": 3243000 }, { "epoch": 25.95, "learning_rate": 3.7026e-05, "loss": 8.7507, "step": 3243500 }, { "epoch": 25.95, "learning_rate": 3.7024e-05, "loss": 8.7673, "step": 3244000 }, { "epoch": 25.96, "learning_rate": 3.7022e-05, "loss": 8.7656, "step": 3244500 }, { "epoch": 25.96, "learning_rate": 3.702e-05, "loss": 8.7737, "step": 3245000 }, { "epoch": 25.96, "learning_rate": 3.7018000000000004e-05, "loss": 8.7492, "step": 3245500 }, { "epoch": 25.97, "learning_rate": 3.7016e-05, "loss": 8.7572, "step": 3246000 }, { "epoch": 25.97, "learning_rate": 3.7014e-05, "loss": 8.7305, "step": 3246500 }, { "epoch": 25.98, "learning_rate": 3.7012000000000004e-05, "loss": 8.7725, "step": 3247000 }, { "epoch": 25.98, "learning_rate": 3.701e-05, "loss": 8.7392, "step": 3247500 }, { "epoch": 25.98, "learning_rate": 3.7008e-05, "loss": 8.7754, "step": 3248000 }, { "epoch": 25.99, "learning_rate": 3.7006000000000004e-05, "loss": 8.7617, "step": 3248500 }, { "epoch": 25.99, "learning_rate": 3.7004e-05, "loss": 8.7304, "step": 3249000 }, { "epoch": 26.0, "learning_rate": 3.7002e-05, "loss": 8.7367, "step": 3249500 }, { "epoch": 26.0, "learning_rate": 3.7e-05, "loss": 8.7544, "step": 3250000 }, { "epoch": 26.0, "learning_rate": 3.6998e-05, "loss": 8.752, "step": 3250500 }, { "epoch": 26.01, "learning_rate": 3.6996e-05, "loss": 8.7435, "step": 3251000 }, { "epoch": 26.01, "learning_rate": 3.6994e-05, "loss": 8.7576, "step": 3251500 }, { "epoch": 26.02, "learning_rate": 3.699200000000001e-05, "loss": 8.7421, "step": 3252000 }, { "epoch": 26.02, "learning_rate": 3.699e-05, "loss": 8.7463, "step": 3252500 }, { "epoch": 26.02, "learning_rate": 3.6988e-05, "loss": 8.7779, "step": 3253000 }, { "epoch": 26.03, "learning_rate": 3.6986e-05, "loss": 8.7627, "step": 3253500 }, { "epoch": 26.03, "learning_rate": 3.6984000000000004e-05, "loss": 8.745, "step": 3254000 }, { "epoch": 26.04, "learning_rate": 3.6982e-05, "loss": 8.7305, "step": 3254500 }, { "epoch": 26.04, "learning_rate": 3.698e-05, "loss": 8.7398, "step": 3255000 }, { "epoch": 26.04, "learning_rate": 3.6978000000000004e-05, "loss": 8.7442, "step": 3255500 }, { "epoch": 26.05, "learning_rate": 3.6976e-05, "loss": 8.7633, "step": 3256000 }, { "epoch": 26.05, "learning_rate": 3.6974e-05, "loss": 8.7649, "step": 3256500 }, { "epoch": 26.06, "learning_rate": 3.6972e-05, "loss": 8.7439, "step": 3257000 }, { "epoch": 26.06, "learning_rate": 3.697e-05, "loss": 8.7624, "step": 3257500 }, { "epoch": 26.06, "learning_rate": 3.6968e-05, "loss": 8.7714, "step": 3258000 }, { "epoch": 26.07, "learning_rate": 3.6966e-05, "loss": 8.7686, "step": 3258500 }, { "epoch": 26.07, "learning_rate": 3.696400000000001e-05, "loss": 8.7558, "step": 3259000 }, { "epoch": 26.08, "learning_rate": 3.6962e-05, "loss": 8.7554, "step": 3259500 }, { "epoch": 26.08, "learning_rate": 3.696e-05, "loss": 8.7222, "step": 3260000 }, { "epoch": 26.08, "learning_rate": 3.6958e-05, "loss": 8.7441, "step": 3260500 }, { "epoch": 26.09, "learning_rate": 3.6956e-05, "loss": 8.7577, "step": 3261000 }, { "epoch": 26.09, "learning_rate": 3.6954e-05, "loss": 8.7421, "step": 3261500 }, { "epoch": 26.1, "learning_rate": 3.6952e-05, "loss": 8.7367, "step": 3262000 }, { "epoch": 26.1, "learning_rate": 3.6950000000000004e-05, "loss": 8.7695, "step": 3262500 }, { "epoch": 26.1, "learning_rate": 3.6948e-05, "loss": 8.758, "step": 3263000 }, { "epoch": 26.11, "learning_rate": 3.6946e-05, "loss": 8.7475, "step": 3263500 }, { "epoch": 26.11, "learning_rate": 3.6944000000000004e-05, "loss": 8.7355, "step": 3264000 }, { "epoch": 26.12, "learning_rate": 3.6942000000000006e-05, "loss": 8.7467, "step": 3264500 }, { "epoch": 26.12, "learning_rate": 3.694e-05, "loss": 8.7379, "step": 3265000 }, { "epoch": 26.12, "learning_rate": 3.6938e-05, "loss": 8.7431, "step": 3265500 }, { "epoch": 26.13, "learning_rate": 3.693600000000001e-05, "loss": 8.7503, "step": 3266000 }, { "epoch": 26.13, "learning_rate": 3.6934e-05, "loss": 8.7566, "step": 3266500 }, { "epoch": 26.14, "learning_rate": 3.6932e-05, "loss": 8.7466, "step": 3267000 }, { "epoch": 26.14, "learning_rate": 3.693e-05, "loss": 8.7587, "step": 3267500 }, { "epoch": 26.14, "learning_rate": 3.6928e-05, "loss": 8.7805, "step": 3268000 }, { "epoch": 26.15, "learning_rate": 3.6926e-05, "loss": 8.7623, "step": 3268500 }, { "epoch": 26.15, "learning_rate": 3.6924e-05, "loss": 8.7254, "step": 3269000 }, { "epoch": 26.16, "learning_rate": 3.6922e-05, "loss": 8.7437, "step": 3269500 }, { "epoch": 26.16, "learning_rate": 3.692e-05, "loss": 8.7603, "step": 3270000 }, { "epoch": 26.16, "learning_rate": 3.6918e-05, "loss": 8.7438, "step": 3270500 }, { "epoch": 26.17, "learning_rate": 3.6916000000000004e-05, "loss": 8.7518, "step": 3271000 }, { "epoch": 26.17, "learning_rate": 3.6914000000000006e-05, "loss": 8.7569, "step": 3271500 }, { "epoch": 26.18, "learning_rate": 3.6912e-05, "loss": 8.7443, "step": 3272000 }, { "epoch": 26.18, "learning_rate": 3.691e-05, "loss": 8.744, "step": 3272500 }, { "epoch": 26.18, "learning_rate": 3.6908000000000006e-05, "loss": 8.7405, "step": 3273000 }, { "epoch": 26.19, "learning_rate": 3.6906e-05, "loss": 8.7488, "step": 3273500 }, { "epoch": 26.19, "learning_rate": 3.6904e-05, "loss": 8.7509, "step": 3274000 }, { "epoch": 26.2, "learning_rate": 3.6902e-05, "loss": 8.7396, "step": 3274500 }, { "epoch": 26.2, "learning_rate": 3.69e-05, "loss": 8.7531, "step": 3275000 }, { "epoch": 26.2, "learning_rate": 3.6898e-05, "loss": 8.7408, "step": 3275500 }, { "epoch": 26.21, "learning_rate": 3.6896e-05, "loss": 8.7662, "step": 3276000 }, { "epoch": 26.21, "learning_rate": 3.6894e-05, "loss": 8.7608, "step": 3276500 }, { "epoch": 26.22, "learning_rate": 3.6892000000000005e-05, "loss": 8.7601, "step": 3277000 }, { "epoch": 26.22, "learning_rate": 3.689e-05, "loss": 8.7732, "step": 3277500 }, { "epoch": 26.22, "learning_rate": 3.6888e-05, "loss": 8.7422, "step": 3278000 }, { "epoch": 26.23, "learning_rate": 3.6886000000000006e-05, "loss": 8.7677, "step": 3278500 }, { "epoch": 26.23, "learning_rate": 3.6884e-05, "loss": 8.7503, "step": 3279000 }, { "epoch": 26.24, "learning_rate": 3.6882e-05, "loss": 8.7618, "step": 3279500 }, { "epoch": 26.24, "learning_rate": 3.6880000000000006e-05, "loss": 8.7781, "step": 3280000 }, { "epoch": 26.24, "learning_rate": 3.6878e-05, "loss": 8.7413, "step": 3280500 }, { "epoch": 26.25, "learning_rate": 3.6876e-05, "loss": 8.7508, "step": 3281000 }, { "epoch": 26.25, "learning_rate": 3.6874e-05, "loss": 8.7516, "step": 3281500 }, { "epoch": 26.26, "learning_rate": 3.6872e-05, "loss": 8.7424, "step": 3282000 }, { "epoch": 26.26, "learning_rate": 3.6870000000000004e-05, "loss": 8.7704, "step": 3282500 }, { "epoch": 26.26, "learning_rate": 3.6868e-05, "loss": 8.7537, "step": 3283000 }, { "epoch": 26.27, "learning_rate": 3.6866e-05, "loss": 8.7604, "step": 3283500 }, { "epoch": 26.27, "learning_rate": 3.6864000000000005e-05, "loss": 8.744, "step": 3284000 }, { "epoch": 26.28, "learning_rate": 3.6862e-05, "loss": 8.7542, "step": 3284500 }, { "epoch": 26.28, "learning_rate": 3.686e-05, "loss": 8.7206, "step": 3285000 }, { "epoch": 26.28, "learning_rate": 3.6858000000000005e-05, "loss": 8.7497, "step": 3285500 }, { "epoch": 26.29, "learning_rate": 3.6856e-05, "loss": 8.7506, "step": 3286000 }, { "epoch": 26.29, "learning_rate": 3.6853999999999996e-05, "loss": 8.7408, "step": 3286500 }, { "epoch": 26.3, "learning_rate": 3.6852000000000006e-05, "loss": 8.7694, "step": 3287000 }, { "epoch": 26.3, "learning_rate": 3.685e-05, "loss": 8.768, "step": 3287500 }, { "epoch": 26.3, "learning_rate": 3.6848e-05, "loss": 8.7555, "step": 3288000 }, { "epoch": 26.31, "learning_rate": 3.6846000000000006e-05, "loss": 8.752, "step": 3288500 }, { "epoch": 26.31, "learning_rate": 3.6844e-05, "loss": 8.759, "step": 3289000 }, { "epoch": 26.32, "learning_rate": 3.6842000000000004e-05, "loss": 8.7277, "step": 3289500 }, { "epoch": 26.32, "learning_rate": 3.684e-05, "loss": 8.7334, "step": 3290000 }, { "epoch": 26.32, "learning_rate": 3.6838e-05, "loss": 8.7535, "step": 3290500 }, { "epoch": 26.33, "learning_rate": 3.6836000000000004e-05, "loss": 8.7408, "step": 3291000 }, { "epoch": 26.33, "learning_rate": 3.6834e-05, "loss": 8.7575, "step": 3291500 }, { "epoch": 26.34, "learning_rate": 3.6832e-05, "loss": 8.7536, "step": 3292000 }, { "epoch": 26.34, "learning_rate": 3.6830000000000005e-05, "loss": 8.7367, "step": 3292500 }, { "epoch": 26.34, "learning_rate": 3.6828e-05, "loss": 8.753, "step": 3293000 }, { "epoch": 26.35, "learning_rate": 3.6825999999999996e-05, "loss": 8.78, "step": 3293500 }, { "epoch": 26.35, "learning_rate": 3.6824000000000005e-05, "loss": 8.7284, "step": 3294000 }, { "epoch": 26.36, "learning_rate": 3.6822e-05, "loss": 8.7607, "step": 3294500 }, { "epoch": 26.36, "learning_rate": 3.682e-05, "loss": 8.7433, "step": 3295000 }, { "epoch": 26.36, "learning_rate": 3.6818000000000006e-05, "loss": 8.7431, "step": 3295500 }, { "epoch": 26.37, "learning_rate": 3.6816e-05, "loss": 8.7335, "step": 3296000 }, { "epoch": 26.37, "learning_rate": 3.6814000000000004e-05, "loss": 8.7502, "step": 3296500 }, { "epoch": 26.38, "learning_rate": 3.6812e-05, "loss": 8.7654, "step": 3297000 }, { "epoch": 26.38, "learning_rate": 3.681e-05, "loss": 8.7386, "step": 3297500 }, { "epoch": 26.38, "learning_rate": 3.6808000000000004e-05, "loss": 8.757, "step": 3298000 }, { "epoch": 26.39, "learning_rate": 3.6806e-05, "loss": 8.7584, "step": 3298500 }, { "epoch": 26.39, "learning_rate": 3.6804e-05, "loss": 8.7412, "step": 3299000 }, { "epoch": 26.4, "learning_rate": 3.6802000000000004e-05, "loss": 8.7364, "step": 3299500 }, { "epoch": 26.4, "learning_rate": 3.68e-05, "loss": 8.7282, "step": 3300000 }, { "epoch": 26.4, "learning_rate": 3.6798e-05, "loss": 8.7656, "step": 3300500 }, { "epoch": 26.41, "learning_rate": 3.6796000000000005e-05, "loss": 8.7479, "step": 3301000 }, { "epoch": 26.41, "learning_rate": 3.6794e-05, "loss": 8.7612, "step": 3301500 }, { "epoch": 26.42, "learning_rate": 3.6792e-05, "loss": 8.756, "step": 3302000 }, { "epoch": 26.42, "learning_rate": 3.6790000000000005e-05, "loss": 8.7474, "step": 3302500 }, { "epoch": 26.42, "learning_rate": 3.6788e-05, "loss": 8.7558, "step": 3303000 }, { "epoch": 26.43, "learning_rate": 3.6786e-05, "loss": 8.7616, "step": 3303500 }, { "epoch": 26.43, "learning_rate": 3.6784e-05, "loss": 8.7801, "step": 3304000 }, { "epoch": 26.44, "learning_rate": 3.6782e-05, "loss": 8.7782, "step": 3304500 }, { "epoch": 26.44, "learning_rate": 3.6780000000000004e-05, "loss": 8.7574, "step": 3305000 }, { "epoch": 26.44, "learning_rate": 3.6778e-05, "loss": 8.7481, "step": 3305500 }, { "epoch": 26.45, "learning_rate": 3.6776e-05, "loss": 8.7442, "step": 3306000 }, { "epoch": 26.45, "learning_rate": 3.6774000000000004e-05, "loss": 8.7625, "step": 3306500 }, { "epoch": 26.46, "learning_rate": 3.6772e-05, "loss": 8.7224, "step": 3307000 }, { "epoch": 26.46, "learning_rate": 3.677e-05, "loss": 8.7618, "step": 3307500 }, { "epoch": 26.46, "learning_rate": 3.6768000000000004e-05, "loss": 8.7534, "step": 3308000 }, { "epoch": 26.47, "learning_rate": 3.6766e-05, "loss": 8.7518, "step": 3308500 }, { "epoch": 26.47, "learning_rate": 3.6764e-05, "loss": 8.7414, "step": 3309000 }, { "epoch": 26.48, "learning_rate": 3.6762000000000005e-05, "loss": 8.7333, "step": 3309500 }, { "epoch": 26.48, "learning_rate": 3.676e-05, "loss": 8.7125, "step": 3310000 }, { "epoch": 26.48, "learning_rate": 3.6758e-05, "loss": 8.7369, "step": 3310500 }, { "epoch": 26.49, "learning_rate": 3.6756e-05, "loss": 8.7384, "step": 3311000 }, { "epoch": 26.49, "learning_rate": 3.6754e-05, "loss": 8.7644, "step": 3311500 }, { "epoch": 26.5, "learning_rate": 3.6752e-05, "loss": 8.7515, "step": 3312000 }, { "epoch": 26.5, "learning_rate": 3.675e-05, "loss": 8.7439, "step": 3312500 }, { "epoch": 26.5, "learning_rate": 3.6748e-05, "loss": 8.7479, "step": 3313000 }, { "epoch": 26.51, "learning_rate": 3.6746000000000004e-05, "loss": 8.7784, "step": 3313500 }, { "epoch": 26.51, "learning_rate": 3.6744e-05, "loss": 8.7408, "step": 3314000 }, { "epoch": 26.52, "learning_rate": 3.6742e-05, "loss": 8.75, "step": 3314500 }, { "epoch": 26.52, "learning_rate": 3.6740000000000004e-05, "loss": 8.7593, "step": 3315000 }, { "epoch": 26.52, "learning_rate": 3.6738e-05, "loss": 8.7493, "step": 3315500 }, { "epoch": 26.53, "learning_rate": 3.6736e-05, "loss": 8.7604, "step": 3316000 }, { "epoch": 26.53, "learning_rate": 3.6734000000000004e-05, "loss": 8.7634, "step": 3316500 }, { "epoch": 26.54, "learning_rate": 3.6732e-05, "loss": 8.7689, "step": 3317000 }, { "epoch": 26.54, "learning_rate": 3.673e-05, "loss": 8.7662, "step": 3317500 }, { "epoch": 26.54, "learning_rate": 3.6728e-05, "loss": 8.7836, "step": 3318000 }, { "epoch": 26.55, "learning_rate": 3.6726e-05, "loss": 8.7417, "step": 3318500 }, { "epoch": 26.55, "learning_rate": 3.6724e-05, "loss": 8.7391, "step": 3319000 }, { "epoch": 26.56, "learning_rate": 3.6722e-05, "loss": 8.746, "step": 3319500 }, { "epoch": 26.56, "learning_rate": 3.672000000000001e-05, "loss": 8.7405, "step": 3320000 }, { "epoch": 26.56, "learning_rate": 3.6718e-05, "loss": 8.7474, "step": 3320500 }, { "epoch": 26.57, "learning_rate": 3.6716e-05, "loss": 8.7437, "step": 3321000 }, { "epoch": 26.57, "learning_rate": 3.6714e-05, "loss": 8.7553, "step": 3321500 }, { "epoch": 26.58, "learning_rate": 3.6712000000000004e-05, "loss": 8.7521, "step": 3322000 }, { "epoch": 26.58, "learning_rate": 3.671e-05, "loss": 8.7577, "step": 3322500 }, { "epoch": 26.58, "learning_rate": 3.6708e-05, "loss": 8.7313, "step": 3323000 }, { "epoch": 26.59, "learning_rate": 3.6706000000000004e-05, "loss": 8.7479, "step": 3323500 }, { "epoch": 26.59, "learning_rate": 3.6704e-05, "loss": 8.758, "step": 3324000 }, { "epoch": 26.6, "learning_rate": 3.6702e-05, "loss": 8.7609, "step": 3324500 }, { "epoch": 26.6, "learning_rate": 3.6700000000000004e-05, "loss": 8.7414, "step": 3325000 }, { "epoch": 26.6, "learning_rate": 3.6698e-05, "loss": 8.7431, "step": 3325500 }, { "epoch": 26.61, "learning_rate": 3.6696e-05, "loss": 8.7298, "step": 3326000 }, { "epoch": 26.61, "learning_rate": 3.6694e-05, "loss": 8.7708, "step": 3326500 }, { "epoch": 26.62, "learning_rate": 3.669200000000001e-05, "loss": 8.7571, "step": 3327000 }, { "epoch": 26.62, "learning_rate": 3.669e-05, "loss": 8.7622, "step": 3327500 }, { "epoch": 26.62, "learning_rate": 3.6688e-05, "loss": 8.7497, "step": 3328000 }, { "epoch": 26.63, "learning_rate": 3.6686e-05, "loss": 8.7646, "step": 3328500 }, { "epoch": 26.63, "learning_rate": 3.6684e-05, "loss": 8.7592, "step": 3329000 }, { "epoch": 26.64, "learning_rate": 3.6682e-05, "loss": 8.7403, "step": 3329500 }, { "epoch": 26.64, "learning_rate": 3.668e-05, "loss": 8.7402, "step": 3330000 }, { "epoch": 26.64, "learning_rate": 3.6678000000000004e-05, "loss": 8.7467, "step": 3330500 }, { "epoch": 26.65, "learning_rate": 3.6676e-05, "loss": 8.7455, "step": 3331000 }, { "epoch": 26.65, "learning_rate": 3.6674e-05, "loss": 8.7417, "step": 3331500 }, { "epoch": 26.66, "learning_rate": 3.6672000000000004e-05, "loss": 8.7369, "step": 3332000 }, { "epoch": 26.66, "learning_rate": 3.6670000000000006e-05, "loss": 8.7582, "step": 3332500 }, { "epoch": 26.66, "learning_rate": 3.6668e-05, "loss": 8.7475, "step": 3333000 }, { "epoch": 26.67, "learning_rate": 3.6666e-05, "loss": 8.7586, "step": 3333500 }, { "epoch": 26.67, "learning_rate": 3.666400000000001e-05, "loss": 8.7478, "step": 3334000 }, { "epoch": 26.68, "learning_rate": 3.6662e-05, "loss": 8.7661, "step": 3334500 }, { "epoch": 26.68, "learning_rate": 3.666e-05, "loss": 8.7589, "step": 3335000 }, { "epoch": 26.68, "learning_rate": 3.6658e-05, "loss": 8.7684, "step": 3335500 }, { "epoch": 26.69, "learning_rate": 3.6656e-05, "loss": 8.7481, "step": 3336000 }, { "epoch": 26.69, "learning_rate": 3.6654e-05, "loss": 8.7563, "step": 3336500 }, { "epoch": 26.7, "learning_rate": 3.6652e-05, "loss": 8.7394, "step": 3337000 }, { "epoch": 26.7, "learning_rate": 3.665e-05, "loss": 8.7546, "step": 3337500 }, { "epoch": 26.7, "learning_rate": 3.6648e-05, "loss": 8.7569, "step": 3338000 }, { "epoch": 26.71, "learning_rate": 3.6646e-05, "loss": 8.7493, "step": 3338500 }, { "epoch": 26.71, "learning_rate": 3.6644000000000004e-05, "loss": 8.7298, "step": 3339000 }, { "epoch": 26.72, "learning_rate": 3.6642000000000006e-05, "loss": 8.7548, "step": 3339500 }, { "epoch": 26.72, "learning_rate": 3.664e-05, "loss": 8.7416, "step": 3340000 }, { "epoch": 26.72, "learning_rate": 3.6638e-05, "loss": 8.7534, "step": 3340500 }, { "epoch": 26.73, "learning_rate": 3.6636000000000006e-05, "loss": 8.7344, "step": 3341000 }, { "epoch": 26.73, "learning_rate": 3.6634e-05, "loss": 8.7411, "step": 3341500 }, { "epoch": 26.74, "learning_rate": 3.6632e-05, "loss": 8.7409, "step": 3342000 }, { "epoch": 26.74, "learning_rate": 3.663e-05, "loss": 8.747, "step": 3342500 }, { "epoch": 26.74, "learning_rate": 3.6628e-05, "loss": 8.7471, "step": 3343000 }, { "epoch": 26.75, "learning_rate": 3.6626e-05, "loss": 8.7288, "step": 3343500 }, { "epoch": 26.75, "learning_rate": 3.6624e-05, "loss": 8.7448, "step": 3344000 }, { "epoch": 26.76, "learning_rate": 3.6622e-05, "loss": 8.756, "step": 3344500 }, { "epoch": 26.76, "learning_rate": 3.6620000000000005e-05, "loss": 8.7403, "step": 3345000 }, { "epoch": 26.76, "learning_rate": 3.6618e-05, "loss": 8.7491, "step": 3345500 }, { "epoch": 26.77, "learning_rate": 3.6616e-05, "loss": 8.7632, "step": 3346000 }, { "epoch": 26.77, "learning_rate": 3.6614000000000006e-05, "loss": 8.7346, "step": 3346500 }, { "epoch": 26.78, "learning_rate": 3.6612e-05, "loss": 8.747, "step": 3347000 }, { "epoch": 26.78, "learning_rate": 3.661e-05, "loss": 8.7485, "step": 3347500 }, { "epoch": 26.78, "learning_rate": 3.6608000000000006e-05, "loss": 8.7489, "step": 3348000 }, { "epoch": 26.79, "learning_rate": 3.6606e-05, "loss": 8.7516, "step": 3348500 }, { "epoch": 26.79, "learning_rate": 3.6604e-05, "loss": 8.7719, "step": 3349000 }, { "epoch": 26.8, "learning_rate": 3.6602000000000006e-05, "loss": 8.7448, "step": 3349500 }, { "epoch": 26.8, "learning_rate": 3.66e-05, "loss": 8.735, "step": 3350000 }, { "epoch": 26.8, "learning_rate": 3.6598000000000004e-05, "loss": 8.7366, "step": 3350500 }, { "epoch": 26.81, "learning_rate": 3.6596e-05, "loss": 8.7554, "step": 3351000 }, { "epoch": 26.81, "learning_rate": 3.6594e-05, "loss": 8.7446, "step": 3351500 }, { "epoch": 26.82, "learning_rate": 3.6592000000000005e-05, "loss": 8.7487, "step": 3352000 }, { "epoch": 26.82, "learning_rate": 3.659e-05, "loss": 8.7463, "step": 3352500 }, { "epoch": 26.82, "learning_rate": 3.6588e-05, "loss": 8.7321, "step": 3353000 }, { "epoch": 26.83, "learning_rate": 3.6586000000000005e-05, "loss": 8.7416, "step": 3353500 }, { "epoch": 26.83, "learning_rate": 3.6584e-05, "loss": 8.7454, "step": 3354000 }, { "epoch": 26.84, "learning_rate": 3.6581999999999996e-05, "loss": 8.7446, "step": 3354500 }, { "epoch": 26.84, "learning_rate": 3.6580000000000006e-05, "loss": 8.7529, "step": 3355000 }, { "epoch": 26.84, "learning_rate": 3.6578e-05, "loss": 8.7503, "step": 3355500 }, { "epoch": 26.85, "learning_rate": 3.6576e-05, "loss": 8.7443, "step": 3356000 }, { "epoch": 26.85, "learning_rate": 3.6574000000000006e-05, "loss": 8.7458, "step": 3356500 }, { "epoch": 26.86, "learning_rate": 3.6572e-05, "loss": 8.7354, "step": 3357000 }, { "epoch": 26.86, "learning_rate": 3.6570000000000004e-05, "loss": 8.7369, "step": 3357500 }, { "epoch": 26.86, "learning_rate": 3.6568e-05, "loss": 8.7677, "step": 3358000 }, { "epoch": 26.87, "learning_rate": 3.6566e-05, "loss": 8.7527, "step": 3358500 }, { "epoch": 26.87, "learning_rate": 3.6564000000000004e-05, "loss": 8.7542, "step": 3359000 }, { "epoch": 26.88, "learning_rate": 3.6562e-05, "loss": 8.74, "step": 3359500 }, { "epoch": 26.88, "learning_rate": 3.656e-05, "loss": 8.7533, "step": 3360000 }, { "epoch": 26.88, "learning_rate": 3.6558000000000005e-05, "loss": 8.7443, "step": 3360500 }, { "epoch": 26.89, "learning_rate": 3.6556e-05, "loss": 8.7478, "step": 3361000 }, { "epoch": 26.89, "learning_rate": 3.6554e-05, "loss": 8.7426, "step": 3361500 }, { "epoch": 26.9, "learning_rate": 3.6552000000000005e-05, "loss": 8.7701, "step": 3362000 }, { "epoch": 26.9, "learning_rate": 3.655e-05, "loss": 8.752, "step": 3362500 }, { "epoch": 26.9, "learning_rate": 3.6548e-05, "loss": 8.7594, "step": 3363000 }, { "epoch": 26.91, "learning_rate": 3.6546000000000006e-05, "loss": 8.7051, "step": 3363500 }, { "epoch": 26.91, "learning_rate": 3.6544e-05, "loss": 8.7306, "step": 3364000 }, { "epoch": 26.92, "learning_rate": 3.6542000000000004e-05, "loss": 8.7512, "step": 3364500 }, { "epoch": 26.92, "learning_rate": 3.654e-05, "loss": 8.7636, "step": 3365000 }, { "epoch": 26.92, "learning_rate": 3.6538e-05, "loss": 8.7567, "step": 3365500 }, { "epoch": 26.93, "learning_rate": 3.6536000000000004e-05, "loss": 8.7385, "step": 3366000 }, { "epoch": 26.93, "learning_rate": 3.6534e-05, "loss": 8.752, "step": 3366500 }, { "epoch": 26.94, "learning_rate": 3.6532e-05, "loss": 8.7622, "step": 3367000 }, { "epoch": 26.94, "learning_rate": 3.6530000000000004e-05, "loss": 8.7313, "step": 3367500 }, { "epoch": 26.94, "learning_rate": 3.6528e-05, "loss": 8.7369, "step": 3368000 }, { "epoch": 26.95, "learning_rate": 3.6526e-05, "loss": 8.7689, "step": 3368500 }, { "epoch": 26.95, "learning_rate": 3.6524000000000005e-05, "loss": 8.7557, "step": 3369000 }, { "epoch": 26.96, "learning_rate": 3.6522e-05, "loss": 8.7636, "step": 3369500 }, { "epoch": 26.96, "learning_rate": 3.652e-05, "loss": 8.7353, "step": 3370000 }, { "epoch": 26.96, "learning_rate": 3.6518000000000005e-05, "loss": 8.7559, "step": 3370500 }, { "epoch": 26.97, "learning_rate": 3.6516e-05, "loss": 8.7357, "step": 3371000 }, { "epoch": 26.97, "learning_rate": 3.6514e-05, "loss": 8.7532, "step": 3371500 }, { "epoch": 26.98, "learning_rate": 3.6512e-05, "loss": 8.7657, "step": 3372000 }, { "epoch": 26.98, "learning_rate": 3.651e-05, "loss": 8.7663, "step": 3372500 }, { "epoch": 26.98, "learning_rate": 3.6508000000000004e-05, "loss": 8.7482, "step": 3373000 }, { "epoch": 26.99, "learning_rate": 3.6506e-05, "loss": 8.7474, "step": 3373500 }, { "epoch": 26.99, "learning_rate": 3.6504e-05, "loss": 8.7341, "step": 3374000 }, { "epoch": 27.0, "learning_rate": 3.6502000000000004e-05, "loss": 8.76, "step": 3374500 }, { "epoch": 27.0, "learning_rate": 3.65e-05, "loss": 8.7437, "step": 3375000 }, { "epoch": 27.0, "learning_rate": 3.6498e-05, "loss": 8.7235, "step": 3375500 }, { "epoch": 27.01, "learning_rate": 3.6496000000000004e-05, "loss": 8.7473, "step": 3376000 }, { "epoch": 27.01, "learning_rate": 3.6494e-05, "loss": 8.7524, "step": 3376500 }, { "epoch": 27.02, "learning_rate": 3.6492e-05, "loss": 8.7229, "step": 3377000 }, { "epoch": 27.02, "learning_rate": 3.6490000000000005e-05, "loss": 8.7679, "step": 3377500 }, { "epoch": 27.02, "learning_rate": 3.6488e-05, "loss": 8.7304, "step": 3378000 }, { "epoch": 27.03, "learning_rate": 3.6486e-05, "loss": 8.7375, "step": 3378500 }, { "epoch": 27.03, "learning_rate": 3.6484e-05, "loss": 8.7758, "step": 3379000 }, { "epoch": 27.04, "learning_rate": 3.6482e-05, "loss": 8.7464, "step": 3379500 }, { "epoch": 27.04, "learning_rate": 3.648e-05, "loss": 8.7566, "step": 3380000 }, { "epoch": 27.04, "learning_rate": 3.6478e-05, "loss": 8.7551, "step": 3380500 }, { "epoch": 27.05, "learning_rate": 3.6476e-05, "loss": 8.7735, "step": 3381000 }, { "epoch": 27.05, "learning_rate": 3.6474000000000003e-05, "loss": 8.7506, "step": 3381500 }, { "epoch": 27.06, "learning_rate": 3.6472e-05, "loss": 8.7245, "step": 3382000 }, { "epoch": 27.06, "learning_rate": 3.647e-05, "loss": 8.7461, "step": 3382500 }, { "epoch": 27.06, "learning_rate": 3.6468000000000004e-05, "loss": 8.7462, "step": 3383000 }, { "epoch": 27.07, "learning_rate": 3.6466e-05, "loss": 8.7377, "step": 3383500 }, { "epoch": 27.07, "learning_rate": 3.6464e-05, "loss": 8.7439, "step": 3384000 }, { "epoch": 27.08, "learning_rate": 3.6462000000000004e-05, "loss": 8.7499, "step": 3384500 }, { "epoch": 27.08, "learning_rate": 3.646e-05, "loss": 8.7617, "step": 3385000 }, { "epoch": 27.08, "learning_rate": 3.6458e-05, "loss": 8.7575, "step": 3385500 }, { "epoch": 27.09, "learning_rate": 3.6456e-05, "loss": 8.762, "step": 3386000 }, { "epoch": 27.09, "learning_rate": 3.6454e-05, "loss": 8.7597, "step": 3386500 }, { "epoch": 27.1, "learning_rate": 3.6452e-05, "loss": 8.7671, "step": 3387000 }, { "epoch": 27.1, "learning_rate": 3.645e-05, "loss": 8.7548, "step": 3387500 }, { "epoch": 27.1, "learning_rate": 3.644800000000001e-05, "loss": 8.7562, "step": 3388000 }, { "epoch": 27.11, "learning_rate": 3.6446e-05, "loss": 8.747, "step": 3388500 }, { "epoch": 27.11, "learning_rate": 3.6444e-05, "loss": 8.7614, "step": 3389000 }, { "epoch": 27.12, "learning_rate": 3.6442e-05, "loss": 8.7423, "step": 3389500 }, { "epoch": 27.12, "learning_rate": 3.6440000000000003e-05, "loss": 8.7454, "step": 3390000 }, { "epoch": 27.12, "learning_rate": 3.6438e-05, "loss": 8.7546, "step": 3390500 }, { "epoch": 27.13, "learning_rate": 3.6436e-05, "loss": 8.757, "step": 3391000 }, { "epoch": 27.13, "learning_rate": 3.6434000000000004e-05, "loss": 8.7468, "step": 3391500 }, { "epoch": 27.14, "learning_rate": 3.6432e-05, "loss": 8.7289, "step": 3392000 }, { "epoch": 27.14, "learning_rate": 3.643e-05, "loss": 8.753, "step": 3392500 }, { "epoch": 27.14, "learning_rate": 3.6428000000000004e-05, "loss": 8.7471, "step": 3393000 }, { "epoch": 27.15, "learning_rate": 3.6426e-05, "loss": 8.7521, "step": 3393500 }, { "epoch": 27.15, "learning_rate": 3.6424e-05, "loss": 8.7563, "step": 3394000 }, { "epoch": 27.16, "learning_rate": 3.6422e-05, "loss": 8.7493, "step": 3394500 }, { "epoch": 27.16, "learning_rate": 3.642000000000001e-05, "loss": 8.7293, "step": 3395000 }, { "epoch": 27.16, "learning_rate": 3.6418e-05, "loss": 8.7659, "step": 3395500 }, { "epoch": 27.17, "learning_rate": 3.6416e-05, "loss": 8.7418, "step": 3396000 }, { "epoch": 27.17, "learning_rate": 3.6414e-05, "loss": 8.7484, "step": 3396500 }, { "epoch": 27.18, "learning_rate": 3.6412e-05, "loss": 8.7489, "step": 3397000 }, { "epoch": 27.18, "learning_rate": 3.641e-05, "loss": 8.7648, "step": 3397500 }, { "epoch": 27.18, "learning_rate": 3.6408e-05, "loss": 8.7473, "step": 3398000 }, { "epoch": 27.19, "learning_rate": 3.6406000000000003e-05, "loss": 8.7575, "step": 3398500 }, { "epoch": 27.19, "learning_rate": 3.6404e-05, "loss": 8.7435, "step": 3399000 }, { "epoch": 27.2, "learning_rate": 3.6402e-05, "loss": 8.7595, "step": 3399500 }, { "epoch": 27.2, "learning_rate": 3.6400000000000004e-05, "loss": 8.7561, "step": 3400000 }, { "epoch": 27.2, "learning_rate": 3.6398000000000006e-05, "loss": 8.7415, "step": 3400500 }, { "epoch": 27.21, "learning_rate": 3.6396e-05, "loss": 8.758, "step": 3401000 }, { "epoch": 27.21, "learning_rate": 3.6394e-05, "loss": 8.7491, "step": 3401500 }, { "epoch": 27.22, "learning_rate": 3.639200000000001e-05, "loss": 8.7604, "step": 3402000 }, { "epoch": 27.22, "learning_rate": 3.639e-05, "loss": 8.7507, "step": 3402500 }, { "epoch": 27.22, "learning_rate": 3.6388e-05, "loss": 8.7315, "step": 3403000 }, { "epoch": 27.23, "learning_rate": 3.6386e-05, "loss": 8.7436, "step": 3403500 }, { "epoch": 27.23, "learning_rate": 3.6384e-05, "loss": 8.7551, "step": 3404000 }, { "epoch": 27.24, "learning_rate": 3.6382e-05, "loss": 8.7487, "step": 3404500 }, { "epoch": 27.24, "learning_rate": 3.638e-05, "loss": 8.7549, "step": 3405000 }, { "epoch": 27.24, "learning_rate": 3.6378e-05, "loss": 8.7577, "step": 3405500 }, { "epoch": 27.25, "learning_rate": 3.6376e-05, "loss": 8.7648, "step": 3406000 }, { "epoch": 27.25, "learning_rate": 3.6374e-05, "loss": 8.7439, "step": 3406500 }, { "epoch": 27.26, "learning_rate": 3.6372000000000003e-05, "loss": 8.7457, "step": 3407000 }, { "epoch": 27.26, "learning_rate": 3.6370000000000006e-05, "loss": 8.7349, "step": 3407500 }, { "epoch": 27.26, "learning_rate": 3.6368e-05, "loss": 8.7692, "step": 3408000 }, { "epoch": 27.27, "learning_rate": 3.6366e-05, "loss": 8.7807, "step": 3408500 }, { "epoch": 27.27, "learning_rate": 3.6364000000000006e-05, "loss": 8.7069, "step": 3409000 }, { "epoch": 27.28, "learning_rate": 3.6362e-05, "loss": 8.7583, "step": 3409500 }, { "epoch": 27.28, "learning_rate": 3.636e-05, "loss": 8.7702, "step": 3410000 }, { "epoch": 27.28, "learning_rate": 3.6358e-05, "loss": 8.7558, "step": 3410500 }, { "epoch": 27.29, "learning_rate": 3.6356e-05, "loss": 8.7518, "step": 3411000 }, { "epoch": 27.29, "learning_rate": 3.6354e-05, "loss": 8.7505, "step": 3411500 }, { "epoch": 27.3, "learning_rate": 3.6352e-05, "loss": 8.7456, "step": 3412000 }, { "epoch": 27.3, "learning_rate": 3.635e-05, "loss": 8.7585, "step": 3412500 }, { "epoch": 27.3, "learning_rate": 3.6348000000000005e-05, "loss": 8.7728, "step": 3413000 }, { "epoch": 27.31, "learning_rate": 3.6346e-05, "loss": 8.759, "step": 3413500 }, { "epoch": 27.31, "learning_rate": 3.6344e-05, "loss": 8.7504, "step": 3414000 }, { "epoch": 27.32, "learning_rate": 3.6342000000000005e-05, "loss": 8.7357, "step": 3414500 }, { "epoch": 27.32, "learning_rate": 3.634e-05, "loss": 8.757, "step": 3415000 }, { "epoch": 27.32, "learning_rate": 3.6338e-05, "loss": 8.7441, "step": 3415500 }, { "epoch": 27.33, "learning_rate": 3.6336000000000006e-05, "loss": 8.7533, "step": 3416000 }, { "epoch": 27.33, "learning_rate": 3.6334e-05, "loss": 8.7544, "step": 3416500 }, { "epoch": 27.34, "learning_rate": 3.6332e-05, "loss": 8.7452, "step": 3417000 }, { "epoch": 27.34, "learning_rate": 3.6330000000000006e-05, "loss": 8.7561, "step": 3417500 }, { "epoch": 27.34, "learning_rate": 3.6328e-05, "loss": 8.7737, "step": 3418000 }, { "epoch": 27.35, "learning_rate": 3.6326000000000004e-05, "loss": 8.7437, "step": 3418500 }, { "epoch": 27.35, "learning_rate": 3.6324e-05, "loss": 8.7554, "step": 3419000 }, { "epoch": 27.36, "learning_rate": 3.6322e-05, "loss": 8.7451, "step": 3419500 }, { "epoch": 27.36, "learning_rate": 3.6320000000000005e-05, "loss": 8.7559, "step": 3420000 }, { "epoch": 27.36, "learning_rate": 3.6318e-05, "loss": 8.7542, "step": 3420500 }, { "epoch": 27.37, "learning_rate": 3.6316e-05, "loss": 8.747, "step": 3421000 }, { "epoch": 27.37, "learning_rate": 3.6314000000000005e-05, "loss": 8.7319, "step": 3421500 }, { "epoch": 27.38, "learning_rate": 3.6312e-05, "loss": 8.7418, "step": 3422000 }, { "epoch": 27.38, "learning_rate": 3.6309999999999996e-05, "loss": 8.7226, "step": 3422500 }, { "epoch": 27.38, "learning_rate": 3.6308000000000005e-05, "loss": 8.7492, "step": 3423000 }, { "epoch": 27.39, "learning_rate": 3.6306e-05, "loss": 8.7439, "step": 3423500 }, { "epoch": 27.39, "learning_rate": 3.6304e-05, "loss": 8.7504, "step": 3424000 }, { "epoch": 27.4, "learning_rate": 3.6302000000000006e-05, "loss": 8.7517, "step": 3424500 }, { "epoch": 27.4, "learning_rate": 3.63e-05, "loss": 8.7547, "step": 3425000 }, { "epoch": 27.4, "learning_rate": 3.6298000000000004e-05, "loss": 8.7444, "step": 3425500 }, { "epoch": 27.41, "learning_rate": 3.6296e-05, "loss": 8.7829, "step": 3426000 }, { "epoch": 27.41, "learning_rate": 3.6294e-05, "loss": 8.7471, "step": 3426500 }, { "epoch": 27.42, "learning_rate": 3.6292000000000004e-05, "loss": 8.7584, "step": 3427000 }, { "epoch": 27.42, "learning_rate": 3.629e-05, "loss": 8.7473, "step": 3427500 }, { "epoch": 27.42, "learning_rate": 3.6288e-05, "loss": 8.7414, "step": 3428000 }, { "epoch": 27.43, "learning_rate": 3.6286000000000005e-05, "loss": 8.7663, "step": 3428500 }, { "epoch": 27.43, "learning_rate": 3.6284e-05, "loss": 8.7499, "step": 3429000 }, { "epoch": 27.44, "learning_rate": 3.6282e-05, "loss": 8.748, "step": 3429500 }, { "epoch": 27.44, "learning_rate": 3.6280000000000005e-05, "loss": 8.7463, "step": 3430000 }, { "epoch": 27.44, "learning_rate": 3.6278e-05, "loss": 8.7557, "step": 3430500 }, { "epoch": 27.45, "learning_rate": 3.6276e-05, "loss": 8.7426, "step": 3431000 }, { "epoch": 27.45, "learning_rate": 3.6274000000000005e-05, "loss": 8.772, "step": 3431500 }, { "epoch": 27.46, "learning_rate": 3.6272e-05, "loss": 8.7633, "step": 3432000 }, { "epoch": 27.46, "learning_rate": 3.6270000000000003e-05, "loss": 8.7609, "step": 3432500 }, { "epoch": 27.46, "learning_rate": 3.6268e-05, "loss": 8.7504, "step": 3433000 }, { "epoch": 27.47, "learning_rate": 3.6266e-05, "loss": 8.7658, "step": 3433500 }, { "epoch": 27.47, "learning_rate": 3.6264000000000004e-05, "loss": 8.7613, "step": 3434000 }, { "epoch": 27.48, "learning_rate": 3.6262e-05, "loss": 8.7506, "step": 3434500 }, { "epoch": 27.48, "learning_rate": 3.626e-05, "loss": 8.7509, "step": 3435000 }, { "epoch": 27.48, "learning_rate": 3.6258000000000004e-05, "loss": 8.7436, "step": 3435500 }, { "epoch": 27.49, "learning_rate": 3.6256e-05, "loss": 8.7459, "step": 3436000 }, { "epoch": 27.49, "learning_rate": 3.6254e-05, "loss": 8.7411, "step": 3436500 }, { "epoch": 27.5, "learning_rate": 3.6252000000000005e-05, "loss": 8.7408, "step": 3437000 }, { "epoch": 27.5, "learning_rate": 3.625e-05, "loss": 8.7559, "step": 3437500 }, { "epoch": 27.5, "learning_rate": 3.6248e-05, "loss": 8.7599, "step": 3438000 }, { "epoch": 27.51, "learning_rate": 3.6246000000000005e-05, "loss": 8.7503, "step": 3438500 }, { "epoch": 27.51, "learning_rate": 3.6244e-05, "loss": 8.7545, "step": 3439000 }, { "epoch": 27.52, "learning_rate": 3.6242e-05, "loss": 8.7537, "step": 3439500 }, { "epoch": 27.52, "learning_rate": 3.624e-05, "loss": 8.761, "step": 3440000 }, { "epoch": 27.52, "learning_rate": 3.6238e-05, "loss": 8.7394, "step": 3440500 }, { "epoch": 27.53, "learning_rate": 3.6236000000000003e-05, "loss": 8.7619, "step": 3441000 }, { "epoch": 27.53, "learning_rate": 3.6234e-05, "loss": 8.7423, "step": 3441500 }, { "epoch": 27.54, "learning_rate": 3.6232e-05, "loss": 8.7675, "step": 3442000 }, { "epoch": 27.54, "learning_rate": 3.6230000000000004e-05, "loss": 8.7325, "step": 3442500 }, { "epoch": 27.54, "learning_rate": 3.6228e-05, "loss": 8.7459, "step": 3443000 }, { "epoch": 27.55, "learning_rate": 3.6226e-05, "loss": 8.7483, "step": 3443500 }, { "epoch": 27.55, "learning_rate": 3.6224000000000004e-05, "loss": 8.7556, "step": 3444000 }, { "epoch": 27.56, "learning_rate": 3.6222e-05, "loss": 8.7547, "step": 3444500 }, { "epoch": 27.56, "learning_rate": 3.622e-05, "loss": 8.7443, "step": 3445000 }, { "epoch": 27.56, "learning_rate": 3.6218000000000005e-05, "loss": 8.7596, "step": 3445500 }, { "epoch": 27.57, "learning_rate": 3.6216e-05, "loss": 8.7249, "step": 3446000 }, { "epoch": 27.57, "learning_rate": 3.6214e-05, "loss": 8.7728, "step": 3446500 }, { "epoch": 27.58, "learning_rate": 3.6212e-05, "loss": 8.7544, "step": 3447000 }, { "epoch": 27.58, "learning_rate": 3.621e-05, "loss": 8.7432, "step": 3447500 }, { "epoch": 27.58, "learning_rate": 3.6208e-05, "loss": 8.768, "step": 3448000 }, { "epoch": 27.59, "learning_rate": 3.6206e-05, "loss": 8.7376, "step": 3448500 }, { "epoch": 27.59, "learning_rate": 3.6204e-05, "loss": 8.7369, "step": 3449000 }, { "epoch": 27.6, "learning_rate": 3.6202e-05, "loss": 8.7513, "step": 3449500 }, { "epoch": 27.6, "learning_rate": 3.62e-05, "loss": 8.7434, "step": 3450000 }, { "epoch": 27.6, "learning_rate": 3.6198e-05, "loss": 8.748, "step": 3450500 }, { "epoch": 27.61, "learning_rate": 3.6196000000000004e-05, "loss": 8.7461, "step": 3451000 }, { "epoch": 27.61, "learning_rate": 3.6194e-05, "loss": 8.7557, "step": 3451500 }, { "epoch": 27.62, "learning_rate": 3.6192e-05, "loss": 8.7702, "step": 3452000 }, { "epoch": 27.62, "learning_rate": 3.6190000000000004e-05, "loss": 8.7494, "step": 3452500 }, { "epoch": 27.62, "learning_rate": 3.6188e-05, "loss": 8.7397, "step": 3453000 }, { "epoch": 27.63, "learning_rate": 3.6186e-05, "loss": 8.7606, "step": 3453500 }, { "epoch": 27.63, "learning_rate": 3.6184000000000005e-05, "loss": 8.7393, "step": 3454000 }, { "epoch": 27.64, "learning_rate": 3.6182e-05, "loss": 8.7431, "step": 3454500 }, { "epoch": 27.64, "learning_rate": 3.618e-05, "loss": 8.7537, "step": 3455000 }, { "epoch": 27.64, "learning_rate": 3.6178e-05, "loss": 8.755, "step": 3455500 }, { "epoch": 27.65, "learning_rate": 3.617600000000001e-05, "loss": 8.7365, "step": 3456000 }, { "epoch": 27.65, "learning_rate": 3.6174e-05, "loss": 8.7463, "step": 3456500 }, { "epoch": 27.66, "learning_rate": 3.6172e-05, "loss": 8.7304, "step": 3457000 }, { "epoch": 27.66, "learning_rate": 3.617e-05, "loss": 8.7449, "step": 3457500 }, { "epoch": 27.66, "learning_rate": 3.6168e-05, "loss": 8.7373, "step": 3458000 }, { "epoch": 27.67, "learning_rate": 3.6166e-05, "loss": 8.7469, "step": 3458500 }, { "epoch": 27.67, "learning_rate": 3.6164e-05, "loss": 8.7624, "step": 3459000 }, { "epoch": 27.68, "learning_rate": 3.6162000000000004e-05, "loss": 8.7505, "step": 3459500 }, { "epoch": 27.68, "learning_rate": 3.616e-05, "loss": 8.7405, "step": 3460000 }, { "epoch": 27.68, "learning_rate": 3.6158e-05, "loss": 8.7319, "step": 3460500 }, { "epoch": 27.69, "learning_rate": 3.6156000000000004e-05, "loss": 8.7672, "step": 3461000 }, { "epoch": 27.69, "learning_rate": 3.6154e-05, "loss": 8.7561, "step": 3461500 }, { "epoch": 27.7, "learning_rate": 3.6152e-05, "loss": 8.7405, "step": 3462000 }, { "epoch": 27.7, "learning_rate": 3.615e-05, "loss": 8.7573, "step": 3462500 }, { "epoch": 27.7, "learning_rate": 3.614800000000001e-05, "loss": 8.7824, "step": 3463000 }, { "epoch": 27.71, "learning_rate": 3.6146e-05, "loss": 8.7631, "step": 3463500 }, { "epoch": 27.71, "learning_rate": 3.6144e-05, "loss": 8.7667, "step": 3464000 }, { "epoch": 27.72, "learning_rate": 3.6142e-05, "loss": 8.7538, "step": 3464500 }, { "epoch": 27.72, "learning_rate": 3.614e-05, "loss": 8.7488, "step": 3465000 }, { "epoch": 27.72, "learning_rate": 3.6138e-05, "loss": 8.761, "step": 3465500 }, { "epoch": 27.73, "learning_rate": 3.6136e-05, "loss": 8.7394, "step": 3466000 }, { "epoch": 27.73, "learning_rate": 3.6134e-05, "loss": 8.7517, "step": 3466500 }, { "epoch": 27.74, "learning_rate": 3.6132e-05, "loss": 8.7497, "step": 3467000 }, { "epoch": 27.74, "learning_rate": 3.613e-05, "loss": 8.7439, "step": 3467500 }, { "epoch": 27.74, "learning_rate": 3.6128000000000004e-05, "loss": 8.7549, "step": 3468000 }, { "epoch": 27.75, "learning_rate": 3.6126000000000006e-05, "loss": 8.7478, "step": 3468500 }, { "epoch": 27.75, "learning_rate": 3.6124e-05, "loss": 8.7708, "step": 3469000 }, { "epoch": 27.76, "learning_rate": 3.6122e-05, "loss": 8.7644, "step": 3469500 }, { "epoch": 27.76, "learning_rate": 3.6120000000000007e-05, "loss": 8.7442, "step": 3470000 }, { "epoch": 27.76, "learning_rate": 3.6118e-05, "loss": 8.7431, "step": 3470500 }, { "epoch": 27.77, "learning_rate": 3.6116e-05, "loss": 8.7595, "step": 3471000 }, { "epoch": 27.77, "learning_rate": 3.6114e-05, "loss": 8.753, "step": 3471500 }, { "epoch": 27.78, "learning_rate": 3.6112e-05, "loss": 8.7563, "step": 3472000 }, { "epoch": 27.78, "learning_rate": 3.611e-05, "loss": 8.7645, "step": 3472500 }, { "epoch": 27.78, "learning_rate": 3.6108e-05, "loss": 8.7241, "step": 3473000 }, { "epoch": 27.79, "learning_rate": 3.6106e-05, "loss": 8.7492, "step": 3473500 }, { "epoch": 27.79, "learning_rate": 3.6104000000000005e-05, "loss": 8.7677, "step": 3474000 }, { "epoch": 27.8, "learning_rate": 3.6102e-05, "loss": 8.7477, "step": 3474500 }, { "epoch": 27.8, "learning_rate": 3.61e-05, "loss": 8.7667, "step": 3475000 }, { "epoch": 27.8, "learning_rate": 3.6098000000000006e-05, "loss": 8.7397, "step": 3475500 }, { "epoch": 27.81, "learning_rate": 3.6096e-05, "loss": 8.7423, "step": 3476000 }, { "epoch": 27.81, "learning_rate": 3.6094e-05, "loss": 8.7572, "step": 3476500 }, { "epoch": 27.82, "learning_rate": 3.6092000000000006e-05, "loss": 8.7393, "step": 3477000 }, { "epoch": 27.82, "learning_rate": 3.609e-05, "loss": 8.7506, "step": 3477500 }, { "epoch": 27.82, "learning_rate": 3.6088e-05, "loss": 8.7385, "step": 3478000 }, { "epoch": 27.83, "learning_rate": 3.6086000000000007e-05, "loss": 8.7484, "step": 3478500 }, { "epoch": 27.83, "learning_rate": 3.6084e-05, "loss": 8.755, "step": 3479000 }, { "epoch": 27.84, "learning_rate": 3.6082e-05, "loss": 8.7565, "step": 3479500 }, { "epoch": 27.84, "learning_rate": 3.608e-05, "loss": 8.7647, "step": 3480000 }, { "epoch": 27.84, "learning_rate": 3.6078e-05, "loss": 8.7842, "step": 3480500 }, { "epoch": 27.85, "learning_rate": 3.6076000000000005e-05, "loss": 8.7591, "step": 3481000 }, { "epoch": 27.85, "learning_rate": 3.6074e-05, "loss": 8.752, "step": 3481500 }, { "epoch": 27.86, "learning_rate": 3.6072e-05, "loss": 8.7341, "step": 3482000 }, { "epoch": 27.86, "learning_rate": 3.6070000000000005e-05, "loss": 8.7648, "step": 3482500 }, { "epoch": 27.86, "learning_rate": 3.6068e-05, "loss": 8.7473, "step": 3483000 }, { "epoch": 27.87, "learning_rate": 3.6065999999999997e-05, "loss": 8.73, "step": 3483500 }, { "epoch": 27.87, "learning_rate": 3.6064000000000006e-05, "loss": 8.7434, "step": 3484000 }, { "epoch": 27.88, "learning_rate": 3.6062e-05, "loss": 8.756, "step": 3484500 }, { "epoch": 27.88, "learning_rate": 3.606e-05, "loss": 8.7477, "step": 3485000 }, { "epoch": 27.88, "learning_rate": 3.6058000000000006e-05, "loss": 8.7324, "step": 3485500 }, { "epoch": 27.89, "learning_rate": 3.6056e-05, "loss": 8.7351, "step": 3486000 }, { "epoch": 27.89, "learning_rate": 3.6054000000000004e-05, "loss": 8.7461, "step": 3486500 }, { "epoch": 27.9, "learning_rate": 3.6052e-05, "loss": 8.7396, "step": 3487000 }, { "epoch": 27.9, "learning_rate": 3.605e-05, "loss": 8.7407, "step": 3487500 }, { "epoch": 27.9, "learning_rate": 3.6048000000000005e-05, "loss": 8.764, "step": 3488000 }, { "epoch": 27.91, "learning_rate": 3.6046e-05, "loss": 8.7348, "step": 3488500 }, { "epoch": 27.91, "learning_rate": 3.6044e-05, "loss": 8.7332, "step": 3489000 }, { "epoch": 27.92, "learning_rate": 3.6042000000000005e-05, "loss": 8.7446, "step": 3489500 }, { "epoch": 27.92, "learning_rate": 3.604e-05, "loss": 8.7779, "step": 3490000 }, { "epoch": 27.92, "learning_rate": 3.6038e-05, "loss": 8.7639, "step": 3490500 }, { "epoch": 27.93, "learning_rate": 3.6036000000000005e-05, "loss": 8.7333, "step": 3491000 }, { "epoch": 27.93, "learning_rate": 3.6034e-05, "loss": 8.7408, "step": 3491500 }, { "epoch": 27.94, "learning_rate": 3.6031999999999997e-05, "loss": 8.7614, "step": 3492000 }, { "epoch": 27.94, "learning_rate": 3.6030000000000006e-05, "loss": 8.763, "step": 3492500 }, { "epoch": 27.94, "learning_rate": 3.6028e-05, "loss": 8.7615, "step": 3493000 }, { "epoch": 27.95, "learning_rate": 3.6026000000000004e-05, "loss": 8.7425, "step": 3493500 }, { "epoch": 27.95, "learning_rate": 3.6024e-05, "loss": 8.7586, "step": 3494000 }, { "epoch": 27.96, "learning_rate": 3.6022e-05, "loss": 8.7295, "step": 3494500 }, { "epoch": 27.96, "learning_rate": 3.6020000000000004e-05, "loss": 8.7525, "step": 3495000 }, { "epoch": 27.96, "learning_rate": 3.6018e-05, "loss": 8.7667, "step": 3495500 }, { "epoch": 27.97, "learning_rate": 3.6016e-05, "loss": 8.739, "step": 3496000 }, { "epoch": 27.97, "learning_rate": 3.6014000000000005e-05, "loss": 8.7305, "step": 3496500 }, { "epoch": 27.98, "learning_rate": 3.6012e-05, "loss": 8.7262, "step": 3497000 }, { "epoch": 27.98, "learning_rate": 3.601e-05, "loss": 8.7577, "step": 3497500 }, { "epoch": 27.98, "learning_rate": 3.6008000000000005e-05, "loss": 8.7668, "step": 3498000 }, { "epoch": 27.99, "learning_rate": 3.6006e-05, "loss": 8.7442, "step": 3498500 }, { "epoch": 27.99, "learning_rate": 3.6004e-05, "loss": 8.7497, "step": 3499000 }, { "epoch": 28.0, "learning_rate": 3.6002000000000005e-05, "loss": 8.7376, "step": 3499500 }, { "epoch": 28.0, "learning_rate": 3.6e-05, "loss": 8.7469, "step": 3500000 }, { "epoch": 28.0, "learning_rate": 3.5998e-05, "loss": 8.7672, "step": 3500500 }, { "epoch": 28.01, "learning_rate": 3.5996e-05, "loss": 8.7589, "step": 3501000 }, { "epoch": 28.01, "learning_rate": 3.5994e-05, "loss": 8.7548, "step": 3501500 }, { "epoch": 28.02, "learning_rate": 3.5992000000000004e-05, "loss": 8.7574, "step": 3502000 }, { "epoch": 28.02, "learning_rate": 3.599e-05, "loss": 8.7643, "step": 3502500 }, { "epoch": 28.02, "learning_rate": 3.5988e-05, "loss": 8.7606, "step": 3503000 }, { "epoch": 28.03, "learning_rate": 3.5986000000000004e-05, "loss": 8.7514, "step": 3503500 }, { "epoch": 28.03, "learning_rate": 3.5984e-05, "loss": 8.7335, "step": 3504000 }, { "epoch": 28.04, "learning_rate": 3.5982e-05, "loss": 8.7575, "step": 3504500 }, { "epoch": 28.04, "learning_rate": 3.5980000000000004e-05, "loss": 8.7514, "step": 3505000 }, { "epoch": 28.04, "learning_rate": 3.5978e-05, "loss": 8.7455, "step": 3505500 }, { "epoch": 28.05, "learning_rate": 3.5976e-05, "loss": 8.75, "step": 3506000 }, { "epoch": 28.05, "learning_rate": 3.5974000000000005e-05, "loss": 8.754, "step": 3506500 }, { "epoch": 28.06, "learning_rate": 3.5972e-05, "loss": 8.7306, "step": 3507000 }, { "epoch": 28.06, "learning_rate": 3.597e-05, "loss": 8.7541, "step": 3507500 }, { "epoch": 28.06, "learning_rate": 3.5968e-05, "loss": 8.7923, "step": 3508000 }, { "epoch": 28.07, "learning_rate": 3.5966e-05, "loss": 8.7522, "step": 3508500 }, { "epoch": 28.07, "learning_rate": 3.5964e-05, "loss": 8.7597, "step": 3509000 }, { "epoch": 28.08, "learning_rate": 3.5962e-05, "loss": 8.7708, "step": 3509500 }, { "epoch": 28.08, "learning_rate": 3.596e-05, "loss": 8.7447, "step": 3510000 }, { "epoch": 28.08, "learning_rate": 3.5958000000000004e-05, "loss": 8.7605, "step": 3510500 }, { "epoch": 28.09, "learning_rate": 3.5956e-05, "loss": 8.7417, "step": 3511000 }, { "epoch": 28.09, "learning_rate": 3.5954e-05, "loss": 8.7375, "step": 3511500 }, { "epoch": 28.1, "learning_rate": 3.5952000000000004e-05, "loss": 8.7592, "step": 3512000 }, { "epoch": 28.1, "learning_rate": 3.595e-05, "loss": 8.7413, "step": 3512500 }, { "epoch": 28.1, "learning_rate": 3.5948e-05, "loss": 8.7595, "step": 3513000 }, { "epoch": 28.11, "learning_rate": 3.5946000000000004e-05, "loss": 8.7621, "step": 3513500 }, { "epoch": 28.11, "learning_rate": 3.5944e-05, "loss": 8.7323, "step": 3514000 }, { "epoch": 28.12, "learning_rate": 3.5942e-05, "loss": 8.7628, "step": 3514500 }, { "epoch": 28.12, "learning_rate": 3.594e-05, "loss": 8.7482, "step": 3515000 }, { "epoch": 28.12, "learning_rate": 3.5938e-05, "loss": 8.7546, "step": 3515500 }, { "epoch": 28.13, "learning_rate": 3.5936e-05, "loss": 8.7641, "step": 3516000 }, { "epoch": 28.13, "learning_rate": 3.5934e-05, "loss": 8.7556, "step": 3516500 }, { "epoch": 28.14, "learning_rate": 3.5932e-05, "loss": 8.7449, "step": 3517000 }, { "epoch": 28.14, "learning_rate": 3.593e-05, "loss": 8.7457, "step": 3517500 }, { "epoch": 28.14, "learning_rate": 3.5928e-05, "loss": 8.751, "step": 3518000 }, { "epoch": 28.15, "learning_rate": 3.5926e-05, "loss": 8.7523, "step": 3518500 }, { "epoch": 28.15, "learning_rate": 3.5924000000000004e-05, "loss": 8.7356, "step": 3519000 }, { "epoch": 28.16, "learning_rate": 3.5922e-05, "loss": 8.7614, "step": 3519500 }, { "epoch": 28.16, "learning_rate": 3.592e-05, "loss": 8.7467, "step": 3520000 }, { "epoch": 28.16, "learning_rate": 3.5918000000000004e-05, "loss": 8.7446, "step": 3520500 }, { "epoch": 28.17, "learning_rate": 3.5916e-05, "loss": 8.7438, "step": 3521000 }, { "epoch": 28.17, "learning_rate": 3.5914e-05, "loss": 8.7503, "step": 3521500 }, { "epoch": 28.18, "learning_rate": 3.5912000000000004e-05, "loss": 8.7377, "step": 3522000 }, { "epoch": 28.18, "learning_rate": 3.591e-05, "loss": 8.7449, "step": 3522500 }, { "epoch": 28.18, "learning_rate": 3.5908e-05, "loss": 8.774, "step": 3523000 }, { "epoch": 28.19, "learning_rate": 3.5906e-05, "loss": 8.7556, "step": 3523500 }, { "epoch": 28.19, "learning_rate": 3.590400000000001e-05, "loss": 8.7568, "step": 3524000 }, { "epoch": 28.2, "learning_rate": 3.5902e-05, "loss": 8.7494, "step": 3524500 }, { "epoch": 28.2, "learning_rate": 3.59e-05, "loss": 8.7505, "step": 3525000 }, { "epoch": 28.2, "learning_rate": 3.5898e-05, "loss": 8.7562, "step": 3525500 }, { "epoch": 28.21, "learning_rate": 3.5896e-05, "loss": 8.7466, "step": 3526000 }, { "epoch": 28.21, "learning_rate": 3.5894e-05, "loss": 8.7464, "step": 3526500 }, { "epoch": 28.22, "learning_rate": 3.5892e-05, "loss": 8.7639, "step": 3527000 }, { "epoch": 28.22, "learning_rate": 3.5890000000000004e-05, "loss": 8.7543, "step": 3527500 }, { "epoch": 28.22, "learning_rate": 3.5888e-05, "loss": 8.7383, "step": 3528000 }, { "epoch": 28.23, "learning_rate": 3.5886e-05, "loss": 8.7453, "step": 3528500 }, { "epoch": 28.23, "learning_rate": 3.5884000000000004e-05, "loss": 8.7681, "step": 3529000 }, { "epoch": 28.24, "learning_rate": 3.5882e-05, "loss": 8.7499, "step": 3529500 }, { "epoch": 28.24, "learning_rate": 3.588e-05, "loss": 8.7381, "step": 3530000 }, { "epoch": 28.24, "learning_rate": 3.5878e-05, "loss": 8.7291, "step": 3530500 }, { "epoch": 28.25, "learning_rate": 3.587600000000001e-05, "loss": 8.7347, "step": 3531000 }, { "epoch": 28.25, "learning_rate": 3.5874e-05, "loss": 8.7335, "step": 3531500 }, { "epoch": 28.26, "learning_rate": 3.5872e-05, "loss": 8.7518, "step": 3532000 }, { "epoch": 28.26, "learning_rate": 3.587e-05, "loss": 8.7443, "step": 3532500 }, { "epoch": 28.26, "learning_rate": 3.5868e-05, "loss": 8.7739, "step": 3533000 }, { "epoch": 28.27, "learning_rate": 3.5866e-05, "loss": 8.7625, "step": 3533500 }, { "epoch": 28.27, "learning_rate": 3.5864e-05, "loss": 8.755, "step": 3534000 }, { "epoch": 28.28, "learning_rate": 3.5862e-05, "loss": 8.751, "step": 3534500 }, { "epoch": 28.28, "learning_rate": 3.586e-05, "loss": 8.7524, "step": 3535000 }, { "epoch": 28.28, "learning_rate": 3.5858e-05, "loss": 8.7627, "step": 3535500 }, { "epoch": 28.29, "learning_rate": 3.5856000000000004e-05, "loss": 8.7593, "step": 3536000 }, { "epoch": 28.29, "learning_rate": 3.5854000000000006e-05, "loss": 8.7552, "step": 3536500 }, { "epoch": 28.3, "learning_rate": 3.5852e-05, "loss": 8.7367, "step": 3537000 }, { "epoch": 28.3, "learning_rate": 3.585e-05, "loss": 8.7592, "step": 3537500 }, { "epoch": 28.3, "learning_rate": 3.5848000000000006e-05, "loss": 8.7482, "step": 3538000 }, { "epoch": 28.31, "learning_rate": 3.5846e-05, "loss": 8.7357, "step": 3538500 }, { "epoch": 28.31, "learning_rate": 3.5844e-05, "loss": 8.7537, "step": 3539000 }, { "epoch": 28.32, "learning_rate": 3.5842e-05, "loss": 8.7557, "step": 3539500 }, { "epoch": 28.32, "learning_rate": 3.584e-05, "loss": 8.7588, "step": 3540000 }, { "epoch": 28.32, "learning_rate": 3.5838e-05, "loss": 8.7518, "step": 3540500 }, { "epoch": 28.33, "learning_rate": 3.5836e-05, "loss": 8.753, "step": 3541000 }, { "epoch": 28.33, "learning_rate": 3.5834e-05, "loss": 8.7439, "step": 3541500 }, { "epoch": 28.34, "learning_rate": 3.5832000000000005e-05, "loss": 8.756, "step": 3542000 }, { "epoch": 28.34, "learning_rate": 3.583e-05, "loss": 8.7586, "step": 3542500 }, { "epoch": 28.34, "learning_rate": 3.5828e-05, "loss": 8.7609, "step": 3543000 }, { "epoch": 28.35, "learning_rate": 3.5826000000000006e-05, "loss": 8.7454, "step": 3543500 }, { "epoch": 28.35, "learning_rate": 3.5824e-05, "loss": 8.7501, "step": 3544000 }, { "epoch": 28.36, "learning_rate": 3.5822e-05, "loss": 8.7687, "step": 3544500 }, { "epoch": 28.36, "learning_rate": 3.5820000000000006e-05, "loss": 8.7685, "step": 3545000 }, { "epoch": 28.36, "learning_rate": 3.5818e-05, "loss": 8.7692, "step": 3545500 }, { "epoch": 28.37, "learning_rate": 3.5816e-05, "loss": 8.7392, "step": 3546000 }, { "epoch": 28.37, "learning_rate": 3.5814000000000006e-05, "loss": 8.7607, "step": 3546500 }, { "epoch": 28.38, "learning_rate": 3.5812e-05, "loss": 8.7346, "step": 3547000 }, { "epoch": 28.38, "learning_rate": 3.581e-05, "loss": 8.7406, "step": 3547500 }, { "epoch": 28.38, "learning_rate": 3.5808e-05, "loss": 8.7442, "step": 3548000 }, { "epoch": 28.39, "learning_rate": 3.5806e-05, "loss": 8.7551, "step": 3548500 }, { "epoch": 28.39, "learning_rate": 3.5804000000000005e-05, "loss": 8.7572, "step": 3549000 }, { "epoch": 28.4, "learning_rate": 3.5802e-05, "loss": 8.7522, "step": 3549500 }, { "epoch": 28.4, "learning_rate": 3.58e-05, "loss": 8.761, "step": 3550000 }, { "epoch": 28.4, "learning_rate": 3.5798000000000005e-05, "loss": 8.7586, "step": 3550500 }, { "epoch": 28.41, "learning_rate": 3.5796e-05, "loss": 8.7717, "step": 3551000 }, { "epoch": 28.41, "learning_rate": 3.5793999999999996e-05, "loss": 8.7658, "step": 3551500 }, { "epoch": 28.42, "learning_rate": 3.5792000000000006e-05, "loss": 8.7369, "step": 3552000 }, { "epoch": 28.42, "learning_rate": 3.579e-05, "loss": 8.7374, "step": 3552500 }, { "epoch": 28.42, "learning_rate": 3.5788e-05, "loss": 8.7741, "step": 3553000 }, { "epoch": 28.43, "learning_rate": 3.5786000000000006e-05, "loss": 8.7614, "step": 3553500 }, { "epoch": 28.43, "learning_rate": 3.5784e-05, "loss": 8.7422, "step": 3554000 }, { "epoch": 28.44, "learning_rate": 3.5782000000000004e-05, "loss": 8.7526, "step": 3554500 }, { "epoch": 28.44, "learning_rate": 3.578e-05, "loss": 8.7572, "step": 3555000 }, { "epoch": 28.44, "learning_rate": 3.5778e-05, "loss": 8.7375, "step": 3555500 }, { "epoch": 28.45, "learning_rate": 3.5776000000000004e-05, "loss": 8.7402, "step": 3556000 }, { "epoch": 28.45, "learning_rate": 3.5774e-05, "loss": 8.7421, "step": 3556500 }, { "epoch": 28.46, "learning_rate": 3.5772e-05, "loss": 8.7566, "step": 3557000 }, { "epoch": 28.46, "learning_rate": 3.5770000000000005e-05, "loss": 8.7189, "step": 3557500 }, { "epoch": 28.46, "learning_rate": 3.5768e-05, "loss": 8.7182, "step": 3558000 }, { "epoch": 28.47, "learning_rate": 3.5766e-05, "loss": 8.7343, "step": 3558500 }, { "epoch": 28.47, "learning_rate": 3.5764000000000005e-05, "loss": 8.7426, "step": 3559000 }, { "epoch": 28.48, "learning_rate": 3.5762e-05, "loss": 8.7563, "step": 3559500 }, { "epoch": 28.48, "learning_rate": 3.5759999999999996e-05, "loss": 8.738, "step": 3560000 }, { "epoch": 28.48, "learning_rate": 3.5758000000000006e-05, "loss": 8.7377, "step": 3560500 }, { "epoch": 28.49, "learning_rate": 3.5756e-05, "loss": 8.7544, "step": 3561000 }, { "epoch": 28.49, "learning_rate": 3.5754000000000004e-05, "loss": 8.7392, "step": 3561500 }, { "epoch": 28.5, "learning_rate": 3.5752e-05, "loss": 8.7398, "step": 3562000 }, { "epoch": 28.5, "learning_rate": 3.575e-05, "loss": 8.7598, "step": 3562500 }, { "epoch": 28.5, "learning_rate": 3.5748000000000004e-05, "loss": 8.7536, "step": 3563000 }, { "epoch": 28.51, "learning_rate": 3.5746e-05, "loss": 8.7343, "step": 3563500 }, { "epoch": 28.51, "learning_rate": 3.5744e-05, "loss": 8.7683, "step": 3564000 }, { "epoch": 28.52, "learning_rate": 3.5742000000000004e-05, "loss": 8.7231, "step": 3564500 }, { "epoch": 28.52, "learning_rate": 3.574e-05, "loss": 8.7634, "step": 3565000 }, { "epoch": 28.52, "learning_rate": 3.5738e-05, "loss": 8.7504, "step": 3565500 }, { "epoch": 28.53, "learning_rate": 3.5736000000000005e-05, "loss": 8.7272, "step": 3566000 }, { "epoch": 28.53, "learning_rate": 3.5734e-05, "loss": 8.747, "step": 3566500 }, { "epoch": 28.54, "learning_rate": 3.5732e-05, "loss": 8.7616, "step": 3567000 }, { "epoch": 28.54, "learning_rate": 3.5730000000000005e-05, "loss": 8.7339, "step": 3567500 }, { "epoch": 28.54, "learning_rate": 3.5728e-05, "loss": 8.7544, "step": 3568000 }, { "epoch": 28.55, "learning_rate": 3.5726e-05, "loss": 8.7517, "step": 3568500 }, { "epoch": 28.55, "learning_rate": 3.5724e-05, "loss": 8.7562, "step": 3569000 }, { "epoch": 28.56, "learning_rate": 3.5722e-05, "loss": 8.742, "step": 3569500 }, { "epoch": 28.56, "learning_rate": 3.5720000000000004e-05, "loss": 8.753, "step": 3570000 }, { "epoch": 28.56, "learning_rate": 3.5718e-05, "loss": 8.7743, "step": 3570500 }, { "epoch": 28.57, "learning_rate": 3.5716e-05, "loss": 8.748, "step": 3571000 }, { "epoch": 28.57, "learning_rate": 3.5714000000000004e-05, "loss": 8.7648, "step": 3571500 }, { "epoch": 28.58, "learning_rate": 3.5712e-05, "loss": 8.7581, "step": 3572000 }, { "epoch": 28.58, "learning_rate": 3.571e-05, "loss": 8.7571, "step": 3572500 }, { "epoch": 28.58, "learning_rate": 3.5708000000000004e-05, "loss": 8.7347, "step": 3573000 }, { "epoch": 28.59, "learning_rate": 3.5706e-05, "loss": 8.7575, "step": 3573500 }, { "epoch": 28.59, "learning_rate": 3.5704e-05, "loss": 8.7599, "step": 3574000 }, { "epoch": 28.6, "learning_rate": 3.5702000000000005e-05, "loss": 8.766, "step": 3574500 }, { "epoch": 28.6, "learning_rate": 3.57e-05, "loss": 8.749, "step": 3575000 }, { "epoch": 28.6, "learning_rate": 3.5698e-05, "loss": 8.7464, "step": 3575500 }, { "epoch": 28.61, "learning_rate": 3.5696e-05, "loss": 8.771, "step": 3576000 }, { "epoch": 28.61, "learning_rate": 3.5694e-05, "loss": 8.7487, "step": 3576500 }, { "epoch": 28.62, "learning_rate": 3.5692e-05, "loss": 8.7506, "step": 3577000 }, { "epoch": 28.62, "learning_rate": 3.569e-05, "loss": 8.7473, "step": 3577500 }, { "epoch": 28.62, "learning_rate": 3.5688e-05, "loss": 8.7501, "step": 3578000 }, { "epoch": 28.63, "learning_rate": 3.5686000000000004e-05, "loss": 8.757, "step": 3578500 }, { "epoch": 28.63, "learning_rate": 3.5684e-05, "loss": 8.7464, "step": 3579000 }, { "epoch": 28.64, "learning_rate": 3.5682e-05, "loss": 8.7456, "step": 3579500 }, { "epoch": 28.64, "learning_rate": 3.5680000000000004e-05, "loss": 8.7586, "step": 3580000 }, { "epoch": 28.64, "learning_rate": 3.5678e-05, "loss": 8.7911, "step": 3580500 }, { "epoch": 28.65, "learning_rate": 3.5676e-05, "loss": 8.7284, "step": 3581000 }, { "epoch": 28.65, "learning_rate": 3.5674000000000004e-05, "loss": 8.7395, "step": 3581500 }, { "epoch": 28.66, "learning_rate": 3.5672e-05, "loss": 8.7501, "step": 3582000 }, { "epoch": 28.66, "learning_rate": 3.567e-05, "loss": 8.7684, "step": 3582500 }, { "epoch": 28.66, "learning_rate": 3.5668000000000005e-05, "loss": 8.7644, "step": 3583000 }, { "epoch": 28.67, "learning_rate": 3.5666e-05, "loss": 8.7587, "step": 3583500 }, { "epoch": 28.67, "learning_rate": 3.5664e-05, "loss": 8.7604, "step": 3584000 }, { "epoch": 28.68, "learning_rate": 3.5662e-05, "loss": 8.7334, "step": 3584500 }, { "epoch": 28.68, "learning_rate": 3.566e-05, "loss": 8.7644, "step": 3585000 }, { "epoch": 28.68, "learning_rate": 3.5658e-05, "loss": 8.7427, "step": 3585500 }, { "epoch": 28.69, "learning_rate": 3.5656e-05, "loss": 8.7252, "step": 3586000 }, { "epoch": 28.69, "learning_rate": 3.5654e-05, "loss": 8.7612, "step": 3586500 }, { "epoch": 28.7, "learning_rate": 3.5652000000000004e-05, "loss": 8.7744, "step": 3587000 }, { "epoch": 28.7, "learning_rate": 3.565e-05, "loss": 8.763, "step": 3587500 }, { "epoch": 28.7, "learning_rate": 3.5648e-05, "loss": 8.752, "step": 3588000 }, { "epoch": 28.71, "learning_rate": 3.5646000000000004e-05, "loss": 8.7354, "step": 3588500 }, { "epoch": 28.71, "learning_rate": 3.5644e-05, "loss": 8.7355, "step": 3589000 }, { "epoch": 28.72, "learning_rate": 3.5642e-05, "loss": 8.7541, "step": 3589500 }, { "epoch": 28.72, "learning_rate": 3.5640000000000004e-05, "loss": 8.7545, "step": 3590000 }, { "epoch": 28.72, "learning_rate": 3.5638e-05, "loss": 8.7486, "step": 3590500 }, { "epoch": 28.73, "learning_rate": 3.5636e-05, "loss": 8.7698, "step": 3591000 }, { "epoch": 28.73, "learning_rate": 3.5634e-05, "loss": 8.7531, "step": 3591500 }, { "epoch": 28.74, "learning_rate": 3.563200000000001e-05, "loss": 8.749, "step": 3592000 }, { "epoch": 28.74, "learning_rate": 3.563e-05, "loss": 8.735, "step": 3592500 }, { "epoch": 28.74, "learning_rate": 3.5628e-05, "loss": 8.7196, "step": 3593000 }, { "epoch": 28.75, "learning_rate": 3.5626e-05, "loss": 8.7548, "step": 3593500 }, { "epoch": 28.75, "learning_rate": 3.5624e-05, "loss": 8.7527, "step": 3594000 }, { "epoch": 28.76, "learning_rate": 3.5622e-05, "loss": 8.7512, "step": 3594500 }, { "epoch": 28.76, "learning_rate": 3.562e-05, "loss": 8.7363, "step": 3595000 }, { "epoch": 28.76, "learning_rate": 3.5618000000000004e-05, "loss": 8.7849, "step": 3595500 }, { "epoch": 28.77, "learning_rate": 3.5616e-05, "loss": 8.7435, "step": 3596000 }, { "epoch": 28.77, "learning_rate": 3.5614e-05, "loss": 8.7474, "step": 3596500 }, { "epoch": 28.78, "learning_rate": 3.5612000000000004e-05, "loss": 8.7599, "step": 3597000 }, { "epoch": 28.78, "learning_rate": 3.5610000000000006e-05, "loss": 8.7577, "step": 3597500 }, { "epoch": 28.78, "learning_rate": 3.5608e-05, "loss": 8.7537, "step": 3598000 }, { "epoch": 28.79, "learning_rate": 3.5606e-05, "loss": 8.7663, "step": 3598500 }, { "epoch": 28.79, "learning_rate": 3.560400000000001e-05, "loss": 8.7743, "step": 3599000 }, { "epoch": 28.8, "learning_rate": 3.5602e-05, "loss": 8.7532, "step": 3599500 }, { "epoch": 28.8, "learning_rate": 3.56e-05, "loss": 8.7381, "step": 3600000 }, { "epoch": 28.8, "learning_rate": 3.5598e-05, "loss": 8.7343, "step": 3600500 }, { "epoch": 28.81, "learning_rate": 3.5596e-05, "loss": 8.7415, "step": 3601000 }, { "epoch": 28.81, "learning_rate": 3.5594e-05, "loss": 8.7492, "step": 3601500 }, { "epoch": 28.82, "learning_rate": 3.5592e-05, "loss": 8.7377, "step": 3602000 }, { "epoch": 28.82, "learning_rate": 3.559e-05, "loss": 8.7408, "step": 3602500 }, { "epoch": 28.82, "learning_rate": 3.5588e-05, "loss": 8.7335, "step": 3603000 }, { "epoch": 28.83, "learning_rate": 3.5586e-05, "loss": 8.737, "step": 3603500 }, { "epoch": 28.83, "learning_rate": 3.5584000000000004e-05, "loss": 8.7477, "step": 3604000 }, { "epoch": 28.84, "learning_rate": 3.5582000000000006e-05, "loss": 8.7353, "step": 3604500 }, { "epoch": 28.84, "learning_rate": 3.558e-05, "loss": 8.7313, "step": 3605000 }, { "epoch": 28.84, "learning_rate": 3.5578e-05, "loss": 8.728, "step": 3605500 }, { "epoch": 28.85, "learning_rate": 3.5576000000000006e-05, "loss": 8.7555, "step": 3606000 }, { "epoch": 28.85, "learning_rate": 3.5574e-05, "loss": 8.7597, "step": 3606500 }, { "epoch": 28.86, "learning_rate": 3.5572e-05, "loss": 8.7433, "step": 3607000 }, { "epoch": 28.86, "learning_rate": 3.557e-05, "loss": 8.7205, "step": 3607500 }, { "epoch": 28.86, "learning_rate": 3.5568e-05, "loss": 8.745, "step": 3608000 }, { "epoch": 28.87, "learning_rate": 3.5566e-05, "loss": 8.7459, "step": 3608500 }, { "epoch": 28.87, "learning_rate": 3.5564e-05, "loss": 8.7601, "step": 3609000 }, { "epoch": 28.88, "learning_rate": 3.5562e-05, "loss": 8.7508, "step": 3609500 }, { "epoch": 28.88, "learning_rate": 3.5560000000000005e-05, "loss": 8.7371, "step": 3610000 }, { "epoch": 28.88, "learning_rate": 3.5558e-05, "loss": 8.7268, "step": 3610500 }, { "epoch": 28.89, "learning_rate": 3.5556e-05, "loss": 8.748, "step": 3611000 }, { "epoch": 28.89, "learning_rate": 3.5554000000000006e-05, "loss": 8.7678, "step": 3611500 }, { "epoch": 28.9, "learning_rate": 3.5552e-05, "loss": 8.7448, "step": 3612000 }, { "epoch": 28.9, "learning_rate": 3.555e-05, "loss": 8.7555, "step": 3612500 }, { "epoch": 28.9, "learning_rate": 3.5548000000000006e-05, "loss": 8.7494, "step": 3613000 }, { "epoch": 28.91, "learning_rate": 3.5546e-05, "loss": 8.749, "step": 3613500 }, { "epoch": 28.91, "learning_rate": 3.5544e-05, "loss": 8.7602, "step": 3614000 }, { "epoch": 28.92, "learning_rate": 3.5542000000000006e-05, "loss": 8.7521, "step": 3614500 }, { "epoch": 28.92, "learning_rate": 3.554e-05, "loss": 8.7623, "step": 3615000 }, { "epoch": 28.92, "learning_rate": 3.5538e-05, "loss": 8.7518, "step": 3615500 }, { "epoch": 28.93, "learning_rate": 3.5536e-05, "loss": 8.7479, "step": 3616000 }, { "epoch": 28.93, "learning_rate": 3.5534e-05, "loss": 8.7524, "step": 3616500 }, { "epoch": 28.94, "learning_rate": 3.5532000000000005e-05, "loss": 8.7477, "step": 3617000 }, { "epoch": 28.94, "learning_rate": 3.553e-05, "loss": 8.7455, "step": 3617500 }, { "epoch": 28.94, "learning_rate": 3.5528e-05, "loss": 8.7573, "step": 3618000 }, { "epoch": 28.95, "learning_rate": 3.5526000000000005e-05, "loss": 8.7521, "step": 3618500 }, { "epoch": 28.95, "learning_rate": 3.5524e-05, "loss": 8.749, "step": 3619000 }, { "epoch": 28.96, "learning_rate": 3.5521999999999996e-05, "loss": 8.7564, "step": 3619500 }, { "epoch": 28.96, "learning_rate": 3.5520000000000006e-05, "loss": 8.7584, "step": 3620000 }, { "epoch": 28.96, "learning_rate": 3.5518e-05, "loss": 8.7355, "step": 3620500 }, { "epoch": 28.97, "learning_rate": 3.5516e-05, "loss": 8.743, "step": 3621000 }, { "epoch": 28.97, "learning_rate": 3.5514000000000006e-05, "loss": 8.7469, "step": 3621500 }, { "epoch": 28.98, "learning_rate": 3.5512e-05, "loss": 8.766, "step": 3622000 }, { "epoch": 28.98, "learning_rate": 3.5510000000000004e-05, "loss": 8.7616, "step": 3622500 }, { "epoch": 28.98, "learning_rate": 3.5508e-05, "loss": 8.7499, "step": 3623000 }, { "epoch": 28.99, "learning_rate": 3.5506e-05, "loss": 8.7613, "step": 3623500 }, { "epoch": 28.99, "learning_rate": 3.5504000000000004e-05, "loss": 8.7515, "step": 3624000 }, { "epoch": 29.0, "learning_rate": 3.5502e-05, "loss": 8.7602, "step": 3624500 }, { "epoch": 29.0, "learning_rate": 3.55e-05, "loss": 8.7322, "step": 3625000 }, { "epoch": 29.0, "learning_rate": 3.5498000000000005e-05, "loss": 8.7516, "step": 3625500 }, { "epoch": 29.01, "learning_rate": 3.5496e-05, "loss": 8.7568, "step": 3626000 }, { "epoch": 29.01, "learning_rate": 3.5494e-05, "loss": 8.7481, "step": 3626500 }, { "epoch": 29.02, "learning_rate": 3.5492000000000005e-05, "loss": 8.743, "step": 3627000 }, { "epoch": 29.02, "learning_rate": 3.549e-05, "loss": 8.7349, "step": 3627500 }, { "epoch": 29.02, "learning_rate": 3.5487999999999996e-05, "loss": 8.7553, "step": 3628000 }, { "epoch": 29.03, "learning_rate": 3.5486000000000005e-05, "loss": 8.7508, "step": 3628500 }, { "epoch": 29.03, "learning_rate": 3.5484e-05, "loss": 8.7438, "step": 3629000 }, { "epoch": 29.04, "learning_rate": 3.5482000000000003e-05, "loss": 8.7378, "step": 3629500 }, { "epoch": 29.04, "learning_rate": 3.548e-05, "loss": 8.7603, "step": 3630000 }, { "epoch": 29.04, "learning_rate": 3.5478e-05, "loss": 8.7342, "step": 3630500 }, { "epoch": 29.05, "learning_rate": 3.5476000000000004e-05, "loss": 8.7441, "step": 3631000 }, { "epoch": 29.05, "learning_rate": 3.5474e-05, "loss": 8.7609, "step": 3631500 }, { "epoch": 29.06, "learning_rate": 3.5472e-05, "loss": 8.7302, "step": 3632000 }, { "epoch": 29.06, "learning_rate": 3.5470000000000004e-05, "loss": 8.7514, "step": 3632500 }, { "epoch": 29.06, "learning_rate": 3.5468e-05, "loss": 8.747, "step": 3633000 }, { "epoch": 29.07, "learning_rate": 3.5466e-05, "loss": 8.7527, "step": 3633500 }, { "epoch": 29.07, "learning_rate": 3.5464000000000005e-05, "loss": 8.7546, "step": 3634000 }, { "epoch": 29.08, "learning_rate": 3.5462e-05, "loss": 8.7501, "step": 3634500 }, { "epoch": 29.08, "learning_rate": 3.546e-05, "loss": 8.7528, "step": 3635000 }, { "epoch": 29.08, "learning_rate": 3.5458000000000005e-05, "loss": 8.7615, "step": 3635500 }, { "epoch": 29.09, "learning_rate": 3.5456e-05, "loss": 8.7785, "step": 3636000 }, { "epoch": 29.09, "learning_rate": 3.5454e-05, "loss": 8.7588, "step": 3636500 }, { "epoch": 29.1, "learning_rate": 3.5452e-05, "loss": 8.7554, "step": 3637000 }, { "epoch": 29.1, "learning_rate": 3.545e-05, "loss": 8.7399, "step": 3637500 }, { "epoch": 29.1, "learning_rate": 3.5448000000000003e-05, "loss": 8.759, "step": 3638000 }, { "epoch": 29.11, "learning_rate": 3.5446e-05, "loss": 8.7445, "step": 3638500 }, { "epoch": 29.11, "learning_rate": 3.5444e-05, "loss": 8.7408, "step": 3639000 }, { "epoch": 29.12, "learning_rate": 3.5442000000000004e-05, "loss": 8.7652, "step": 3639500 }, { "epoch": 29.12, "learning_rate": 3.544e-05, "loss": 8.7587, "step": 3640000 }, { "epoch": 29.12, "learning_rate": 3.5438e-05, "loss": 8.7437, "step": 3640500 }, { "epoch": 29.13, "learning_rate": 3.5436000000000004e-05, "loss": 8.7451, "step": 3641000 }, { "epoch": 29.13, "learning_rate": 3.5434e-05, "loss": 8.7399, "step": 3641500 }, { "epoch": 29.14, "learning_rate": 3.5432e-05, "loss": 8.7574, "step": 3642000 }, { "epoch": 29.14, "learning_rate": 3.5430000000000005e-05, "loss": 8.7514, "step": 3642500 }, { "epoch": 29.14, "learning_rate": 3.5428e-05, "loss": 8.7448, "step": 3643000 }, { "epoch": 29.15, "learning_rate": 3.5426e-05, "loss": 8.7303, "step": 3643500 }, { "epoch": 29.15, "learning_rate": 3.5424e-05, "loss": 8.7415, "step": 3644000 }, { "epoch": 29.16, "learning_rate": 3.5422e-05, "loss": 8.7593, "step": 3644500 }, { "epoch": 29.16, "learning_rate": 3.542e-05, "loss": 8.7472, "step": 3645000 }, { "epoch": 29.16, "learning_rate": 3.5418e-05, "loss": 8.742, "step": 3645500 }, { "epoch": 29.17, "learning_rate": 3.5416e-05, "loss": 8.7476, "step": 3646000 }, { "epoch": 29.17, "learning_rate": 3.5414000000000003e-05, "loss": 8.7463, "step": 3646500 }, { "epoch": 29.18, "learning_rate": 3.5412e-05, "loss": 8.7403, "step": 3647000 }, { "epoch": 29.18, "learning_rate": 3.541e-05, "loss": 8.7236, "step": 3647500 }, { "epoch": 29.18, "learning_rate": 3.5408000000000004e-05, "loss": 8.75, "step": 3648000 }, { "epoch": 29.19, "learning_rate": 3.5406e-05, "loss": 8.7573, "step": 3648500 }, { "epoch": 29.19, "learning_rate": 3.5404e-05, "loss": 8.7454, "step": 3649000 }, { "epoch": 29.2, "learning_rate": 3.5402000000000004e-05, "loss": 8.7518, "step": 3649500 }, { "epoch": 29.2, "learning_rate": 3.54e-05, "loss": 8.7573, "step": 3650000 }, { "epoch": 29.2, "learning_rate": 3.5398e-05, "loss": 8.752, "step": 3650500 }, { "epoch": 29.21, "learning_rate": 3.5396000000000005e-05, "loss": 8.7295, "step": 3651000 }, { "epoch": 29.21, "learning_rate": 3.5394e-05, "loss": 8.752, "step": 3651500 }, { "epoch": 29.22, "learning_rate": 3.5392e-05, "loss": 8.7418, "step": 3652000 }, { "epoch": 29.22, "learning_rate": 3.539e-05, "loss": 8.7536, "step": 3652500 }, { "epoch": 29.22, "learning_rate": 3.5388e-05, "loss": 8.7454, "step": 3653000 }, { "epoch": 29.23, "learning_rate": 3.5386e-05, "loss": 8.734, "step": 3653500 }, { "epoch": 29.23, "learning_rate": 3.5384e-05, "loss": 8.7385, "step": 3654000 }, { "epoch": 29.24, "learning_rate": 3.5382e-05, "loss": 8.7565, "step": 3654500 }, { "epoch": 29.24, "learning_rate": 3.5380000000000003e-05, "loss": 8.7509, "step": 3655000 }, { "epoch": 29.24, "learning_rate": 3.5378e-05, "loss": 8.7614, "step": 3655500 }, { "epoch": 29.25, "learning_rate": 3.5376e-05, "loss": 8.7504, "step": 3656000 }, { "epoch": 29.25, "learning_rate": 3.5374000000000004e-05, "loss": 8.7307, "step": 3656500 }, { "epoch": 29.26, "learning_rate": 3.5372e-05, "loss": 8.7657, "step": 3657000 }, { "epoch": 29.26, "learning_rate": 3.537e-05, "loss": 8.7303, "step": 3657500 }, { "epoch": 29.26, "learning_rate": 3.5368000000000004e-05, "loss": 8.7611, "step": 3658000 }, { "epoch": 29.27, "learning_rate": 3.5366e-05, "loss": 8.7531, "step": 3658500 }, { "epoch": 29.27, "learning_rate": 3.5364e-05, "loss": 8.7556, "step": 3659000 }, { "epoch": 29.28, "learning_rate": 3.5362e-05, "loss": 8.7395, "step": 3659500 }, { "epoch": 29.28, "learning_rate": 3.536000000000001e-05, "loss": 8.7479, "step": 3660000 }, { "epoch": 29.28, "learning_rate": 3.5358e-05, "loss": 8.7182, "step": 3660500 }, { "epoch": 29.29, "learning_rate": 3.5356e-05, "loss": 8.7679, "step": 3661000 }, { "epoch": 29.29, "learning_rate": 3.5354e-05, "loss": 8.7526, "step": 3661500 }, { "epoch": 29.3, "learning_rate": 3.5352e-05, "loss": 8.7603, "step": 3662000 }, { "epoch": 29.3, "learning_rate": 3.535e-05, "loss": 8.7449, "step": 3662500 }, { "epoch": 29.3, "learning_rate": 3.5348e-05, "loss": 8.747, "step": 3663000 }, { "epoch": 29.31, "learning_rate": 3.5346000000000003e-05, "loss": 8.7625, "step": 3663500 }, { "epoch": 29.31, "learning_rate": 3.5344e-05, "loss": 8.7396, "step": 3664000 }, { "epoch": 29.32, "learning_rate": 3.5342e-05, "loss": 8.7421, "step": 3664500 }, { "epoch": 29.32, "learning_rate": 3.5340000000000004e-05, "loss": 8.7611, "step": 3665000 }, { "epoch": 29.32, "learning_rate": 3.5338000000000006e-05, "loss": 8.7503, "step": 3665500 }, { "epoch": 29.33, "learning_rate": 3.5336e-05, "loss": 8.7643, "step": 3666000 }, { "epoch": 29.33, "learning_rate": 3.5334e-05, "loss": 8.7342, "step": 3666500 }, { "epoch": 29.34, "learning_rate": 3.533200000000001e-05, "loss": 8.7548, "step": 3667000 }, { "epoch": 29.34, "learning_rate": 3.533e-05, "loss": 8.7473, "step": 3667500 }, { "epoch": 29.34, "learning_rate": 3.5328e-05, "loss": 8.7608, "step": 3668000 }, { "epoch": 29.35, "learning_rate": 3.5326e-05, "loss": 8.7213, "step": 3668500 }, { "epoch": 29.35, "learning_rate": 3.5324e-05, "loss": 8.7545, "step": 3669000 }, { "epoch": 29.36, "learning_rate": 3.5322e-05, "loss": 8.7725, "step": 3669500 }, { "epoch": 29.36, "learning_rate": 3.532e-05, "loss": 8.7631, "step": 3670000 }, { "epoch": 29.36, "learning_rate": 3.5318e-05, "loss": 8.7446, "step": 3670500 }, { "epoch": 29.37, "learning_rate": 3.5316e-05, "loss": 8.7514, "step": 3671000 }, { "epoch": 29.37, "learning_rate": 3.5314e-05, "loss": 8.7641, "step": 3671500 }, { "epoch": 29.38, "learning_rate": 3.5312000000000003e-05, "loss": 8.7468, "step": 3672000 }, { "epoch": 29.38, "learning_rate": 3.5310000000000006e-05, "loss": 8.7591, "step": 3672500 }, { "epoch": 29.38, "learning_rate": 3.5308e-05, "loss": 8.7508, "step": 3673000 }, { "epoch": 29.39, "learning_rate": 3.5306e-05, "loss": 8.7594, "step": 3673500 }, { "epoch": 29.39, "learning_rate": 3.5304000000000006e-05, "loss": 8.7629, "step": 3674000 }, { "epoch": 29.4, "learning_rate": 3.5302e-05, "loss": 8.755, "step": 3674500 }, { "epoch": 29.4, "learning_rate": 3.53e-05, "loss": 8.7588, "step": 3675000 }, { "epoch": 29.4, "learning_rate": 3.529800000000001e-05, "loss": 8.7437, "step": 3675500 }, { "epoch": 29.41, "learning_rate": 3.5296e-05, "loss": 8.7637, "step": 3676000 }, { "epoch": 29.41, "learning_rate": 3.5294e-05, "loss": 8.7506, "step": 3676500 }, { "epoch": 29.42, "learning_rate": 3.5292e-05, "loss": 8.7389, "step": 3677000 }, { "epoch": 29.42, "learning_rate": 3.529e-05, "loss": 8.7348, "step": 3677500 }, { "epoch": 29.42, "learning_rate": 3.5288000000000005e-05, "loss": 8.7528, "step": 3678000 }, { "epoch": 29.43, "learning_rate": 3.5286e-05, "loss": 8.7449, "step": 3678500 }, { "epoch": 29.43, "learning_rate": 3.5284e-05, "loss": 8.7344, "step": 3679000 }, { "epoch": 29.44, "learning_rate": 3.5282000000000005e-05, "loss": 8.76, "step": 3679500 }, { "epoch": 29.44, "learning_rate": 3.528e-05, "loss": 8.7537, "step": 3680000 }, { "epoch": 29.44, "learning_rate": 3.5278e-05, "loss": 8.7381, "step": 3680500 }, { "epoch": 29.45, "learning_rate": 3.5276000000000006e-05, "loss": 8.747, "step": 3681000 }, { "epoch": 29.45, "learning_rate": 3.5274e-05, "loss": 8.7685, "step": 3681500 }, { "epoch": 29.46, "learning_rate": 3.5272e-05, "loss": 8.7383, "step": 3682000 }, { "epoch": 29.46, "learning_rate": 3.5270000000000006e-05, "loss": 8.7306, "step": 3682500 }, { "epoch": 29.46, "learning_rate": 3.5268e-05, "loss": 8.7588, "step": 3683000 }, { "epoch": 29.47, "learning_rate": 3.5266e-05, "loss": 8.7652, "step": 3683500 }, { "epoch": 29.47, "learning_rate": 3.5264e-05, "loss": 8.7317, "step": 3684000 }, { "epoch": 29.48, "learning_rate": 3.5262e-05, "loss": 8.7531, "step": 3684500 }, { "epoch": 29.48, "learning_rate": 3.5260000000000005e-05, "loss": 8.7446, "step": 3685000 }, { "epoch": 29.48, "learning_rate": 3.5258e-05, "loss": 8.7448, "step": 3685500 }, { "epoch": 29.49, "learning_rate": 3.5256e-05, "loss": 8.7472, "step": 3686000 }, { "epoch": 29.49, "learning_rate": 3.5254000000000005e-05, "loss": 8.7577, "step": 3686500 }, { "epoch": 29.5, "learning_rate": 3.5252e-05, "loss": 8.7481, "step": 3687000 }, { "epoch": 29.5, "learning_rate": 3.525e-05, "loss": 8.7469, "step": 3687500 }, { "epoch": 29.5, "learning_rate": 3.5248000000000005e-05, "loss": 8.7498, "step": 3688000 }, { "epoch": 29.51, "learning_rate": 3.5246e-05, "loss": 8.7466, "step": 3688500 }, { "epoch": 29.51, "learning_rate": 3.5244e-05, "loss": 8.7552, "step": 3689000 }, { "epoch": 29.52, "learning_rate": 3.5242000000000006e-05, "loss": 8.7585, "step": 3689500 }, { "epoch": 29.52, "learning_rate": 3.524e-05, "loss": 8.7506, "step": 3690000 }, { "epoch": 29.52, "learning_rate": 3.5238000000000004e-05, "loss": 8.735, "step": 3690500 }, { "epoch": 29.53, "learning_rate": 3.5236e-05, "loss": 8.7647, "step": 3691000 }, { "epoch": 29.53, "learning_rate": 3.5234e-05, "loss": 8.7416, "step": 3691500 }, { "epoch": 29.54, "learning_rate": 3.5232000000000004e-05, "loss": 8.7725, "step": 3692000 }, { "epoch": 29.54, "learning_rate": 3.523e-05, "loss": 8.7679, "step": 3692500 }, { "epoch": 29.54, "learning_rate": 3.5228e-05, "loss": 8.755, "step": 3693000 }, { "epoch": 29.55, "learning_rate": 3.5226000000000005e-05, "loss": 8.7565, "step": 3693500 }, { "epoch": 29.55, "learning_rate": 3.5224e-05, "loss": 8.7513, "step": 3694000 }, { "epoch": 29.56, "learning_rate": 3.5222e-05, "loss": 8.7536, "step": 3694500 }, { "epoch": 29.56, "learning_rate": 3.5220000000000005e-05, "loss": 8.7523, "step": 3695000 }, { "epoch": 29.56, "learning_rate": 3.5218e-05, "loss": 8.7396, "step": 3695500 }, { "epoch": 29.57, "learning_rate": 3.5215999999999996e-05, "loss": 8.7631, "step": 3696000 }, { "epoch": 29.57, "learning_rate": 3.5214000000000005e-05, "loss": 8.7341, "step": 3696500 }, { "epoch": 29.58, "learning_rate": 3.5212e-05, "loss": 8.7365, "step": 3697000 }, { "epoch": 29.58, "learning_rate": 3.5210000000000003e-05, "loss": 8.7637, "step": 3697500 }, { "epoch": 29.58, "learning_rate": 3.5208e-05, "loss": 8.7515, "step": 3698000 }, { "epoch": 29.59, "learning_rate": 3.5206e-05, "loss": 8.7639, "step": 3698500 }, { "epoch": 29.59, "learning_rate": 3.5204000000000004e-05, "loss": 8.7529, "step": 3699000 }, { "epoch": 29.6, "learning_rate": 3.5202e-05, "loss": 8.7586, "step": 3699500 }, { "epoch": 29.6, "learning_rate": 3.52e-05, "loss": 8.7494, "step": 3700000 }, { "epoch": 29.6, "learning_rate": 3.5198000000000004e-05, "loss": 8.7558, "step": 3700500 }, { "epoch": 29.61, "learning_rate": 3.5196e-05, "loss": 8.7636, "step": 3701000 }, { "epoch": 29.61, "learning_rate": 3.5194e-05, "loss": 8.7421, "step": 3701500 }, { "epoch": 29.62, "learning_rate": 3.5192000000000005e-05, "loss": 8.7461, "step": 3702000 }, { "epoch": 29.62, "learning_rate": 3.519e-05, "loss": 8.7502, "step": 3702500 }, { "epoch": 29.62, "learning_rate": 3.5188e-05, "loss": 8.7375, "step": 3703000 }, { "epoch": 29.63, "learning_rate": 3.5186000000000005e-05, "loss": 8.7496, "step": 3703500 }, { "epoch": 29.63, "learning_rate": 3.5184e-05, "loss": 8.7587, "step": 3704000 }, { "epoch": 29.64, "learning_rate": 3.5182e-05, "loss": 8.7621, "step": 3704500 }, { "epoch": 29.64, "learning_rate": 3.518e-05, "loss": 8.7452, "step": 3705000 }, { "epoch": 29.64, "learning_rate": 3.5178e-05, "loss": 8.7321, "step": 3705500 }, { "epoch": 29.65, "learning_rate": 3.5176e-05, "loss": 8.7569, "step": 3706000 }, { "epoch": 29.65, "learning_rate": 3.5174e-05, "loss": 8.7658, "step": 3706500 }, { "epoch": 29.66, "learning_rate": 3.5172e-05, "loss": 8.7396, "step": 3707000 }, { "epoch": 29.66, "learning_rate": 3.5170000000000004e-05, "loss": 8.7479, "step": 3707500 }, { "epoch": 29.66, "learning_rate": 3.5168e-05, "loss": 8.738, "step": 3708000 }, { "epoch": 29.67, "learning_rate": 3.5166e-05, "loss": 8.7479, "step": 3708500 }, { "epoch": 29.67, "learning_rate": 3.5164000000000004e-05, "loss": 8.7434, "step": 3709000 }, { "epoch": 29.68, "learning_rate": 3.5162e-05, "loss": 8.7661, "step": 3709500 }, { "epoch": 29.68, "learning_rate": 3.516e-05, "loss": 8.739, "step": 3710000 }, { "epoch": 29.68, "learning_rate": 3.5158000000000005e-05, "loss": 8.7642, "step": 3710500 }, { "epoch": 29.69, "learning_rate": 3.5156e-05, "loss": 8.7505, "step": 3711000 }, { "epoch": 29.69, "learning_rate": 3.5154e-05, "loss": 8.7476, "step": 3711500 }, { "epoch": 29.7, "learning_rate": 3.5152000000000005e-05, "loss": 8.7611, "step": 3712000 }, { "epoch": 29.7, "learning_rate": 3.515e-05, "loss": 8.7253, "step": 3712500 }, { "epoch": 29.7, "learning_rate": 3.5148e-05, "loss": 8.7544, "step": 3713000 }, { "epoch": 29.71, "learning_rate": 3.5146e-05, "loss": 8.7469, "step": 3713500 }, { "epoch": 29.71, "learning_rate": 3.5144e-05, "loss": 8.754, "step": 3714000 }, { "epoch": 29.72, "learning_rate": 3.5142e-05, "loss": 8.7486, "step": 3714500 }, { "epoch": 29.72, "learning_rate": 3.514e-05, "loss": 8.7437, "step": 3715000 }, { "epoch": 29.72, "learning_rate": 3.5138e-05, "loss": 8.7413, "step": 3715500 }, { "epoch": 29.73, "learning_rate": 3.5136000000000004e-05, "loss": 8.7473, "step": 3716000 }, { "epoch": 29.73, "learning_rate": 3.5134e-05, "loss": 8.7404, "step": 3716500 }, { "epoch": 29.74, "learning_rate": 3.5132e-05, "loss": 8.7531, "step": 3717000 }, { "epoch": 29.74, "learning_rate": 3.5130000000000004e-05, "loss": 8.739, "step": 3717500 }, { "epoch": 29.74, "learning_rate": 3.5128e-05, "loss": 8.7302, "step": 3718000 }, { "epoch": 29.75, "learning_rate": 3.5126e-05, "loss": 8.7688, "step": 3718500 }, { "epoch": 29.75, "learning_rate": 3.5124000000000005e-05, "loss": 8.7317, "step": 3719000 }, { "epoch": 29.76, "learning_rate": 3.5122e-05, "loss": 8.7506, "step": 3719500 }, { "epoch": 29.76, "learning_rate": 3.512e-05, "loss": 8.7421, "step": 3720000 }, { "epoch": 29.76, "learning_rate": 3.5118e-05, "loss": 8.7517, "step": 3720500 }, { "epoch": 29.77, "learning_rate": 3.511600000000001e-05, "loss": 8.7456, "step": 3721000 }, { "epoch": 29.77, "learning_rate": 3.5114e-05, "loss": 8.7577, "step": 3721500 }, { "epoch": 29.78, "learning_rate": 3.5112e-05, "loss": 8.7532, "step": 3722000 }, { "epoch": 29.78, "learning_rate": 3.511e-05, "loss": 8.7648, "step": 3722500 }, { "epoch": 29.78, "learning_rate": 3.5108e-05, "loss": 8.7394, "step": 3723000 }, { "epoch": 29.79, "learning_rate": 3.5106e-05, "loss": 8.7549, "step": 3723500 }, { "epoch": 29.79, "learning_rate": 3.5104e-05, "loss": 8.7423, "step": 3724000 }, { "epoch": 29.8, "learning_rate": 3.5102000000000004e-05, "loss": 8.7504, "step": 3724500 }, { "epoch": 29.8, "learning_rate": 3.51e-05, "loss": 8.7425, "step": 3725000 }, { "epoch": 29.8, "learning_rate": 3.5098e-05, "loss": 8.7656, "step": 3725500 }, { "epoch": 29.81, "learning_rate": 3.5096000000000004e-05, "loss": 8.7403, "step": 3726000 }, { "epoch": 29.81, "learning_rate": 3.5094e-05, "loss": 8.7559, "step": 3726500 }, { "epoch": 29.82, "learning_rate": 3.5092e-05, "loss": 8.7609, "step": 3727000 }, { "epoch": 29.82, "learning_rate": 3.509e-05, "loss": 8.7218, "step": 3727500 }, { "epoch": 29.82, "learning_rate": 3.508800000000001e-05, "loss": 8.7559, "step": 3728000 }, { "epoch": 29.83, "learning_rate": 3.5086e-05, "loss": 8.7432, "step": 3728500 }, { "epoch": 29.83, "learning_rate": 3.5084e-05, "loss": 8.7391, "step": 3729000 }, { "epoch": 29.84, "learning_rate": 3.5082e-05, "loss": 8.76, "step": 3729500 }, { "epoch": 29.84, "learning_rate": 3.508e-05, "loss": 8.7575, "step": 3730000 }, { "epoch": 29.84, "learning_rate": 3.5078e-05, "loss": 8.7398, "step": 3730500 }, { "epoch": 29.85, "learning_rate": 3.5076e-05, "loss": 8.756, "step": 3731000 }, { "epoch": 29.85, "learning_rate": 3.5074e-05, "loss": 8.7567, "step": 3731500 }, { "epoch": 29.86, "learning_rate": 3.5072e-05, "loss": 8.7245, "step": 3732000 }, { "epoch": 29.86, "learning_rate": 3.507e-05, "loss": 8.7388, "step": 3732500 }, { "epoch": 29.86, "learning_rate": 3.5068000000000004e-05, "loss": 8.7645, "step": 3733000 }, { "epoch": 29.87, "learning_rate": 3.5066000000000006e-05, "loss": 8.7478, "step": 3733500 }, { "epoch": 29.87, "learning_rate": 3.5064e-05, "loss": 8.7338, "step": 3734000 }, { "epoch": 29.88, "learning_rate": 3.5062e-05, "loss": 8.7428, "step": 3734500 }, { "epoch": 29.88, "learning_rate": 3.5060000000000007e-05, "loss": 8.7491, "step": 3735000 }, { "epoch": 29.88, "learning_rate": 3.5058e-05, "loss": 8.7521, "step": 3735500 }, { "epoch": 29.89, "learning_rate": 3.5056e-05, "loss": 8.7287, "step": 3736000 }, { "epoch": 29.89, "learning_rate": 3.5054e-05, "loss": 8.7488, "step": 3736500 }, { "epoch": 29.9, "learning_rate": 3.5052e-05, "loss": 8.7296, "step": 3737000 }, { "epoch": 29.9, "learning_rate": 3.505e-05, "loss": 8.7487, "step": 3737500 }, { "epoch": 29.9, "learning_rate": 3.5048e-05, "loss": 8.7497, "step": 3738000 }, { "epoch": 29.91, "learning_rate": 3.5046e-05, "loss": 8.7512, "step": 3738500 }, { "epoch": 29.91, "learning_rate": 3.5044e-05, "loss": 8.7363, "step": 3739000 }, { "epoch": 29.92, "learning_rate": 3.5042e-05, "loss": 8.7414, "step": 3739500 }, { "epoch": 29.92, "learning_rate": 3.504e-05, "loss": 8.7396, "step": 3740000 }, { "epoch": 29.92, "learning_rate": 3.5038000000000006e-05, "loss": 8.7533, "step": 3740500 }, { "epoch": 29.93, "learning_rate": 3.5036e-05, "loss": 8.7555, "step": 3741000 }, { "epoch": 29.93, "learning_rate": 3.5034e-05, "loss": 8.7303, "step": 3741500 }, { "epoch": 29.94, "learning_rate": 3.5032000000000006e-05, "loss": 8.7283, "step": 3742000 }, { "epoch": 29.94, "learning_rate": 3.503e-05, "loss": 8.7411, "step": 3742500 }, { "epoch": 29.94, "learning_rate": 3.5028e-05, "loss": 8.7739, "step": 3743000 }, { "epoch": 29.95, "learning_rate": 3.5026000000000007e-05, "loss": 8.7601, "step": 3743500 }, { "epoch": 29.95, "learning_rate": 3.5024e-05, "loss": 8.7667, "step": 3744000 }, { "epoch": 29.96, "learning_rate": 3.5022e-05, "loss": 8.7285, "step": 3744500 }, { "epoch": 29.96, "learning_rate": 3.502e-05, "loss": 8.7485, "step": 3745000 }, { "epoch": 29.96, "learning_rate": 3.5018e-05, "loss": 8.7413, "step": 3745500 }, { "epoch": 29.97, "learning_rate": 3.5016000000000005e-05, "loss": 8.751, "step": 3746000 }, { "epoch": 29.97, "learning_rate": 3.5014e-05, "loss": 8.746, "step": 3746500 }, { "epoch": 29.98, "learning_rate": 3.5012e-05, "loss": 8.7437, "step": 3747000 }, { "epoch": 29.98, "learning_rate": 3.5010000000000005e-05, "loss": 8.7527, "step": 3747500 }, { "epoch": 29.98, "learning_rate": 3.5008e-05, "loss": 8.7705, "step": 3748000 }, { "epoch": 29.99, "learning_rate": 3.5005999999999997e-05, "loss": 8.749, "step": 3748500 }, { "epoch": 29.99, "learning_rate": 3.5004000000000006e-05, "loss": 8.7547, "step": 3749000 }, { "epoch": 30.0, "learning_rate": 3.5002e-05, "loss": 8.7389, "step": 3749500 }, { "epoch": 30.0, "learning_rate": 3.5e-05, "loss": 8.7517, "step": 3750000 }, { "epoch": 30.0, "learning_rate": 3.4998000000000006e-05, "loss": 8.7528, "step": 3750500 }, { "epoch": 30.01, "learning_rate": 3.4996e-05, "loss": 8.7381, "step": 3751000 }, { "epoch": 30.01, "learning_rate": 3.4994e-05, "loss": 8.7413, "step": 3751500 }, { "epoch": 30.02, "learning_rate": 3.4992e-05, "loss": 8.7113, "step": 3752000 }, { "epoch": 30.02, "learning_rate": 3.499e-05, "loss": 8.7561, "step": 3752500 }, { "epoch": 30.02, "learning_rate": 3.4988000000000005e-05, "loss": 8.7536, "step": 3753000 }, { "epoch": 30.03, "learning_rate": 3.4986e-05, "loss": 8.7486, "step": 3753500 }, { "epoch": 30.03, "learning_rate": 3.4984e-05, "loss": 8.7359, "step": 3754000 }, { "epoch": 30.04, "learning_rate": 3.4982000000000005e-05, "loss": 8.7302, "step": 3754500 }, { "epoch": 30.04, "learning_rate": 3.498e-05, "loss": 8.7427, "step": 3755000 }, { "epoch": 30.04, "learning_rate": 3.4978e-05, "loss": 8.7532, "step": 3755500 }, { "epoch": 30.05, "learning_rate": 3.4976000000000005e-05, "loss": 8.7318, "step": 3756000 }, { "epoch": 30.05, "learning_rate": 3.4974e-05, "loss": 8.7613, "step": 3756500 }, { "epoch": 30.06, "learning_rate": 3.4971999999999997e-05, "loss": 8.738, "step": 3757000 }, { "epoch": 30.06, "learning_rate": 3.4970000000000006e-05, "loss": 8.763, "step": 3757500 }, { "epoch": 30.06, "learning_rate": 3.4968e-05, "loss": 8.772, "step": 3758000 }, { "epoch": 30.07, "learning_rate": 3.4966000000000004e-05, "loss": 8.7358, "step": 3758500 }, { "epoch": 30.07, "learning_rate": 3.4964e-05, "loss": 8.7728, "step": 3759000 }, { "epoch": 30.08, "learning_rate": 3.4962e-05, "loss": 8.7471, "step": 3759500 }, { "epoch": 30.08, "learning_rate": 3.4960000000000004e-05, "loss": 8.7456, "step": 3760000 }, { "epoch": 30.08, "learning_rate": 3.4958e-05, "loss": 8.7487, "step": 3760500 }, { "epoch": 30.09, "learning_rate": 3.4956e-05, "loss": 8.754, "step": 3761000 }, { "epoch": 30.09, "learning_rate": 3.4954000000000004e-05, "loss": 8.7352, "step": 3761500 }, { "epoch": 30.1, "learning_rate": 3.4952e-05, "loss": 8.7655, "step": 3762000 }, { "epoch": 30.1, "learning_rate": 3.495e-05, "loss": 8.7522, "step": 3762500 }, { "epoch": 30.1, "learning_rate": 3.4948000000000005e-05, "loss": 8.7536, "step": 3763000 }, { "epoch": 30.11, "learning_rate": 3.4946e-05, "loss": 8.7438, "step": 3763500 }, { "epoch": 30.11, "learning_rate": 3.4943999999999996e-05, "loss": 8.7319, "step": 3764000 }, { "epoch": 30.12, "learning_rate": 3.4942000000000005e-05, "loss": 8.742, "step": 3764500 }, { "epoch": 30.12, "learning_rate": 3.494e-05, "loss": 8.7445, "step": 3765000 }, { "epoch": 30.12, "learning_rate": 3.4938e-05, "loss": 8.7388, "step": 3765500 }, { "epoch": 30.13, "learning_rate": 3.4936e-05, "loss": 8.7642, "step": 3766000 }, { "epoch": 30.13, "learning_rate": 3.4934e-05, "loss": 8.7728, "step": 3766500 }, { "epoch": 30.14, "learning_rate": 3.4932000000000004e-05, "loss": 8.757, "step": 3767000 }, { "epoch": 30.14, "learning_rate": 3.493e-05, "loss": 8.7415, "step": 3767500 }, { "epoch": 30.14, "learning_rate": 3.4928e-05, "loss": 8.745, "step": 3768000 }, { "epoch": 30.15, "learning_rate": 3.4926000000000004e-05, "loss": 8.7346, "step": 3768500 }, { "epoch": 30.15, "learning_rate": 3.4924e-05, "loss": 8.7662, "step": 3769000 }, { "epoch": 30.16, "learning_rate": 3.4922e-05, "loss": 8.7558, "step": 3769500 }, { "epoch": 30.16, "learning_rate": 3.4920000000000004e-05, "loss": 8.7589, "step": 3770000 }, { "epoch": 30.16, "learning_rate": 3.4918e-05, "loss": 8.7495, "step": 3770500 }, { "epoch": 30.17, "learning_rate": 3.4916e-05, "loss": 8.735, "step": 3771000 }, { "epoch": 30.17, "learning_rate": 3.4914000000000005e-05, "loss": 8.7454, "step": 3771500 }, { "epoch": 30.18, "learning_rate": 3.4912e-05, "loss": 8.7654, "step": 3772000 }, { "epoch": 30.18, "learning_rate": 3.491e-05, "loss": 8.7566, "step": 3772500 }, { "epoch": 30.18, "learning_rate": 3.4908e-05, "loss": 8.7607, "step": 3773000 }, { "epoch": 30.19, "learning_rate": 3.4906e-05, "loss": 8.7726, "step": 3773500 }, { "epoch": 30.19, "learning_rate": 3.4904e-05, "loss": 8.7591, "step": 3774000 }, { "epoch": 30.2, "learning_rate": 3.4902e-05, "loss": 8.7373, "step": 3774500 }, { "epoch": 30.2, "learning_rate": 3.49e-05, "loss": 8.77, "step": 3775000 }, { "epoch": 30.2, "learning_rate": 3.4898000000000004e-05, "loss": 8.7655, "step": 3775500 }, { "epoch": 30.21, "learning_rate": 3.4896e-05, "loss": 8.7536, "step": 3776000 }, { "epoch": 30.21, "learning_rate": 3.4894e-05, "loss": 8.7579, "step": 3776500 }, { "epoch": 30.22, "learning_rate": 3.4892000000000004e-05, "loss": 8.7521, "step": 3777000 }, { "epoch": 30.22, "learning_rate": 3.489e-05, "loss": 8.7575, "step": 3777500 }, { "epoch": 30.22, "learning_rate": 3.4888e-05, "loss": 8.7658, "step": 3778000 }, { "epoch": 30.23, "learning_rate": 3.4886000000000004e-05, "loss": 8.7551, "step": 3778500 }, { "epoch": 30.23, "learning_rate": 3.4884e-05, "loss": 8.7455, "step": 3779000 }, { "epoch": 30.24, "learning_rate": 3.4882e-05, "loss": 8.756, "step": 3779500 }, { "epoch": 30.24, "learning_rate": 3.4880000000000005e-05, "loss": 8.7562, "step": 3780000 }, { "epoch": 30.24, "learning_rate": 3.4878e-05, "loss": 8.7558, "step": 3780500 }, { "epoch": 30.25, "learning_rate": 3.4876e-05, "loss": 8.7476, "step": 3781000 }, { "epoch": 30.25, "learning_rate": 3.4874e-05, "loss": 8.7325, "step": 3781500 }, { "epoch": 30.26, "learning_rate": 3.4872e-05, "loss": 8.7605, "step": 3782000 }, { "epoch": 30.26, "learning_rate": 3.487e-05, "loss": 8.7507, "step": 3782500 }, { "epoch": 30.26, "learning_rate": 3.4868e-05, "loss": 8.757, "step": 3783000 }, { "epoch": 30.27, "learning_rate": 3.4866e-05, "loss": 8.7341, "step": 3783500 }, { "epoch": 30.27, "learning_rate": 3.4864000000000004e-05, "loss": 8.7593, "step": 3784000 }, { "epoch": 30.28, "learning_rate": 3.4862e-05, "loss": 8.7598, "step": 3784500 }, { "epoch": 30.28, "learning_rate": 3.486e-05, "loss": 8.7326, "step": 3785000 }, { "epoch": 30.28, "learning_rate": 3.4858000000000004e-05, "loss": 8.746, "step": 3785500 }, { "epoch": 30.29, "learning_rate": 3.4856e-05, "loss": 8.7587, "step": 3786000 }, { "epoch": 30.29, "learning_rate": 3.4854e-05, "loss": 8.7432, "step": 3786500 }, { "epoch": 30.3, "learning_rate": 3.4852000000000004e-05, "loss": 8.7376, "step": 3787000 }, { "epoch": 30.3, "learning_rate": 3.485e-05, "loss": 8.7402, "step": 3787500 }, { "epoch": 30.3, "learning_rate": 3.4848e-05, "loss": 8.7496, "step": 3788000 }, { "epoch": 30.31, "learning_rate": 3.4846e-05, "loss": 8.7247, "step": 3788500 }, { "epoch": 30.31, "learning_rate": 3.484400000000001e-05, "loss": 8.7626, "step": 3789000 }, { "epoch": 30.32, "learning_rate": 3.4842e-05, "loss": 8.7246, "step": 3789500 }, { "epoch": 30.32, "learning_rate": 3.484e-05, "loss": 8.7395, "step": 3790000 }, { "epoch": 30.32, "learning_rate": 3.4838e-05, "loss": 8.7386, "step": 3790500 }, { "epoch": 30.33, "learning_rate": 3.4836e-05, "loss": 8.7421, "step": 3791000 }, { "epoch": 30.33, "learning_rate": 3.4834e-05, "loss": 8.7463, "step": 3791500 }, { "epoch": 30.34, "learning_rate": 3.4832e-05, "loss": 8.749, "step": 3792000 }, { "epoch": 30.34, "learning_rate": 3.4830000000000004e-05, "loss": 8.7385, "step": 3792500 }, { "epoch": 30.34, "learning_rate": 3.4828e-05, "loss": 8.7577, "step": 3793000 }, { "epoch": 30.35, "learning_rate": 3.4826e-05, "loss": 8.7521, "step": 3793500 }, { "epoch": 30.35, "learning_rate": 3.4824000000000004e-05, "loss": 8.7504, "step": 3794000 }, { "epoch": 30.36, "learning_rate": 3.4822e-05, "loss": 8.7572, "step": 3794500 }, { "epoch": 30.36, "learning_rate": 3.482e-05, "loss": 8.7664, "step": 3795000 }, { "epoch": 30.36, "learning_rate": 3.4818e-05, "loss": 8.7453, "step": 3795500 }, { "epoch": 30.37, "learning_rate": 3.481600000000001e-05, "loss": 8.7809, "step": 3796000 }, { "epoch": 30.37, "learning_rate": 3.4814e-05, "loss": 8.7492, "step": 3796500 }, { "epoch": 30.38, "learning_rate": 3.4812e-05, "loss": 8.7415, "step": 3797000 }, { "epoch": 30.38, "learning_rate": 3.481e-05, "loss": 8.7286, "step": 3797500 }, { "epoch": 30.38, "learning_rate": 3.4808e-05, "loss": 8.7417, "step": 3798000 }, { "epoch": 30.39, "learning_rate": 3.4806e-05, "loss": 8.7385, "step": 3798500 }, { "epoch": 30.39, "learning_rate": 3.4804e-05, "loss": 8.7417, "step": 3799000 }, { "epoch": 30.4, "learning_rate": 3.4802e-05, "loss": 8.7423, "step": 3799500 }, { "epoch": 30.4, "learning_rate": 3.48e-05, "loss": 8.7415, "step": 3800000 }, { "epoch": 30.4, "learning_rate": 3.4798e-05, "loss": 8.7435, "step": 3800500 }, { "epoch": 30.41, "learning_rate": 3.4796000000000004e-05, "loss": 8.7612, "step": 3801000 }, { "epoch": 30.41, "learning_rate": 3.4794000000000006e-05, "loss": 8.7511, "step": 3801500 }, { "epoch": 30.42, "learning_rate": 3.4792e-05, "loss": 8.7501, "step": 3802000 }, { "epoch": 30.42, "learning_rate": 3.479e-05, "loss": 8.7576, "step": 3802500 }, { "epoch": 30.42, "learning_rate": 3.4788000000000006e-05, "loss": 8.7477, "step": 3803000 }, { "epoch": 30.43, "learning_rate": 3.4786e-05, "loss": 8.7619, "step": 3803500 }, { "epoch": 30.43, "learning_rate": 3.4784e-05, "loss": 8.7425, "step": 3804000 }, { "epoch": 30.44, "learning_rate": 3.478200000000001e-05, "loss": 8.7671, "step": 3804500 }, { "epoch": 30.44, "learning_rate": 3.478e-05, "loss": 8.7656, "step": 3805000 }, { "epoch": 30.44, "learning_rate": 3.4778e-05, "loss": 8.7397, "step": 3805500 }, { "epoch": 30.45, "learning_rate": 3.4776e-05, "loss": 8.7568, "step": 3806000 }, { "epoch": 30.45, "learning_rate": 3.4774e-05, "loss": 8.7602, "step": 3806500 }, { "epoch": 30.46, "learning_rate": 3.4772e-05, "loss": 8.7578, "step": 3807000 }, { "epoch": 30.46, "learning_rate": 3.477e-05, "loss": 8.757, "step": 3807500 }, { "epoch": 30.46, "learning_rate": 3.4768e-05, "loss": 8.7448, "step": 3808000 }, { "epoch": 30.47, "learning_rate": 3.4766000000000006e-05, "loss": 8.7657, "step": 3808500 }, { "epoch": 30.47, "learning_rate": 3.4764e-05, "loss": 8.7537, "step": 3809000 }, { "epoch": 30.48, "learning_rate": 3.4762e-05, "loss": 8.7452, "step": 3809500 }, { "epoch": 30.48, "learning_rate": 3.4760000000000006e-05, "loss": 8.7613, "step": 3810000 }, { "epoch": 30.48, "learning_rate": 3.4758e-05, "loss": 8.754, "step": 3810500 }, { "epoch": 30.49, "learning_rate": 3.4756e-05, "loss": 8.7521, "step": 3811000 }, { "epoch": 30.49, "learning_rate": 3.4754000000000006e-05, "loss": 8.744, "step": 3811500 }, { "epoch": 30.5, "learning_rate": 3.4752e-05, "loss": 8.7535, "step": 3812000 }, { "epoch": 30.5, "learning_rate": 3.475e-05, "loss": 8.746, "step": 3812500 }, { "epoch": 30.5, "learning_rate": 3.4748e-05, "loss": 8.7227, "step": 3813000 }, { "epoch": 30.51, "learning_rate": 3.4746e-05, "loss": 8.7618, "step": 3813500 }, { "epoch": 30.51, "learning_rate": 3.4744000000000005e-05, "loss": 8.7503, "step": 3814000 }, { "epoch": 30.52, "learning_rate": 3.4742e-05, "loss": 8.7408, "step": 3814500 }, { "epoch": 30.52, "learning_rate": 3.474e-05, "loss": 8.7512, "step": 3815000 }, { "epoch": 30.52, "learning_rate": 3.4738000000000005e-05, "loss": 8.7347, "step": 3815500 }, { "epoch": 30.53, "learning_rate": 3.4736e-05, "loss": 8.734, "step": 3816000 }, { "epoch": 30.53, "learning_rate": 3.4734e-05, "loss": 8.7642, "step": 3816500 }, { "epoch": 30.54, "learning_rate": 3.4732000000000006e-05, "loss": 8.7366, "step": 3817000 }, { "epoch": 30.54, "learning_rate": 3.473e-05, "loss": 8.7465, "step": 3817500 }, { "epoch": 30.54, "learning_rate": 3.4728e-05, "loss": 8.7351, "step": 3818000 }, { "epoch": 30.55, "learning_rate": 3.4726000000000006e-05, "loss": 8.7652, "step": 3818500 }, { "epoch": 30.55, "learning_rate": 3.4724e-05, "loss": 8.7563, "step": 3819000 }, { "epoch": 30.56, "learning_rate": 3.4722e-05, "loss": 8.7424, "step": 3819500 }, { "epoch": 30.56, "learning_rate": 3.472e-05, "loss": 8.7314, "step": 3820000 }, { "epoch": 30.56, "learning_rate": 3.4718e-05, "loss": 8.7532, "step": 3820500 }, { "epoch": 30.57, "learning_rate": 3.4716000000000004e-05, "loss": 8.7536, "step": 3821000 }, { "epoch": 30.57, "learning_rate": 3.4714e-05, "loss": 8.7341, "step": 3821500 }, { "epoch": 30.58, "learning_rate": 3.4712e-05, "loss": 8.7604, "step": 3822000 }, { "epoch": 30.58, "learning_rate": 3.4710000000000005e-05, "loss": 8.7391, "step": 3822500 }, { "epoch": 30.58, "learning_rate": 3.4708e-05, "loss": 8.7649, "step": 3823000 }, { "epoch": 30.59, "learning_rate": 3.4706e-05, "loss": 8.7241, "step": 3823500 }, { "epoch": 30.59, "learning_rate": 3.4704000000000005e-05, "loss": 8.7668, "step": 3824000 }, { "epoch": 30.6, "learning_rate": 3.4702e-05, "loss": 8.7641, "step": 3824500 }, { "epoch": 30.6, "learning_rate": 3.4699999999999996e-05, "loss": 8.7443, "step": 3825000 }, { "epoch": 30.6, "learning_rate": 3.4698000000000006e-05, "loss": 8.7631, "step": 3825500 }, { "epoch": 30.61, "learning_rate": 3.4696e-05, "loss": 8.7341, "step": 3826000 }, { "epoch": 30.61, "learning_rate": 3.4694000000000004e-05, "loss": 8.726, "step": 3826500 }, { "epoch": 30.62, "learning_rate": 3.4692e-05, "loss": 8.7457, "step": 3827000 }, { "epoch": 30.62, "learning_rate": 3.469e-05, "loss": 8.7571, "step": 3827500 }, { "epoch": 30.62, "learning_rate": 3.4688000000000004e-05, "loss": 8.7739, "step": 3828000 }, { "epoch": 30.63, "learning_rate": 3.4686e-05, "loss": 8.7564, "step": 3828500 }, { "epoch": 30.63, "learning_rate": 3.4684e-05, "loss": 8.733, "step": 3829000 }, { "epoch": 30.64, "learning_rate": 3.4682000000000004e-05, "loss": 8.7534, "step": 3829500 }, { "epoch": 30.64, "learning_rate": 3.468e-05, "loss": 8.7603, "step": 3830000 }, { "epoch": 30.64, "learning_rate": 3.4678e-05, "loss": 8.7847, "step": 3830500 }, { "epoch": 30.65, "learning_rate": 3.4676000000000005e-05, "loss": 8.7614, "step": 3831000 }, { "epoch": 30.65, "learning_rate": 3.4674e-05, "loss": 8.7343, "step": 3831500 }, { "epoch": 30.66, "learning_rate": 3.4671999999999996e-05, "loss": 8.75, "step": 3832000 }, { "epoch": 30.66, "learning_rate": 3.4670000000000005e-05, "loss": 8.7544, "step": 3832500 }, { "epoch": 30.66, "learning_rate": 3.4668e-05, "loss": 8.7481, "step": 3833000 }, { "epoch": 30.67, "learning_rate": 3.4666e-05, "loss": 8.758, "step": 3833500 }, { "epoch": 30.67, "learning_rate": 3.4664e-05, "loss": 8.7512, "step": 3834000 }, { "epoch": 30.68, "learning_rate": 3.4662e-05, "loss": 8.7491, "step": 3834500 }, { "epoch": 30.68, "learning_rate": 3.4660000000000004e-05, "loss": 8.7519, "step": 3835000 }, { "epoch": 30.68, "learning_rate": 3.4658e-05, "loss": 8.733, "step": 3835500 }, { "epoch": 30.69, "learning_rate": 3.4656e-05, "loss": 8.7495, "step": 3836000 }, { "epoch": 30.69, "learning_rate": 3.4654000000000004e-05, "loss": 8.7608, "step": 3836500 }, { "epoch": 30.7, "learning_rate": 3.4652e-05, "loss": 8.7483, "step": 3837000 }, { "epoch": 30.7, "learning_rate": 3.465e-05, "loss": 8.7338, "step": 3837500 }, { "epoch": 30.7, "learning_rate": 3.4648000000000004e-05, "loss": 8.7604, "step": 3838000 }, { "epoch": 30.71, "learning_rate": 3.4646e-05, "loss": 8.7604, "step": 3838500 }, { "epoch": 30.71, "learning_rate": 3.4644e-05, "loss": 8.7482, "step": 3839000 }, { "epoch": 30.72, "learning_rate": 3.4642000000000005e-05, "loss": 8.7481, "step": 3839500 }, { "epoch": 30.72, "learning_rate": 3.464e-05, "loss": 8.7478, "step": 3840000 }, { "epoch": 30.72, "learning_rate": 3.4638e-05, "loss": 8.7414, "step": 3840500 }, { "epoch": 30.73, "learning_rate": 3.4636000000000005e-05, "loss": 8.7605, "step": 3841000 }, { "epoch": 30.73, "learning_rate": 3.4634e-05, "loss": 8.7537, "step": 3841500 }, { "epoch": 30.74, "learning_rate": 3.4632e-05, "loss": 8.7509, "step": 3842000 }, { "epoch": 30.74, "learning_rate": 3.463e-05, "loss": 8.7502, "step": 3842500 }, { "epoch": 30.74, "learning_rate": 3.4628e-05, "loss": 8.743, "step": 3843000 }, { "epoch": 30.75, "learning_rate": 3.4626000000000004e-05, "loss": 8.7564, "step": 3843500 }, { "epoch": 30.75, "learning_rate": 3.4624e-05, "loss": 8.7764, "step": 3844000 }, { "epoch": 30.76, "learning_rate": 3.4622e-05, "loss": 8.7385, "step": 3844500 }, { "epoch": 30.76, "learning_rate": 3.4620000000000004e-05, "loss": 8.7488, "step": 3845000 }, { "epoch": 30.76, "learning_rate": 3.4618e-05, "loss": 8.75, "step": 3845500 }, { "epoch": 30.77, "learning_rate": 3.4616e-05, "loss": 8.7613, "step": 3846000 }, { "epoch": 30.77, "learning_rate": 3.4614000000000004e-05, "loss": 8.7594, "step": 3846500 }, { "epoch": 30.78, "learning_rate": 3.4612e-05, "loss": 8.7355, "step": 3847000 }, { "epoch": 30.78, "learning_rate": 3.461e-05, "loss": 8.7637, "step": 3847500 }, { "epoch": 30.78, "learning_rate": 3.4608000000000005e-05, "loss": 8.7493, "step": 3848000 }, { "epoch": 30.79, "learning_rate": 3.4606e-05, "loss": 8.75, "step": 3848500 }, { "epoch": 30.79, "learning_rate": 3.4604e-05, "loss": 8.7422, "step": 3849000 }, { "epoch": 30.8, "learning_rate": 3.4602e-05, "loss": 8.7543, "step": 3849500 }, { "epoch": 30.8, "learning_rate": 3.46e-05, "loss": 8.7732, "step": 3850000 }, { "epoch": 30.8, "learning_rate": 3.4598e-05, "loss": 8.7381, "step": 3850500 }, { "epoch": 30.81, "learning_rate": 3.4596e-05, "loss": 8.7653, "step": 3851000 }, { "epoch": 30.81, "learning_rate": 3.4594e-05, "loss": 8.7498, "step": 3851500 }, { "epoch": 30.82, "learning_rate": 3.4592000000000004e-05, "loss": 8.7583, "step": 3852000 }, { "epoch": 30.82, "learning_rate": 3.459e-05, "loss": 8.7373, "step": 3852500 }, { "epoch": 30.82, "learning_rate": 3.4588e-05, "loss": 8.7492, "step": 3853000 }, { "epoch": 30.83, "learning_rate": 3.4586000000000004e-05, "loss": 8.7504, "step": 3853500 }, { "epoch": 30.83, "learning_rate": 3.4584e-05, "loss": 8.7502, "step": 3854000 }, { "epoch": 30.84, "learning_rate": 3.4582e-05, "loss": 8.7638, "step": 3854500 }, { "epoch": 30.84, "learning_rate": 3.4580000000000004e-05, "loss": 8.7445, "step": 3855000 }, { "epoch": 30.84, "learning_rate": 3.4578e-05, "loss": 8.7586, "step": 3855500 }, { "epoch": 30.85, "learning_rate": 3.4576e-05, "loss": 8.7607, "step": 3856000 }, { "epoch": 30.85, "learning_rate": 3.4574e-05, "loss": 8.7519, "step": 3856500 }, { "epoch": 30.86, "learning_rate": 3.457200000000001e-05, "loss": 8.7687, "step": 3857000 }, { "epoch": 30.86, "learning_rate": 3.457e-05, "loss": 8.7349, "step": 3857500 }, { "epoch": 30.86, "learning_rate": 3.4568e-05, "loss": 8.7438, "step": 3858000 }, { "epoch": 30.87, "learning_rate": 3.4566e-05, "loss": 8.7452, "step": 3858500 }, { "epoch": 30.87, "learning_rate": 3.4564e-05, "loss": 8.7384, "step": 3859000 }, { "epoch": 30.88, "learning_rate": 3.4562e-05, "loss": 8.76, "step": 3859500 }, { "epoch": 30.88, "learning_rate": 3.456e-05, "loss": 8.7605, "step": 3860000 }, { "epoch": 30.88, "learning_rate": 3.4558000000000004e-05, "loss": 8.7507, "step": 3860500 }, { "epoch": 30.89, "learning_rate": 3.4556e-05, "loss": 8.7247, "step": 3861000 }, { "epoch": 30.89, "learning_rate": 3.4554e-05, "loss": 8.7501, "step": 3861500 }, { "epoch": 30.9, "learning_rate": 3.4552000000000004e-05, "loss": 8.7449, "step": 3862000 }, { "epoch": 30.9, "learning_rate": 3.455e-05, "loss": 8.7633, "step": 3862500 }, { "epoch": 30.9, "learning_rate": 3.4548e-05, "loss": 8.7466, "step": 3863000 }, { "epoch": 30.91, "learning_rate": 3.4546e-05, "loss": 8.7711, "step": 3863500 }, { "epoch": 30.91, "learning_rate": 3.454400000000001e-05, "loss": 8.7487, "step": 3864000 }, { "epoch": 30.92, "learning_rate": 3.4542e-05, "loss": 8.7394, "step": 3864500 }, { "epoch": 30.92, "learning_rate": 3.454e-05, "loss": 8.7638, "step": 3865000 }, { "epoch": 30.92, "learning_rate": 3.4538e-05, "loss": 8.7332, "step": 3865500 }, { "epoch": 30.93, "learning_rate": 3.4536e-05, "loss": 8.7563, "step": 3866000 }, { "epoch": 30.93, "learning_rate": 3.4534e-05, "loss": 8.743, "step": 3866500 }, { "epoch": 30.94, "learning_rate": 3.4532e-05, "loss": 8.7443, "step": 3867000 }, { "epoch": 30.94, "learning_rate": 3.453e-05, "loss": 8.7466, "step": 3867500 }, { "epoch": 30.94, "learning_rate": 3.4528e-05, "loss": 8.762, "step": 3868000 }, { "epoch": 30.95, "learning_rate": 3.4526e-05, "loss": 8.7634, "step": 3868500 }, { "epoch": 30.95, "learning_rate": 3.4524000000000004e-05, "loss": 8.7494, "step": 3869000 }, { "epoch": 30.96, "learning_rate": 3.4522000000000006e-05, "loss": 8.7302, "step": 3869500 }, { "epoch": 30.96, "learning_rate": 3.452e-05, "loss": 8.7694, "step": 3870000 }, { "epoch": 30.96, "learning_rate": 3.4518e-05, "loss": 8.7542, "step": 3870500 }, { "epoch": 30.97, "learning_rate": 3.4516000000000006e-05, "loss": 8.744, "step": 3871000 }, { "epoch": 30.97, "learning_rate": 3.4514e-05, "loss": 8.7628, "step": 3871500 }, { "epoch": 30.98, "learning_rate": 3.4512e-05, "loss": 8.7657, "step": 3872000 }, { "epoch": 30.98, "learning_rate": 3.451000000000001e-05, "loss": 8.7422, "step": 3872500 }, { "epoch": 30.98, "learning_rate": 3.4508e-05, "loss": 8.7791, "step": 3873000 }, { "epoch": 30.99, "learning_rate": 3.4506e-05, "loss": 8.74, "step": 3873500 }, { "epoch": 30.99, "learning_rate": 3.4504e-05, "loss": 8.7574, "step": 3874000 }, { "epoch": 31.0, "learning_rate": 3.4502e-05, "loss": 8.7538, "step": 3874500 }, { "epoch": 31.0, "learning_rate": 3.45e-05, "loss": 8.7549, "step": 3875000 }, { "epoch": 31.0, "learning_rate": 3.4498e-05, "loss": 8.7405, "step": 3875500 }, { "epoch": 31.01, "learning_rate": 3.4496e-05, "loss": 8.7441, "step": 3876000 }, { "epoch": 31.01, "learning_rate": 3.4494000000000006e-05, "loss": 8.7436, "step": 3876500 }, { "epoch": 31.02, "learning_rate": 3.4492e-05, "loss": 8.7452, "step": 3877000 }, { "epoch": 31.02, "learning_rate": 3.449e-05, "loss": 8.737, "step": 3877500 }, { "epoch": 31.02, "learning_rate": 3.4488000000000006e-05, "loss": 8.7545, "step": 3878000 }, { "epoch": 31.03, "learning_rate": 3.4486e-05, "loss": 8.7445, "step": 3878500 }, { "epoch": 31.03, "learning_rate": 3.4484e-05, "loss": 8.7583, "step": 3879000 }, { "epoch": 31.04, "learning_rate": 3.4482000000000006e-05, "loss": 8.7253, "step": 3879500 }, { "epoch": 31.04, "learning_rate": 3.448e-05, "loss": 8.7525, "step": 3880000 }, { "epoch": 31.04, "learning_rate": 3.4478e-05, "loss": 8.7497, "step": 3880500 }, { "epoch": 31.05, "learning_rate": 3.4476e-05, "loss": 8.7638, "step": 3881000 }, { "epoch": 31.05, "learning_rate": 3.4474e-05, "loss": 8.7427, "step": 3881500 }, { "epoch": 31.06, "learning_rate": 3.4472000000000005e-05, "loss": 8.7559, "step": 3882000 }, { "epoch": 31.06, "learning_rate": 3.447e-05, "loss": 8.7555, "step": 3882500 }, { "epoch": 31.06, "learning_rate": 3.4468e-05, "loss": 8.7598, "step": 3883000 }, { "epoch": 31.07, "learning_rate": 3.4466000000000005e-05, "loss": 8.7646, "step": 3883500 }, { "epoch": 31.07, "learning_rate": 3.4464e-05, "loss": 8.7569, "step": 3884000 }, { "epoch": 31.08, "learning_rate": 3.4462e-05, "loss": 8.7725, "step": 3884500 }, { "epoch": 31.08, "learning_rate": 3.4460000000000005e-05, "loss": 8.7757, "step": 3885000 }, { "epoch": 31.08, "learning_rate": 3.4458e-05, "loss": 8.7425, "step": 3885500 }, { "epoch": 31.09, "learning_rate": 3.4456e-05, "loss": 8.7498, "step": 3886000 }, { "epoch": 31.09, "learning_rate": 3.4454000000000006e-05, "loss": 8.7533, "step": 3886500 }, { "epoch": 31.1, "learning_rate": 3.4452e-05, "loss": 8.7679, "step": 3887000 }, { "epoch": 31.1, "learning_rate": 3.445e-05, "loss": 8.7563, "step": 3887500 }, { "epoch": 31.1, "learning_rate": 3.4448e-05, "loss": 8.7526, "step": 3888000 }, { "epoch": 31.11, "learning_rate": 3.4446e-05, "loss": 8.7533, "step": 3888500 }, { "epoch": 31.11, "learning_rate": 3.4444000000000004e-05, "loss": 8.7485, "step": 3889000 }, { "epoch": 31.12, "learning_rate": 3.4442e-05, "loss": 8.7318, "step": 3889500 }, { "epoch": 31.12, "learning_rate": 3.444e-05, "loss": 8.7526, "step": 3890000 }, { "epoch": 31.12, "learning_rate": 3.4438000000000005e-05, "loss": 8.748, "step": 3890500 }, { "epoch": 31.13, "learning_rate": 3.4436e-05, "loss": 8.7407, "step": 3891000 }, { "epoch": 31.13, "learning_rate": 3.4434e-05, "loss": 8.7683, "step": 3891500 }, { "epoch": 31.14, "learning_rate": 3.4432000000000005e-05, "loss": 8.7503, "step": 3892000 }, { "epoch": 31.14, "learning_rate": 3.443e-05, "loss": 8.7686, "step": 3892500 }, { "epoch": 31.14, "learning_rate": 3.4427999999999996e-05, "loss": 8.7457, "step": 3893000 }, { "epoch": 31.15, "learning_rate": 3.4426000000000005e-05, "loss": 8.7216, "step": 3893500 }, { "epoch": 31.15, "learning_rate": 3.4424e-05, "loss": 8.7418, "step": 3894000 }, { "epoch": 31.16, "learning_rate": 3.4422000000000003e-05, "loss": 8.7493, "step": 3894500 }, { "epoch": 31.16, "learning_rate": 3.442e-05, "loss": 8.7533, "step": 3895000 }, { "epoch": 31.16, "learning_rate": 3.4418e-05, "loss": 8.7491, "step": 3895500 }, { "epoch": 31.17, "learning_rate": 3.4416000000000004e-05, "loss": 8.7575, "step": 3896000 }, { "epoch": 31.17, "learning_rate": 3.4414e-05, "loss": 8.7307, "step": 3896500 }, { "epoch": 31.18, "learning_rate": 3.4412e-05, "loss": 8.7488, "step": 3897000 }, { "epoch": 31.18, "learning_rate": 3.4410000000000004e-05, "loss": 8.7511, "step": 3897500 }, { "epoch": 31.18, "learning_rate": 3.4408e-05, "loss": 8.7528, "step": 3898000 }, { "epoch": 31.19, "learning_rate": 3.4406e-05, "loss": 8.736, "step": 3898500 }, { "epoch": 31.19, "learning_rate": 3.4404000000000005e-05, "loss": 8.7608, "step": 3899000 }, { "epoch": 31.2, "learning_rate": 3.4402e-05, "loss": 8.7431, "step": 3899500 }, { "epoch": 31.2, "learning_rate": 3.4399999999999996e-05, "loss": 8.7554, "step": 3900000 }, { "epoch": 31.2, "learning_rate": 3.4398000000000005e-05, "loss": 8.7522, "step": 3900500 }, { "epoch": 31.21, "learning_rate": 3.4396e-05, "loss": 8.7338, "step": 3901000 }, { "epoch": 31.21, "learning_rate": 3.4394e-05, "loss": 8.7689, "step": 3901500 }, { "epoch": 31.22, "learning_rate": 3.4392e-05, "loss": 8.7361, "step": 3902000 }, { "epoch": 31.22, "learning_rate": 3.439e-05, "loss": 8.7631, "step": 3902500 }, { "epoch": 31.22, "learning_rate": 3.4388000000000003e-05, "loss": 8.7405, "step": 3903000 }, { "epoch": 31.23, "learning_rate": 3.4386e-05, "loss": 8.7664, "step": 3903500 }, { "epoch": 31.23, "learning_rate": 3.4384e-05, "loss": 8.7468, "step": 3904000 }, { "epoch": 31.24, "learning_rate": 3.4382000000000004e-05, "loss": 8.7565, "step": 3904500 }, { "epoch": 31.24, "learning_rate": 3.438e-05, "loss": 8.743, "step": 3905000 }, { "epoch": 31.24, "learning_rate": 3.4378e-05, "loss": 8.7645, "step": 3905500 }, { "epoch": 31.25, "learning_rate": 3.4376000000000004e-05, "loss": 8.7401, "step": 3906000 }, { "epoch": 31.25, "learning_rate": 3.4374e-05, "loss": 8.7544, "step": 3906500 }, { "epoch": 31.26, "learning_rate": 3.4372e-05, "loss": 8.7411, "step": 3907000 }, { "epoch": 31.26, "learning_rate": 3.4370000000000005e-05, "loss": 8.7466, "step": 3907500 }, { "epoch": 31.26, "learning_rate": 3.4368e-05, "loss": 8.7458, "step": 3908000 }, { "epoch": 31.27, "learning_rate": 3.4366e-05, "loss": 8.7388, "step": 3908500 }, { "epoch": 31.27, "learning_rate": 3.4364000000000005e-05, "loss": 8.7428, "step": 3909000 }, { "epoch": 31.28, "learning_rate": 3.4362e-05, "loss": 8.7622, "step": 3909500 }, { "epoch": 31.28, "learning_rate": 3.436e-05, "loss": 8.7499, "step": 3910000 }, { "epoch": 31.28, "learning_rate": 3.4358e-05, "loss": 8.7496, "step": 3910500 }, { "epoch": 31.29, "learning_rate": 3.4356e-05, "loss": 8.7671, "step": 3911000 }, { "epoch": 31.29, "learning_rate": 3.4354000000000003e-05, "loss": 8.7677, "step": 3911500 }, { "epoch": 31.3, "learning_rate": 3.4352e-05, "loss": 8.7383, "step": 3912000 }, { "epoch": 31.3, "learning_rate": 3.435e-05, "loss": 8.7288, "step": 3912500 }, { "epoch": 31.3, "learning_rate": 3.4348000000000004e-05, "loss": 8.744, "step": 3913000 }, { "epoch": 31.31, "learning_rate": 3.4346e-05, "loss": 8.7561, "step": 3913500 }, { "epoch": 31.31, "learning_rate": 3.4344e-05, "loss": 8.7633, "step": 3914000 }, { "epoch": 31.32, "learning_rate": 3.4342000000000004e-05, "loss": 8.7341, "step": 3914500 }, { "epoch": 31.32, "learning_rate": 3.434e-05, "loss": 8.7704, "step": 3915000 }, { "epoch": 31.32, "learning_rate": 3.4338e-05, "loss": 8.7427, "step": 3915500 }, { "epoch": 31.33, "learning_rate": 3.4336000000000005e-05, "loss": 8.7374, "step": 3916000 }, { "epoch": 31.33, "learning_rate": 3.4334e-05, "loss": 8.7576, "step": 3916500 }, { "epoch": 31.34, "learning_rate": 3.4332e-05, "loss": 8.7364, "step": 3917000 }, { "epoch": 31.34, "learning_rate": 3.433e-05, "loss": 8.7448, "step": 3917500 }, { "epoch": 31.34, "learning_rate": 3.4328e-05, "loss": 8.7315, "step": 3918000 }, { "epoch": 31.35, "learning_rate": 3.4326e-05, "loss": 8.7528, "step": 3918500 }, { "epoch": 31.35, "learning_rate": 3.4324e-05, "loss": 8.7601, "step": 3919000 }, { "epoch": 31.36, "learning_rate": 3.4322e-05, "loss": 8.755, "step": 3919500 }, { "epoch": 31.36, "learning_rate": 3.4320000000000003e-05, "loss": 8.7467, "step": 3920000 }, { "epoch": 31.36, "learning_rate": 3.4318e-05, "loss": 8.7576, "step": 3920500 }, { "epoch": 31.37, "learning_rate": 3.4316e-05, "loss": 8.7381, "step": 3921000 }, { "epoch": 31.37, "learning_rate": 3.4314000000000004e-05, "loss": 8.7373, "step": 3921500 }, { "epoch": 31.38, "learning_rate": 3.4312e-05, "loss": 8.7587, "step": 3922000 }, { "epoch": 31.38, "learning_rate": 3.431e-05, "loss": 8.7554, "step": 3922500 }, { "epoch": 31.38, "learning_rate": 3.4308000000000004e-05, "loss": 8.7552, "step": 3923000 }, { "epoch": 31.39, "learning_rate": 3.4306e-05, "loss": 8.7349, "step": 3923500 }, { "epoch": 31.39, "learning_rate": 3.4304e-05, "loss": 8.762, "step": 3924000 }, { "epoch": 31.4, "learning_rate": 3.4302e-05, "loss": 8.759, "step": 3924500 }, { "epoch": 31.4, "learning_rate": 3.430000000000001e-05, "loss": 8.7675, "step": 3925000 }, { "epoch": 31.4, "learning_rate": 3.4298e-05, "loss": 8.7533, "step": 3925500 }, { "epoch": 31.41, "learning_rate": 3.4296e-05, "loss": 8.7331, "step": 3926000 }, { "epoch": 31.41, "learning_rate": 3.4294e-05, "loss": 8.7384, "step": 3926500 }, { "epoch": 31.42, "learning_rate": 3.4292e-05, "loss": 8.7418, "step": 3927000 }, { "epoch": 31.42, "learning_rate": 3.429e-05, "loss": 8.7654, "step": 3927500 }, { "epoch": 31.42, "learning_rate": 3.4288e-05, "loss": 8.7534, "step": 3928000 }, { "epoch": 31.43, "learning_rate": 3.4286000000000003e-05, "loss": 8.7554, "step": 3928500 }, { "epoch": 31.43, "learning_rate": 3.4284e-05, "loss": 8.7454, "step": 3929000 }, { "epoch": 31.44, "learning_rate": 3.4282e-05, "loss": 8.7406, "step": 3929500 }, { "epoch": 31.44, "learning_rate": 3.4280000000000004e-05, "loss": 8.7306, "step": 3930000 }, { "epoch": 31.44, "learning_rate": 3.4278e-05, "loss": 8.7534, "step": 3930500 }, { "epoch": 31.45, "learning_rate": 3.4276e-05, "loss": 8.7321, "step": 3931000 }, { "epoch": 31.45, "learning_rate": 3.4274e-05, "loss": 8.7634, "step": 3931500 }, { "epoch": 31.46, "learning_rate": 3.427200000000001e-05, "loss": 8.7351, "step": 3932000 }, { "epoch": 31.46, "learning_rate": 3.427e-05, "loss": 8.7535, "step": 3932500 }, { "epoch": 31.46, "learning_rate": 3.4268e-05, "loss": 8.7608, "step": 3933000 }, { "epoch": 31.47, "learning_rate": 3.426600000000001e-05, "loss": 8.7568, "step": 3933500 }, { "epoch": 31.47, "learning_rate": 3.4264e-05, "loss": 8.7536, "step": 3934000 }, { "epoch": 31.48, "learning_rate": 3.4262e-05, "loss": 8.7319, "step": 3934500 }, { "epoch": 31.48, "learning_rate": 3.426e-05, "loss": 8.7455, "step": 3935000 }, { "epoch": 31.48, "learning_rate": 3.4258e-05, "loss": 8.7463, "step": 3935500 }, { "epoch": 31.49, "learning_rate": 3.4256e-05, "loss": 8.7362, "step": 3936000 }, { "epoch": 31.49, "learning_rate": 3.4254e-05, "loss": 8.7341, "step": 3936500 }, { "epoch": 31.5, "learning_rate": 3.4252000000000003e-05, "loss": 8.7512, "step": 3937000 }, { "epoch": 31.5, "learning_rate": 3.4250000000000006e-05, "loss": 8.7543, "step": 3937500 }, { "epoch": 31.5, "learning_rate": 3.4248e-05, "loss": 8.7425, "step": 3938000 }, { "epoch": 31.51, "learning_rate": 3.4246e-05, "loss": 8.7401, "step": 3938500 }, { "epoch": 31.51, "learning_rate": 3.4244000000000006e-05, "loss": 8.7637, "step": 3939000 }, { "epoch": 31.52, "learning_rate": 3.4242e-05, "loss": 8.7551, "step": 3939500 }, { "epoch": 31.52, "learning_rate": 3.424e-05, "loss": 8.7313, "step": 3940000 }, { "epoch": 31.52, "learning_rate": 3.4238000000000007e-05, "loss": 8.7649, "step": 3940500 }, { "epoch": 31.53, "learning_rate": 3.4236e-05, "loss": 8.745, "step": 3941000 }, { "epoch": 31.53, "learning_rate": 3.4234e-05, "loss": 8.7479, "step": 3941500 }, { "epoch": 31.54, "learning_rate": 3.4232e-05, "loss": 8.7519, "step": 3942000 }, { "epoch": 31.54, "learning_rate": 3.423e-05, "loss": 8.7386, "step": 3942500 }, { "epoch": 31.54, "learning_rate": 3.4228e-05, "loss": 8.7636, "step": 3943000 }, { "epoch": 31.55, "learning_rate": 3.4226e-05, "loss": 8.7428, "step": 3943500 }, { "epoch": 31.55, "learning_rate": 3.4224e-05, "loss": 8.7464, "step": 3944000 }, { "epoch": 31.56, "learning_rate": 3.4222000000000005e-05, "loss": 8.7431, "step": 3944500 }, { "epoch": 31.56, "learning_rate": 3.422e-05, "loss": 8.744, "step": 3945000 }, { "epoch": 31.56, "learning_rate": 3.4218000000000003e-05, "loss": 8.7819, "step": 3945500 }, { "epoch": 31.57, "learning_rate": 3.4216000000000006e-05, "loss": 8.7512, "step": 3946000 }, { "epoch": 31.57, "learning_rate": 3.4214e-05, "loss": 8.7425, "step": 3946500 }, { "epoch": 31.58, "learning_rate": 3.4212e-05, "loss": 8.7459, "step": 3947000 }, { "epoch": 31.58, "learning_rate": 3.4210000000000006e-05, "loss": 8.7536, "step": 3947500 }, { "epoch": 31.58, "learning_rate": 3.4208e-05, "loss": 8.7488, "step": 3948000 }, { "epoch": 31.59, "learning_rate": 3.4206e-05, "loss": 8.7504, "step": 3948500 }, { "epoch": 31.59, "learning_rate": 3.4204e-05, "loss": 8.742, "step": 3949000 }, { "epoch": 31.6, "learning_rate": 3.4202e-05, "loss": 8.7551, "step": 3949500 }, { "epoch": 31.6, "learning_rate": 3.4200000000000005e-05, "loss": 8.739, "step": 3950000 }, { "epoch": 31.6, "learning_rate": 3.4198e-05, "loss": 8.7375, "step": 3950500 }, { "epoch": 31.61, "learning_rate": 3.4196e-05, "loss": 8.7408, "step": 3951000 }, { "epoch": 31.61, "learning_rate": 3.4194000000000005e-05, "loss": 8.7469, "step": 3951500 }, { "epoch": 31.62, "learning_rate": 3.4192e-05, "loss": 8.7476, "step": 3952000 }, { "epoch": 31.62, "learning_rate": 3.419e-05, "loss": 8.7664, "step": 3952500 }, { "epoch": 31.62, "learning_rate": 3.4188000000000005e-05, "loss": 8.7596, "step": 3953000 }, { "epoch": 31.63, "learning_rate": 3.4186e-05, "loss": 8.7514, "step": 3953500 }, { "epoch": 31.63, "learning_rate": 3.4184e-05, "loss": 8.7442, "step": 3954000 }, { "epoch": 31.64, "learning_rate": 3.4182000000000006e-05, "loss": 8.7535, "step": 3954500 }, { "epoch": 31.64, "learning_rate": 3.418e-05, "loss": 8.751, "step": 3955000 }, { "epoch": 31.64, "learning_rate": 3.4178e-05, "loss": 8.7488, "step": 3955500 }, { "epoch": 31.65, "learning_rate": 3.4176e-05, "loss": 8.7592, "step": 3956000 }, { "epoch": 31.65, "learning_rate": 3.4174e-05, "loss": 8.7361, "step": 3956500 }, { "epoch": 31.66, "learning_rate": 3.4172000000000004e-05, "loss": 8.7465, "step": 3957000 }, { "epoch": 31.66, "learning_rate": 3.417e-05, "loss": 8.7409, "step": 3957500 }, { "epoch": 31.66, "learning_rate": 3.4168e-05, "loss": 8.7622, "step": 3958000 }, { "epoch": 31.67, "learning_rate": 3.4166000000000005e-05, "loss": 8.7692, "step": 3958500 }, { "epoch": 31.67, "learning_rate": 3.4164e-05, "loss": 8.7471, "step": 3959000 }, { "epoch": 31.68, "learning_rate": 3.4162e-05, "loss": 8.7504, "step": 3959500 }, { "epoch": 31.68, "learning_rate": 3.4160000000000005e-05, "loss": 8.7707, "step": 3960000 }, { "epoch": 31.68, "learning_rate": 3.4158e-05, "loss": 8.7626, "step": 3960500 }, { "epoch": 31.69, "learning_rate": 3.4155999999999996e-05, "loss": 8.7508, "step": 3961000 }, { "epoch": 31.69, "learning_rate": 3.4154000000000005e-05, "loss": 8.7543, "step": 3961500 }, { "epoch": 31.7, "learning_rate": 3.4152e-05, "loss": 8.7376, "step": 3962000 }, { "epoch": 31.7, "learning_rate": 3.415e-05, "loss": 8.7347, "step": 3962500 }, { "epoch": 31.7, "learning_rate": 3.4148e-05, "loss": 8.7412, "step": 3963000 }, { "epoch": 31.71, "learning_rate": 3.4146e-05, "loss": 8.749, "step": 3963500 }, { "epoch": 31.71, "learning_rate": 3.4144000000000004e-05, "loss": 8.7489, "step": 3964000 }, { "epoch": 31.72, "learning_rate": 3.4142e-05, "loss": 8.7223, "step": 3964500 }, { "epoch": 31.72, "learning_rate": 3.414e-05, "loss": 8.7428, "step": 3965000 }, { "epoch": 31.72, "learning_rate": 3.4138000000000004e-05, "loss": 8.7515, "step": 3965500 }, { "epoch": 31.73, "learning_rate": 3.4136e-05, "loss": 8.7329, "step": 3966000 }, { "epoch": 31.73, "learning_rate": 3.4134e-05, "loss": 8.7414, "step": 3966500 }, { "epoch": 31.74, "learning_rate": 3.4132000000000005e-05, "loss": 8.7586, "step": 3967000 }, { "epoch": 31.74, "learning_rate": 3.413e-05, "loss": 8.759, "step": 3967500 }, { "epoch": 31.74, "learning_rate": 3.4127999999999996e-05, "loss": 8.7242, "step": 3968000 }, { "epoch": 31.75, "learning_rate": 3.4126000000000005e-05, "loss": 8.7621, "step": 3968500 }, { "epoch": 31.75, "learning_rate": 3.4124e-05, "loss": 8.7655, "step": 3969000 }, { "epoch": 31.76, "learning_rate": 3.4122e-05, "loss": 8.7624, "step": 3969500 }, { "epoch": 31.76, "learning_rate": 3.412e-05, "loss": 8.7525, "step": 3970000 }, { "epoch": 31.76, "learning_rate": 3.4118e-05, "loss": 8.735, "step": 3970500 }, { "epoch": 31.77, "learning_rate": 3.4116e-05, "loss": 8.7652, "step": 3971000 }, { "epoch": 31.77, "learning_rate": 3.4114e-05, "loss": 8.7372, "step": 3971500 }, { "epoch": 31.78, "learning_rate": 3.4112e-05, "loss": 8.7604, "step": 3972000 }, { "epoch": 31.78, "learning_rate": 3.4110000000000004e-05, "loss": 8.7745, "step": 3972500 }, { "epoch": 31.78, "learning_rate": 3.4108e-05, "loss": 8.7388, "step": 3973000 }, { "epoch": 31.79, "learning_rate": 3.4106e-05, "loss": 8.7464, "step": 3973500 }, { "epoch": 31.79, "learning_rate": 3.4104000000000004e-05, "loss": 8.733, "step": 3974000 }, { "epoch": 31.8, "learning_rate": 3.4102e-05, "loss": 8.7452, "step": 3974500 }, { "epoch": 31.8, "learning_rate": 3.41e-05, "loss": 8.7312, "step": 3975000 }, { "epoch": 31.8, "learning_rate": 3.4098000000000005e-05, "loss": 8.7344, "step": 3975500 }, { "epoch": 31.81, "learning_rate": 3.4096e-05, "loss": 8.7404, "step": 3976000 }, { "epoch": 31.81, "learning_rate": 3.4094e-05, "loss": 8.7467, "step": 3976500 }, { "epoch": 31.82, "learning_rate": 3.4092000000000005e-05, "loss": 8.7397, "step": 3977000 }, { "epoch": 31.82, "learning_rate": 3.409e-05, "loss": 8.7473, "step": 3977500 }, { "epoch": 31.82, "learning_rate": 3.4088e-05, "loss": 8.75, "step": 3978000 }, { "epoch": 31.83, "learning_rate": 3.4086e-05, "loss": 8.7464, "step": 3978500 }, { "epoch": 31.83, "learning_rate": 3.4084e-05, "loss": 8.7444, "step": 3979000 }, { "epoch": 31.84, "learning_rate": 3.4082e-05, "loss": 8.75, "step": 3979500 }, { "epoch": 31.84, "learning_rate": 3.408e-05, "loss": 8.776, "step": 3980000 }, { "epoch": 31.84, "learning_rate": 3.4078e-05, "loss": 8.7603, "step": 3980500 }, { "epoch": 31.85, "learning_rate": 3.4076000000000004e-05, "loss": 8.764, "step": 3981000 }, { "epoch": 31.85, "learning_rate": 3.4074e-05, "loss": 8.7192, "step": 3981500 }, { "epoch": 31.86, "learning_rate": 3.4072e-05, "loss": 8.7552, "step": 3982000 }, { "epoch": 31.86, "learning_rate": 3.4070000000000004e-05, "loss": 8.752, "step": 3982500 }, { "epoch": 31.86, "learning_rate": 3.4068e-05, "loss": 8.7294, "step": 3983000 }, { "epoch": 31.87, "learning_rate": 3.4066e-05, "loss": 8.7503, "step": 3983500 }, { "epoch": 31.87, "learning_rate": 3.4064000000000005e-05, "loss": 8.7556, "step": 3984000 }, { "epoch": 31.88, "learning_rate": 3.4062e-05, "loss": 8.7618, "step": 3984500 }, { "epoch": 31.88, "learning_rate": 3.406e-05, "loss": 8.7732, "step": 3985000 }, { "epoch": 31.88, "learning_rate": 3.4058e-05, "loss": 8.7644, "step": 3985500 }, { "epoch": 31.89, "learning_rate": 3.4056e-05, "loss": 8.7546, "step": 3986000 }, { "epoch": 31.89, "learning_rate": 3.4054e-05, "loss": 8.7538, "step": 3986500 }, { "epoch": 31.9, "learning_rate": 3.4052e-05, "loss": 8.7564, "step": 3987000 }, { "epoch": 31.9, "learning_rate": 3.405e-05, "loss": 8.7406, "step": 3987500 }, { "epoch": 31.9, "learning_rate": 3.4048e-05, "loss": 8.7361, "step": 3988000 }, { "epoch": 31.91, "learning_rate": 3.4046e-05, "loss": 8.7265, "step": 3988500 }, { "epoch": 31.91, "learning_rate": 3.4044e-05, "loss": 8.7352, "step": 3989000 }, { "epoch": 31.92, "learning_rate": 3.4042000000000004e-05, "loss": 8.7474, "step": 3989500 }, { "epoch": 31.92, "learning_rate": 3.404e-05, "loss": 8.7549, "step": 3990000 }, { "epoch": 31.92, "learning_rate": 3.4038e-05, "loss": 8.7549, "step": 3990500 }, { "epoch": 31.93, "learning_rate": 3.4036000000000004e-05, "loss": 8.7251, "step": 3991000 }, { "epoch": 31.93, "learning_rate": 3.4034e-05, "loss": 8.7553, "step": 3991500 }, { "epoch": 31.94, "learning_rate": 3.4032e-05, "loss": 8.7486, "step": 3992000 }, { "epoch": 31.94, "learning_rate": 3.403e-05, "loss": 8.7333, "step": 3992500 }, { "epoch": 31.94, "learning_rate": 3.402800000000001e-05, "loss": 8.7596, "step": 3993000 }, { "epoch": 31.95, "learning_rate": 3.4026e-05, "loss": 8.7536, "step": 3993500 }, { "epoch": 31.95, "learning_rate": 3.4024e-05, "loss": 8.7494, "step": 3994000 }, { "epoch": 31.96, "learning_rate": 3.4022e-05, "loss": 8.7536, "step": 3994500 }, { "epoch": 31.96, "learning_rate": 3.402e-05, "loss": 8.7355, "step": 3995000 }, { "epoch": 31.96, "learning_rate": 3.4018e-05, "loss": 8.7502, "step": 3995500 }, { "epoch": 31.97, "learning_rate": 3.4016e-05, "loss": 8.7354, "step": 3996000 }, { "epoch": 31.97, "learning_rate": 3.4014e-05, "loss": 8.7583, "step": 3996500 }, { "epoch": 31.98, "learning_rate": 3.4012e-05, "loss": 8.7434, "step": 3997000 }, { "epoch": 31.98, "learning_rate": 3.401e-05, "loss": 8.7437, "step": 3997500 }, { "epoch": 31.98, "learning_rate": 3.4008000000000004e-05, "loss": 8.7256, "step": 3998000 }, { "epoch": 31.99, "learning_rate": 3.4006e-05, "loss": 8.7521, "step": 3998500 }, { "epoch": 31.99, "learning_rate": 3.4004e-05, "loss": 8.7571, "step": 3999000 }, { "epoch": 32.0, "learning_rate": 3.4002e-05, "loss": 8.7593, "step": 3999500 }, { "epoch": 32.0, "learning_rate": 3.4000000000000007e-05, "loss": 8.7462, "step": 4000000 }, { "epoch": 32.0, "learning_rate": 3.3998e-05, "loss": 8.7484, "step": 4000500 }, { "epoch": 32.01, "learning_rate": 3.3996e-05, "loss": 8.7437, "step": 4001000 }, { "epoch": 32.01, "learning_rate": 3.399400000000001e-05, "loss": 8.7706, "step": 4001500 }, { "epoch": 32.02, "learning_rate": 3.3992e-05, "loss": 8.7564, "step": 4002000 }, { "epoch": 32.02, "learning_rate": 3.399e-05, "loss": 8.7345, "step": 4002500 }, { "epoch": 32.02, "learning_rate": 3.3988e-05, "loss": 8.7671, "step": 4003000 }, { "epoch": 32.03, "learning_rate": 3.3986e-05, "loss": 8.7348, "step": 4003500 }, { "epoch": 32.03, "learning_rate": 3.3984e-05, "loss": 8.7537, "step": 4004000 }, { "epoch": 32.04, "learning_rate": 3.3982e-05, "loss": 8.7802, "step": 4004500 }, { "epoch": 32.04, "learning_rate": 3.398e-05, "loss": 8.7583, "step": 4005000 }, { "epoch": 32.04, "learning_rate": 3.3978000000000006e-05, "loss": 8.7541, "step": 4005500 }, { "epoch": 32.05, "learning_rate": 3.3976e-05, "loss": 8.7464, "step": 4006000 }, { "epoch": 32.05, "learning_rate": 3.3974e-05, "loss": 8.7506, "step": 4006500 }, { "epoch": 32.06, "learning_rate": 3.3972000000000006e-05, "loss": 8.7436, "step": 4007000 }, { "epoch": 32.06, "learning_rate": 3.397e-05, "loss": 8.7423, "step": 4007500 }, { "epoch": 32.06, "learning_rate": 3.3968e-05, "loss": 8.7493, "step": 4008000 }, { "epoch": 32.07, "learning_rate": 3.3966000000000006e-05, "loss": 8.7593, "step": 4008500 }, { "epoch": 32.07, "learning_rate": 3.3964e-05, "loss": 8.7635, "step": 4009000 }, { "epoch": 32.08, "learning_rate": 3.3962e-05, "loss": 8.7575, "step": 4009500 }, { "epoch": 32.08, "learning_rate": 3.396e-05, "loss": 8.728, "step": 4010000 }, { "epoch": 32.08, "learning_rate": 3.3958e-05, "loss": 8.7381, "step": 4010500 }, { "epoch": 32.09, "learning_rate": 3.3956e-05, "loss": 8.7537, "step": 4011000 }, { "epoch": 32.09, "learning_rate": 3.3954e-05, "loss": 8.7487, "step": 4011500 }, { "epoch": 32.1, "learning_rate": 3.3952e-05, "loss": 8.7279, "step": 4012000 }, { "epoch": 32.1, "learning_rate": 3.3950000000000005e-05, "loss": 8.7442, "step": 4012500 }, { "epoch": 32.1, "learning_rate": 3.3948e-05, "loss": 8.7389, "step": 4013000 }, { "epoch": 32.11, "learning_rate": 3.3946e-05, "loss": 8.7593, "step": 4013500 }, { "epoch": 32.11, "learning_rate": 3.3944000000000006e-05, "loss": 8.7802, "step": 4014000 }, { "epoch": 32.12, "learning_rate": 3.3942e-05, "loss": 8.7408, "step": 4014500 }, { "epoch": 32.12, "learning_rate": 3.394e-05, "loss": 8.7607, "step": 4015000 }, { "epoch": 32.12, "learning_rate": 3.3938000000000006e-05, "loss": 8.7635, "step": 4015500 }, { "epoch": 32.13, "learning_rate": 3.3936e-05, "loss": 8.7706, "step": 4016000 }, { "epoch": 32.13, "learning_rate": 3.3934e-05, "loss": 8.755, "step": 4016500 }, { "epoch": 32.14, "learning_rate": 3.3932e-05, "loss": 8.7584, "step": 4017000 }, { "epoch": 32.14, "learning_rate": 3.393e-05, "loss": 8.7582, "step": 4017500 }, { "epoch": 32.14, "learning_rate": 3.3928000000000004e-05, "loss": 8.7433, "step": 4018000 }, { "epoch": 32.15, "learning_rate": 3.3926e-05, "loss": 8.7244, "step": 4018500 }, { "epoch": 32.15, "learning_rate": 3.3924e-05, "loss": 8.7349, "step": 4019000 }, { "epoch": 32.16, "learning_rate": 3.3922000000000005e-05, "loss": 8.7316, "step": 4019500 }, { "epoch": 32.16, "learning_rate": 3.392e-05, "loss": 8.7301, "step": 4020000 }, { "epoch": 32.16, "learning_rate": 3.3918e-05, "loss": 8.7398, "step": 4020500 }, { "epoch": 32.17, "learning_rate": 3.3916000000000005e-05, "loss": 8.7772, "step": 4021000 }, { "epoch": 32.17, "learning_rate": 3.3914e-05, "loss": 8.741, "step": 4021500 }, { "epoch": 32.18, "learning_rate": 3.3911999999999997e-05, "loss": 8.7669, "step": 4022000 }, { "epoch": 32.18, "learning_rate": 3.3910000000000006e-05, "loss": 8.7638, "step": 4022500 }, { "epoch": 32.18, "learning_rate": 3.3908e-05, "loss": 8.7472, "step": 4023000 }, { "epoch": 32.19, "learning_rate": 3.3906e-05, "loss": 8.753, "step": 4023500 }, { "epoch": 32.19, "learning_rate": 3.3904e-05, "loss": 8.7602, "step": 4024000 }, { "epoch": 32.2, "learning_rate": 3.3902e-05, "loss": 8.7471, "step": 4024500 }, { "epoch": 32.2, "learning_rate": 3.3900000000000004e-05, "loss": 8.7222, "step": 4025000 }, { "epoch": 32.2, "learning_rate": 3.3898e-05, "loss": 8.7487, "step": 4025500 }, { "epoch": 32.21, "learning_rate": 3.3896e-05, "loss": 8.7482, "step": 4026000 }, { "epoch": 32.21, "learning_rate": 3.3894000000000004e-05, "loss": 8.7592, "step": 4026500 }, { "epoch": 32.22, "learning_rate": 3.3892e-05, "loss": 8.7415, "step": 4027000 }, { "epoch": 32.22, "learning_rate": 3.389e-05, "loss": 8.7455, "step": 4027500 }, { "epoch": 32.22, "learning_rate": 3.3888000000000005e-05, "loss": 8.7634, "step": 4028000 }, { "epoch": 32.23, "learning_rate": 3.3886e-05, "loss": 8.729, "step": 4028500 }, { "epoch": 32.23, "learning_rate": 3.3883999999999996e-05, "loss": 8.7461, "step": 4029000 }, { "epoch": 32.24, "learning_rate": 3.3882000000000005e-05, "loss": 8.7552, "step": 4029500 }, { "epoch": 32.24, "learning_rate": 3.388e-05, "loss": 8.7407, "step": 4030000 }, { "epoch": 32.24, "learning_rate": 3.3878e-05, "loss": 8.7728, "step": 4030500 }, { "epoch": 32.25, "learning_rate": 3.3876e-05, "loss": 8.7441, "step": 4031000 }, { "epoch": 32.25, "learning_rate": 3.3874e-05, "loss": 8.7512, "step": 4031500 }, { "epoch": 32.26, "learning_rate": 3.3872000000000004e-05, "loss": 8.7574, "step": 4032000 }, { "epoch": 32.26, "learning_rate": 3.387e-05, "loss": 8.7505, "step": 4032500 }, { "epoch": 32.26, "learning_rate": 3.3868e-05, "loss": 8.7429, "step": 4033000 }, { "epoch": 32.27, "learning_rate": 3.3866000000000004e-05, "loss": 8.7574, "step": 4033500 }, { "epoch": 32.27, "learning_rate": 3.3864e-05, "loss": 8.7694, "step": 4034000 }, { "epoch": 32.28, "learning_rate": 3.3862e-05, "loss": 8.7439, "step": 4034500 }, { "epoch": 32.28, "learning_rate": 3.3860000000000004e-05, "loss": 8.746, "step": 4035000 }, { "epoch": 32.28, "learning_rate": 3.3858e-05, "loss": 8.7574, "step": 4035500 }, { "epoch": 32.29, "learning_rate": 3.3856e-05, "loss": 8.7607, "step": 4036000 }, { "epoch": 32.29, "learning_rate": 3.3854000000000005e-05, "loss": 8.7444, "step": 4036500 }, { "epoch": 32.3, "learning_rate": 3.3852e-05, "loss": 8.7542, "step": 4037000 }, { "epoch": 32.3, "learning_rate": 3.385e-05, "loss": 8.7519, "step": 4037500 }, { "epoch": 32.3, "learning_rate": 3.3848000000000005e-05, "loss": 8.7384, "step": 4038000 }, { "epoch": 32.31, "learning_rate": 3.3846e-05, "loss": 8.7435, "step": 4038500 }, { "epoch": 32.31, "learning_rate": 3.3844e-05, "loss": 8.7704, "step": 4039000 }, { "epoch": 32.32, "learning_rate": 3.3842e-05, "loss": 8.7747, "step": 4039500 }, { "epoch": 32.32, "learning_rate": 3.384e-05, "loss": 8.7257, "step": 4040000 }, { "epoch": 32.32, "learning_rate": 3.3838000000000004e-05, "loss": 8.7362, "step": 4040500 }, { "epoch": 32.33, "learning_rate": 3.3836e-05, "loss": 8.7565, "step": 4041000 }, { "epoch": 32.33, "learning_rate": 3.3834e-05, "loss": 8.7477, "step": 4041500 }, { "epoch": 32.34, "learning_rate": 3.3832000000000004e-05, "loss": 8.738, "step": 4042000 }, { "epoch": 32.34, "learning_rate": 3.383e-05, "loss": 8.7626, "step": 4042500 }, { "epoch": 32.34, "learning_rate": 3.3828e-05, "loss": 8.7592, "step": 4043000 }, { "epoch": 32.35, "learning_rate": 3.3826000000000004e-05, "loss": 8.7485, "step": 4043500 }, { "epoch": 32.35, "learning_rate": 3.3824e-05, "loss": 8.7421, "step": 4044000 }, { "epoch": 32.36, "learning_rate": 3.3822e-05, "loss": 8.7761, "step": 4044500 }, { "epoch": 32.36, "learning_rate": 3.3820000000000005e-05, "loss": 8.7366, "step": 4045000 }, { "epoch": 32.36, "learning_rate": 3.3818e-05, "loss": 8.753, "step": 4045500 }, { "epoch": 32.37, "learning_rate": 3.3816e-05, "loss": 8.7521, "step": 4046000 }, { "epoch": 32.37, "learning_rate": 3.3814e-05, "loss": 8.7294, "step": 4046500 }, { "epoch": 32.38, "learning_rate": 3.3812e-05, "loss": 8.7328, "step": 4047000 }, { "epoch": 32.38, "learning_rate": 3.381e-05, "loss": 8.7472, "step": 4047500 }, { "epoch": 32.38, "learning_rate": 3.3808e-05, "loss": 8.7613, "step": 4048000 }, { "epoch": 32.39, "learning_rate": 3.3806e-05, "loss": 8.7361, "step": 4048500 }, { "epoch": 32.39, "learning_rate": 3.3804000000000004e-05, "loss": 8.7501, "step": 4049000 }, { "epoch": 32.4, "learning_rate": 3.3802e-05, "loss": 8.7411, "step": 4049500 }, { "epoch": 32.4, "learning_rate": 3.38e-05, "loss": 8.7708, "step": 4050000 }, { "epoch": 32.4, "learning_rate": 3.3798000000000004e-05, "loss": 8.7555, "step": 4050500 }, { "epoch": 32.41, "learning_rate": 3.3796e-05, "loss": 8.7354, "step": 4051000 }, { "epoch": 32.41, "learning_rate": 3.3794e-05, "loss": 8.7553, "step": 4051500 }, { "epoch": 32.42, "learning_rate": 3.3792000000000004e-05, "loss": 8.761, "step": 4052000 }, { "epoch": 32.42, "learning_rate": 3.379e-05, "loss": 8.7333, "step": 4052500 }, { "epoch": 32.42, "learning_rate": 3.3788e-05, "loss": 8.7641, "step": 4053000 }, { "epoch": 32.43, "learning_rate": 3.3786e-05, "loss": 8.7667, "step": 4053500 }, { "epoch": 32.43, "learning_rate": 3.3784e-05, "loss": 8.743, "step": 4054000 }, { "epoch": 32.44, "learning_rate": 3.3782e-05, "loss": 8.7424, "step": 4054500 }, { "epoch": 32.44, "learning_rate": 3.378e-05, "loss": 8.7581, "step": 4055000 }, { "epoch": 32.44, "learning_rate": 3.3778e-05, "loss": 8.7483, "step": 4055500 }, { "epoch": 32.45, "learning_rate": 3.3776e-05, "loss": 8.7707, "step": 4056000 }, { "epoch": 32.45, "learning_rate": 3.3774e-05, "loss": 8.7635, "step": 4056500 }, { "epoch": 32.46, "learning_rate": 3.3772e-05, "loss": 8.7409, "step": 4057000 }, { "epoch": 32.46, "learning_rate": 3.3770000000000004e-05, "loss": 8.7402, "step": 4057500 }, { "epoch": 32.46, "learning_rate": 3.3768e-05, "loss": 8.7471, "step": 4058000 }, { "epoch": 32.47, "learning_rate": 3.3766e-05, "loss": 8.7475, "step": 4058500 }, { "epoch": 32.47, "learning_rate": 3.3764000000000004e-05, "loss": 8.724, "step": 4059000 }, { "epoch": 32.48, "learning_rate": 3.3762e-05, "loss": 8.7417, "step": 4059500 }, { "epoch": 32.48, "learning_rate": 3.376e-05, "loss": 8.7405, "step": 4060000 }, { "epoch": 32.48, "learning_rate": 3.3758e-05, "loss": 8.7352, "step": 4060500 }, { "epoch": 32.49, "learning_rate": 3.375600000000001e-05, "loss": 8.7613, "step": 4061000 }, { "epoch": 32.49, "learning_rate": 3.3754e-05, "loss": 8.7244, "step": 4061500 }, { "epoch": 32.5, "learning_rate": 3.3752e-05, "loss": 8.7591, "step": 4062000 }, { "epoch": 32.5, "learning_rate": 3.375000000000001e-05, "loss": 8.7484, "step": 4062500 }, { "epoch": 32.5, "learning_rate": 3.3748e-05, "loss": 8.7453, "step": 4063000 }, { "epoch": 32.51, "learning_rate": 3.3746e-05, "loss": 8.7664, "step": 4063500 }, { "epoch": 32.51, "learning_rate": 3.3744e-05, "loss": 8.7443, "step": 4064000 }, { "epoch": 32.52, "learning_rate": 3.3742e-05, "loss": 8.7599, "step": 4064500 }, { "epoch": 32.52, "learning_rate": 3.374e-05, "loss": 8.7281, "step": 4065000 }, { "epoch": 32.52, "learning_rate": 3.3738e-05, "loss": 8.7371, "step": 4065500 }, { "epoch": 32.53, "learning_rate": 3.3736000000000004e-05, "loss": 8.7329, "step": 4066000 }, { "epoch": 32.53, "learning_rate": 3.3734e-05, "loss": 8.7509, "step": 4066500 }, { "epoch": 32.54, "learning_rate": 3.3732e-05, "loss": 8.7328, "step": 4067000 }, { "epoch": 32.54, "learning_rate": 3.373e-05, "loss": 8.7563, "step": 4067500 }, { "epoch": 32.54, "learning_rate": 3.3728000000000006e-05, "loss": 8.7419, "step": 4068000 }, { "epoch": 32.55, "learning_rate": 3.3726e-05, "loss": 8.7357, "step": 4068500 }, { "epoch": 32.55, "learning_rate": 3.3724e-05, "loss": 8.7318, "step": 4069000 }, { "epoch": 32.56, "learning_rate": 3.372200000000001e-05, "loss": 8.7585, "step": 4069500 }, { "epoch": 32.56, "learning_rate": 3.372e-05, "loss": 8.7661, "step": 4070000 }, { "epoch": 32.56, "learning_rate": 3.3718e-05, "loss": 8.7262, "step": 4070500 }, { "epoch": 32.57, "learning_rate": 3.3716e-05, "loss": 8.7391, "step": 4071000 }, { "epoch": 32.57, "learning_rate": 3.3714e-05, "loss": 8.7606, "step": 4071500 }, { "epoch": 32.58, "learning_rate": 3.3712e-05, "loss": 8.7558, "step": 4072000 }, { "epoch": 32.58, "learning_rate": 3.371e-05, "loss": 8.7531, "step": 4072500 }, { "epoch": 32.58, "learning_rate": 3.3708e-05, "loss": 8.7626, "step": 4073000 }, { "epoch": 32.59, "learning_rate": 3.3706000000000006e-05, "loss": 8.7541, "step": 4073500 }, { "epoch": 32.59, "learning_rate": 3.3704e-05, "loss": 8.752, "step": 4074000 }, { "epoch": 32.6, "learning_rate": 3.3702000000000004e-05, "loss": 8.7611, "step": 4074500 }, { "epoch": 32.6, "learning_rate": 3.3700000000000006e-05, "loss": 8.7369, "step": 4075000 }, { "epoch": 32.6, "learning_rate": 3.3698e-05, "loss": 8.7498, "step": 4075500 }, { "epoch": 32.61, "learning_rate": 3.3696e-05, "loss": 8.7452, "step": 4076000 }, { "epoch": 32.61, "learning_rate": 3.3694000000000006e-05, "loss": 8.7308, "step": 4076500 }, { "epoch": 32.62, "learning_rate": 3.3692e-05, "loss": 8.7665, "step": 4077000 }, { "epoch": 32.62, "learning_rate": 3.369e-05, "loss": 8.7421, "step": 4077500 }, { "epoch": 32.62, "learning_rate": 3.3688e-05, "loss": 8.7526, "step": 4078000 }, { "epoch": 32.63, "learning_rate": 3.3686e-05, "loss": 8.7358, "step": 4078500 }, { "epoch": 32.63, "learning_rate": 3.3684e-05, "loss": 8.7333, "step": 4079000 }, { "epoch": 32.64, "learning_rate": 3.3682e-05, "loss": 8.7491, "step": 4079500 }, { "epoch": 32.64, "learning_rate": 3.368e-05, "loss": 8.7633, "step": 4080000 }, { "epoch": 32.64, "learning_rate": 3.3678000000000005e-05, "loss": 8.7508, "step": 4080500 }, { "epoch": 32.65, "learning_rate": 3.3676e-05, "loss": 8.7606, "step": 4081000 }, { "epoch": 32.65, "learning_rate": 3.3674e-05, "loss": 8.7504, "step": 4081500 }, { "epoch": 32.66, "learning_rate": 3.3672000000000006e-05, "loss": 8.7418, "step": 4082000 }, { "epoch": 32.66, "learning_rate": 3.367e-05, "loss": 8.7522, "step": 4082500 }, { "epoch": 32.66, "learning_rate": 3.3668e-05, "loss": 8.757, "step": 4083000 }, { "epoch": 32.67, "learning_rate": 3.3666000000000006e-05, "loss": 8.7589, "step": 4083500 }, { "epoch": 32.67, "learning_rate": 3.3664e-05, "loss": 8.7485, "step": 4084000 }, { "epoch": 32.68, "learning_rate": 3.3662e-05, "loss": 8.7707, "step": 4084500 }, { "epoch": 32.68, "learning_rate": 3.366e-05, "loss": 8.7512, "step": 4085000 }, { "epoch": 32.68, "learning_rate": 3.3658e-05, "loss": 8.7502, "step": 4085500 }, { "epoch": 32.69, "learning_rate": 3.3656000000000004e-05, "loss": 8.7533, "step": 4086000 }, { "epoch": 32.69, "learning_rate": 3.3654e-05, "loss": 8.7375, "step": 4086500 }, { "epoch": 32.7, "learning_rate": 3.3652e-05, "loss": 8.7379, "step": 4087000 }, { "epoch": 32.7, "learning_rate": 3.3650000000000005e-05, "loss": 8.7403, "step": 4087500 }, { "epoch": 32.7, "learning_rate": 3.3648e-05, "loss": 8.748, "step": 4088000 }, { "epoch": 32.71, "learning_rate": 3.3646e-05, "loss": 8.7427, "step": 4088500 }, { "epoch": 32.71, "learning_rate": 3.3644000000000005e-05, "loss": 8.7372, "step": 4089000 }, { "epoch": 32.72, "learning_rate": 3.3642e-05, "loss": 8.7528, "step": 4089500 }, { "epoch": 32.72, "learning_rate": 3.3639999999999996e-05, "loss": 8.7673, "step": 4090000 }, { "epoch": 32.72, "learning_rate": 3.3638000000000006e-05, "loss": 8.7397, "step": 4090500 }, { "epoch": 32.73, "learning_rate": 3.3636e-05, "loss": 8.7607, "step": 4091000 }, { "epoch": 32.73, "learning_rate": 3.3634e-05, "loss": 8.7483, "step": 4091500 }, { "epoch": 32.74, "learning_rate": 3.3632e-05, "loss": 8.7538, "step": 4092000 }, { "epoch": 32.74, "learning_rate": 3.363e-05, "loss": 8.7556, "step": 4092500 }, { "epoch": 32.74, "learning_rate": 3.3628000000000004e-05, "loss": 8.768, "step": 4093000 }, { "epoch": 32.75, "learning_rate": 3.3626e-05, "loss": 8.7756, "step": 4093500 }, { "epoch": 32.75, "learning_rate": 3.3624e-05, "loss": 8.7497, "step": 4094000 }, { "epoch": 32.76, "learning_rate": 3.3622000000000004e-05, "loss": 8.757, "step": 4094500 }, { "epoch": 32.76, "learning_rate": 3.362e-05, "loss": 8.7737, "step": 4095000 }, { "epoch": 32.76, "learning_rate": 3.3618e-05, "loss": 8.7542, "step": 4095500 }, { "epoch": 32.77, "learning_rate": 3.3616000000000005e-05, "loss": 8.7593, "step": 4096000 }, { "epoch": 32.77, "learning_rate": 3.3614e-05, "loss": 8.7612, "step": 4096500 }, { "epoch": 32.78, "learning_rate": 3.3611999999999996e-05, "loss": 8.749, "step": 4097000 }, { "epoch": 32.78, "learning_rate": 3.3610000000000005e-05, "loss": 8.7684, "step": 4097500 }, { "epoch": 32.78, "learning_rate": 3.3608e-05, "loss": 8.767, "step": 4098000 }, { "epoch": 32.79, "learning_rate": 3.3606e-05, "loss": 8.768, "step": 4098500 }, { "epoch": 32.79, "learning_rate": 3.3604e-05, "loss": 8.7398, "step": 4099000 }, { "epoch": 32.8, "learning_rate": 3.3602e-05, "loss": 8.7376, "step": 4099500 }, { "epoch": 32.8, "learning_rate": 3.3600000000000004e-05, "loss": 8.7506, "step": 4100000 }, { "epoch": 32.8, "learning_rate": 3.3598e-05, "loss": 8.7499, "step": 4100500 }, { "epoch": 32.81, "learning_rate": 3.3596e-05, "loss": 8.7726, "step": 4101000 }, { "epoch": 32.81, "learning_rate": 3.3594000000000004e-05, "loss": 8.7631, "step": 4101500 }, { "epoch": 32.82, "learning_rate": 3.3592e-05, "loss": 8.7462, "step": 4102000 }, { "epoch": 32.82, "learning_rate": 3.359e-05, "loss": 8.763, "step": 4102500 }, { "epoch": 32.82, "learning_rate": 3.3588000000000004e-05, "loss": 8.7168, "step": 4103000 }, { "epoch": 32.83, "learning_rate": 3.3586e-05, "loss": 8.7554, "step": 4103500 }, { "epoch": 32.83, "learning_rate": 3.3584e-05, "loss": 8.7563, "step": 4104000 }, { "epoch": 32.84, "learning_rate": 3.3582000000000005e-05, "loss": 8.7608, "step": 4104500 }, { "epoch": 32.84, "learning_rate": 3.358e-05, "loss": 8.7516, "step": 4105000 }, { "epoch": 32.84, "learning_rate": 3.3578e-05, "loss": 8.7404, "step": 4105500 }, { "epoch": 32.85, "learning_rate": 3.3576000000000005e-05, "loss": 8.7536, "step": 4106000 }, { "epoch": 32.85, "learning_rate": 3.3574e-05, "loss": 8.7352, "step": 4106500 }, { "epoch": 32.86, "learning_rate": 3.3572e-05, "loss": 8.7305, "step": 4107000 }, { "epoch": 32.86, "learning_rate": 3.357e-05, "loss": 8.76, "step": 4107500 }, { "epoch": 32.86, "learning_rate": 3.3568e-05, "loss": 8.7794, "step": 4108000 }, { "epoch": 32.87, "learning_rate": 3.3566000000000004e-05, "loss": 8.7626, "step": 4108500 }, { "epoch": 32.87, "learning_rate": 3.3564e-05, "loss": 8.7634, "step": 4109000 }, { "epoch": 32.88, "learning_rate": 3.3562e-05, "loss": 8.7424, "step": 4109500 }, { "epoch": 32.88, "learning_rate": 3.3560000000000004e-05, "loss": 8.7384, "step": 4110000 }, { "epoch": 32.88, "learning_rate": 3.3558e-05, "loss": 8.7373, "step": 4110500 }, { "epoch": 32.89, "learning_rate": 3.3556e-05, "loss": 8.7502, "step": 4111000 }, { "epoch": 32.89, "learning_rate": 3.3554000000000004e-05, "loss": 8.7529, "step": 4111500 }, { "epoch": 32.9, "learning_rate": 3.3552e-05, "loss": 8.7616, "step": 4112000 }, { "epoch": 32.9, "learning_rate": 3.355e-05, "loss": 8.7599, "step": 4112500 }, { "epoch": 32.9, "learning_rate": 3.3548000000000005e-05, "loss": 8.7595, "step": 4113000 }, { "epoch": 32.91, "learning_rate": 3.3546e-05, "loss": 8.7788, "step": 4113500 }, { "epoch": 32.91, "learning_rate": 3.3544e-05, "loss": 8.7382, "step": 4114000 }, { "epoch": 32.92, "learning_rate": 3.3542e-05, "loss": 8.7376, "step": 4114500 }, { "epoch": 32.92, "learning_rate": 3.354e-05, "loss": 8.7332, "step": 4115000 }, { "epoch": 32.92, "learning_rate": 3.3538e-05, "loss": 8.7468, "step": 4115500 }, { "epoch": 32.93, "learning_rate": 3.3536e-05, "loss": 8.747, "step": 4116000 }, { "epoch": 32.93, "learning_rate": 3.3534e-05, "loss": 8.7368, "step": 4116500 }, { "epoch": 32.94, "learning_rate": 3.3532000000000004e-05, "loss": 8.7473, "step": 4117000 }, { "epoch": 32.94, "learning_rate": 3.353e-05, "loss": 8.7419, "step": 4117500 }, { "epoch": 32.94, "learning_rate": 3.3528e-05, "loss": 8.7606, "step": 4118000 }, { "epoch": 32.95, "learning_rate": 3.3526000000000004e-05, "loss": 8.7469, "step": 4118500 }, { "epoch": 32.95, "learning_rate": 3.3524e-05, "loss": 8.7457, "step": 4119000 }, { "epoch": 32.96, "learning_rate": 3.3522e-05, "loss": 8.7685, "step": 4119500 }, { "epoch": 32.96, "learning_rate": 3.3520000000000004e-05, "loss": 8.7489, "step": 4120000 }, { "epoch": 32.96, "learning_rate": 3.3518e-05, "loss": 8.753, "step": 4120500 }, { "epoch": 32.97, "learning_rate": 3.3516e-05, "loss": 8.7389, "step": 4121000 }, { "epoch": 32.97, "learning_rate": 3.3514e-05, "loss": 8.7128, "step": 4121500 }, { "epoch": 32.98, "learning_rate": 3.3512e-05, "loss": 8.7503, "step": 4122000 }, { "epoch": 32.98, "learning_rate": 3.351e-05, "loss": 8.763, "step": 4122500 }, { "epoch": 32.98, "learning_rate": 3.3508e-05, "loss": 8.7389, "step": 4123000 }, { "epoch": 32.99, "learning_rate": 3.3506e-05, "loss": 8.7426, "step": 4123500 }, { "epoch": 32.99, "learning_rate": 3.3504e-05, "loss": 8.7535, "step": 4124000 }, { "epoch": 33.0, "learning_rate": 3.3502e-05, "loss": 8.7156, "step": 4124500 }, { "epoch": 33.0, "learning_rate": 3.35e-05, "loss": 8.7303, "step": 4125000 }, { "epoch": 33.0, "learning_rate": 3.3498000000000004e-05, "loss": 8.7541, "step": 4125500 }, { "epoch": 33.01, "learning_rate": 3.3496e-05, "loss": 8.7396, "step": 4126000 }, { "epoch": 33.01, "learning_rate": 3.3494e-05, "loss": 8.7615, "step": 4126500 }, { "epoch": 33.02, "learning_rate": 3.3492000000000004e-05, "loss": 8.7449, "step": 4127000 }, { "epoch": 33.02, "learning_rate": 3.349e-05, "loss": 8.7463, "step": 4127500 }, { "epoch": 33.02, "learning_rate": 3.3488e-05, "loss": 8.7531, "step": 4128000 }, { "epoch": 33.03, "learning_rate": 3.3486e-05, "loss": 8.7454, "step": 4128500 }, { "epoch": 33.03, "learning_rate": 3.348400000000001e-05, "loss": 8.7535, "step": 4129000 }, { "epoch": 33.04, "learning_rate": 3.3482e-05, "loss": 8.7551, "step": 4129500 }, { "epoch": 33.04, "learning_rate": 3.348e-05, "loss": 8.7514, "step": 4130000 }, { "epoch": 33.04, "learning_rate": 3.347800000000001e-05, "loss": 8.7637, "step": 4130500 }, { "epoch": 33.05, "learning_rate": 3.3476e-05, "loss": 8.7948, "step": 4131000 }, { "epoch": 33.05, "learning_rate": 3.3474e-05, "loss": 8.7575, "step": 4131500 }, { "epoch": 33.06, "learning_rate": 3.3472e-05, "loss": 8.7535, "step": 4132000 }, { "epoch": 33.06, "learning_rate": 3.347e-05, "loss": 8.7727, "step": 4132500 }, { "epoch": 33.06, "learning_rate": 3.3468e-05, "loss": 8.7443, "step": 4133000 }, { "epoch": 33.07, "learning_rate": 3.3466e-05, "loss": 8.7515, "step": 4133500 }, { "epoch": 33.07, "learning_rate": 3.3464000000000004e-05, "loss": 8.757, "step": 4134000 }, { "epoch": 33.08, "learning_rate": 3.3462e-05, "loss": 8.7728, "step": 4134500 }, { "epoch": 33.08, "learning_rate": 3.346e-05, "loss": 8.7524, "step": 4135000 }, { "epoch": 33.08, "learning_rate": 3.3458e-05, "loss": 8.7503, "step": 4135500 }, { "epoch": 33.09, "learning_rate": 3.3456000000000006e-05, "loss": 8.7608, "step": 4136000 }, { "epoch": 33.09, "learning_rate": 3.3454e-05, "loss": 8.7329, "step": 4136500 }, { "epoch": 33.1, "learning_rate": 3.3452e-05, "loss": 8.7339, "step": 4137000 }, { "epoch": 33.1, "learning_rate": 3.345000000000001e-05, "loss": 8.7417, "step": 4137500 }, { "epoch": 33.1, "learning_rate": 3.3448e-05, "loss": 8.7416, "step": 4138000 }, { "epoch": 33.11, "learning_rate": 3.3446e-05, "loss": 8.7682, "step": 4138500 }, { "epoch": 33.11, "learning_rate": 3.3444e-05, "loss": 8.7404, "step": 4139000 }, { "epoch": 33.12, "learning_rate": 3.3442e-05, "loss": 8.7337, "step": 4139500 }, { "epoch": 33.12, "learning_rate": 3.344e-05, "loss": 8.758, "step": 4140000 }, { "epoch": 33.12, "learning_rate": 3.3438e-05, "loss": 8.7495, "step": 4140500 }, { "epoch": 33.13, "learning_rate": 3.3436e-05, "loss": 8.7622, "step": 4141000 }, { "epoch": 33.13, "learning_rate": 3.3434000000000005e-05, "loss": 8.7651, "step": 4141500 }, { "epoch": 33.14, "learning_rate": 3.3432e-05, "loss": 8.7401, "step": 4142000 }, { "epoch": 33.14, "learning_rate": 3.3430000000000003e-05, "loss": 8.7598, "step": 4142500 }, { "epoch": 33.14, "learning_rate": 3.3428000000000006e-05, "loss": 8.7594, "step": 4143000 }, { "epoch": 33.15, "learning_rate": 3.3426e-05, "loss": 8.7315, "step": 4143500 }, { "epoch": 33.15, "learning_rate": 3.3424e-05, "loss": 8.7512, "step": 4144000 }, { "epoch": 33.16, "learning_rate": 3.3422000000000006e-05, "loss": 8.7293, "step": 4144500 }, { "epoch": 33.16, "learning_rate": 3.342e-05, "loss": 8.7402, "step": 4145000 }, { "epoch": 33.16, "learning_rate": 3.3418e-05, "loss": 8.7597, "step": 4145500 }, { "epoch": 33.17, "learning_rate": 3.3416e-05, "loss": 8.7462, "step": 4146000 }, { "epoch": 33.17, "learning_rate": 3.3414e-05, "loss": 8.7547, "step": 4146500 }, { "epoch": 33.18, "learning_rate": 3.3412e-05, "loss": 8.7639, "step": 4147000 }, { "epoch": 33.18, "learning_rate": 3.341e-05, "loss": 8.7465, "step": 4147500 }, { "epoch": 33.18, "learning_rate": 3.3408e-05, "loss": 8.7275, "step": 4148000 }, { "epoch": 33.19, "learning_rate": 3.3406000000000005e-05, "loss": 8.7558, "step": 4148500 }, { "epoch": 33.19, "learning_rate": 3.3404e-05, "loss": 8.7506, "step": 4149000 }, { "epoch": 33.2, "learning_rate": 3.3402e-05, "loss": 8.7475, "step": 4149500 }, { "epoch": 33.2, "learning_rate": 3.3400000000000005e-05, "loss": 8.7438, "step": 4150000 }, { "epoch": 33.2, "learning_rate": 3.3398e-05, "loss": 8.7268, "step": 4150500 }, { "epoch": 33.21, "learning_rate": 3.3396e-05, "loss": 8.7702, "step": 4151000 }, { "epoch": 33.21, "learning_rate": 3.3394000000000006e-05, "loss": 8.7418, "step": 4151500 }, { "epoch": 33.22, "learning_rate": 3.3392e-05, "loss": 8.7481, "step": 4152000 }, { "epoch": 33.22, "learning_rate": 3.339e-05, "loss": 8.7452, "step": 4152500 }, { "epoch": 33.22, "learning_rate": 3.3388e-05, "loss": 8.7521, "step": 4153000 }, { "epoch": 33.23, "learning_rate": 3.3386e-05, "loss": 8.7526, "step": 4153500 }, { "epoch": 33.23, "learning_rate": 3.3384000000000004e-05, "loss": 8.7546, "step": 4154000 }, { "epoch": 33.24, "learning_rate": 3.3382e-05, "loss": 8.7537, "step": 4154500 }, { "epoch": 33.24, "learning_rate": 3.338e-05, "loss": 8.7463, "step": 4155000 }, { "epoch": 33.24, "learning_rate": 3.3378000000000005e-05, "loss": 8.7448, "step": 4155500 }, { "epoch": 33.25, "learning_rate": 3.3376e-05, "loss": 8.7554, "step": 4156000 }, { "epoch": 33.25, "learning_rate": 3.3374e-05, "loss": 8.751, "step": 4156500 }, { "epoch": 33.26, "learning_rate": 3.3372000000000005e-05, "loss": 8.7388, "step": 4157000 }, { "epoch": 33.26, "learning_rate": 3.337e-05, "loss": 8.7433, "step": 4157500 }, { "epoch": 33.26, "learning_rate": 3.3367999999999996e-05, "loss": 8.7418, "step": 4158000 }, { "epoch": 33.27, "learning_rate": 3.3366000000000005e-05, "loss": 8.759, "step": 4158500 }, { "epoch": 33.27, "learning_rate": 3.3364e-05, "loss": 8.7472, "step": 4159000 }, { "epoch": 33.28, "learning_rate": 3.3362000000000003e-05, "loss": 8.7527, "step": 4159500 }, { "epoch": 33.28, "learning_rate": 3.336e-05, "loss": 8.7521, "step": 4160000 }, { "epoch": 33.28, "learning_rate": 3.3358e-05, "loss": 8.7527, "step": 4160500 }, { "epoch": 33.29, "learning_rate": 3.3356000000000004e-05, "loss": 8.734, "step": 4161000 }, { "epoch": 33.29, "learning_rate": 3.3354e-05, "loss": 8.7324, "step": 4161500 }, { "epoch": 33.3, "learning_rate": 3.3352e-05, "loss": 8.7469, "step": 4162000 }, { "epoch": 33.3, "learning_rate": 3.3350000000000004e-05, "loss": 8.755, "step": 4162500 }, { "epoch": 33.3, "learning_rate": 3.3348e-05, "loss": 8.7368, "step": 4163000 }, { "epoch": 33.31, "learning_rate": 3.3346e-05, "loss": 8.7366, "step": 4163500 }, { "epoch": 33.31, "learning_rate": 3.3344000000000005e-05, "loss": 8.7532, "step": 4164000 }, { "epoch": 33.32, "learning_rate": 3.3342e-05, "loss": 8.7479, "step": 4164500 }, { "epoch": 33.32, "learning_rate": 3.3339999999999996e-05, "loss": 8.7475, "step": 4165000 }, { "epoch": 33.32, "learning_rate": 3.3338000000000005e-05, "loss": 8.7292, "step": 4165500 }, { "epoch": 33.33, "learning_rate": 3.3336e-05, "loss": 8.7444, "step": 4166000 }, { "epoch": 33.33, "learning_rate": 3.3334e-05, "loss": 8.7505, "step": 4166500 }, { "epoch": 33.34, "learning_rate": 3.3332000000000005e-05, "loss": 8.7459, "step": 4167000 }, { "epoch": 33.34, "learning_rate": 3.333e-05, "loss": 8.7585, "step": 4167500 }, { "epoch": 33.34, "learning_rate": 3.3328000000000003e-05, "loss": 8.7435, "step": 4168000 }, { "epoch": 33.35, "learning_rate": 3.3326e-05, "loss": 8.7621, "step": 4168500 }, { "epoch": 33.35, "learning_rate": 3.3324e-05, "loss": 8.7337, "step": 4169000 }, { "epoch": 33.36, "learning_rate": 3.3322000000000004e-05, "loss": 8.7514, "step": 4169500 }, { "epoch": 33.36, "learning_rate": 3.332e-05, "loss": 8.7542, "step": 4170000 }, { "epoch": 33.36, "learning_rate": 3.3318e-05, "loss": 8.7559, "step": 4170500 }, { "epoch": 33.37, "learning_rate": 3.3316000000000004e-05, "loss": 8.7517, "step": 4171000 }, { "epoch": 33.37, "learning_rate": 3.3314e-05, "loss": 8.75, "step": 4171500 }, { "epoch": 33.38, "learning_rate": 3.3312e-05, "loss": 8.7455, "step": 4172000 }, { "epoch": 33.38, "learning_rate": 3.3310000000000005e-05, "loss": 8.7454, "step": 4172500 }, { "epoch": 33.38, "learning_rate": 3.3308e-05, "loss": 8.7633, "step": 4173000 }, { "epoch": 33.39, "learning_rate": 3.3306e-05, "loss": 8.7463, "step": 4173500 }, { "epoch": 33.39, "learning_rate": 3.3304000000000005e-05, "loss": 8.7314, "step": 4174000 }, { "epoch": 33.4, "learning_rate": 3.3302e-05, "loss": 8.7451, "step": 4174500 }, { "epoch": 33.4, "learning_rate": 3.33e-05, "loss": 8.7379, "step": 4175000 }, { "epoch": 33.4, "learning_rate": 3.3298e-05, "loss": 8.7309, "step": 4175500 }, { "epoch": 33.41, "learning_rate": 3.3296e-05, "loss": 8.7596, "step": 4176000 }, { "epoch": 33.41, "learning_rate": 3.3294000000000003e-05, "loss": 8.7276, "step": 4176500 }, { "epoch": 33.42, "learning_rate": 3.3292e-05, "loss": 8.7302, "step": 4177000 }, { "epoch": 33.42, "learning_rate": 3.329e-05, "loss": 8.7286, "step": 4177500 }, { "epoch": 33.42, "learning_rate": 3.3288000000000004e-05, "loss": 8.7399, "step": 4178000 }, { "epoch": 33.43, "learning_rate": 3.3286e-05, "loss": 8.7441, "step": 4178500 }, { "epoch": 33.43, "learning_rate": 3.3284e-05, "loss": 8.7469, "step": 4179000 }, { "epoch": 33.44, "learning_rate": 3.3282000000000004e-05, "loss": 8.733, "step": 4179500 }, { "epoch": 33.44, "learning_rate": 3.328e-05, "loss": 8.7552, "step": 4180000 }, { "epoch": 33.44, "learning_rate": 3.3278e-05, "loss": 8.747, "step": 4180500 }, { "epoch": 33.45, "learning_rate": 3.3276000000000005e-05, "loss": 8.7557, "step": 4181000 }, { "epoch": 33.45, "learning_rate": 3.3274e-05, "loss": 8.7426, "step": 4181500 }, { "epoch": 33.46, "learning_rate": 3.3272e-05, "loss": 8.7594, "step": 4182000 }, { "epoch": 33.46, "learning_rate": 3.327e-05, "loss": 8.7343, "step": 4182500 }, { "epoch": 33.46, "learning_rate": 3.3268e-05, "loss": 8.7603, "step": 4183000 }, { "epoch": 33.47, "learning_rate": 3.3266e-05, "loss": 8.7309, "step": 4183500 }, { "epoch": 33.47, "learning_rate": 3.3264e-05, "loss": 8.7556, "step": 4184000 }, { "epoch": 33.48, "learning_rate": 3.3262e-05, "loss": 8.7437, "step": 4184500 }, { "epoch": 33.48, "learning_rate": 3.3260000000000003e-05, "loss": 8.7558, "step": 4185000 }, { "epoch": 33.48, "learning_rate": 3.3258e-05, "loss": 8.7599, "step": 4185500 }, { "epoch": 33.49, "learning_rate": 3.3256e-05, "loss": 8.7457, "step": 4186000 }, { "epoch": 33.49, "learning_rate": 3.3254000000000004e-05, "loss": 8.7402, "step": 4186500 }, { "epoch": 33.5, "learning_rate": 3.3252e-05, "loss": 8.7625, "step": 4187000 }, { "epoch": 33.5, "learning_rate": 3.325e-05, "loss": 8.7531, "step": 4187500 }, { "epoch": 33.5, "learning_rate": 3.3248000000000004e-05, "loss": 8.7375, "step": 4188000 }, { "epoch": 33.51, "learning_rate": 3.3246e-05, "loss": 8.7385, "step": 4188500 }, { "epoch": 33.51, "learning_rate": 3.3244e-05, "loss": 8.7398, "step": 4189000 }, { "epoch": 33.52, "learning_rate": 3.3242e-05, "loss": 8.7635, "step": 4189500 }, { "epoch": 33.52, "learning_rate": 3.324e-05, "loss": 8.751, "step": 4190000 }, { "epoch": 33.52, "learning_rate": 3.3238e-05, "loss": 8.7608, "step": 4190500 }, { "epoch": 33.53, "learning_rate": 3.3236e-05, "loss": 8.739, "step": 4191000 }, { "epoch": 33.53, "learning_rate": 3.323400000000001e-05, "loss": 8.7518, "step": 4191500 }, { "epoch": 33.54, "learning_rate": 3.3232e-05, "loss": 8.7259, "step": 4192000 }, { "epoch": 33.54, "learning_rate": 3.323e-05, "loss": 8.7628, "step": 4192500 }, { "epoch": 33.54, "learning_rate": 3.3228e-05, "loss": 8.7517, "step": 4193000 }, { "epoch": 33.55, "learning_rate": 3.3226000000000003e-05, "loss": 8.7619, "step": 4193500 }, { "epoch": 33.55, "learning_rate": 3.3224e-05, "loss": 8.7475, "step": 4194000 }, { "epoch": 33.56, "learning_rate": 3.3222e-05, "loss": 8.7357, "step": 4194500 }, { "epoch": 33.56, "learning_rate": 3.3220000000000004e-05, "loss": 8.7465, "step": 4195000 }, { "epoch": 33.56, "learning_rate": 3.3218e-05, "loss": 8.7461, "step": 4195500 }, { "epoch": 33.57, "learning_rate": 3.3216e-05, "loss": 8.7648, "step": 4196000 }, { "epoch": 33.57, "learning_rate": 3.3214e-05, "loss": 8.7468, "step": 4196500 }, { "epoch": 33.58, "learning_rate": 3.3212000000000007e-05, "loss": 8.7589, "step": 4197000 }, { "epoch": 33.58, "learning_rate": 3.321e-05, "loss": 8.756, "step": 4197500 }, { "epoch": 33.58, "learning_rate": 3.3208e-05, "loss": 8.756, "step": 4198000 }, { "epoch": 33.59, "learning_rate": 3.320600000000001e-05, "loss": 8.7569, "step": 4198500 }, { "epoch": 33.59, "learning_rate": 3.3204e-05, "loss": 8.7457, "step": 4199000 }, { "epoch": 33.6, "learning_rate": 3.3202e-05, "loss": 8.7483, "step": 4199500 }, { "epoch": 33.6, "learning_rate": 3.32e-05, "loss": 8.7501, "step": 4200000 }, { "epoch": 33.6, "learning_rate": 3.3198e-05, "loss": 8.7635, "step": 4200500 }, { "epoch": 33.61, "learning_rate": 3.3196e-05, "loss": 8.7577, "step": 4201000 }, { "epoch": 33.61, "learning_rate": 3.3194e-05, "loss": 8.7245, "step": 4201500 }, { "epoch": 33.62, "learning_rate": 3.3192000000000003e-05, "loss": 8.7609, "step": 4202000 }, { "epoch": 33.62, "learning_rate": 3.319e-05, "loss": 8.742, "step": 4202500 }, { "epoch": 33.62, "learning_rate": 3.3188e-05, "loss": 8.74, "step": 4203000 }, { "epoch": 33.63, "learning_rate": 3.3186000000000004e-05, "loss": 8.7562, "step": 4203500 }, { "epoch": 33.63, "learning_rate": 3.3184000000000006e-05, "loss": 8.7543, "step": 4204000 }, { "epoch": 33.64, "learning_rate": 3.3182e-05, "loss": 8.7549, "step": 4204500 }, { "epoch": 33.64, "learning_rate": 3.318e-05, "loss": 8.7388, "step": 4205000 }, { "epoch": 33.64, "learning_rate": 3.3178000000000007e-05, "loss": 8.7312, "step": 4205500 }, { "epoch": 33.65, "learning_rate": 3.3176e-05, "loss": 8.7496, "step": 4206000 }, { "epoch": 33.65, "learning_rate": 3.3174e-05, "loss": 8.7641, "step": 4206500 }, { "epoch": 33.66, "learning_rate": 3.3172e-05, "loss": 8.746, "step": 4207000 }, { "epoch": 33.66, "learning_rate": 3.317e-05, "loss": 8.7426, "step": 4207500 }, { "epoch": 33.66, "learning_rate": 3.3168e-05, "loss": 8.7444, "step": 4208000 }, { "epoch": 33.67, "learning_rate": 3.3166e-05, "loss": 8.7416, "step": 4208500 }, { "epoch": 33.67, "learning_rate": 3.3164e-05, "loss": 8.7486, "step": 4209000 }, { "epoch": 33.68, "learning_rate": 3.3162000000000005e-05, "loss": 8.748, "step": 4209500 }, { "epoch": 33.68, "learning_rate": 3.316e-05, "loss": 8.7518, "step": 4210000 }, { "epoch": 33.68, "learning_rate": 3.3158000000000003e-05, "loss": 8.7449, "step": 4210500 }, { "epoch": 33.69, "learning_rate": 3.3156000000000006e-05, "loss": 8.7338, "step": 4211000 }, { "epoch": 33.69, "learning_rate": 3.3154e-05, "loss": 8.765, "step": 4211500 }, { "epoch": 33.7, "learning_rate": 3.3152e-05, "loss": 8.7648, "step": 4212000 }, { "epoch": 33.7, "learning_rate": 3.3150000000000006e-05, "loss": 8.7581, "step": 4212500 }, { "epoch": 33.7, "learning_rate": 3.3148e-05, "loss": 8.7376, "step": 4213000 }, { "epoch": 33.71, "learning_rate": 3.3146e-05, "loss": 8.7454, "step": 4213500 }, { "epoch": 33.71, "learning_rate": 3.3144e-05, "loss": 8.7511, "step": 4214000 }, { "epoch": 33.72, "learning_rate": 3.3142e-05, "loss": 8.7323, "step": 4214500 }, { "epoch": 33.72, "learning_rate": 3.314e-05, "loss": 8.7531, "step": 4215000 }, { "epoch": 33.72, "learning_rate": 3.3138e-05, "loss": 8.746, "step": 4215500 }, { "epoch": 33.73, "learning_rate": 3.3136e-05, "loss": 8.7404, "step": 4216000 }, { "epoch": 33.73, "learning_rate": 3.3134000000000005e-05, "loss": 8.7254, "step": 4216500 }, { "epoch": 33.74, "learning_rate": 3.3132e-05, "loss": 8.741, "step": 4217000 }, { "epoch": 33.74, "learning_rate": 3.313e-05, "loss": 8.7599, "step": 4217500 }, { "epoch": 33.74, "learning_rate": 3.3128000000000005e-05, "loss": 8.7471, "step": 4218000 }, { "epoch": 33.75, "learning_rate": 3.3126e-05, "loss": 8.7478, "step": 4218500 }, { "epoch": 33.75, "learning_rate": 3.3123999999999997e-05, "loss": 8.7488, "step": 4219000 }, { "epoch": 33.76, "learning_rate": 3.3122000000000006e-05, "loss": 8.7563, "step": 4219500 }, { "epoch": 33.76, "learning_rate": 3.312e-05, "loss": 8.7559, "step": 4220000 }, { "epoch": 33.76, "learning_rate": 3.3118e-05, "loss": 8.7554, "step": 4220500 }, { "epoch": 33.77, "learning_rate": 3.3116e-05, "loss": 8.7452, "step": 4221000 }, { "epoch": 33.77, "learning_rate": 3.3114e-05, "loss": 8.7591, "step": 4221500 }, { "epoch": 33.78, "learning_rate": 3.3112000000000004e-05, "loss": 8.7626, "step": 4222000 }, { "epoch": 33.78, "learning_rate": 3.311e-05, "loss": 8.7396, "step": 4222500 }, { "epoch": 33.78, "learning_rate": 3.3108e-05, "loss": 8.7398, "step": 4223000 }, { "epoch": 33.79, "learning_rate": 3.3106000000000005e-05, "loss": 8.7253, "step": 4223500 }, { "epoch": 33.79, "learning_rate": 3.3104e-05, "loss": 8.7321, "step": 4224000 }, { "epoch": 33.8, "learning_rate": 3.3102e-05, "loss": 8.7477, "step": 4224500 }, { "epoch": 33.8, "learning_rate": 3.3100000000000005e-05, "loss": 8.7519, "step": 4225000 }, { "epoch": 33.8, "learning_rate": 3.3098e-05, "loss": 8.7613, "step": 4225500 }, { "epoch": 33.81, "learning_rate": 3.3095999999999996e-05, "loss": 8.749, "step": 4226000 }, { "epoch": 33.81, "learning_rate": 3.3094000000000005e-05, "loss": 8.7236, "step": 4226500 }, { "epoch": 33.82, "learning_rate": 3.3092e-05, "loss": 8.7652, "step": 4227000 }, { "epoch": 33.82, "learning_rate": 3.309e-05, "loss": 8.738, "step": 4227500 }, { "epoch": 33.82, "learning_rate": 3.3088e-05, "loss": 8.7473, "step": 4228000 }, { "epoch": 33.83, "learning_rate": 3.3086e-05, "loss": 8.7694, "step": 4228500 }, { "epoch": 33.83, "learning_rate": 3.3084000000000004e-05, "loss": 8.7531, "step": 4229000 }, { "epoch": 33.84, "learning_rate": 3.3082e-05, "loss": 8.7603, "step": 4229500 }, { "epoch": 33.84, "learning_rate": 3.308e-05, "loss": 8.7551, "step": 4230000 }, { "epoch": 33.84, "learning_rate": 3.3078000000000004e-05, "loss": 8.7501, "step": 4230500 }, { "epoch": 33.85, "learning_rate": 3.3076e-05, "loss": 8.744, "step": 4231000 }, { "epoch": 33.85, "learning_rate": 3.3074e-05, "loss": 8.7366, "step": 4231500 }, { "epoch": 33.86, "learning_rate": 3.3072000000000005e-05, "loss": 8.7532, "step": 4232000 }, { "epoch": 33.86, "learning_rate": 3.307e-05, "loss": 8.7433, "step": 4232500 }, { "epoch": 33.86, "learning_rate": 3.3067999999999996e-05, "loss": 8.7577, "step": 4233000 }, { "epoch": 33.87, "learning_rate": 3.3066000000000005e-05, "loss": 8.7524, "step": 4233500 }, { "epoch": 33.87, "learning_rate": 3.3064e-05, "loss": 8.7427, "step": 4234000 }, { "epoch": 33.88, "learning_rate": 3.3062e-05, "loss": 8.7433, "step": 4234500 }, { "epoch": 33.88, "learning_rate": 3.3060000000000005e-05, "loss": 8.7388, "step": 4235000 }, { "epoch": 33.88, "learning_rate": 3.3058e-05, "loss": 8.7515, "step": 4235500 }, { "epoch": 33.89, "learning_rate": 3.3056e-05, "loss": 8.7653, "step": 4236000 }, { "epoch": 33.89, "learning_rate": 3.3054e-05, "loss": 8.7599, "step": 4236500 }, { "epoch": 33.9, "learning_rate": 3.3052e-05, "loss": 8.7622, "step": 4237000 }, { "epoch": 33.9, "learning_rate": 3.3050000000000004e-05, "loss": 8.7487, "step": 4237500 }, { "epoch": 33.9, "learning_rate": 3.3048e-05, "loss": 8.7584, "step": 4238000 }, { "epoch": 33.91, "learning_rate": 3.3046e-05, "loss": 8.7626, "step": 4238500 }, { "epoch": 33.91, "learning_rate": 3.3044000000000004e-05, "loss": 8.7447, "step": 4239000 }, { "epoch": 33.92, "learning_rate": 3.3042e-05, "loss": 8.7691, "step": 4239500 }, { "epoch": 33.92, "learning_rate": 3.304e-05, "loss": 8.7589, "step": 4240000 }, { "epoch": 33.92, "learning_rate": 3.3038000000000005e-05, "loss": 8.7359, "step": 4240500 }, { "epoch": 33.93, "learning_rate": 3.3036e-05, "loss": 8.7642, "step": 4241000 }, { "epoch": 33.93, "learning_rate": 3.3034e-05, "loss": 8.768, "step": 4241500 }, { "epoch": 33.94, "learning_rate": 3.3032000000000005e-05, "loss": 8.7382, "step": 4242000 }, { "epoch": 33.94, "learning_rate": 3.303e-05, "loss": 8.7323, "step": 4242500 }, { "epoch": 33.94, "learning_rate": 3.3028e-05, "loss": 8.7743, "step": 4243000 }, { "epoch": 33.95, "learning_rate": 3.3026e-05, "loss": 8.72, "step": 4243500 }, { "epoch": 33.95, "learning_rate": 3.3024e-05, "loss": 8.7492, "step": 4244000 }, { "epoch": 33.96, "learning_rate": 3.3022e-05, "loss": 8.7516, "step": 4244500 }, { "epoch": 33.96, "learning_rate": 3.302e-05, "loss": 8.7537, "step": 4245000 }, { "epoch": 33.96, "learning_rate": 3.3018e-05, "loss": 8.7601, "step": 4245500 }, { "epoch": 33.97, "learning_rate": 3.3016000000000004e-05, "loss": 8.7353, "step": 4246000 }, { "epoch": 33.97, "learning_rate": 3.3014e-05, "loss": 8.756, "step": 4246500 }, { "epoch": 33.98, "learning_rate": 3.3012e-05, "loss": 8.7541, "step": 4247000 }, { "epoch": 33.98, "learning_rate": 3.3010000000000004e-05, "loss": 8.7709, "step": 4247500 }, { "epoch": 33.98, "learning_rate": 3.3008e-05, "loss": 8.7501, "step": 4248000 }, { "epoch": 33.99, "learning_rate": 3.3006e-05, "loss": 8.735, "step": 4248500 }, { "epoch": 33.99, "learning_rate": 3.3004000000000005e-05, "loss": 8.7349, "step": 4249000 }, { "epoch": 34.0, "learning_rate": 3.3002e-05, "loss": 8.7454, "step": 4249500 }, { "epoch": 34.0, "learning_rate": 3.3e-05, "loss": 8.7587, "step": 4250000 }, { "epoch": 34.0, "learning_rate": 3.2998e-05, "loss": 8.7467, "step": 4250500 }, { "epoch": 34.01, "learning_rate": 3.2996e-05, "loss": 8.7706, "step": 4251000 }, { "epoch": 34.01, "learning_rate": 3.2994e-05, "loss": 8.7301, "step": 4251500 }, { "epoch": 34.02, "learning_rate": 3.2992e-05, "loss": 8.7365, "step": 4252000 }, { "epoch": 34.02, "learning_rate": 3.299e-05, "loss": 8.7551, "step": 4252500 }, { "epoch": 34.02, "learning_rate": 3.2988e-05, "loss": 8.7381, "step": 4253000 }, { "epoch": 34.03, "learning_rate": 3.2986e-05, "loss": 8.7267, "step": 4253500 }, { "epoch": 34.03, "learning_rate": 3.2984e-05, "loss": 8.7443, "step": 4254000 }, { "epoch": 34.04, "learning_rate": 3.2982000000000004e-05, "loss": 8.7832, "step": 4254500 }, { "epoch": 34.04, "learning_rate": 3.298e-05, "loss": 8.7565, "step": 4255000 }, { "epoch": 34.04, "learning_rate": 3.2978e-05, "loss": 8.7684, "step": 4255500 }, { "epoch": 34.05, "learning_rate": 3.2976000000000004e-05, "loss": 8.7393, "step": 4256000 }, { "epoch": 34.05, "learning_rate": 3.2974e-05, "loss": 8.7504, "step": 4256500 }, { "epoch": 34.06, "learning_rate": 3.2972e-05, "loss": 8.7419, "step": 4257000 }, { "epoch": 34.06, "learning_rate": 3.297e-05, "loss": 8.7553, "step": 4257500 }, { "epoch": 34.06, "learning_rate": 3.2968e-05, "loss": 8.7329, "step": 4258000 }, { "epoch": 34.07, "learning_rate": 3.2966e-05, "loss": 8.7166, "step": 4258500 }, { "epoch": 34.07, "learning_rate": 3.2964e-05, "loss": 8.7599, "step": 4259000 }, { "epoch": 34.08, "learning_rate": 3.296200000000001e-05, "loss": 8.7377, "step": 4259500 }, { "epoch": 34.08, "learning_rate": 3.296e-05, "loss": 8.7538, "step": 4260000 }, { "epoch": 34.08, "learning_rate": 3.2958e-05, "loss": 8.7467, "step": 4260500 }, { "epoch": 34.09, "learning_rate": 3.2956e-05, "loss": 8.7537, "step": 4261000 }, { "epoch": 34.09, "learning_rate": 3.2954e-05, "loss": 8.752, "step": 4261500 }, { "epoch": 34.1, "learning_rate": 3.2952e-05, "loss": 8.758, "step": 4262000 }, { "epoch": 34.1, "learning_rate": 3.295e-05, "loss": 8.7507, "step": 4262500 }, { "epoch": 34.1, "learning_rate": 3.2948000000000004e-05, "loss": 8.7439, "step": 4263000 }, { "epoch": 34.11, "learning_rate": 3.2946e-05, "loss": 8.7374, "step": 4263500 }, { "epoch": 34.11, "learning_rate": 3.2944e-05, "loss": 8.7423, "step": 4264000 }, { "epoch": 34.12, "learning_rate": 3.2942e-05, "loss": 8.7546, "step": 4264500 }, { "epoch": 34.12, "learning_rate": 3.2940000000000006e-05, "loss": 8.7543, "step": 4265000 }, { "epoch": 34.12, "learning_rate": 3.2938e-05, "loss": 8.7521, "step": 4265500 }, { "epoch": 34.13, "learning_rate": 3.2936e-05, "loss": 8.7548, "step": 4266000 }, { "epoch": 34.13, "learning_rate": 3.293400000000001e-05, "loss": 8.7453, "step": 4266500 }, { "epoch": 34.14, "learning_rate": 3.2932e-05, "loss": 8.7424, "step": 4267000 }, { "epoch": 34.14, "learning_rate": 3.293e-05, "loss": 8.7509, "step": 4267500 }, { "epoch": 34.14, "learning_rate": 3.2928e-05, "loss": 8.7592, "step": 4268000 }, { "epoch": 34.15, "learning_rate": 3.2926e-05, "loss": 8.7666, "step": 4268500 }, { "epoch": 34.15, "learning_rate": 3.2924e-05, "loss": 8.7492, "step": 4269000 }, { "epoch": 34.16, "learning_rate": 3.2922e-05, "loss": 8.7576, "step": 4269500 }, { "epoch": 34.16, "learning_rate": 3.292e-05, "loss": 8.7701, "step": 4270000 }, { "epoch": 34.16, "learning_rate": 3.2918e-05, "loss": 8.7504, "step": 4270500 }, { "epoch": 34.17, "learning_rate": 3.2916e-05, "loss": 8.7518, "step": 4271000 }, { "epoch": 34.17, "learning_rate": 3.2914000000000004e-05, "loss": 8.752, "step": 4271500 }, { "epoch": 34.18, "learning_rate": 3.2912000000000006e-05, "loss": 8.7398, "step": 4272000 }, { "epoch": 34.18, "learning_rate": 3.291e-05, "loss": 8.7563, "step": 4272500 }, { "epoch": 34.18, "learning_rate": 3.2908e-05, "loss": 8.7517, "step": 4273000 }, { "epoch": 34.19, "learning_rate": 3.2906000000000006e-05, "loss": 8.7478, "step": 4273500 }, { "epoch": 34.19, "learning_rate": 3.2904e-05, "loss": 8.7411, "step": 4274000 }, { "epoch": 34.2, "learning_rate": 3.2902e-05, "loss": 8.7585, "step": 4274500 }, { "epoch": 34.2, "learning_rate": 3.29e-05, "loss": 8.7537, "step": 4275000 }, { "epoch": 34.2, "learning_rate": 3.2898e-05, "loss": 8.7466, "step": 4275500 }, { "epoch": 34.21, "learning_rate": 3.2896e-05, "loss": 8.7426, "step": 4276000 }, { "epoch": 34.21, "learning_rate": 3.2894e-05, "loss": 8.7532, "step": 4276500 }, { "epoch": 34.22, "learning_rate": 3.2892e-05, "loss": 8.7502, "step": 4277000 }, { "epoch": 34.22, "learning_rate": 3.2890000000000005e-05, "loss": 8.7529, "step": 4277500 }, { "epoch": 34.22, "learning_rate": 3.2888e-05, "loss": 8.7329, "step": 4278000 }, { "epoch": 34.23, "learning_rate": 3.2886e-05, "loss": 8.7651, "step": 4278500 }, { "epoch": 34.23, "learning_rate": 3.2884000000000006e-05, "loss": 8.749, "step": 4279000 }, { "epoch": 34.24, "learning_rate": 3.2882e-05, "loss": 8.7396, "step": 4279500 }, { "epoch": 34.24, "learning_rate": 3.288e-05, "loss": 8.7445, "step": 4280000 }, { "epoch": 34.24, "learning_rate": 3.2878000000000006e-05, "loss": 8.7498, "step": 4280500 }, { "epoch": 34.25, "learning_rate": 3.2876e-05, "loss": 8.7418, "step": 4281000 }, { "epoch": 34.25, "learning_rate": 3.2874e-05, "loss": 8.7382, "step": 4281500 }, { "epoch": 34.26, "learning_rate": 3.2872e-05, "loss": 8.7513, "step": 4282000 }, { "epoch": 34.26, "learning_rate": 3.287e-05, "loss": 8.7522, "step": 4282500 }, { "epoch": 34.26, "learning_rate": 3.2868000000000004e-05, "loss": 8.7478, "step": 4283000 }, { "epoch": 34.27, "learning_rate": 3.2866e-05, "loss": 8.7479, "step": 4283500 }, { "epoch": 34.27, "learning_rate": 3.2864e-05, "loss": 8.7531, "step": 4284000 }, { "epoch": 34.28, "learning_rate": 3.2862000000000005e-05, "loss": 8.7532, "step": 4284500 }, { "epoch": 34.28, "learning_rate": 3.286e-05, "loss": 8.7818, "step": 4285000 }, { "epoch": 34.28, "learning_rate": 3.2858e-05, "loss": 8.7741, "step": 4285500 }, { "epoch": 34.29, "learning_rate": 3.2856000000000005e-05, "loss": 8.7412, "step": 4286000 }, { "epoch": 34.29, "learning_rate": 3.2854e-05, "loss": 8.7645, "step": 4286500 }, { "epoch": 34.3, "learning_rate": 3.2851999999999996e-05, "loss": 8.7604, "step": 4287000 }, { "epoch": 34.3, "learning_rate": 3.2850000000000006e-05, "loss": 8.7517, "step": 4287500 }, { "epoch": 34.3, "learning_rate": 3.2848e-05, "loss": 8.7592, "step": 4288000 }, { "epoch": 34.31, "learning_rate": 3.2846e-05, "loss": 8.7311, "step": 4288500 }, { "epoch": 34.31, "learning_rate": 3.2844e-05, "loss": 8.7652, "step": 4289000 }, { "epoch": 34.32, "learning_rate": 3.2842e-05, "loss": 8.7611, "step": 4289500 }, { "epoch": 34.32, "learning_rate": 3.2840000000000004e-05, "loss": 8.7634, "step": 4290000 }, { "epoch": 34.32, "learning_rate": 3.2838e-05, "loss": 8.7538, "step": 4290500 }, { "epoch": 34.33, "learning_rate": 3.2836e-05, "loss": 8.767, "step": 4291000 }, { "epoch": 34.33, "learning_rate": 3.2834000000000004e-05, "loss": 8.7435, "step": 4291500 }, { "epoch": 34.34, "learning_rate": 3.2832e-05, "loss": 8.7677, "step": 4292000 }, { "epoch": 34.34, "learning_rate": 3.283e-05, "loss": 8.7586, "step": 4292500 }, { "epoch": 34.34, "learning_rate": 3.2828000000000005e-05, "loss": 8.7457, "step": 4293000 }, { "epoch": 34.35, "learning_rate": 3.2826e-05, "loss": 8.7658, "step": 4293500 }, { "epoch": 34.35, "learning_rate": 3.2823999999999996e-05, "loss": 8.7585, "step": 4294000 }, { "epoch": 34.36, "learning_rate": 3.2822000000000005e-05, "loss": 8.7458, "step": 4294500 }, { "epoch": 34.36, "learning_rate": 3.282e-05, "loss": 8.743, "step": 4295000 }, { "epoch": 34.36, "learning_rate": 3.2818e-05, "loss": 8.7371, "step": 4295500 }, { "epoch": 34.37, "learning_rate": 3.2816000000000006e-05, "loss": 8.7469, "step": 4296000 }, { "epoch": 34.37, "learning_rate": 3.2814e-05, "loss": 8.7526, "step": 4296500 }, { "epoch": 34.38, "learning_rate": 3.2812000000000004e-05, "loss": 8.7418, "step": 4297000 }, { "epoch": 34.38, "learning_rate": 3.281e-05, "loss": 8.7485, "step": 4297500 }, { "epoch": 34.38, "learning_rate": 3.2808e-05, "loss": 8.7651, "step": 4298000 }, { "epoch": 34.39, "learning_rate": 3.2806000000000004e-05, "loss": 8.7511, "step": 4298500 }, { "epoch": 34.39, "learning_rate": 3.2804e-05, "loss": 8.7383, "step": 4299000 }, { "epoch": 34.4, "learning_rate": 3.2802e-05, "loss": 8.7478, "step": 4299500 }, { "epoch": 34.4, "learning_rate": 3.2800000000000004e-05, "loss": 8.7589, "step": 4300000 }, { "epoch": 34.4, "learning_rate": 3.2798e-05, "loss": 8.7482, "step": 4300500 }, { "epoch": 34.41, "learning_rate": 3.2795999999999996e-05, "loss": 8.7426, "step": 4301000 }, { "epoch": 34.41, "learning_rate": 3.2794000000000005e-05, "loss": 8.7537, "step": 4301500 }, { "epoch": 34.42, "learning_rate": 3.2792e-05, "loss": 8.7477, "step": 4302000 }, { "epoch": 34.42, "learning_rate": 3.279e-05, "loss": 8.7592, "step": 4302500 }, { "epoch": 34.42, "learning_rate": 3.2788000000000005e-05, "loss": 8.7521, "step": 4303000 }, { "epoch": 34.43, "learning_rate": 3.2786e-05, "loss": 8.7365, "step": 4303500 }, { "epoch": 34.43, "learning_rate": 3.2784e-05, "loss": 8.7202, "step": 4304000 }, { "epoch": 34.44, "learning_rate": 3.2782e-05, "loss": 8.7635, "step": 4304500 }, { "epoch": 34.44, "learning_rate": 3.278e-05, "loss": 8.7538, "step": 4305000 }, { "epoch": 34.44, "learning_rate": 3.2778000000000004e-05, "loss": 8.7509, "step": 4305500 }, { "epoch": 34.45, "learning_rate": 3.2776e-05, "loss": 8.7557, "step": 4306000 }, { "epoch": 34.45, "learning_rate": 3.2774e-05, "loss": 8.7639, "step": 4306500 }, { "epoch": 34.46, "learning_rate": 3.2772000000000004e-05, "loss": 8.7387, "step": 4307000 }, { "epoch": 34.46, "learning_rate": 3.277e-05, "loss": 8.7457, "step": 4307500 }, { "epoch": 34.46, "learning_rate": 3.2768e-05, "loss": 8.7514, "step": 4308000 }, { "epoch": 34.47, "learning_rate": 3.2766000000000004e-05, "loss": 8.7636, "step": 4308500 }, { "epoch": 34.47, "learning_rate": 3.2764e-05, "loss": 8.7467, "step": 4309000 }, { "epoch": 34.48, "learning_rate": 3.2762e-05, "loss": 8.7737, "step": 4309500 }, { "epoch": 34.48, "learning_rate": 3.2760000000000005e-05, "loss": 8.7564, "step": 4310000 }, { "epoch": 34.48, "learning_rate": 3.2758e-05, "loss": 8.7435, "step": 4310500 }, { "epoch": 34.49, "learning_rate": 3.2756e-05, "loss": 8.7457, "step": 4311000 }, { "epoch": 34.49, "learning_rate": 3.2754e-05, "loss": 8.7423, "step": 4311500 }, { "epoch": 34.5, "learning_rate": 3.2752e-05, "loss": 8.7558, "step": 4312000 }, { "epoch": 34.5, "learning_rate": 3.275e-05, "loss": 8.753, "step": 4312500 }, { "epoch": 34.5, "learning_rate": 3.2748e-05, "loss": 8.7493, "step": 4313000 }, { "epoch": 34.51, "learning_rate": 3.2746e-05, "loss": 8.7677, "step": 4313500 }, { "epoch": 34.51, "learning_rate": 3.2744000000000004e-05, "loss": 8.7497, "step": 4314000 }, { "epoch": 34.52, "learning_rate": 3.2742e-05, "loss": 8.7555, "step": 4314500 }, { "epoch": 34.52, "learning_rate": 3.274e-05, "loss": 8.7618, "step": 4315000 }, { "epoch": 34.52, "learning_rate": 3.2738000000000004e-05, "loss": 8.7438, "step": 4315500 }, { "epoch": 34.53, "learning_rate": 3.2736e-05, "loss": 8.7375, "step": 4316000 }, { "epoch": 34.53, "learning_rate": 3.2734e-05, "loss": 8.7449, "step": 4316500 }, { "epoch": 34.54, "learning_rate": 3.2732000000000004e-05, "loss": 8.7421, "step": 4317000 }, { "epoch": 34.54, "learning_rate": 3.273e-05, "loss": 8.7457, "step": 4317500 }, { "epoch": 34.54, "learning_rate": 3.2728e-05, "loss": 8.7589, "step": 4318000 }, { "epoch": 34.55, "learning_rate": 3.2726e-05, "loss": 8.7562, "step": 4318500 }, { "epoch": 34.55, "learning_rate": 3.2724e-05, "loss": 8.7751, "step": 4319000 }, { "epoch": 34.56, "learning_rate": 3.2722e-05, "loss": 8.748, "step": 4319500 }, { "epoch": 34.56, "learning_rate": 3.272e-05, "loss": 8.7693, "step": 4320000 }, { "epoch": 34.56, "learning_rate": 3.2718e-05, "loss": 8.7537, "step": 4320500 }, { "epoch": 34.57, "learning_rate": 3.2716e-05, "loss": 8.7451, "step": 4321000 }, { "epoch": 34.57, "learning_rate": 3.2714e-05, "loss": 8.7459, "step": 4321500 }, { "epoch": 34.58, "learning_rate": 3.2712e-05, "loss": 8.7592, "step": 4322000 }, { "epoch": 34.58, "learning_rate": 3.2710000000000004e-05, "loss": 8.7523, "step": 4322500 }, { "epoch": 34.58, "learning_rate": 3.2708e-05, "loss": 8.7371, "step": 4323000 }, { "epoch": 34.59, "learning_rate": 3.2706e-05, "loss": 8.7728, "step": 4323500 }, { "epoch": 34.59, "learning_rate": 3.2704000000000004e-05, "loss": 8.7494, "step": 4324000 }, { "epoch": 34.6, "learning_rate": 3.2702e-05, "loss": 8.7433, "step": 4324500 }, { "epoch": 34.6, "learning_rate": 3.27e-05, "loss": 8.7626, "step": 4325000 }, { "epoch": 34.6, "learning_rate": 3.2698e-05, "loss": 8.7391, "step": 4325500 }, { "epoch": 34.61, "learning_rate": 3.2696e-05, "loss": 8.7521, "step": 4326000 }, { "epoch": 34.61, "learning_rate": 3.2694e-05, "loss": 8.7618, "step": 4326500 }, { "epoch": 34.62, "learning_rate": 3.2692e-05, "loss": 8.767, "step": 4327000 }, { "epoch": 34.62, "learning_rate": 3.269000000000001e-05, "loss": 8.7607, "step": 4327500 }, { "epoch": 34.62, "learning_rate": 3.2688e-05, "loss": 8.717, "step": 4328000 }, { "epoch": 34.63, "learning_rate": 3.2686e-05, "loss": 8.7298, "step": 4328500 }, { "epoch": 34.63, "learning_rate": 3.2684e-05, "loss": 8.7494, "step": 4329000 }, { "epoch": 34.64, "learning_rate": 3.2682e-05, "loss": 8.7586, "step": 4329500 }, { "epoch": 34.64, "learning_rate": 3.268e-05, "loss": 8.7459, "step": 4330000 }, { "epoch": 34.64, "learning_rate": 3.2678e-05, "loss": 8.751, "step": 4330500 }, { "epoch": 34.65, "learning_rate": 3.2676000000000004e-05, "loss": 8.7482, "step": 4331000 }, { "epoch": 34.65, "learning_rate": 3.2674e-05, "loss": 8.7535, "step": 4331500 }, { "epoch": 34.66, "learning_rate": 3.2672e-05, "loss": 8.7396, "step": 4332000 }, { "epoch": 34.66, "learning_rate": 3.267e-05, "loss": 8.7486, "step": 4332500 }, { "epoch": 34.66, "learning_rate": 3.2668000000000006e-05, "loss": 8.7567, "step": 4333000 }, { "epoch": 34.67, "learning_rate": 3.2666e-05, "loss": 8.7599, "step": 4333500 }, { "epoch": 34.67, "learning_rate": 3.2664e-05, "loss": 8.7426, "step": 4334000 }, { "epoch": 34.68, "learning_rate": 3.266200000000001e-05, "loss": 8.7701, "step": 4334500 }, { "epoch": 34.68, "learning_rate": 3.266e-05, "loss": 8.7561, "step": 4335000 }, { "epoch": 34.68, "learning_rate": 3.2658e-05, "loss": 8.7512, "step": 4335500 }, { "epoch": 34.69, "learning_rate": 3.2656e-05, "loss": 8.7428, "step": 4336000 }, { "epoch": 34.69, "learning_rate": 3.2654e-05, "loss": 8.7559, "step": 4336500 }, { "epoch": 34.7, "learning_rate": 3.2652e-05, "loss": 8.7459, "step": 4337000 }, { "epoch": 34.7, "learning_rate": 3.265e-05, "loss": 8.7442, "step": 4337500 }, { "epoch": 34.7, "learning_rate": 3.2648e-05, "loss": 8.7339, "step": 4338000 }, { "epoch": 34.71, "learning_rate": 3.2646e-05, "loss": 8.7455, "step": 4338500 }, { "epoch": 34.71, "learning_rate": 3.2644e-05, "loss": 8.7393, "step": 4339000 }, { "epoch": 34.72, "learning_rate": 3.2642000000000004e-05, "loss": 8.7572, "step": 4339500 }, { "epoch": 34.72, "learning_rate": 3.2640000000000006e-05, "loss": 8.7451, "step": 4340000 }, { "epoch": 34.72, "learning_rate": 3.2638e-05, "loss": 8.7508, "step": 4340500 }, { "epoch": 34.73, "learning_rate": 3.2636e-05, "loss": 8.7487, "step": 4341000 }, { "epoch": 34.73, "learning_rate": 3.2634000000000006e-05, "loss": 8.7235, "step": 4341500 }, { "epoch": 34.74, "learning_rate": 3.2632e-05, "loss": 8.7538, "step": 4342000 }, { "epoch": 34.74, "learning_rate": 3.263e-05, "loss": 8.7475, "step": 4342500 }, { "epoch": 34.74, "learning_rate": 3.2628e-05, "loss": 8.7515, "step": 4343000 }, { "epoch": 34.75, "learning_rate": 3.2626e-05, "loss": 8.7592, "step": 4343500 }, { "epoch": 34.75, "learning_rate": 3.2624e-05, "loss": 8.754, "step": 4344000 }, { "epoch": 34.76, "learning_rate": 3.2622e-05, "loss": 8.7434, "step": 4344500 }, { "epoch": 34.76, "learning_rate": 3.262e-05, "loss": 8.7432, "step": 4345000 }, { "epoch": 34.76, "learning_rate": 3.2618000000000005e-05, "loss": 8.7542, "step": 4345500 }, { "epoch": 34.77, "learning_rate": 3.2616e-05, "loss": 8.7667, "step": 4346000 }, { "epoch": 34.77, "learning_rate": 3.2614e-05, "loss": 8.7404, "step": 4346500 }, { "epoch": 34.78, "learning_rate": 3.2612000000000006e-05, "loss": 8.7539, "step": 4347000 }, { "epoch": 34.78, "learning_rate": 3.261e-05, "loss": 8.7563, "step": 4347500 }, { "epoch": 34.78, "learning_rate": 3.2608e-05, "loss": 8.7531, "step": 4348000 }, { "epoch": 34.79, "learning_rate": 3.2606000000000006e-05, "loss": 8.7653, "step": 4348500 }, { "epoch": 34.79, "learning_rate": 3.2604e-05, "loss": 8.7539, "step": 4349000 }, { "epoch": 34.8, "learning_rate": 3.2602e-05, "loss": 8.7627, "step": 4349500 }, { "epoch": 34.8, "learning_rate": 3.26e-05, "loss": 8.7468, "step": 4350000 }, { "epoch": 34.8, "learning_rate": 3.2598e-05, "loss": 8.7643, "step": 4350500 }, { "epoch": 34.81, "learning_rate": 3.2596000000000004e-05, "loss": 8.7394, "step": 4351000 }, { "epoch": 34.81, "learning_rate": 3.2594e-05, "loss": 8.7494, "step": 4351500 }, { "epoch": 34.82, "learning_rate": 3.2592e-05, "loss": 8.7329, "step": 4352000 }, { "epoch": 34.82, "learning_rate": 3.2590000000000005e-05, "loss": 8.7489, "step": 4352500 }, { "epoch": 34.82, "learning_rate": 3.2588e-05, "loss": 8.7392, "step": 4353000 }, { "epoch": 34.83, "learning_rate": 3.2586e-05, "loss": 8.7401, "step": 4353500 }, { "epoch": 34.83, "learning_rate": 3.2584000000000005e-05, "loss": 8.7407, "step": 4354000 }, { "epoch": 34.84, "learning_rate": 3.2582e-05, "loss": 8.7621, "step": 4354500 }, { "epoch": 34.84, "learning_rate": 3.2579999999999996e-05, "loss": 8.7541, "step": 4355000 }, { "epoch": 34.84, "learning_rate": 3.2578000000000006e-05, "loss": 8.7385, "step": 4355500 }, { "epoch": 34.85, "learning_rate": 3.2576e-05, "loss": 8.7679, "step": 4356000 }, { "epoch": 34.85, "learning_rate": 3.2574e-05, "loss": 8.7281, "step": 4356500 }, { "epoch": 34.86, "learning_rate": 3.2572e-05, "loss": 8.7389, "step": 4357000 }, { "epoch": 34.86, "learning_rate": 3.257e-05, "loss": 8.767, "step": 4357500 }, { "epoch": 34.86, "learning_rate": 3.2568000000000004e-05, "loss": 8.7461, "step": 4358000 }, { "epoch": 34.87, "learning_rate": 3.2566e-05, "loss": 8.7514, "step": 4358500 }, { "epoch": 34.87, "learning_rate": 3.2564e-05, "loss": 8.7469, "step": 4359000 }, { "epoch": 34.88, "learning_rate": 3.2562000000000004e-05, "loss": 8.7494, "step": 4359500 }, { "epoch": 34.88, "learning_rate": 3.256e-05, "loss": 8.7553, "step": 4360000 }, { "epoch": 34.88, "learning_rate": 3.2558e-05, "loss": 8.745, "step": 4360500 }, { "epoch": 34.89, "learning_rate": 3.2556000000000005e-05, "loss": 8.7494, "step": 4361000 }, { "epoch": 34.89, "learning_rate": 3.2554e-05, "loss": 8.7481, "step": 4361500 }, { "epoch": 34.9, "learning_rate": 3.2551999999999996e-05, "loss": 8.7705, "step": 4362000 }, { "epoch": 34.9, "learning_rate": 3.2550000000000005e-05, "loss": 8.7418, "step": 4362500 }, { "epoch": 34.9, "learning_rate": 3.2548e-05, "loss": 8.7438, "step": 4363000 }, { "epoch": 34.91, "learning_rate": 3.2546e-05, "loss": 8.7473, "step": 4363500 }, { "epoch": 34.91, "learning_rate": 3.2544000000000006e-05, "loss": 8.7505, "step": 4364000 }, { "epoch": 34.92, "learning_rate": 3.2542e-05, "loss": 8.7597, "step": 4364500 }, { "epoch": 34.92, "learning_rate": 3.2540000000000004e-05, "loss": 8.7404, "step": 4365000 }, { "epoch": 34.92, "learning_rate": 3.2538e-05, "loss": 8.7459, "step": 4365500 }, { "epoch": 34.93, "learning_rate": 3.2536e-05, "loss": 8.7732, "step": 4366000 }, { "epoch": 34.93, "learning_rate": 3.2534000000000004e-05, "loss": 8.7713, "step": 4366500 }, { "epoch": 34.94, "learning_rate": 3.2532e-05, "loss": 8.749, "step": 4367000 }, { "epoch": 34.94, "learning_rate": 3.253e-05, "loss": 8.7556, "step": 4367500 }, { "epoch": 34.94, "learning_rate": 3.2528000000000004e-05, "loss": 8.7513, "step": 4368000 }, { "epoch": 34.95, "learning_rate": 3.2526e-05, "loss": 8.7398, "step": 4368500 }, { "epoch": 34.95, "learning_rate": 3.2523999999999996e-05, "loss": 8.7609, "step": 4369000 }, { "epoch": 34.96, "learning_rate": 3.2522000000000005e-05, "loss": 8.7625, "step": 4369500 }, { "epoch": 34.96, "learning_rate": 3.252e-05, "loss": 8.7434, "step": 4370000 }, { "epoch": 34.96, "learning_rate": 3.2518e-05, "loss": 8.7509, "step": 4370500 }, { "epoch": 34.97, "learning_rate": 3.2516000000000005e-05, "loss": 8.7596, "step": 4371000 }, { "epoch": 34.97, "learning_rate": 3.2514e-05, "loss": 8.7464, "step": 4371500 }, { "epoch": 34.98, "learning_rate": 3.2512e-05, "loss": 8.747, "step": 4372000 }, { "epoch": 34.98, "learning_rate": 3.251e-05, "loss": 8.7421, "step": 4372500 }, { "epoch": 34.98, "learning_rate": 3.2508e-05, "loss": 8.7588, "step": 4373000 }, { "epoch": 34.99, "learning_rate": 3.2506000000000004e-05, "loss": 8.7484, "step": 4373500 }, { "epoch": 34.99, "learning_rate": 3.2504e-05, "loss": 8.7613, "step": 4374000 }, { "epoch": 35.0, "learning_rate": 3.2502e-05, "loss": 8.7368, "step": 4374500 }, { "epoch": 35.0, "learning_rate": 3.2500000000000004e-05, "loss": 8.756, "step": 4375000 }, { "epoch": 35.0, "learning_rate": 3.2498e-05, "loss": 8.7386, "step": 4375500 }, { "epoch": 35.01, "learning_rate": 3.2496e-05, "loss": 8.7594, "step": 4376000 }, { "epoch": 35.01, "learning_rate": 3.2494000000000004e-05, "loss": 8.7593, "step": 4376500 }, { "epoch": 35.02, "learning_rate": 3.2492e-05, "loss": 8.7351, "step": 4377000 }, { "epoch": 35.02, "learning_rate": 3.249e-05, "loss": 8.7496, "step": 4377500 }, { "epoch": 35.02, "learning_rate": 3.2488000000000005e-05, "loss": 8.7652, "step": 4378000 }, { "epoch": 35.03, "learning_rate": 3.2486e-05, "loss": 8.7625, "step": 4378500 }, { "epoch": 35.03, "learning_rate": 3.2484e-05, "loss": 8.7276, "step": 4379000 }, { "epoch": 35.04, "learning_rate": 3.2482e-05, "loss": 8.7393, "step": 4379500 }, { "epoch": 35.04, "learning_rate": 3.248e-05, "loss": 8.7682, "step": 4380000 }, { "epoch": 35.04, "learning_rate": 3.2478e-05, "loss": 8.7505, "step": 4380500 }, { "epoch": 35.05, "learning_rate": 3.2476e-05, "loss": 8.7563, "step": 4381000 }, { "epoch": 35.05, "learning_rate": 3.2474e-05, "loss": 8.7452, "step": 4381500 }, { "epoch": 35.06, "learning_rate": 3.2472000000000004e-05, "loss": 8.7656, "step": 4382000 }, { "epoch": 35.06, "learning_rate": 3.247e-05, "loss": 8.7602, "step": 4382500 }, { "epoch": 35.06, "learning_rate": 3.2468e-05, "loss": 8.7408, "step": 4383000 }, { "epoch": 35.07, "learning_rate": 3.2466000000000004e-05, "loss": 8.7633, "step": 4383500 }, { "epoch": 35.07, "learning_rate": 3.2464e-05, "loss": 8.7399, "step": 4384000 }, { "epoch": 35.08, "learning_rate": 3.2462e-05, "loss": 8.7329, "step": 4384500 }, { "epoch": 35.08, "learning_rate": 3.2460000000000004e-05, "loss": 8.7636, "step": 4385000 }, { "epoch": 35.08, "learning_rate": 3.2458e-05, "loss": 8.7447, "step": 4385500 }, { "epoch": 35.09, "learning_rate": 3.2456e-05, "loss": 8.7738, "step": 4386000 }, { "epoch": 35.09, "learning_rate": 3.2454e-05, "loss": 8.7577, "step": 4386500 }, { "epoch": 35.1, "learning_rate": 3.2452e-05, "loss": 8.7434, "step": 4387000 }, { "epoch": 35.1, "learning_rate": 3.245e-05, "loss": 8.7555, "step": 4387500 }, { "epoch": 35.1, "learning_rate": 3.2448e-05, "loss": 8.7391, "step": 4388000 }, { "epoch": 35.11, "learning_rate": 3.244600000000001e-05, "loss": 8.747, "step": 4388500 }, { "epoch": 35.11, "learning_rate": 3.2444e-05, "loss": 8.7545, "step": 4389000 }, { "epoch": 35.12, "learning_rate": 3.2442e-05, "loss": 8.7256, "step": 4389500 }, { "epoch": 35.12, "learning_rate": 3.244e-05, "loss": 8.756, "step": 4390000 }, { "epoch": 35.12, "learning_rate": 3.2438000000000004e-05, "loss": 8.7463, "step": 4390500 }, { "epoch": 35.13, "learning_rate": 3.2436e-05, "loss": 8.7501, "step": 4391000 }, { "epoch": 35.13, "learning_rate": 3.2434e-05, "loss": 8.7487, "step": 4391500 }, { "epoch": 35.14, "learning_rate": 3.2432000000000004e-05, "loss": 8.7626, "step": 4392000 }, { "epoch": 35.14, "learning_rate": 3.243e-05, "loss": 8.7532, "step": 4392500 }, { "epoch": 35.14, "learning_rate": 3.2428e-05, "loss": 8.7659, "step": 4393000 }, { "epoch": 35.15, "learning_rate": 3.2426e-05, "loss": 8.7618, "step": 4393500 }, { "epoch": 35.15, "learning_rate": 3.2424e-05, "loss": 8.7651, "step": 4394000 }, { "epoch": 35.16, "learning_rate": 3.2422e-05, "loss": 8.7547, "step": 4394500 }, { "epoch": 35.16, "learning_rate": 3.242e-05, "loss": 8.751, "step": 4395000 }, { "epoch": 35.16, "learning_rate": 3.241800000000001e-05, "loss": 8.7625, "step": 4395500 }, { "epoch": 35.17, "learning_rate": 3.2416e-05, "loss": 8.7533, "step": 4396000 }, { "epoch": 35.17, "learning_rate": 3.2414e-05, "loss": 8.744, "step": 4396500 }, { "epoch": 35.18, "learning_rate": 3.2412e-05, "loss": 8.7537, "step": 4397000 }, { "epoch": 35.18, "learning_rate": 3.241e-05, "loss": 8.7453, "step": 4397500 }, { "epoch": 35.18, "learning_rate": 3.2408e-05, "loss": 8.7468, "step": 4398000 }, { "epoch": 35.19, "learning_rate": 3.2406e-05, "loss": 8.757, "step": 4398500 }, { "epoch": 35.19, "learning_rate": 3.2404000000000003e-05, "loss": 8.7503, "step": 4399000 }, { "epoch": 35.2, "learning_rate": 3.2402e-05, "loss": 8.7574, "step": 4399500 }, { "epoch": 35.2, "learning_rate": 3.24e-05, "loss": 8.7522, "step": 4400000 }, { "epoch": 35.2, "learning_rate": 3.2398000000000004e-05, "loss": 8.7445, "step": 4400500 }, { "epoch": 35.21, "learning_rate": 3.2396000000000006e-05, "loss": 8.7463, "step": 4401000 }, { "epoch": 35.21, "learning_rate": 3.2394e-05, "loss": 8.766, "step": 4401500 }, { "epoch": 35.22, "learning_rate": 3.2392e-05, "loss": 8.7609, "step": 4402000 }, { "epoch": 35.22, "learning_rate": 3.239000000000001e-05, "loss": 8.7559, "step": 4402500 }, { "epoch": 35.22, "learning_rate": 3.2388e-05, "loss": 8.7752, "step": 4403000 }, { "epoch": 35.23, "learning_rate": 3.2386e-05, "loss": 8.759, "step": 4403500 }, { "epoch": 35.23, "learning_rate": 3.2384e-05, "loss": 8.7475, "step": 4404000 }, { "epoch": 35.24, "learning_rate": 3.2382e-05, "loss": 8.7482, "step": 4404500 }, { "epoch": 35.24, "learning_rate": 3.238e-05, "loss": 8.7505, "step": 4405000 }, { "epoch": 35.24, "learning_rate": 3.2378e-05, "loss": 8.7602, "step": 4405500 }, { "epoch": 35.25, "learning_rate": 3.2376e-05, "loss": 8.7393, "step": 4406000 }, { "epoch": 35.25, "learning_rate": 3.2374000000000005e-05, "loss": 8.735, "step": 4406500 }, { "epoch": 35.26, "learning_rate": 3.2372e-05, "loss": 8.749, "step": 4407000 }, { "epoch": 35.26, "learning_rate": 3.2370000000000003e-05, "loss": 8.7428, "step": 4407500 }, { "epoch": 35.26, "learning_rate": 3.2368000000000006e-05, "loss": 8.7512, "step": 4408000 }, { "epoch": 35.27, "learning_rate": 3.2366e-05, "loss": 8.7742, "step": 4408500 }, { "epoch": 35.27, "learning_rate": 3.2364e-05, "loss": 8.7648, "step": 4409000 }, { "epoch": 35.28, "learning_rate": 3.2362000000000006e-05, "loss": 8.7504, "step": 4409500 }, { "epoch": 35.28, "learning_rate": 3.236e-05, "loss": 8.742, "step": 4410000 }, { "epoch": 35.28, "learning_rate": 3.2358e-05, "loss": 8.7408, "step": 4410500 }, { "epoch": 35.29, "learning_rate": 3.2356e-05, "loss": 8.7662, "step": 4411000 }, { "epoch": 35.29, "learning_rate": 3.2354e-05, "loss": 8.7488, "step": 4411500 }, { "epoch": 35.3, "learning_rate": 3.2352e-05, "loss": 8.7577, "step": 4412000 }, { "epoch": 35.3, "learning_rate": 3.235e-05, "loss": 8.7467, "step": 4412500 }, { "epoch": 35.3, "learning_rate": 3.2348e-05, "loss": 8.7516, "step": 4413000 }, { "epoch": 35.31, "learning_rate": 3.2346000000000005e-05, "loss": 8.7413, "step": 4413500 }, { "epoch": 35.31, "learning_rate": 3.2344e-05, "loss": 8.7482, "step": 4414000 }, { "epoch": 35.32, "learning_rate": 3.2342e-05, "loss": 8.75, "step": 4414500 }, { "epoch": 35.32, "learning_rate": 3.2340000000000005e-05, "loss": 8.7385, "step": 4415000 }, { "epoch": 35.32, "learning_rate": 3.2338e-05, "loss": 8.7441, "step": 4415500 }, { "epoch": 35.33, "learning_rate": 3.2336e-05, "loss": 8.7419, "step": 4416000 }, { "epoch": 35.33, "learning_rate": 3.2334000000000006e-05, "loss": 8.7533, "step": 4416500 }, { "epoch": 35.34, "learning_rate": 3.2332e-05, "loss": 8.7691, "step": 4417000 }, { "epoch": 35.34, "learning_rate": 3.233e-05, "loss": 8.7583, "step": 4417500 }, { "epoch": 35.34, "learning_rate": 3.2328e-05, "loss": 8.7411, "step": 4418000 }, { "epoch": 35.35, "learning_rate": 3.2326e-05, "loss": 8.7439, "step": 4418500 }, { "epoch": 35.35, "learning_rate": 3.2324000000000004e-05, "loss": 8.7503, "step": 4419000 }, { "epoch": 35.36, "learning_rate": 3.2322e-05, "loss": 8.7522, "step": 4419500 }, { "epoch": 35.36, "learning_rate": 3.232e-05, "loss": 8.7613, "step": 4420000 }, { "epoch": 35.36, "learning_rate": 3.2318000000000005e-05, "loss": 8.7296, "step": 4420500 }, { "epoch": 35.37, "learning_rate": 3.2316e-05, "loss": 8.7791, "step": 4421000 }, { "epoch": 35.37, "learning_rate": 3.2314e-05, "loss": 8.7439, "step": 4421500 }, { "epoch": 35.38, "learning_rate": 3.2312000000000005e-05, "loss": 8.7583, "step": 4422000 }, { "epoch": 35.38, "learning_rate": 3.231e-05, "loss": 8.7768, "step": 4422500 }, { "epoch": 35.38, "learning_rate": 3.2307999999999996e-05, "loss": 8.7581, "step": 4423000 }, { "epoch": 35.39, "learning_rate": 3.2306000000000005e-05, "loss": 8.7396, "step": 4423500 }, { "epoch": 35.39, "learning_rate": 3.2304e-05, "loss": 8.7657, "step": 4424000 }, { "epoch": 35.4, "learning_rate": 3.2302e-05, "loss": 8.7505, "step": 4424500 }, { "epoch": 35.4, "learning_rate": 3.2300000000000006e-05, "loss": 8.7673, "step": 4425000 }, { "epoch": 35.4, "learning_rate": 3.2298e-05, "loss": 8.7461, "step": 4425500 }, { "epoch": 35.41, "learning_rate": 3.2296000000000004e-05, "loss": 8.761, "step": 4426000 }, { "epoch": 35.41, "learning_rate": 3.2294e-05, "loss": 8.7554, "step": 4426500 }, { "epoch": 35.42, "learning_rate": 3.2292e-05, "loss": 8.7511, "step": 4427000 }, { "epoch": 35.42, "learning_rate": 3.2290000000000004e-05, "loss": 8.758, "step": 4427500 }, { "epoch": 35.42, "learning_rate": 3.2288e-05, "loss": 8.7481, "step": 4428000 }, { "epoch": 35.43, "learning_rate": 3.2286e-05, "loss": 8.7565, "step": 4428500 }, { "epoch": 35.43, "learning_rate": 3.2284000000000005e-05, "loss": 8.774, "step": 4429000 }, { "epoch": 35.44, "learning_rate": 3.2282e-05, "loss": 8.7515, "step": 4429500 }, { "epoch": 35.44, "learning_rate": 3.2279999999999996e-05, "loss": 8.7424, "step": 4430000 }, { "epoch": 35.44, "learning_rate": 3.2278000000000005e-05, "loss": 8.7528, "step": 4430500 }, { "epoch": 35.45, "learning_rate": 3.2276e-05, "loss": 8.754, "step": 4431000 }, { "epoch": 35.45, "learning_rate": 3.2274e-05, "loss": 8.7692, "step": 4431500 }, { "epoch": 35.46, "learning_rate": 3.2272000000000005e-05, "loss": 8.7596, "step": 4432000 }, { "epoch": 35.46, "learning_rate": 3.227e-05, "loss": 8.7511, "step": 4432500 }, { "epoch": 35.46, "learning_rate": 3.2268000000000003e-05, "loss": 8.738, "step": 4433000 }, { "epoch": 35.47, "learning_rate": 3.2266e-05, "loss": 8.7609, "step": 4433500 }, { "epoch": 35.47, "learning_rate": 3.2264e-05, "loss": 8.7358, "step": 4434000 }, { "epoch": 35.48, "learning_rate": 3.2262000000000004e-05, "loss": 8.7451, "step": 4434500 }, { "epoch": 35.48, "learning_rate": 3.226e-05, "loss": 8.745, "step": 4435000 }, { "epoch": 35.48, "learning_rate": 3.2258e-05, "loss": 8.7414, "step": 4435500 }, { "epoch": 35.49, "learning_rate": 3.2256000000000004e-05, "loss": 8.7546, "step": 4436000 }, { "epoch": 35.49, "learning_rate": 3.2254e-05, "loss": 8.7341, "step": 4436500 }, { "epoch": 35.5, "learning_rate": 3.2252e-05, "loss": 8.7402, "step": 4437000 }, { "epoch": 35.5, "learning_rate": 3.2250000000000005e-05, "loss": 8.7389, "step": 4437500 }, { "epoch": 35.5, "learning_rate": 3.2248e-05, "loss": 8.7631, "step": 4438000 }, { "epoch": 35.51, "learning_rate": 3.2246e-05, "loss": 8.7508, "step": 4438500 }, { "epoch": 35.51, "learning_rate": 3.2244000000000005e-05, "loss": 8.7441, "step": 4439000 }, { "epoch": 35.52, "learning_rate": 3.2242e-05, "loss": 8.7558, "step": 4439500 }, { "epoch": 35.52, "learning_rate": 3.224e-05, "loss": 8.7507, "step": 4440000 }, { "epoch": 35.52, "learning_rate": 3.2238e-05, "loss": 8.7532, "step": 4440500 }, { "epoch": 35.53, "learning_rate": 3.2236e-05, "loss": 8.7564, "step": 4441000 }, { "epoch": 35.53, "learning_rate": 3.2234000000000003e-05, "loss": 8.7566, "step": 4441500 }, { "epoch": 35.54, "learning_rate": 3.2232e-05, "loss": 8.7515, "step": 4442000 }, { "epoch": 35.54, "learning_rate": 3.223e-05, "loss": 8.7602, "step": 4442500 }, { "epoch": 35.54, "learning_rate": 3.2228000000000004e-05, "loss": 8.7579, "step": 4443000 }, { "epoch": 35.55, "learning_rate": 3.2226e-05, "loss": 8.757, "step": 4443500 }, { "epoch": 35.55, "learning_rate": 3.2224e-05, "loss": 8.732, "step": 4444000 }, { "epoch": 35.56, "learning_rate": 3.2222000000000004e-05, "loss": 8.7633, "step": 4444500 }, { "epoch": 35.56, "learning_rate": 3.222e-05, "loss": 8.7612, "step": 4445000 }, { "epoch": 35.56, "learning_rate": 3.2218e-05, "loss": 8.7501, "step": 4445500 }, { "epoch": 35.57, "learning_rate": 3.2216000000000005e-05, "loss": 8.7467, "step": 4446000 }, { "epoch": 35.57, "learning_rate": 3.2214e-05, "loss": 8.7656, "step": 4446500 }, { "epoch": 35.58, "learning_rate": 3.2212e-05, "loss": 8.7494, "step": 4447000 }, { "epoch": 35.58, "learning_rate": 3.221e-05, "loss": 8.7477, "step": 4447500 }, { "epoch": 35.58, "learning_rate": 3.2208e-05, "loss": 8.739, "step": 4448000 }, { "epoch": 35.59, "learning_rate": 3.2206e-05, "loss": 8.7496, "step": 4448500 }, { "epoch": 35.59, "learning_rate": 3.2204e-05, "loss": 8.7535, "step": 4449000 }, { "epoch": 35.6, "learning_rate": 3.2202e-05, "loss": 8.7518, "step": 4449500 }, { "epoch": 35.6, "learning_rate": 3.2200000000000003e-05, "loss": 8.7284, "step": 4450000 }, { "epoch": 35.6, "learning_rate": 3.2198e-05, "loss": 8.7396, "step": 4450500 }, { "epoch": 35.61, "learning_rate": 3.2196e-05, "loss": 8.7645, "step": 4451000 }, { "epoch": 35.61, "learning_rate": 3.2194000000000004e-05, "loss": 8.7572, "step": 4451500 }, { "epoch": 35.62, "learning_rate": 3.2192e-05, "loss": 8.7939, "step": 4452000 }, { "epoch": 35.62, "learning_rate": 3.219e-05, "loss": 8.7705, "step": 4452500 }, { "epoch": 35.62, "learning_rate": 3.2188000000000004e-05, "loss": 8.7419, "step": 4453000 }, { "epoch": 35.63, "learning_rate": 3.2186e-05, "loss": 8.7457, "step": 4453500 }, { "epoch": 35.63, "learning_rate": 3.2184e-05, "loss": 8.7376, "step": 4454000 }, { "epoch": 35.64, "learning_rate": 3.2182e-05, "loss": 8.7337, "step": 4454500 }, { "epoch": 35.64, "learning_rate": 3.218e-05, "loss": 8.7423, "step": 4455000 }, { "epoch": 35.64, "learning_rate": 3.2178e-05, "loss": 8.7557, "step": 4455500 }, { "epoch": 35.65, "learning_rate": 3.2176e-05, "loss": 8.7479, "step": 4456000 }, { "epoch": 35.65, "learning_rate": 3.217400000000001e-05, "loss": 8.7645, "step": 4456500 }, { "epoch": 35.66, "learning_rate": 3.2172e-05, "loss": 8.7296, "step": 4457000 }, { "epoch": 35.66, "learning_rate": 3.217e-05, "loss": 8.733, "step": 4457500 }, { "epoch": 35.66, "learning_rate": 3.2168e-05, "loss": 8.7472, "step": 4458000 }, { "epoch": 35.67, "learning_rate": 3.2166000000000003e-05, "loss": 8.7204, "step": 4458500 }, { "epoch": 35.67, "learning_rate": 3.2164e-05, "loss": 8.7566, "step": 4459000 }, { "epoch": 35.68, "learning_rate": 3.2162e-05, "loss": 8.7498, "step": 4459500 }, { "epoch": 35.68, "learning_rate": 3.2160000000000004e-05, "loss": 8.7753, "step": 4460000 }, { "epoch": 35.68, "learning_rate": 3.2158e-05, "loss": 8.7552, "step": 4460500 }, { "epoch": 35.69, "learning_rate": 3.2156e-05, "loss": 8.7426, "step": 4461000 }, { "epoch": 35.69, "learning_rate": 3.2154e-05, "loss": 8.7446, "step": 4461500 }, { "epoch": 35.7, "learning_rate": 3.2152e-05, "loss": 8.7496, "step": 4462000 }, { "epoch": 35.7, "learning_rate": 3.215e-05, "loss": 8.7465, "step": 4462500 }, { "epoch": 35.7, "learning_rate": 3.2148e-05, "loss": 8.7599, "step": 4463000 }, { "epoch": 35.71, "learning_rate": 3.214600000000001e-05, "loss": 8.7541, "step": 4463500 }, { "epoch": 35.71, "learning_rate": 3.2144e-05, "loss": 8.7808, "step": 4464000 }, { "epoch": 35.72, "learning_rate": 3.2142e-05, "loss": 8.7537, "step": 4464500 }, { "epoch": 35.72, "learning_rate": 3.214e-05, "loss": 8.7348, "step": 4465000 }, { "epoch": 35.72, "learning_rate": 3.2138e-05, "loss": 8.7426, "step": 4465500 }, { "epoch": 35.73, "learning_rate": 3.2136e-05, "loss": 8.7484, "step": 4466000 }, { "epoch": 35.73, "learning_rate": 3.2134e-05, "loss": 8.755, "step": 4466500 }, { "epoch": 35.74, "learning_rate": 3.2132e-05, "loss": 8.7602, "step": 4467000 }, { "epoch": 35.74, "learning_rate": 3.213e-05, "loss": 8.756, "step": 4467500 }, { "epoch": 35.74, "learning_rate": 3.2128e-05, "loss": 8.7389, "step": 4468000 }, { "epoch": 35.75, "learning_rate": 3.2126000000000004e-05, "loss": 8.7483, "step": 4468500 }, { "epoch": 35.75, "learning_rate": 3.2124000000000006e-05, "loss": 8.7453, "step": 4469000 }, { "epoch": 35.76, "learning_rate": 3.2122e-05, "loss": 8.7572, "step": 4469500 }, { "epoch": 35.76, "learning_rate": 3.212e-05, "loss": 8.746, "step": 4470000 }, { "epoch": 35.76, "learning_rate": 3.2118000000000007e-05, "loss": 8.7529, "step": 4470500 }, { "epoch": 35.77, "learning_rate": 3.2116e-05, "loss": 8.754, "step": 4471000 }, { "epoch": 35.77, "learning_rate": 3.2114e-05, "loss": 8.7375, "step": 4471500 }, { "epoch": 35.78, "learning_rate": 3.2112e-05, "loss": 8.7476, "step": 4472000 }, { "epoch": 35.78, "learning_rate": 3.211e-05, "loss": 8.735, "step": 4472500 }, { "epoch": 35.78, "learning_rate": 3.2108e-05, "loss": 8.7509, "step": 4473000 }, { "epoch": 35.79, "learning_rate": 3.2106e-05, "loss": 8.7638, "step": 4473500 }, { "epoch": 35.79, "learning_rate": 3.2104e-05, "loss": 8.7308, "step": 4474000 }, { "epoch": 35.8, "learning_rate": 3.2102000000000005e-05, "loss": 8.7471, "step": 4474500 }, { "epoch": 35.8, "learning_rate": 3.21e-05, "loss": 8.7422, "step": 4475000 }, { "epoch": 35.8, "learning_rate": 3.2098e-05, "loss": 8.7531, "step": 4475500 }, { "epoch": 35.81, "learning_rate": 3.2096000000000006e-05, "loss": 8.7478, "step": 4476000 }, { "epoch": 35.81, "learning_rate": 3.2094e-05, "loss": 8.7582, "step": 4476500 }, { "epoch": 35.82, "learning_rate": 3.2092e-05, "loss": 8.7474, "step": 4477000 }, { "epoch": 35.82, "learning_rate": 3.2090000000000006e-05, "loss": 8.7649, "step": 4477500 }, { "epoch": 35.82, "learning_rate": 3.2088e-05, "loss": 8.7636, "step": 4478000 }, { "epoch": 35.83, "learning_rate": 3.2086e-05, "loss": 8.7378, "step": 4478500 }, { "epoch": 35.83, "learning_rate": 3.2084e-05, "loss": 8.7522, "step": 4479000 }, { "epoch": 35.84, "learning_rate": 3.2082e-05, "loss": 8.737, "step": 4479500 }, { "epoch": 35.84, "learning_rate": 3.208e-05, "loss": 8.7467, "step": 4480000 }, { "epoch": 35.84, "learning_rate": 3.2078e-05, "loss": 8.7223, "step": 4480500 }, { "epoch": 35.85, "learning_rate": 3.2076e-05, "loss": 8.7278, "step": 4481000 }, { "epoch": 35.85, "learning_rate": 3.2074000000000005e-05, "loss": 8.7475, "step": 4481500 }, { "epoch": 35.86, "learning_rate": 3.2072e-05, "loss": 8.7714, "step": 4482000 }, { "epoch": 35.86, "learning_rate": 3.207e-05, "loss": 8.7547, "step": 4482500 }, { "epoch": 35.86, "learning_rate": 3.2068000000000005e-05, "loss": 8.723, "step": 4483000 }, { "epoch": 35.87, "learning_rate": 3.2066e-05, "loss": 8.7704, "step": 4483500 }, { "epoch": 35.87, "learning_rate": 3.2063999999999997e-05, "loss": 8.7402, "step": 4484000 }, { "epoch": 35.88, "learning_rate": 3.2062000000000006e-05, "loss": 8.757, "step": 4484500 }, { "epoch": 35.88, "learning_rate": 3.206e-05, "loss": 8.7697, "step": 4485000 }, { "epoch": 35.88, "learning_rate": 3.2058e-05, "loss": 8.7548, "step": 4485500 }, { "epoch": 35.89, "learning_rate": 3.2056e-05, "loss": 8.7521, "step": 4486000 }, { "epoch": 35.89, "learning_rate": 3.2054e-05, "loss": 8.7557, "step": 4486500 }, { "epoch": 35.9, "learning_rate": 3.2052000000000004e-05, "loss": 8.7638, "step": 4487000 }, { "epoch": 35.9, "learning_rate": 3.205e-05, "loss": 8.7442, "step": 4487500 }, { "epoch": 35.9, "learning_rate": 3.2048e-05, "loss": 8.7296, "step": 4488000 }, { "epoch": 35.91, "learning_rate": 3.2046000000000005e-05, "loss": 8.7336, "step": 4488500 }, { "epoch": 35.91, "learning_rate": 3.2044e-05, "loss": 8.7388, "step": 4489000 }, { "epoch": 35.92, "learning_rate": 3.2042e-05, "loss": 8.7448, "step": 4489500 }, { "epoch": 35.92, "learning_rate": 3.2040000000000005e-05, "loss": 8.7533, "step": 4490000 }, { "epoch": 35.92, "learning_rate": 3.2038e-05, "loss": 8.7453, "step": 4490500 }, { "epoch": 35.93, "learning_rate": 3.2035999999999996e-05, "loss": 8.7428, "step": 4491000 }, { "epoch": 35.93, "learning_rate": 3.2034000000000005e-05, "loss": 8.7473, "step": 4491500 }, { "epoch": 35.94, "learning_rate": 3.2032e-05, "loss": 8.7529, "step": 4492000 }, { "epoch": 35.94, "learning_rate": 3.2029999999999997e-05, "loss": 8.7598, "step": 4492500 }, { "epoch": 35.94, "learning_rate": 3.2028000000000006e-05, "loss": 8.7328, "step": 4493000 }, { "epoch": 35.95, "learning_rate": 3.2026e-05, "loss": 8.7453, "step": 4493500 }, { "epoch": 35.95, "learning_rate": 3.2024000000000004e-05, "loss": 8.7473, "step": 4494000 }, { "epoch": 35.96, "learning_rate": 3.2022e-05, "loss": 8.75, "step": 4494500 }, { "epoch": 35.96, "learning_rate": 3.202e-05, "loss": 8.7543, "step": 4495000 }, { "epoch": 35.96, "learning_rate": 3.2018000000000004e-05, "loss": 8.7519, "step": 4495500 }, { "epoch": 35.97, "learning_rate": 3.2016e-05, "loss": 8.7586, "step": 4496000 }, { "epoch": 35.97, "learning_rate": 3.2014e-05, "loss": 8.7645, "step": 4496500 }, { "epoch": 35.98, "learning_rate": 3.2012000000000005e-05, "loss": 8.7659, "step": 4497000 }, { "epoch": 35.98, "learning_rate": 3.201e-05, "loss": 8.7405, "step": 4497500 }, { "epoch": 35.98, "learning_rate": 3.2007999999999996e-05, "loss": 8.737, "step": 4498000 }, { "epoch": 35.99, "learning_rate": 3.2006000000000005e-05, "loss": 8.753, "step": 4498500 }, { "epoch": 35.99, "learning_rate": 3.2004e-05, "loss": 8.7359, "step": 4499000 }, { "epoch": 36.0, "learning_rate": 3.2002e-05, "loss": 8.7413, "step": 4499500 }, { "epoch": 36.0, "learning_rate": 3.2000000000000005e-05, "loss": 8.7343, "step": 4500000 }, { "epoch": 36.0, "learning_rate": 3.1998e-05, "loss": 8.7588, "step": 4500500 }, { "epoch": 36.01, "learning_rate": 3.1996e-05, "loss": 8.7379, "step": 4501000 }, { "epoch": 36.01, "learning_rate": 3.1994e-05, "loss": 8.7521, "step": 4501500 }, { "epoch": 36.02, "learning_rate": 3.1992e-05, "loss": 8.7407, "step": 4502000 }, { "epoch": 36.02, "learning_rate": 3.1990000000000004e-05, "loss": 8.7416, "step": 4502500 }, { "epoch": 36.02, "learning_rate": 3.1988e-05, "loss": 8.7374, "step": 4503000 }, { "epoch": 36.03, "learning_rate": 3.1986e-05, "loss": 8.7439, "step": 4503500 }, { "epoch": 36.03, "learning_rate": 3.1984000000000004e-05, "loss": 8.755, "step": 4504000 }, { "epoch": 36.04, "learning_rate": 3.1982e-05, "loss": 8.7241, "step": 4504500 }, { "epoch": 36.04, "learning_rate": 3.198e-05, "loss": 8.7597, "step": 4505000 }, { "epoch": 36.04, "learning_rate": 3.1978000000000005e-05, "loss": 8.7343, "step": 4505500 }, { "epoch": 36.05, "learning_rate": 3.1976e-05, "loss": 8.7539, "step": 4506000 }, { "epoch": 36.05, "learning_rate": 3.1974e-05, "loss": 8.7423, "step": 4506500 }, { "epoch": 36.06, "learning_rate": 3.1972000000000005e-05, "loss": 8.7539, "step": 4507000 }, { "epoch": 36.06, "learning_rate": 3.197e-05, "loss": 8.7257, "step": 4507500 }, { "epoch": 36.06, "learning_rate": 3.1968e-05, "loss": 8.7393, "step": 4508000 }, { "epoch": 36.07, "learning_rate": 3.1966e-05, "loss": 8.7611, "step": 4508500 }, { "epoch": 36.07, "learning_rate": 3.1964e-05, "loss": 8.7454, "step": 4509000 }, { "epoch": 36.08, "learning_rate": 3.1962e-05, "loss": 8.7566, "step": 4509500 }, { "epoch": 36.08, "learning_rate": 3.196e-05, "loss": 8.7415, "step": 4510000 }, { "epoch": 36.08, "learning_rate": 3.1958e-05, "loss": 8.7607, "step": 4510500 }, { "epoch": 36.09, "learning_rate": 3.1956000000000004e-05, "loss": 8.7419, "step": 4511000 }, { "epoch": 36.09, "learning_rate": 3.1954e-05, "loss": 8.7451, "step": 4511500 }, { "epoch": 36.1, "learning_rate": 3.1952e-05, "loss": 8.7339, "step": 4512000 }, { "epoch": 36.1, "learning_rate": 3.1950000000000004e-05, "loss": 8.7614, "step": 4512500 }, { "epoch": 36.1, "learning_rate": 3.1948e-05, "loss": 8.7571, "step": 4513000 }, { "epoch": 36.11, "learning_rate": 3.1946e-05, "loss": 8.6912, "step": 4513500 }, { "epoch": 36.11, "learning_rate": 3.1944000000000005e-05, "loss": 8.7455, "step": 4514000 }, { "epoch": 36.12, "learning_rate": 3.1942e-05, "loss": 8.75, "step": 4514500 }, { "epoch": 36.12, "learning_rate": 3.194e-05, "loss": 8.7512, "step": 4515000 }, { "epoch": 36.12, "learning_rate": 3.1938e-05, "loss": 8.754, "step": 4515500 }, { "epoch": 36.13, "learning_rate": 3.1936e-05, "loss": 8.7483, "step": 4516000 }, { "epoch": 36.13, "learning_rate": 3.1934e-05, "loss": 8.7346, "step": 4516500 }, { "epoch": 36.14, "learning_rate": 3.1932e-05, "loss": 8.7346, "step": 4517000 }, { "epoch": 36.14, "learning_rate": 3.193e-05, "loss": 8.7547, "step": 4517500 }, { "epoch": 36.14, "learning_rate": 3.1928e-05, "loss": 8.7373, "step": 4518000 }, { "epoch": 36.15, "learning_rate": 3.1926e-05, "loss": 8.749, "step": 4518500 }, { "epoch": 36.15, "learning_rate": 3.1924e-05, "loss": 8.7502, "step": 4519000 }, { "epoch": 36.16, "learning_rate": 3.1922000000000004e-05, "loss": 8.7543, "step": 4519500 }, { "epoch": 36.16, "learning_rate": 3.192e-05, "loss": 8.7308, "step": 4520000 }, { "epoch": 36.16, "learning_rate": 3.1918e-05, "loss": 8.7575, "step": 4520500 }, { "epoch": 36.17, "learning_rate": 3.1916000000000004e-05, "loss": 8.7376, "step": 4521000 }, { "epoch": 36.17, "learning_rate": 3.1914e-05, "loss": 8.7618, "step": 4521500 }, { "epoch": 36.18, "learning_rate": 3.1912e-05, "loss": 8.7494, "step": 4522000 }, { "epoch": 36.18, "learning_rate": 3.191e-05, "loss": 8.7383, "step": 4522500 }, { "epoch": 36.18, "learning_rate": 3.1908e-05, "loss": 8.7499, "step": 4523000 }, { "epoch": 36.19, "learning_rate": 3.1906e-05, "loss": 8.7479, "step": 4523500 }, { "epoch": 36.19, "learning_rate": 3.1904e-05, "loss": 8.7627, "step": 4524000 }, { "epoch": 36.2, "learning_rate": 3.190200000000001e-05, "loss": 8.7366, "step": 4524500 }, { "epoch": 36.2, "learning_rate": 3.19e-05, "loss": 8.7593, "step": 4525000 }, { "epoch": 36.2, "learning_rate": 3.1898e-05, "loss": 8.7593, "step": 4525500 }, { "epoch": 36.21, "learning_rate": 3.1896e-05, "loss": 8.7607, "step": 4526000 }, { "epoch": 36.21, "learning_rate": 3.1894e-05, "loss": 8.7405, "step": 4526500 }, { "epoch": 36.22, "learning_rate": 3.1892e-05, "loss": 8.7456, "step": 4527000 }, { "epoch": 36.22, "learning_rate": 3.189e-05, "loss": 8.7361, "step": 4527500 }, { "epoch": 36.22, "learning_rate": 3.1888000000000004e-05, "loss": 8.7576, "step": 4528000 }, { "epoch": 36.23, "learning_rate": 3.1886e-05, "loss": 8.7617, "step": 4528500 }, { "epoch": 36.23, "learning_rate": 3.1884e-05, "loss": 8.733, "step": 4529000 }, { "epoch": 36.24, "learning_rate": 3.1882000000000004e-05, "loss": 8.7567, "step": 4529500 }, { "epoch": 36.24, "learning_rate": 3.188e-05, "loss": 8.7495, "step": 4530000 }, { "epoch": 36.24, "learning_rate": 3.1878e-05, "loss": 8.764, "step": 4530500 }, { "epoch": 36.25, "learning_rate": 3.1876e-05, "loss": 8.7342, "step": 4531000 }, { "epoch": 36.25, "learning_rate": 3.187400000000001e-05, "loss": 8.7393, "step": 4531500 }, { "epoch": 36.26, "learning_rate": 3.1872e-05, "loss": 8.7495, "step": 4532000 }, { "epoch": 36.26, "learning_rate": 3.187e-05, "loss": 8.7588, "step": 4532500 }, { "epoch": 36.26, "learning_rate": 3.1868e-05, "loss": 8.7578, "step": 4533000 }, { "epoch": 36.27, "learning_rate": 3.1866e-05, "loss": 8.7401, "step": 4533500 }, { "epoch": 36.27, "learning_rate": 3.1864e-05, "loss": 8.7158, "step": 4534000 }, { "epoch": 36.28, "learning_rate": 3.1862e-05, "loss": 8.7437, "step": 4534500 }, { "epoch": 36.28, "learning_rate": 3.186e-05, "loss": 8.7566, "step": 4535000 }, { "epoch": 36.28, "learning_rate": 3.1858e-05, "loss": 8.7525, "step": 4535500 }, { "epoch": 36.29, "learning_rate": 3.1856e-05, "loss": 8.7492, "step": 4536000 }, { "epoch": 36.29, "learning_rate": 3.1854000000000004e-05, "loss": 8.765, "step": 4536500 }, { "epoch": 36.3, "learning_rate": 3.1852000000000006e-05, "loss": 8.7548, "step": 4537000 }, { "epoch": 36.3, "learning_rate": 3.185e-05, "loss": 8.7388, "step": 4537500 }, { "epoch": 36.3, "learning_rate": 3.1848e-05, "loss": 8.7568, "step": 4538000 }, { "epoch": 36.31, "learning_rate": 3.1846000000000006e-05, "loss": 8.7609, "step": 4538500 }, { "epoch": 36.31, "learning_rate": 3.1844e-05, "loss": 8.7655, "step": 4539000 }, { "epoch": 36.32, "learning_rate": 3.1842e-05, "loss": 8.7317, "step": 4539500 }, { "epoch": 36.32, "learning_rate": 3.184e-05, "loss": 8.745, "step": 4540000 }, { "epoch": 36.32, "learning_rate": 3.1838e-05, "loss": 8.7578, "step": 4540500 }, { "epoch": 36.33, "learning_rate": 3.1836e-05, "loss": 8.7339, "step": 4541000 }, { "epoch": 36.33, "learning_rate": 3.1834e-05, "loss": 8.7698, "step": 4541500 }, { "epoch": 36.34, "learning_rate": 3.1832e-05, "loss": 8.7444, "step": 4542000 }, { "epoch": 36.34, "learning_rate": 3.1830000000000005e-05, "loss": 8.7549, "step": 4542500 }, { "epoch": 36.34, "learning_rate": 3.1828e-05, "loss": 8.7523, "step": 4543000 }, { "epoch": 36.35, "learning_rate": 3.1826e-05, "loss": 8.7696, "step": 4543500 }, { "epoch": 36.35, "learning_rate": 3.1824000000000006e-05, "loss": 8.7584, "step": 4544000 }, { "epoch": 36.36, "learning_rate": 3.1822e-05, "loss": 8.7608, "step": 4544500 }, { "epoch": 36.36, "learning_rate": 3.182e-05, "loss": 8.7348, "step": 4545000 }, { "epoch": 36.36, "learning_rate": 3.1818000000000006e-05, "loss": 8.7446, "step": 4545500 }, { "epoch": 36.37, "learning_rate": 3.1816e-05, "loss": 8.745, "step": 4546000 }, { "epoch": 36.37, "learning_rate": 3.1814e-05, "loss": 8.7601, "step": 4546500 }, { "epoch": 36.38, "learning_rate": 3.1812e-05, "loss": 8.7616, "step": 4547000 }, { "epoch": 36.38, "learning_rate": 3.181e-05, "loss": 8.7514, "step": 4547500 }, { "epoch": 36.38, "learning_rate": 3.1808e-05, "loss": 8.7536, "step": 4548000 }, { "epoch": 36.39, "learning_rate": 3.1806e-05, "loss": 8.7529, "step": 4548500 }, { "epoch": 36.39, "learning_rate": 3.1804e-05, "loss": 8.7529, "step": 4549000 }, { "epoch": 36.4, "learning_rate": 3.1802000000000005e-05, "loss": 8.7572, "step": 4549500 }, { "epoch": 36.4, "learning_rate": 3.18e-05, "loss": 8.7812, "step": 4550000 }, { "epoch": 36.4, "learning_rate": 3.1798e-05, "loss": 8.7306, "step": 4550500 }, { "epoch": 36.41, "learning_rate": 3.1796000000000005e-05, "loss": 8.7433, "step": 4551000 }, { "epoch": 36.41, "learning_rate": 3.1794e-05, "loss": 8.7389, "step": 4551500 }, { "epoch": 36.42, "learning_rate": 3.1791999999999996e-05, "loss": 8.7396, "step": 4552000 }, { "epoch": 36.42, "learning_rate": 3.1790000000000006e-05, "loss": 8.7439, "step": 4552500 }, { "epoch": 36.42, "learning_rate": 3.1788e-05, "loss": 8.7154, "step": 4553000 }, { "epoch": 36.43, "learning_rate": 3.1786e-05, "loss": 8.7358, "step": 4553500 }, { "epoch": 36.43, "learning_rate": 3.1784000000000006e-05, "loss": 8.7486, "step": 4554000 }, { "epoch": 36.44, "learning_rate": 3.1782e-05, "loss": 8.739, "step": 4554500 }, { "epoch": 36.44, "learning_rate": 3.1780000000000004e-05, "loss": 8.7586, "step": 4555000 }, { "epoch": 36.44, "learning_rate": 3.1778e-05, "loss": 8.7492, "step": 4555500 }, { "epoch": 36.45, "learning_rate": 3.1776e-05, "loss": 8.757, "step": 4556000 }, { "epoch": 36.45, "learning_rate": 3.1774000000000004e-05, "loss": 8.7508, "step": 4556500 }, { "epoch": 36.46, "learning_rate": 3.1772e-05, "loss": 8.7709, "step": 4557000 }, { "epoch": 36.46, "learning_rate": 3.177e-05, "loss": 8.73, "step": 4557500 }, { "epoch": 36.46, "learning_rate": 3.1768000000000005e-05, "loss": 8.7384, "step": 4558000 }, { "epoch": 36.47, "learning_rate": 3.1766e-05, "loss": 8.7542, "step": 4558500 }, { "epoch": 36.47, "learning_rate": 3.1763999999999996e-05, "loss": 8.7619, "step": 4559000 }, { "epoch": 36.48, "learning_rate": 3.1762000000000005e-05, "loss": 8.7604, "step": 4559500 }, { "epoch": 36.48, "learning_rate": 3.176e-05, "loss": 8.7635, "step": 4560000 }, { "epoch": 36.48, "learning_rate": 3.1757999999999996e-05, "loss": 8.7348, "step": 4560500 }, { "epoch": 36.49, "learning_rate": 3.1756000000000006e-05, "loss": 8.7514, "step": 4561000 }, { "epoch": 36.49, "learning_rate": 3.1754e-05, "loss": 8.766, "step": 4561500 }, { "epoch": 36.5, "learning_rate": 3.1752000000000004e-05, "loss": 8.7637, "step": 4562000 }, { "epoch": 36.5, "learning_rate": 3.175e-05, "loss": 8.7396, "step": 4562500 }, { "epoch": 36.5, "learning_rate": 3.1748e-05, "loss": 8.7587, "step": 4563000 }, { "epoch": 36.51, "learning_rate": 3.1746000000000004e-05, "loss": 8.7631, "step": 4563500 }, { "epoch": 36.51, "learning_rate": 3.1744e-05, "loss": 8.7594, "step": 4564000 }, { "epoch": 36.52, "learning_rate": 3.1742e-05, "loss": 8.7426, "step": 4564500 }, { "epoch": 36.52, "learning_rate": 3.1740000000000004e-05, "loss": 8.7637, "step": 4565000 }, { "epoch": 36.52, "learning_rate": 3.1738e-05, "loss": 8.7516, "step": 4565500 }, { "epoch": 36.53, "learning_rate": 3.1736e-05, "loss": 8.7366, "step": 4566000 }, { "epoch": 36.53, "learning_rate": 3.1734000000000005e-05, "loss": 8.746, "step": 4566500 }, { "epoch": 36.54, "learning_rate": 3.1732e-05, "loss": 8.7512, "step": 4567000 }, { "epoch": 36.54, "learning_rate": 3.173e-05, "loss": 8.7453, "step": 4567500 }, { "epoch": 36.54, "learning_rate": 3.1728000000000005e-05, "loss": 8.7512, "step": 4568000 }, { "epoch": 36.55, "learning_rate": 3.1726e-05, "loss": 8.7488, "step": 4568500 }, { "epoch": 36.55, "learning_rate": 3.1724e-05, "loss": 8.7325, "step": 4569000 }, { "epoch": 36.56, "learning_rate": 3.1722e-05, "loss": 8.7369, "step": 4569500 }, { "epoch": 36.56, "learning_rate": 3.172e-05, "loss": 8.7484, "step": 4570000 }, { "epoch": 36.56, "learning_rate": 3.1718000000000004e-05, "loss": 8.7294, "step": 4570500 }, { "epoch": 36.57, "learning_rate": 3.1716e-05, "loss": 8.7472, "step": 4571000 }, { "epoch": 36.57, "learning_rate": 3.1714e-05, "loss": 8.7405, "step": 4571500 }, { "epoch": 36.58, "learning_rate": 3.1712000000000004e-05, "loss": 8.7442, "step": 4572000 }, { "epoch": 36.58, "learning_rate": 3.171e-05, "loss": 8.7447, "step": 4572500 }, { "epoch": 36.58, "learning_rate": 3.1708e-05, "loss": 8.7404, "step": 4573000 }, { "epoch": 36.59, "learning_rate": 3.1706000000000004e-05, "loss": 8.7684, "step": 4573500 }, { "epoch": 36.59, "learning_rate": 3.1704e-05, "loss": 8.7663, "step": 4574000 }, { "epoch": 36.6, "learning_rate": 3.1702e-05, "loss": 8.7506, "step": 4574500 }, { "epoch": 36.6, "learning_rate": 3.1700000000000005e-05, "loss": 8.7281, "step": 4575000 }, { "epoch": 36.6, "learning_rate": 3.1698e-05, "loss": 8.7611, "step": 4575500 }, { "epoch": 36.61, "learning_rate": 3.1696e-05, "loss": 8.7424, "step": 4576000 }, { "epoch": 36.61, "learning_rate": 3.1694e-05, "loss": 8.755, "step": 4576500 }, { "epoch": 36.62, "learning_rate": 3.1692e-05, "loss": 8.7483, "step": 4577000 }, { "epoch": 36.62, "learning_rate": 3.169e-05, "loss": 8.7481, "step": 4577500 }, { "epoch": 36.62, "learning_rate": 3.1688e-05, "loss": 8.7313, "step": 4578000 }, { "epoch": 36.63, "learning_rate": 3.1686e-05, "loss": 8.7157, "step": 4578500 }, { "epoch": 36.63, "learning_rate": 3.1684000000000004e-05, "loss": 8.751, "step": 4579000 }, { "epoch": 36.64, "learning_rate": 3.1682e-05, "loss": 8.7515, "step": 4579500 }, { "epoch": 36.64, "learning_rate": 3.168e-05, "loss": 8.7585, "step": 4580000 }, { "epoch": 36.64, "learning_rate": 3.1678000000000004e-05, "loss": 8.7597, "step": 4580500 }, { "epoch": 36.65, "learning_rate": 3.1676e-05, "loss": 8.7637, "step": 4581000 }, { "epoch": 36.65, "learning_rate": 3.1674e-05, "loss": 8.7448, "step": 4581500 }, { "epoch": 36.66, "learning_rate": 3.1672000000000004e-05, "loss": 8.7376, "step": 4582000 }, { "epoch": 36.66, "learning_rate": 3.167e-05, "loss": 8.7671, "step": 4582500 }, { "epoch": 36.66, "learning_rate": 3.1668e-05, "loss": 8.743, "step": 4583000 }, { "epoch": 36.67, "learning_rate": 3.1666e-05, "loss": 8.7508, "step": 4583500 }, { "epoch": 36.67, "learning_rate": 3.1664e-05, "loss": 8.7508, "step": 4584000 }, { "epoch": 36.68, "learning_rate": 3.1662e-05, "loss": 8.7457, "step": 4584500 }, { "epoch": 36.68, "learning_rate": 3.166e-05, "loss": 8.738, "step": 4585000 }, { "epoch": 36.68, "learning_rate": 3.1658e-05, "loss": 8.7674, "step": 4585500 }, { "epoch": 36.69, "learning_rate": 3.1656e-05, "loss": 8.7561, "step": 4586000 }, { "epoch": 36.69, "learning_rate": 3.1654e-05, "loss": 8.736, "step": 4586500 }, { "epoch": 36.7, "learning_rate": 3.1652e-05, "loss": 8.7665, "step": 4587000 }, { "epoch": 36.7, "learning_rate": 3.1650000000000004e-05, "loss": 8.7532, "step": 4587500 }, { "epoch": 36.7, "learning_rate": 3.1648e-05, "loss": 8.7482, "step": 4588000 }, { "epoch": 36.71, "learning_rate": 3.1646e-05, "loss": 8.7557, "step": 4588500 }, { "epoch": 36.71, "learning_rate": 3.1644000000000004e-05, "loss": 8.7677, "step": 4589000 }, { "epoch": 36.72, "learning_rate": 3.1642e-05, "loss": 8.7636, "step": 4589500 }, { "epoch": 36.72, "learning_rate": 3.164e-05, "loss": 8.7425, "step": 4590000 }, { "epoch": 36.72, "learning_rate": 3.1638e-05, "loss": 8.7703, "step": 4590500 }, { "epoch": 36.73, "learning_rate": 3.1636e-05, "loss": 8.7473, "step": 4591000 }, { "epoch": 36.73, "learning_rate": 3.1634e-05, "loss": 8.7371, "step": 4591500 }, { "epoch": 36.74, "learning_rate": 3.1632e-05, "loss": 8.7776, "step": 4592000 }, { "epoch": 36.74, "learning_rate": 3.163000000000001e-05, "loss": 8.7421, "step": 4592500 }, { "epoch": 36.74, "learning_rate": 3.1628e-05, "loss": 8.7398, "step": 4593000 }, { "epoch": 36.75, "learning_rate": 3.1626e-05, "loss": 8.7466, "step": 4593500 }, { "epoch": 36.75, "learning_rate": 3.1624e-05, "loss": 8.7292, "step": 4594000 }, { "epoch": 36.76, "learning_rate": 3.1622e-05, "loss": 8.7487, "step": 4594500 }, { "epoch": 36.76, "learning_rate": 3.162e-05, "loss": 8.7504, "step": 4595000 }, { "epoch": 36.76, "learning_rate": 3.1618e-05, "loss": 8.7373, "step": 4595500 }, { "epoch": 36.77, "learning_rate": 3.1616000000000004e-05, "loss": 8.7369, "step": 4596000 }, { "epoch": 36.77, "learning_rate": 3.1614e-05, "loss": 8.7407, "step": 4596500 }, { "epoch": 36.78, "learning_rate": 3.1612e-05, "loss": 8.7525, "step": 4597000 }, { "epoch": 36.78, "learning_rate": 3.1610000000000004e-05, "loss": 8.7507, "step": 4597500 }, { "epoch": 36.78, "learning_rate": 3.1608000000000006e-05, "loss": 8.7528, "step": 4598000 }, { "epoch": 36.79, "learning_rate": 3.1606e-05, "loss": 8.7605, "step": 4598500 }, { "epoch": 36.79, "learning_rate": 3.1604e-05, "loss": 8.7494, "step": 4599000 }, { "epoch": 36.8, "learning_rate": 3.160200000000001e-05, "loss": 8.7623, "step": 4599500 }, { "epoch": 36.8, "learning_rate": 3.16e-05, "loss": 8.7492, "step": 4600000 }, { "epoch": 36.8, "learning_rate": 3.1598e-05, "loss": 8.7589, "step": 4600500 }, { "epoch": 36.81, "learning_rate": 3.1596e-05, "loss": 8.74, "step": 4601000 }, { "epoch": 36.81, "learning_rate": 3.1594e-05, "loss": 8.7378, "step": 4601500 }, { "epoch": 36.82, "learning_rate": 3.1592e-05, "loss": 8.7477, "step": 4602000 }, { "epoch": 36.82, "learning_rate": 3.159e-05, "loss": 8.7529, "step": 4602500 }, { "epoch": 36.82, "learning_rate": 3.1588e-05, "loss": 8.7577, "step": 4603000 }, { "epoch": 36.83, "learning_rate": 3.1586e-05, "loss": 8.7326, "step": 4603500 }, { "epoch": 36.83, "learning_rate": 3.1584e-05, "loss": 8.735, "step": 4604000 }, { "epoch": 36.84, "learning_rate": 3.1582000000000004e-05, "loss": 8.7435, "step": 4604500 }, { "epoch": 36.84, "learning_rate": 3.1580000000000006e-05, "loss": 8.7727, "step": 4605000 }, { "epoch": 36.84, "learning_rate": 3.1578e-05, "loss": 8.7549, "step": 4605500 }, { "epoch": 36.85, "learning_rate": 3.1576e-05, "loss": 8.7507, "step": 4606000 }, { "epoch": 36.85, "learning_rate": 3.1574000000000006e-05, "loss": 8.7628, "step": 4606500 }, { "epoch": 36.86, "learning_rate": 3.1572e-05, "loss": 8.752, "step": 4607000 }, { "epoch": 36.86, "learning_rate": 3.157e-05, "loss": 8.7569, "step": 4607500 }, { "epoch": 36.86, "learning_rate": 3.1568e-05, "loss": 8.7602, "step": 4608000 }, { "epoch": 36.87, "learning_rate": 3.1566e-05, "loss": 8.7622, "step": 4608500 }, { "epoch": 36.87, "learning_rate": 3.1564e-05, "loss": 8.7447, "step": 4609000 }, { "epoch": 36.88, "learning_rate": 3.1562e-05, "loss": 8.7343, "step": 4609500 }, { "epoch": 36.88, "learning_rate": 3.156e-05, "loss": 8.7439, "step": 4610000 }, { "epoch": 36.88, "learning_rate": 3.1558000000000005e-05, "loss": 8.7627, "step": 4610500 }, { "epoch": 36.89, "learning_rate": 3.1556e-05, "loss": 8.7382, "step": 4611000 }, { "epoch": 36.89, "learning_rate": 3.1554e-05, "loss": 8.7512, "step": 4611500 }, { "epoch": 36.9, "learning_rate": 3.1552000000000006e-05, "loss": 8.7354, "step": 4612000 }, { "epoch": 36.9, "learning_rate": 3.155e-05, "loss": 8.7408, "step": 4612500 }, { "epoch": 36.9, "learning_rate": 3.1548e-05, "loss": 8.7509, "step": 4613000 }, { "epoch": 36.91, "learning_rate": 3.1546000000000006e-05, "loss": 8.7386, "step": 4613500 }, { "epoch": 36.91, "learning_rate": 3.1544e-05, "loss": 8.7631, "step": 4614000 }, { "epoch": 36.92, "learning_rate": 3.1542e-05, "loss": 8.7312, "step": 4614500 }, { "epoch": 36.92, "learning_rate": 3.154e-05, "loss": 8.7522, "step": 4615000 }, { "epoch": 36.92, "learning_rate": 3.1538e-05, "loss": 8.7615, "step": 4615500 }, { "epoch": 36.93, "learning_rate": 3.1536e-05, "loss": 8.7516, "step": 4616000 }, { "epoch": 36.93, "learning_rate": 3.1534e-05, "loss": 8.7466, "step": 4616500 }, { "epoch": 36.94, "learning_rate": 3.1532e-05, "loss": 8.7519, "step": 4617000 }, { "epoch": 36.94, "learning_rate": 3.1530000000000005e-05, "loss": 8.7388, "step": 4617500 }, { "epoch": 36.94, "learning_rate": 3.1528e-05, "loss": 8.7388, "step": 4618000 }, { "epoch": 36.95, "learning_rate": 3.1526e-05, "loss": 8.7417, "step": 4618500 }, { "epoch": 36.95, "learning_rate": 3.1524000000000005e-05, "loss": 8.7401, "step": 4619000 }, { "epoch": 36.96, "learning_rate": 3.1522e-05, "loss": 8.7516, "step": 4619500 }, { "epoch": 36.96, "learning_rate": 3.1519999999999996e-05, "loss": 8.7555, "step": 4620000 }, { "epoch": 36.96, "learning_rate": 3.1518000000000006e-05, "loss": 8.747, "step": 4620500 }, { "epoch": 36.97, "learning_rate": 3.1516e-05, "loss": 8.7619, "step": 4621000 }, { "epoch": 36.97, "learning_rate": 3.1514e-05, "loss": 8.7408, "step": 4621500 }, { "epoch": 36.98, "learning_rate": 3.1512000000000006e-05, "loss": 8.753, "step": 4622000 }, { "epoch": 36.98, "learning_rate": 3.151e-05, "loss": 8.7541, "step": 4622500 }, { "epoch": 36.98, "learning_rate": 3.1508000000000004e-05, "loss": 8.7755, "step": 4623000 }, { "epoch": 36.99, "learning_rate": 3.1506e-05, "loss": 8.7512, "step": 4623500 }, { "epoch": 36.99, "learning_rate": 3.1504e-05, "loss": 8.7401, "step": 4624000 }, { "epoch": 37.0, "learning_rate": 3.1502000000000004e-05, "loss": 8.7422, "step": 4624500 }, { "epoch": 37.0, "learning_rate": 3.15e-05, "loss": 8.7561, "step": 4625000 }, { "epoch": 37.0, "learning_rate": 3.1498e-05, "loss": 8.7727, "step": 4625500 }, { "epoch": 37.01, "learning_rate": 3.1496000000000005e-05, "loss": 8.7483, "step": 4626000 }, { "epoch": 37.01, "learning_rate": 3.1494e-05, "loss": 8.746, "step": 4626500 }, { "epoch": 37.02, "learning_rate": 3.1491999999999996e-05, "loss": 8.7397, "step": 4627000 }, { "epoch": 37.02, "learning_rate": 3.1490000000000005e-05, "loss": 8.7481, "step": 4627500 }, { "epoch": 37.02, "learning_rate": 3.1488e-05, "loss": 8.7707, "step": 4628000 }, { "epoch": 37.03, "learning_rate": 3.1485999999999996e-05, "loss": 8.7569, "step": 4628500 }, { "epoch": 37.03, "learning_rate": 3.1484000000000006e-05, "loss": 8.7595, "step": 4629000 }, { "epoch": 37.04, "learning_rate": 3.1482e-05, "loss": 8.7434, "step": 4629500 }, { "epoch": 37.04, "learning_rate": 3.1480000000000004e-05, "loss": 8.7475, "step": 4630000 }, { "epoch": 37.04, "learning_rate": 3.1478e-05, "loss": 8.7422, "step": 4630500 }, { "epoch": 37.05, "learning_rate": 3.1476e-05, "loss": 8.7391, "step": 4631000 }, { "epoch": 37.05, "learning_rate": 3.1474000000000004e-05, "loss": 8.7525, "step": 4631500 }, { "epoch": 37.06, "learning_rate": 3.1472e-05, "loss": 8.765, "step": 4632000 }, { "epoch": 37.06, "learning_rate": 3.147e-05, "loss": 8.7653, "step": 4632500 }, { "epoch": 37.06, "learning_rate": 3.1468000000000004e-05, "loss": 8.7248, "step": 4633000 }, { "epoch": 37.07, "learning_rate": 3.1466e-05, "loss": 8.7419, "step": 4633500 }, { "epoch": 37.07, "learning_rate": 3.1464e-05, "loss": 8.7668, "step": 4634000 }, { "epoch": 37.08, "learning_rate": 3.1462000000000005e-05, "loss": 8.7436, "step": 4634500 }, { "epoch": 37.08, "learning_rate": 3.146e-05, "loss": 8.7631, "step": 4635000 }, { "epoch": 37.08, "learning_rate": 3.1458e-05, "loss": 8.7285, "step": 4635500 }, { "epoch": 37.09, "learning_rate": 3.1456000000000005e-05, "loss": 8.7663, "step": 4636000 }, { "epoch": 37.09, "learning_rate": 3.1454e-05, "loss": 8.7459, "step": 4636500 }, { "epoch": 37.1, "learning_rate": 3.1452e-05, "loss": 8.7595, "step": 4637000 }, { "epoch": 37.1, "learning_rate": 3.145e-05, "loss": 8.7564, "step": 4637500 }, { "epoch": 37.1, "learning_rate": 3.1448e-05, "loss": 8.7562, "step": 4638000 }, { "epoch": 37.11, "learning_rate": 3.1446000000000004e-05, "loss": 8.7432, "step": 4638500 }, { "epoch": 37.11, "learning_rate": 3.1444e-05, "loss": 8.746, "step": 4639000 }, { "epoch": 37.12, "learning_rate": 3.1442e-05, "loss": 8.7339, "step": 4639500 }, { "epoch": 37.12, "learning_rate": 3.1440000000000004e-05, "loss": 8.7547, "step": 4640000 }, { "epoch": 37.12, "learning_rate": 3.1438e-05, "loss": 8.7411, "step": 4640500 }, { "epoch": 37.13, "learning_rate": 3.1436e-05, "loss": 8.7724, "step": 4641000 }, { "epoch": 37.13, "learning_rate": 3.1434000000000004e-05, "loss": 8.7358, "step": 4641500 }, { "epoch": 37.14, "learning_rate": 3.1432e-05, "loss": 8.7592, "step": 4642000 }, { "epoch": 37.14, "learning_rate": 3.143e-05, "loss": 8.7505, "step": 4642500 }, { "epoch": 37.14, "learning_rate": 3.1428000000000005e-05, "loss": 8.7533, "step": 4643000 }, { "epoch": 37.15, "learning_rate": 3.1426e-05, "loss": 8.7387, "step": 4643500 }, { "epoch": 37.15, "learning_rate": 3.1424e-05, "loss": 8.7285, "step": 4644000 }, { "epoch": 37.16, "learning_rate": 3.1422e-05, "loss": 8.7479, "step": 4644500 }, { "epoch": 37.16, "learning_rate": 3.142e-05, "loss": 8.7624, "step": 4645000 }, { "epoch": 37.16, "learning_rate": 3.1418e-05, "loss": 8.75, "step": 4645500 }, { "epoch": 37.17, "learning_rate": 3.1416e-05, "loss": 8.7556, "step": 4646000 }, { "epoch": 37.17, "learning_rate": 3.1414e-05, "loss": 8.736, "step": 4646500 }, { "epoch": 37.18, "learning_rate": 3.1412000000000004e-05, "loss": 8.7555, "step": 4647000 }, { "epoch": 37.18, "learning_rate": 3.141e-05, "loss": 8.7494, "step": 4647500 }, { "epoch": 37.18, "learning_rate": 3.1408e-05, "loss": 8.7581, "step": 4648000 }, { "epoch": 37.19, "learning_rate": 3.1406000000000004e-05, "loss": 8.7715, "step": 4648500 }, { "epoch": 37.19, "learning_rate": 3.1404e-05, "loss": 8.7564, "step": 4649000 }, { "epoch": 37.2, "learning_rate": 3.1402e-05, "loss": 8.7655, "step": 4649500 }, { "epoch": 37.2, "learning_rate": 3.1400000000000004e-05, "loss": 8.7591, "step": 4650000 }, { "epoch": 37.2, "learning_rate": 3.1398e-05, "loss": 8.7408, "step": 4650500 }, { "epoch": 37.21, "learning_rate": 3.1396e-05, "loss": 8.7374, "step": 4651000 }, { "epoch": 37.21, "learning_rate": 3.1394e-05, "loss": 8.7446, "step": 4651500 }, { "epoch": 37.22, "learning_rate": 3.1392e-05, "loss": 8.7532, "step": 4652000 }, { "epoch": 37.22, "learning_rate": 3.139e-05, "loss": 8.7552, "step": 4652500 }, { "epoch": 37.22, "learning_rate": 3.1388e-05, "loss": 8.7642, "step": 4653000 }, { "epoch": 37.23, "learning_rate": 3.1386e-05, "loss": 8.7524, "step": 4653500 }, { "epoch": 37.23, "learning_rate": 3.1384e-05, "loss": 8.7561, "step": 4654000 }, { "epoch": 37.24, "learning_rate": 3.1382e-05, "loss": 8.7627, "step": 4654500 }, { "epoch": 37.24, "learning_rate": 3.138e-05, "loss": 8.7471, "step": 4655000 }, { "epoch": 37.24, "learning_rate": 3.1378000000000003e-05, "loss": 8.7483, "step": 4655500 }, { "epoch": 37.25, "learning_rate": 3.1376e-05, "loss": 8.7638, "step": 4656000 }, { "epoch": 37.25, "learning_rate": 3.1374e-05, "loss": 8.7511, "step": 4656500 }, { "epoch": 37.26, "learning_rate": 3.1372000000000004e-05, "loss": 8.7575, "step": 4657000 }, { "epoch": 37.26, "learning_rate": 3.137e-05, "loss": 8.7465, "step": 4657500 }, { "epoch": 37.26, "learning_rate": 3.1368e-05, "loss": 8.7515, "step": 4658000 }, { "epoch": 37.27, "learning_rate": 3.1366000000000004e-05, "loss": 8.7485, "step": 4658500 }, { "epoch": 37.27, "learning_rate": 3.1364e-05, "loss": 8.7658, "step": 4659000 }, { "epoch": 37.28, "learning_rate": 3.1362e-05, "loss": 8.7508, "step": 4659500 }, { "epoch": 37.28, "learning_rate": 3.136e-05, "loss": 8.7599, "step": 4660000 }, { "epoch": 37.28, "learning_rate": 3.135800000000001e-05, "loss": 8.7468, "step": 4660500 }, { "epoch": 37.29, "learning_rate": 3.1356e-05, "loss": 8.7414, "step": 4661000 }, { "epoch": 37.29, "learning_rate": 3.1354e-05, "loss": 8.7584, "step": 4661500 }, { "epoch": 37.3, "learning_rate": 3.1352e-05, "loss": 8.7626, "step": 4662000 }, { "epoch": 37.3, "learning_rate": 3.135e-05, "loss": 8.753, "step": 4662500 }, { "epoch": 37.3, "learning_rate": 3.1348e-05, "loss": 8.7581, "step": 4663000 }, { "epoch": 37.31, "learning_rate": 3.1346e-05, "loss": 8.7419, "step": 4663500 }, { "epoch": 37.31, "learning_rate": 3.1344000000000003e-05, "loss": 8.7649, "step": 4664000 }, { "epoch": 37.32, "learning_rate": 3.1342e-05, "loss": 8.7531, "step": 4664500 }, { "epoch": 37.32, "learning_rate": 3.134e-05, "loss": 8.7277, "step": 4665000 }, { "epoch": 37.32, "learning_rate": 3.1338000000000004e-05, "loss": 8.7527, "step": 4665500 }, { "epoch": 37.33, "learning_rate": 3.1336000000000006e-05, "loss": 8.7448, "step": 4666000 }, { "epoch": 37.33, "learning_rate": 3.1334e-05, "loss": 8.7367, "step": 4666500 }, { "epoch": 37.34, "learning_rate": 3.1332e-05, "loss": 8.748, "step": 4667000 }, { "epoch": 37.34, "learning_rate": 3.133000000000001e-05, "loss": 8.7604, "step": 4667500 }, { "epoch": 37.34, "learning_rate": 3.1328e-05, "loss": 8.7517, "step": 4668000 }, { "epoch": 37.35, "learning_rate": 3.1326e-05, "loss": 8.7541, "step": 4668500 }, { "epoch": 37.35, "learning_rate": 3.1324e-05, "loss": 8.7149, "step": 4669000 }, { "epoch": 37.36, "learning_rate": 3.1322e-05, "loss": 8.7322, "step": 4669500 }, { "epoch": 37.36, "learning_rate": 3.132e-05, "loss": 8.7647, "step": 4670000 }, { "epoch": 37.36, "learning_rate": 3.1318e-05, "loss": 8.7444, "step": 4670500 }, { "epoch": 37.37, "learning_rate": 3.1316e-05, "loss": 8.7395, "step": 4671000 }, { "epoch": 37.37, "learning_rate": 3.1314e-05, "loss": 8.7423, "step": 4671500 }, { "epoch": 37.38, "learning_rate": 3.1312e-05, "loss": 8.7412, "step": 4672000 }, { "epoch": 37.38, "learning_rate": 3.1310000000000003e-05, "loss": 8.7464, "step": 4672500 }, { "epoch": 37.38, "learning_rate": 3.1308000000000006e-05, "loss": 8.7594, "step": 4673000 }, { "epoch": 37.39, "learning_rate": 3.1306e-05, "loss": 8.7503, "step": 4673500 }, { "epoch": 37.39, "learning_rate": 3.1304e-05, "loss": 8.7491, "step": 4674000 }, { "epoch": 37.4, "learning_rate": 3.1302000000000006e-05, "loss": 8.7468, "step": 4674500 }, { "epoch": 37.4, "learning_rate": 3.13e-05, "loss": 8.7507, "step": 4675000 }, { "epoch": 37.4, "learning_rate": 3.1298e-05, "loss": 8.7394, "step": 4675500 }, { "epoch": 37.41, "learning_rate": 3.1296e-05, "loss": 8.7351, "step": 4676000 }, { "epoch": 37.41, "learning_rate": 3.1294e-05, "loss": 8.7535, "step": 4676500 }, { "epoch": 37.42, "learning_rate": 3.1292e-05, "loss": 8.7575, "step": 4677000 }, { "epoch": 37.42, "learning_rate": 3.129e-05, "loss": 8.7486, "step": 4677500 }, { "epoch": 37.42, "learning_rate": 3.1288e-05, "loss": 8.7576, "step": 4678000 }, { "epoch": 37.43, "learning_rate": 3.1286000000000005e-05, "loss": 8.7562, "step": 4678500 }, { "epoch": 37.43, "learning_rate": 3.1284e-05, "loss": 8.7517, "step": 4679000 }, { "epoch": 37.44, "learning_rate": 3.1282e-05, "loss": 8.7522, "step": 4679500 }, { "epoch": 37.44, "learning_rate": 3.1280000000000005e-05, "loss": 8.7694, "step": 4680000 }, { "epoch": 37.44, "learning_rate": 3.1278e-05, "loss": 8.7324, "step": 4680500 }, { "epoch": 37.45, "learning_rate": 3.1276e-05, "loss": 8.772, "step": 4681000 }, { "epoch": 37.45, "learning_rate": 3.1274000000000006e-05, "loss": 8.7415, "step": 4681500 }, { "epoch": 37.46, "learning_rate": 3.1272e-05, "loss": 8.7522, "step": 4682000 }, { "epoch": 37.46, "learning_rate": 3.127e-05, "loss": 8.7527, "step": 4682500 }, { "epoch": 37.46, "learning_rate": 3.1268e-05, "loss": 8.7557, "step": 4683000 }, { "epoch": 37.47, "learning_rate": 3.1266e-05, "loss": 8.7614, "step": 4683500 }, { "epoch": 37.47, "learning_rate": 3.1264e-05, "loss": 8.7507, "step": 4684000 }, { "epoch": 37.48, "learning_rate": 3.1262e-05, "loss": 8.7511, "step": 4684500 }, { "epoch": 37.48, "learning_rate": 3.126e-05, "loss": 8.7574, "step": 4685000 }, { "epoch": 37.48, "learning_rate": 3.1258000000000005e-05, "loss": 8.7403, "step": 4685500 }, { "epoch": 37.49, "learning_rate": 3.1256e-05, "loss": 8.7706, "step": 4686000 }, { "epoch": 37.49, "learning_rate": 3.1254e-05, "loss": 8.744, "step": 4686500 }, { "epoch": 37.5, "learning_rate": 3.1252000000000005e-05, "loss": 8.7591, "step": 4687000 }, { "epoch": 37.5, "learning_rate": 3.125e-05, "loss": 8.7358, "step": 4687500 }, { "epoch": 37.5, "learning_rate": 3.1247999999999996e-05, "loss": 8.738, "step": 4688000 }, { "epoch": 37.51, "learning_rate": 3.1246000000000005e-05, "loss": 8.7581, "step": 4688500 }, { "epoch": 37.51, "learning_rate": 3.1244e-05, "loss": 8.7356, "step": 4689000 }, { "epoch": 37.52, "learning_rate": 3.1242e-05, "loss": 8.7556, "step": 4689500 }, { "epoch": 37.52, "learning_rate": 3.1240000000000006e-05, "loss": 8.7471, "step": 4690000 }, { "epoch": 37.52, "learning_rate": 3.1238e-05, "loss": 8.744, "step": 4690500 }, { "epoch": 37.53, "learning_rate": 3.1236000000000004e-05, "loss": 8.7629, "step": 4691000 }, { "epoch": 37.53, "learning_rate": 3.1234e-05, "loss": 8.7434, "step": 4691500 }, { "epoch": 37.54, "learning_rate": 3.1232e-05, "loss": 8.7516, "step": 4692000 }, { "epoch": 37.54, "learning_rate": 3.1230000000000004e-05, "loss": 8.7475, "step": 4692500 }, { "epoch": 37.54, "learning_rate": 3.1228e-05, "loss": 8.7485, "step": 4693000 }, { "epoch": 37.55, "learning_rate": 3.1226e-05, "loss": 8.7555, "step": 4693500 }, { "epoch": 37.55, "learning_rate": 3.1224000000000005e-05, "loss": 8.7587, "step": 4694000 }, { "epoch": 37.56, "learning_rate": 3.1222e-05, "loss": 8.7524, "step": 4694500 }, { "epoch": 37.56, "learning_rate": 3.122e-05, "loss": 8.7753, "step": 4695000 }, { "epoch": 37.56, "learning_rate": 3.1218000000000005e-05, "loss": 8.7755, "step": 4695500 }, { "epoch": 37.57, "learning_rate": 3.1216e-05, "loss": 8.7625, "step": 4696000 }, { "epoch": 37.57, "learning_rate": 3.1213999999999996e-05, "loss": 8.7559, "step": 4696500 }, { "epoch": 37.58, "learning_rate": 3.1212000000000005e-05, "loss": 8.7626, "step": 4697000 }, { "epoch": 37.58, "learning_rate": 3.121e-05, "loss": 8.7274, "step": 4697500 }, { "epoch": 37.58, "learning_rate": 3.1208000000000003e-05, "loss": 8.7523, "step": 4698000 }, { "epoch": 37.59, "learning_rate": 3.1206e-05, "loss": 8.7583, "step": 4698500 }, { "epoch": 37.59, "learning_rate": 3.1204e-05, "loss": 8.7596, "step": 4699000 }, { "epoch": 37.6, "learning_rate": 3.1202000000000004e-05, "loss": 8.7332, "step": 4699500 }, { "epoch": 37.6, "learning_rate": 3.12e-05, "loss": 8.7538, "step": 4700000 }, { "epoch": 37.6, "learning_rate": 3.1198e-05, "loss": 8.739, "step": 4700500 }, { "epoch": 37.61, "learning_rate": 3.1196000000000004e-05, "loss": 8.7464, "step": 4701000 }, { "epoch": 37.61, "learning_rate": 3.1194e-05, "loss": 8.7409, "step": 4701500 }, { "epoch": 37.62, "learning_rate": 3.1192e-05, "loss": 8.7601, "step": 4702000 }, { "epoch": 37.62, "learning_rate": 3.1190000000000005e-05, "loss": 8.7434, "step": 4702500 }, { "epoch": 37.62, "learning_rate": 3.1188e-05, "loss": 8.7581, "step": 4703000 }, { "epoch": 37.63, "learning_rate": 3.1186e-05, "loss": 8.7625, "step": 4703500 }, { "epoch": 37.63, "learning_rate": 3.1184000000000005e-05, "loss": 8.7586, "step": 4704000 }, { "epoch": 37.64, "learning_rate": 3.1182e-05, "loss": 8.7596, "step": 4704500 }, { "epoch": 37.64, "learning_rate": 3.118e-05, "loss": 8.7402, "step": 4705000 }, { "epoch": 37.64, "learning_rate": 3.1178e-05, "loss": 8.7387, "step": 4705500 }, { "epoch": 37.65, "learning_rate": 3.1176e-05, "loss": 8.7404, "step": 4706000 }, { "epoch": 37.65, "learning_rate": 3.1174000000000003e-05, "loss": 8.7487, "step": 4706500 }, { "epoch": 37.66, "learning_rate": 3.1172e-05, "loss": 8.7647, "step": 4707000 }, { "epoch": 37.66, "learning_rate": 3.117e-05, "loss": 8.7711, "step": 4707500 }, { "epoch": 37.66, "learning_rate": 3.1168000000000004e-05, "loss": 8.7484, "step": 4708000 }, { "epoch": 37.67, "learning_rate": 3.1166e-05, "loss": 8.7371, "step": 4708500 }, { "epoch": 37.67, "learning_rate": 3.1164e-05, "loss": 8.7457, "step": 4709000 }, { "epoch": 37.68, "learning_rate": 3.1162000000000004e-05, "loss": 8.7479, "step": 4709500 }, { "epoch": 37.68, "learning_rate": 3.116e-05, "loss": 8.7592, "step": 4710000 }, { "epoch": 37.68, "learning_rate": 3.1158e-05, "loss": 8.7489, "step": 4710500 }, { "epoch": 37.69, "learning_rate": 3.1156000000000005e-05, "loss": 8.7551, "step": 4711000 }, { "epoch": 37.69, "learning_rate": 3.1154e-05, "loss": 8.747, "step": 4711500 }, { "epoch": 37.7, "learning_rate": 3.1152e-05, "loss": 8.7493, "step": 4712000 }, { "epoch": 37.7, "learning_rate": 3.115e-05, "loss": 8.745, "step": 4712500 }, { "epoch": 37.7, "learning_rate": 3.1148e-05, "loss": 8.7421, "step": 4713000 }, { "epoch": 37.71, "learning_rate": 3.1146e-05, "loss": 8.7387, "step": 4713500 }, { "epoch": 37.71, "learning_rate": 3.1144e-05, "loss": 8.7631, "step": 4714000 }, { "epoch": 37.72, "learning_rate": 3.1142e-05, "loss": 8.7468, "step": 4714500 }, { "epoch": 37.72, "learning_rate": 3.1140000000000003e-05, "loss": 8.7492, "step": 4715000 }, { "epoch": 37.72, "learning_rate": 3.1138e-05, "loss": 8.7464, "step": 4715500 }, { "epoch": 37.73, "learning_rate": 3.1136e-05, "loss": 8.7561, "step": 4716000 }, { "epoch": 37.73, "learning_rate": 3.1134000000000004e-05, "loss": 8.747, "step": 4716500 }, { "epoch": 37.74, "learning_rate": 3.1132e-05, "loss": 8.7505, "step": 4717000 }, { "epoch": 37.74, "learning_rate": 3.113e-05, "loss": 8.7407, "step": 4717500 }, { "epoch": 37.74, "learning_rate": 3.1128000000000004e-05, "loss": 8.7352, "step": 4718000 }, { "epoch": 37.75, "learning_rate": 3.1126e-05, "loss": 8.7503, "step": 4718500 }, { "epoch": 37.75, "learning_rate": 3.1124e-05, "loss": 8.7593, "step": 4719000 }, { "epoch": 37.76, "learning_rate": 3.1122e-05, "loss": 8.7476, "step": 4719500 }, { "epoch": 37.76, "learning_rate": 3.112e-05, "loss": 8.7465, "step": 4720000 }, { "epoch": 37.76, "learning_rate": 3.1118e-05, "loss": 8.7424, "step": 4720500 }, { "epoch": 37.77, "learning_rate": 3.1116e-05, "loss": 8.7313, "step": 4721000 }, { "epoch": 37.77, "learning_rate": 3.111400000000001e-05, "loss": 8.7372, "step": 4721500 }, { "epoch": 37.78, "learning_rate": 3.1112e-05, "loss": 8.7519, "step": 4722000 }, { "epoch": 37.78, "learning_rate": 3.111e-05, "loss": 8.7517, "step": 4722500 }, { "epoch": 37.78, "learning_rate": 3.1108e-05, "loss": 8.7734, "step": 4723000 }, { "epoch": 37.79, "learning_rate": 3.1106e-05, "loss": 8.7461, "step": 4723500 }, { "epoch": 37.79, "learning_rate": 3.1104e-05, "loss": 8.7525, "step": 4724000 }, { "epoch": 37.8, "learning_rate": 3.1102e-05, "loss": 8.7446, "step": 4724500 }, { "epoch": 37.8, "learning_rate": 3.1100000000000004e-05, "loss": 8.7364, "step": 4725000 }, { "epoch": 37.8, "learning_rate": 3.1098e-05, "loss": 8.758, "step": 4725500 }, { "epoch": 37.81, "learning_rate": 3.1096e-05, "loss": 8.7318, "step": 4726000 }, { "epoch": 37.81, "learning_rate": 3.1094000000000004e-05, "loss": 8.7325, "step": 4726500 }, { "epoch": 37.82, "learning_rate": 3.1092e-05, "loss": 8.7505, "step": 4727000 }, { "epoch": 37.82, "learning_rate": 3.109e-05, "loss": 8.7608, "step": 4727500 }, { "epoch": 37.82, "learning_rate": 3.1088e-05, "loss": 8.7365, "step": 4728000 }, { "epoch": 37.83, "learning_rate": 3.108600000000001e-05, "loss": 8.7575, "step": 4728500 }, { "epoch": 37.83, "learning_rate": 3.1084e-05, "loss": 8.7624, "step": 4729000 }, { "epoch": 37.84, "learning_rate": 3.1082e-05, "loss": 8.7474, "step": 4729500 }, { "epoch": 37.84, "learning_rate": 3.108e-05, "loss": 8.763, "step": 4730000 }, { "epoch": 37.84, "learning_rate": 3.1078e-05, "loss": 8.7458, "step": 4730500 }, { "epoch": 37.85, "learning_rate": 3.1076e-05, "loss": 8.748, "step": 4731000 }, { "epoch": 37.85, "learning_rate": 3.1074e-05, "loss": 8.7441, "step": 4731500 }, { "epoch": 37.86, "learning_rate": 3.1072e-05, "loss": 8.7486, "step": 4732000 }, { "epoch": 37.86, "learning_rate": 3.107e-05, "loss": 8.7561, "step": 4732500 }, { "epoch": 37.86, "learning_rate": 3.1068e-05, "loss": 8.7625, "step": 4733000 }, { "epoch": 37.87, "learning_rate": 3.1066000000000004e-05, "loss": 8.7505, "step": 4733500 }, { "epoch": 37.87, "learning_rate": 3.1064000000000006e-05, "loss": 8.7422, "step": 4734000 }, { "epoch": 37.88, "learning_rate": 3.1062e-05, "loss": 8.7559, "step": 4734500 }, { "epoch": 37.88, "learning_rate": 3.106e-05, "loss": 8.7531, "step": 4735000 }, { "epoch": 37.88, "learning_rate": 3.1058000000000007e-05, "loss": 8.7355, "step": 4735500 }, { "epoch": 37.89, "learning_rate": 3.1056e-05, "loss": 8.7574, "step": 4736000 }, { "epoch": 37.89, "learning_rate": 3.1054e-05, "loss": 8.7631, "step": 4736500 }, { "epoch": 37.9, "learning_rate": 3.1052e-05, "loss": 8.7553, "step": 4737000 }, { "epoch": 37.9, "learning_rate": 3.105e-05, "loss": 8.7735, "step": 4737500 }, { "epoch": 37.9, "learning_rate": 3.1048e-05, "loss": 8.7472, "step": 4738000 }, { "epoch": 37.91, "learning_rate": 3.1046e-05, "loss": 8.7666, "step": 4738500 }, { "epoch": 37.91, "learning_rate": 3.1044e-05, "loss": 8.7272, "step": 4739000 }, { "epoch": 37.92, "learning_rate": 3.1042e-05, "loss": 8.7584, "step": 4739500 }, { "epoch": 37.92, "learning_rate": 3.104e-05, "loss": 8.7692, "step": 4740000 }, { "epoch": 37.92, "learning_rate": 3.1038e-05, "loss": 8.7272, "step": 4740500 }, { "epoch": 37.93, "learning_rate": 3.1036000000000006e-05, "loss": 8.7385, "step": 4741000 }, { "epoch": 37.93, "learning_rate": 3.1034e-05, "loss": 8.7519, "step": 4741500 }, { "epoch": 37.94, "learning_rate": 3.1032e-05, "loss": 8.7479, "step": 4742000 }, { "epoch": 37.94, "learning_rate": 3.1030000000000006e-05, "loss": 8.7498, "step": 4742500 }, { "epoch": 37.94, "learning_rate": 3.1028e-05, "loss": 8.7432, "step": 4743000 }, { "epoch": 37.95, "learning_rate": 3.1026e-05, "loss": 8.7548, "step": 4743500 }, { "epoch": 37.95, "learning_rate": 3.1024e-05, "loss": 8.7355, "step": 4744000 }, { "epoch": 37.96, "learning_rate": 3.1022e-05, "loss": 8.7541, "step": 4744500 }, { "epoch": 37.96, "learning_rate": 3.102e-05, "loss": 8.754, "step": 4745000 }, { "epoch": 37.96, "learning_rate": 3.1018e-05, "loss": 8.7499, "step": 4745500 }, { "epoch": 37.97, "learning_rate": 3.1016e-05, "loss": 8.7326, "step": 4746000 }, { "epoch": 37.97, "learning_rate": 3.1014000000000005e-05, "loss": 8.7332, "step": 4746500 }, { "epoch": 37.98, "learning_rate": 3.1012e-05, "loss": 8.7316, "step": 4747000 }, { "epoch": 37.98, "learning_rate": 3.101e-05, "loss": 8.7401, "step": 4747500 }, { "epoch": 37.98, "learning_rate": 3.1008000000000005e-05, "loss": 8.7303, "step": 4748000 }, { "epoch": 37.99, "learning_rate": 3.1006e-05, "loss": 8.763, "step": 4748500 }, { "epoch": 37.99, "learning_rate": 3.1003999999999997e-05, "loss": 8.7662, "step": 4749000 }, { "epoch": 38.0, "learning_rate": 3.1002000000000006e-05, "loss": 8.7457, "step": 4749500 }, { "epoch": 38.0, "learning_rate": 3.1e-05, "loss": 8.7494, "step": 4750000 }, { "epoch": 38.0, "learning_rate": 3.0998e-05, "loss": 8.7514, "step": 4750500 }, { "epoch": 38.01, "learning_rate": 3.0996000000000006e-05, "loss": 8.7416, "step": 4751000 }, { "epoch": 38.01, "learning_rate": 3.0994e-05, "loss": 8.7645, "step": 4751500 }, { "epoch": 38.02, "learning_rate": 3.0992e-05, "loss": 8.7539, "step": 4752000 }, { "epoch": 38.02, "learning_rate": 3.099e-05, "loss": 8.7451, "step": 4752500 }, { "epoch": 38.02, "learning_rate": 3.0988e-05, "loss": 8.7387, "step": 4753000 }, { "epoch": 38.03, "learning_rate": 3.0986000000000005e-05, "loss": 8.7509, "step": 4753500 }, { "epoch": 38.03, "learning_rate": 3.0984e-05, "loss": 8.7506, "step": 4754000 }, { "epoch": 38.04, "learning_rate": 3.0982e-05, "loss": 8.742, "step": 4754500 }, { "epoch": 38.04, "learning_rate": 3.0980000000000005e-05, "loss": 8.7425, "step": 4755000 }, { "epoch": 38.04, "learning_rate": 3.0978e-05, "loss": 8.7412, "step": 4755500 }, { "epoch": 38.05, "learning_rate": 3.0975999999999996e-05, "loss": 8.7623, "step": 4756000 }, { "epoch": 38.05, "learning_rate": 3.0974000000000005e-05, "loss": 8.7371, "step": 4756500 }, { "epoch": 38.06, "learning_rate": 3.0972e-05, "loss": 8.7373, "step": 4757000 }, { "epoch": 38.06, "learning_rate": 3.0969999999999997e-05, "loss": 8.7493, "step": 4757500 }, { "epoch": 38.06, "learning_rate": 3.0968000000000006e-05, "loss": 8.7502, "step": 4758000 }, { "epoch": 38.07, "learning_rate": 3.0966e-05, "loss": 8.7494, "step": 4758500 }, { "epoch": 38.07, "learning_rate": 3.0964000000000004e-05, "loss": 8.7515, "step": 4759000 }, { "epoch": 38.08, "learning_rate": 3.0962e-05, "loss": 8.7393, "step": 4759500 }, { "epoch": 38.08, "learning_rate": 3.096e-05, "loss": 8.7373, "step": 4760000 }, { "epoch": 38.08, "learning_rate": 3.0958000000000004e-05, "loss": 8.7321, "step": 4760500 }, { "epoch": 38.09, "learning_rate": 3.0956e-05, "loss": 8.7636, "step": 4761000 }, { "epoch": 38.09, "learning_rate": 3.0954e-05, "loss": 8.7445, "step": 4761500 }, { "epoch": 38.1, "learning_rate": 3.0952000000000005e-05, "loss": 8.7319, "step": 4762000 }, { "epoch": 38.1, "learning_rate": 3.095e-05, "loss": 8.7367, "step": 4762500 }, { "epoch": 38.1, "learning_rate": 3.0948e-05, "loss": 8.7401, "step": 4763000 }, { "epoch": 38.11, "learning_rate": 3.0946000000000005e-05, "loss": 8.755, "step": 4763500 }, { "epoch": 38.11, "learning_rate": 3.0944e-05, "loss": 8.7597, "step": 4764000 }, { "epoch": 38.12, "learning_rate": 3.0941999999999996e-05, "loss": 8.7345, "step": 4764500 }, { "epoch": 38.12, "learning_rate": 3.0940000000000005e-05, "loss": 8.78, "step": 4765000 }, { "epoch": 38.12, "learning_rate": 3.0938e-05, "loss": 8.764, "step": 4765500 }, { "epoch": 38.13, "learning_rate": 3.0936e-05, "loss": 8.7312, "step": 4766000 }, { "epoch": 38.13, "learning_rate": 3.0934e-05, "loss": 8.7569, "step": 4766500 }, { "epoch": 38.14, "learning_rate": 3.0932e-05, "loss": 8.7535, "step": 4767000 }, { "epoch": 38.14, "learning_rate": 3.0930000000000004e-05, "loss": 8.7378, "step": 4767500 }, { "epoch": 38.14, "learning_rate": 3.0928e-05, "loss": 8.7463, "step": 4768000 }, { "epoch": 38.15, "learning_rate": 3.0926e-05, "loss": 8.7573, "step": 4768500 }, { "epoch": 38.15, "learning_rate": 3.0924000000000004e-05, "loss": 8.7431, "step": 4769000 }, { "epoch": 38.16, "learning_rate": 3.0922e-05, "loss": 8.7356, "step": 4769500 }, { "epoch": 38.16, "learning_rate": 3.092e-05, "loss": 8.7505, "step": 4770000 }, { "epoch": 38.16, "learning_rate": 3.0918000000000005e-05, "loss": 8.7532, "step": 4770500 }, { "epoch": 38.17, "learning_rate": 3.0916e-05, "loss": 8.7279, "step": 4771000 }, { "epoch": 38.17, "learning_rate": 3.0914e-05, "loss": 8.7519, "step": 4771500 }, { "epoch": 38.18, "learning_rate": 3.0912000000000005e-05, "loss": 8.7588, "step": 4772000 }, { "epoch": 38.18, "learning_rate": 3.091e-05, "loss": 8.7313, "step": 4772500 }, { "epoch": 38.18, "learning_rate": 3.0908e-05, "loss": 8.7456, "step": 4773000 }, { "epoch": 38.19, "learning_rate": 3.0906e-05, "loss": 8.7582, "step": 4773500 }, { "epoch": 38.19, "learning_rate": 3.0904e-05, "loss": 8.7391, "step": 4774000 }, { "epoch": 38.2, "learning_rate": 3.0902e-05, "loss": 8.7535, "step": 4774500 }, { "epoch": 38.2, "learning_rate": 3.09e-05, "loss": 8.7598, "step": 4775000 }, { "epoch": 38.2, "learning_rate": 3.0898e-05, "loss": 8.7425, "step": 4775500 }, { "epoch": 38.21, "learning_rate": 3.0896000000000004e-05, "loss": 8.7514, "step": 4776000 }, { "epoch": 38.21, "learning_rate": 3.0894e-05, "loss": 8.7551, "step": 4776500 }, { "epoch": 38.22, "learning_rate": 3.0892e-05, "loss": 8.7416, "step": 4777000 }, { "epoch": 38.22, "learning_rate": 3.0890000000000004e-05, "loss": 8.7519, "step": 4777500 }, { "epoch": 38.22, "learning_rate": 3.0888e-05, "loss": 8.7666, "step": 4778000 }, { "epoch": 38.23, "learning_rate": 3.0886e-05, "loss": 8.7584, "step": 4778500 }, { "epoch": 38.23, "learning_rate": 3.0884000000000004e-05, "loss": 8.7427, "step": 4779000 }, { "epoch": 38.24, "learning_rate": 3.0882e-05, "loss": 8.7626, "step": 4779500 }, { "epoch": 38.24, "learning_rate": 3.088e-05, "loss": 8.7566, "step": 4780000 }, { "epoch": 38.24, "learning_rate": 3.0878e-05, "loss": 8.7604, "step": 4780500 }, { "epoch": 38.25, "learning_rate": 3.0876e-05, "loss": 8.7477, "step": 4781000 }, { "epoch": 38.25, "learning_rate": 3.0874e-05, "loss": 8.7591, "step": 4781500 }, { "epoch": 38.26, "learning_rate": 3.0872e-05, "loss": 8.7473, "step": 4782000 }, { "epoch": 38.26, "learning_rate": 3.087e-05, "loss": 8.742, "step": 4782500 }, { "epoch": 38.26, "learning_rate": 3.0868e-05, "loss": 8.7682, "step": 4783000 }, { "epoch": 38.27, "learning_rate": 3.0866e-05, "loss": 8.7643, "step": 4783500 }, { "epoch": 38.27, "learning_rate": 3.0864e-05, "loss": 8.749, "step": 4784000 }, { "epoch": 38.28, "learning_rate": 3.0862000000000004e-05, "loss": 8.7532, "step": 4784500 }, { "epoch": 38.28, "learning_rate": 3.086e-05, "loss": 8.7521, "step": 4785000 }, { "epoch": 38.28, "learning_rate": 3.0858e-05, "loss": 8.7487, "step": 4785500 }, { "epoch": 38.29, "learning_rate": 3.0856000000000004e-05, "loss": 8.7452, "step": 4786000 }, { "epoch": 38.29, "learning_rate": 3.0854e-05, "loss": 8.7508, "step": 4786500 }, { "epoch": 38.3, "learning_rate": 3.0852e-05, "loss": 8.7551, "step": 4787000 }, { "epoch": 38.3, "learning_rate": 3.0850000000000004e-05, "loss": 8.7591, "step": 4787500 }, { "epoch": 38.3, "learning_rate": 3.0848e-05, "loss": 8.7391, "step": 4788000 }, { "epoch": 38.31, "learning_rate": 3.0846e-05, "loss": 8.7502, "step": 4788500 }, { "epoch": 38.31, "learning_rate": 3.0844e-05, "loss": 8.7574, "step": 4789000 }, { "epoch": 38.32, "learning_rate": 3.084200000000001e-05, "loss": 8.7664, "step": 4789500 }, { "epoch": 38.32, "learning_rate": 3.084e-05, "loss": 8.7421, "step": 4790000 }, { "epoch": 38.32, "learning_rate": 3.0838e-05, "loss": 8.7551, "step": 4790500 }, { "epoch": 38.33, "learning_rate": 3.0836e-05, "loss": 8.7564, "step": 4791000 }, { "epoch": 38.33, "learning_rate": 3.0834e-05, "loss": 8.7405, "step": 4791500 }, { "epoch": 38.34, "learning_rate": 3.0832e-05, "loss": 8.7468, "step": 4792000 }, { "epoch": 38.34, "learning_rate": 3.083e-05, "loss": 8.7528, "step": 4792500 }, { "epoch": 38.34, "learning_rate": 3.0828000000000004e-05, "loss": 8.7312, "step": 4793000 }, { "epoch": 38.35, "learning_rate": 3.0826e-05, "loss": 8.744, "step": 4793500 }, { "epoch": 38.35, "learning_rate": 3.0824e-05, "loss": 8.7422, "step": 4794000 }, { "epoch": 38.36, "learning_rate": 3.0822000000000004e-05, "loss": 8.7476, "step": 4794500 }, { "epoch": 38.36, "learning_rate": 3.082e-05, "loss": 8.7449, "step": 4795000 }, { "epoch": 38.36, "learning_rate": 3.0818e-05, "loss": 8.7613, "step": 4795500 }, { "epoch": 38.37, "learning_rate": 3.0816e-05, "loss": 8.7696, "step": 4796000 }, { "epoch": 38.37, "learning_rate": 3.081400000000001e-05, "loss": 8.748, "step": 4796500 }, { "epoch": 38.38, "learning_rate": 3.0812e-05, "loss": 8.7399, "step": 4797000 }, { "epoch": 38.38, "learning_rate": 3.081e-05, "loss": 8.7692, "step": 4797500 }, { "epoch": 38.38, "learning_rate": 3.0808e-05, "loss": 8.7385, "step": 4798000 }, { "epoch": 38.39, "learning_rate": 3.0806e-05, "loss": 8.7593, "step": 4798500 }, { "epoch": 38.39, "learning_rate": 3.0804e-05, "loss": 8.7384, "step": 4799000 }, { "epoch": 38.4, "learning_rate": 3.0802e-05, "loss": 8.7388, "step": 4799500 }, { "epoch": 38.4, "learning_rate": 3.08e-05, "loss": 8.7586, "step": 4800000 }, { "epoch": 38.4, "learning_rate": 3.0798e-05, "loss": 8.7526, "step": 4800500 }, { "epoch": 38.41, "learning_rate": 3.0796e-05, "loss": 8.7469, "step": 4801000 }, { "epoch": 38.41, "learning_rate": 3.0794000000000004e-05, "loss": 8.7594, "step": 4801500 }, { "epoch": 38.42, "learning_rate": 3.0792000000000006e-05, "loss": 8.7448, "step": 4802000 }, { "epoch": 38.42, "learning_rate": 3.079e-05, "loss": 8.741, "step": 4802500 }, { "epoch": 38.42, "learning_rate": 3.0788e-05, "loss": 8.7236, "step": 4803000 }, { "epoch": 38.43, "learning_rate": 3.0786000000000006e-05, "loss": 8.7534, "step": 4803500 }, { "epoch": 38.43, "learning_rate": 3.0784e-05, "loss": 8.738, "step": 4804000 }, { "epoch": 38.44, "learning_rate": 3.0782e-05, "loss": 8.7584, "step": 4804500 }, { "epoch": 38.44, "learning_rate": 3.078e-05, "loss": 8.7602, "step": 4805000 }, { "epoch": 38.44, "learning_rate": 3.0778e-05, "loss": 8.7439, "step": 4805500 }, { "epoch": 38.45, "learning_rate": 3.0776e-05, "loss": 8.7339, "step": 4806000 }, { "epoch": 38.45, "learning_rate": 3.0774e-05, "loss": 8.7407, "step": 4806500 }, { "epoch": 38.46, "learning_rate": 3.0772e-05, "loss": 8.7435, "step": 4807000 }, { "epoch": 38.46, "learning_rate": 3.077e-05, "loss": 8.7431, "step": 4807500 }, { "epoch": 38.46, "learning_rate": 3.0768e-05, "loss": 8.7704, "step": 4808000 }, { "epoch": 38.47, "learning_rate": 3.0766e-05, "loss": 8.7257, "step": 4808500 }, { "epoch": 38.47, "learning_rate": 3.0764000000000006e-05, "loss": 8.7461, "step": 4809000 }, { "epoch": 38.48, "learning_rate": 3.0762e-05, "loss": 8.741, "step": 4809500 }, { "epoch": 38.48, "learning_rate": 3.076e-05, "loss": 8.7554, "step": 4810000 }, { "epoch": 38.48, "learning_rate": 3.0758000000000006e-05, "loss": 8.7445, "step": 4810500 }, { "epoch": 38.49, "learning_rate": 3.0756e-05, "loss": 8.7494, "step": 4811000 }, { "epoch": 38.49, "learning_rate": 3.0754e-05, "loss": 8.7267, "step": 4811500 }, { "epoch": 38.5, "learning_rate": 3.0752e-05, "loss": 8.7541, "step": 4812000 }, { "epoch": 38.5, "learning_rate": 3.075e-05, "loss": 8.7633, "step": 4812500 }, { "epoch": 38.5, "learning_rate": 3.0748e-05, "loss": 8.7608, "step": 4813000 }, { "epoch": 38.51, "learning_rate": 3.0746e-05, "loss": 8.7334, "step": 4813500 }, { "epoch": 38.51, "learning_rate": 3.0744e-05, "loss": 8.7434, "step": 4814000 }, { "epoch": 38.52, "learning_rate": 3.0742000000000005e-05, "loss": 8.7376, "step": 4814500 }, { "epoch": 38.52, "learning_rate": 3.074e-05, "loss": 8.743, "step": 4815000 }, { "epoch": 38.52, "learning_rate": 3.0738e-05, "loss": 8.7649, "step": 4815500 }, { "epoch": 38.53, "learning_rate": 3.0736000000000005e-05, "loss": 8.7386, "step": 4816000 }, { "epoch": 38.53, "learning_rate": 3.0734e-05, "loss": 8.7524, "step": 4816500 }, { "epoch": 38.54, "learning_rate": 3.0731999999999996e-05, "loss": 8.7455, "step": 4817000 }, { "epoch": 38.54, "learning_rate": 3.0730000000000006e-05, "loss": 8.7366, "step": 4817500 }, { "epoch": 38.54, "learning_rate": 3.0728e-05, "loss": 8.746, "step": 4818000 }, { "epoch": 38.55, "learning_rate": 3.0726e-05, "loss": 8.7649, "step": 4818500 }, { "epoch": 38.55, "learning_rate": 3.0724000000000006e-05, "loss": 8.7356, "step": 4819000 }, { "epoch": 38.56, "learning_rate": 3.0722e-05, "loss": 8.767, "step": 4819500 }, { "epoch": 38.56, "learning_rate": 3.072e-05, "loss": 8.7642, "step": 4820000 }, { "epoch": 38.56, "learning_rate": 3.0718e-05, "loss": 8.75, "step": 4820500 }, { "epoch": 38.57, "learning_rate": 3.0716e-05, "loss": 8.7359, "step": 4821000 }, { "epoch": 38.57, "learning_rate": 3.0714000000000004e-05, "loss": 8.7486, "step": 4821500 }, { "epoch": 38.58, "learning_rate": 3.0712e-05, "loss": 8.7472, "step": 4822000 }, { "epoch": 38.58, "learning_rate": 3.071e-05, "loss": 8.7303, "step": 4822500 }, { "epoch": 38.58, "learning_rate": 3.0708000000000005e-05, "loss": 8.7415, "step": 4823000 }, { "epoch": 38.59, "learning_rate": 3.0706e-05, "loss": 8.7376, "step": 4823500 }, { "epoch": 38.59, "learning_rate": 3.0703999999999996e-05, "loss": 8.7662, "step": 4824000 }, { "epoch": 38.6, "learning_rate": 3.0702000000000005e-05, "loss": 8.7397, "step": 4824500 }, { "epoch": 38.6, "learning_rate": 3.07e-05, "loss": 8.7485, "step": 4825000 }, { "epoch": 38.6, "learning_rate": 3.0697999999999996e-05, "loss": 8.7556, "step": 4825500 }, { "epoch": 38.61, "learning_rate": 3.0696000000000006e-05, "loss": 8.7236, "step": 4826000 }, { "epoch": 38.61, "learning_rate": 3.0694e-05, "loss": 8.7582, "step": 4826500 }, { "epoch": 38.62, "learning_rate": 3.0692000000000004e-05, "loss": 8.7513, "step": 4827000 }, { "epoch": 38.62, "learning_rate": 3.069e-05, "loss": 8.7323, "step": 4827500 }, { "epoch": 38.62, "learning_rate": 3.0688e-05, "loss": 8.7515, "step": 4828000 }, { "epoch": 38.63, "learning_rate": 3.0686000000000004e-05, "loss": 8.7425, "step": 4828500 }, { "epoch": 38.63, "learning_rate": 3.0684e-05, "loss": 8.7489, "step": 4829000 }, { "epoch": 38.64, "learning_rate": 3.0682e-05, "loss": 8.7683, "step": 4829500 }, { "epoch": 38.64, "learning_rate": 3.0680000000000004e-05, "loss": 8.757, "step": 4830000 }, { "epoch": 38.64, "learning_rate": 3.0678e-05, "loss": 8.7314, "step": 4830500 }, { "epoch": 38.65, "learning_rate": 3.0676e-05, "loss": 8.7357, "step": 4831000 }, { "epoch": 38.65, "learning_rate": 3.0674000000000005e-05, "loss": 8.7615, "step": 4831500 }, { "epoch": 38.66, "learning_rate": 3.0672e-05, "loss": 8.7334, "step": 4832000 }, { "epoch": 38.66, "learning_rate": 3.0669999999999996e-05, "loss": 8.7368, "step": 4832500 }, { "epoch": 38.66, "learning_rate": 3.0668000000000005e-05, "loss": 8.7482, "step": 4833000 }, { "epoch": 38.67, "learning_rate": 3.0666e-05, "loss": 8.7464, "step": 4833500 }, { "epoch": 38.67, "learning_rate": 3.0664e-05, "loss": 8.753, "step": 4834000 }, { "epoch": 38.68, "learning_rate": 3.0662e-05, "loss": 8.7433, "step": 4834500 }, { "epoch": 38.68, "learning_rate": 3.066e-05, "loss": 8.7437, "step": 4835000 }, { "epoch": 38.68, "learning_rate": 3.0658000000000004e-05, "loss": 8.7392, "step": 4835500 }, { "epoch": 38.69, "learning_rate": 3.0656e-05, "loss": 8.7575, "step": 4836000 }, { "epoch": 38.69, "learning_rate": 3.0654e-05, "loss": 8.7588, "step": 4836500 }, { "epoch": 38.7, "learning_rate": 3.0652000000000004e-05, "loss": 8.7486, "step": 4837000 }, { "epoch": 38.7, "learning_rate": 3.065e-05, "loss": 8.7534, "step": 4837500 }, { "epoch": 38.7, "learning_rate": 3.0648e-05, "loss": 8.7605, "step": 4838000 }, { "epoch": 38.71, "learning_rate": 3.0646000000000004e-05, "loss": 8.7408, "step": 4838500 }, { "epoch": 38.71, "learning_rate": 3.0644e-05, "loss": 8.757, "step": 4839000 }, { "epoch": 38.72, "learning_rate": 3.0642e-05, "loss": 8.7579, "step": 4839500 }, { "epoch": 38.72, "learning_rate": 3.0640000000000005e-05, "loss": 8.7422, "step": 4840000 }, { "epoch": 38.72, "learning_rate": 3.0638e-05, "loss": 8.7475, "step": 4840500 }, { "epoch": 38.73, "learning_rate": 3.0636e-05, "loss": 8.7463, "step": 4841000 }, { "epoch": 38.73, "learning_rate": 3.0634e-05, "loss": 8.7533, "step": 4841500 }, { "epoch": 38.74, "learning_rate": 3.0632e-05, "loss": 8.7262, "step": 4842000 }, { "epoch": 38.74, "learning_rate": 3.063e-05, "loss": 8.7549, "step": 4842500 }, { "epoch": 38.74, "learning_rate": 3.0628e-05, "loss": 8.7632, "step": 4843000 }, { "epoch": 38.75, "learning_rate": 3.0626e-05, "loss": 8.7347, "step": 4843500 }, { "epoch": 38.75, "learning_rate": 3.0624000000000004e-05, "loss": 8.7699, "step": 4844000 }, { "epoch": 38.76, "learning_rate": 3.0622e-05, "loss": 8.7348, "step": 4844500 }, { "epoch": 38.76, "learning_rate": 3.062e-05, "loss": 8.7488, "step": 4845000 }, { "epoch": 38.76, "learning_rate": 3.0618000000000004e-05, "loss": 8.7367, "step": 4845500 }, { "epoch": 38.77, "learning_rate": 3.0616e-05, "loss": 8.7471, "step": 4846000 }, { "epoch": 38.77, "learning_rate": 3.0614e-05, "loss": 8.7561, "step": 4846500 }, { "epoch": 38.78, "learning_rate": 3.0612000000000004e-05, "loss": 8.7667, "step": 4847000 }, { "epoch": 38.78, "learning_rate": 3.061e-05, "loss": 8.7574, "step": 4847500 }, { "epoch": 38.78, "learning_rate": 3.0608e-05, "loss": 8.7572, "step": 4848000 }, { "epoch": 38.79, "learning_rate": 3.0606e-05, "loss": 8.7658, "step": 4848500 }, { "epoch": 38.79, "learning_rate": 3.0604e-05, "loss": 8.7137, "step": 4849000 }, { "epoch": 38.8, "learning_rate": 3.0602e-05, "loss": 8.749, "step": 4849500 }, { "epoch": 38.8, "learning_rate": 3.06e-05, "loss": 8.7436, "step": 4850000 }, { "epoch": 38.8, "learning_rate": 3.0598e-05, "loss": 8.7529, "step": 4850500 }, { "epoch": 38.81, "learning_rate": 3.0596e-05, "loss": 8.7681, "step": 4851000 }, { "epoch": 38.81, "learning_rate": 3.0594e-05, "loss": 8.7708, "step": 4851500 }, { "epoch": 38.82, "learning_rate": 3.0592e-05, "loss": 8.7399, "step": 4852000 }, { "epoch": 38.82, "learning_rate": 3.0590000000000004e-05, "loss": 8.7411, "step": 4852500 }, { "epoch": 38.82, "learning_rate": 3.0588e-05, "loss": 8.7454, "step": 4853000 }, { "epoch": 38.83, "learning_rate": 3.0586e-05, "loss": 8.7387, "step": 4853500 }, { "epoch": 38.83, "learning_rate": 3.0584000000000004e-05, "loss": 8.7512, "step": 4854000 }, { "epoch": 38.84, "learning_rate": 3.0582e-05, "loss": 8.7856, "step": 4854500 }, { "epoch": 38.84, "learning_rate": 3.058e-05, "loss": 8.7284, "step": 4855000 }, { "epoch": 38.84, "learning_rate": 3.0578000000000004e-05, "loss": 8.7495, "step": 4855500 }, { "epoch": 38.85, "learning_rate": 3.0576e-05, "loss": 8.7533, "step": 4856000 }, { "epoch": 38.85, "learning_rate": 3.0574e-05, "loss": 8.7517, "step": 4856500 }, { "epoch": 38.86, "learning_rate": 3.0572e-05, "loss": 8.7406, "step": 4857000 }, { "epoch": 38.86, "learning_rate": 3.057000000000001e-05, "loss": 8.76, "step": 4857500 }, { "epoch": 38.86, "learning_rate": 3.0568e-05, "loss": 8.7586, "step": 4858000 }, { "epoch": 38.87, "learning_rate": 3.0566e-05, "loss": 8.7424, "step": 4858500 }, { "epoch": 38.87, "learning_rate": 3.0564e-05, "loss": 8.725, "step": 4859000 }, { "epoch": 38.88, "learning_rate": 3.0562e-05, "loss": 8.7601, "step": 4859500 }, { "epoch": 38.88, "learning_rate": 3.056e-05, "loss": 8.7526, "step": 4860000 }, { "epoch": 38.88, "learning_rate": 3.0558e-05, "loss": 8.7482, "step": 4860500 }, { "epoch": 38.89, "learning_rate": 3.0556000000000004e-05, "loss": 8.7366, "step": 4861000 }, { "epoch": 38.89, "learning_rate": 3.0554e-05, "loss": 8.7392, "step": 4861500 }, { "epoch": 38.9, "learning_rate": 3.0552e-05, "loss": 8.733, "step": 4862000 }, { "epoch": 38.9, "learning_rate": 3.0550000000000004e-05, "loss": 8.7649, "step": 4862500 }, { "epoch": 38.9, "learning_rate": 3.0548e-05, "loss": 8.7559, "step": 4863000 }, { "epoch": 38.91, "learning_rate": 3.0546e-05, "loss": 8.7606, "step": 4863500 }, { "epoch": 38.91, "learning_rate": 3.0544e-05, "loss": 8.731, "step": 4864000 }, { "epoch": 38.92, "learning_rate": 3.054200000000001e-05, "loss": 8.7612, "step": 4864500 }, { "epoch": 38.92, "learning_rate": 3.054e-05, "loss": 8.7658, "step": 4865000 }, { "epoch": 38.92, "learning_rate": 3.0538e-05, "loss": 8.7347, "step": 4865500 }, { "epoch": 38.93, "learning_rate": 3.0536e-05, "loss": 8.7509, "step": 4866000 }, { "epoch": 38.93, "learning_rate": 3.0534e-05, "loss": 8.7466, "step": 4866500 }, { "epoch": 38.94, "learning_rate": 3.0532e-05, "loss": 8.7795, "step": 4867000 }, { "epoch": 38.94, "learning_rate": 3.053e-05, "loss": 8.7582, "step": 4867500 }, { "epoch": 38.94, "learning_rate": 3.0528e-05, "loss": 8.7633, "step": 4868000 }, { "epoch": 38.95, "learning_rate": 3.0526e-05, "loss": 8.7595, "step": 4868500 }, { "epoch": 38.95, "learning_rate": 3.0524e-05, "loss": 8.7501, "step": 4869000 }, { "epoch": 38.96, "learning_rate": 3.0522000000000004e-05, "loss": 8.7479, "step": 4869500 }, { "epoch": 38.96, "learning_rate": 3.0520000000000006e-05, "loss": 8.7578, "step": 4870000 }, { "epoch": 38.96, "learning_rate": 3.0518e-05, "loss": 8.7359, "step": 4870500 }, { "epoch": 38.97, "learning_rate": 3.0516e-05, "loss": 8.7511, "step": 4871000 }, { "epoch": 38.97, "learning_rate": 3.0514000000000003e-05, "loss": 8.7453, "step": 4871500 }, { "epoch": 38.98, "learning_rate": 3.0512000000000002e-05, "loss": 8.7441, "step": 4872000 }, { "epoch": 38.98, "learning_rate": 3.051e-05, "loss": 8.7426, "step": 4872500 }, { "epoch": 38.98, "learning_rate": 3.0508000000000003e-05, "loss": 8.7451, "step": 4873000 }, { "epoch": 38.99, "learning_rate": 3.0506000000000002e-05, "loss": 8.7466, "step": 4873500 }, { "epoch": 38.99, "learning_rate": 3.0503999999999998e-05, "loss": 8.7387, "step": 4874000 }, { "epoch": 39.0, "learning_rate": 3.0502000000000004e-05, "loss": 8.7513, "step": 4874500 }, { "epoch": 39.0, "learning_rate": 3.05e-05, "loss": 8.7579, "step": 4875000 }, { "epoch": 39.0, "learning_rate": 3.0498e-05, "loss": 8.7644, "step": 4875500 }, { "epoch": 39.01, "learning_rate": 3.0496e-05, "loss": 8.7352, "step": 4876000 }, { "epoch": 39.01, "learning_rate": 3.0494e-05, "loss": 8.741, "step": 4876500 }, { "epoch": 39.02, "learning_rate": 3.0492000000000006e-05, "loss": 8.7317, "step": 4877000 }, { "epoch": 39.02, "learning_rate": 3.049e-05, "loss": 8.7444, "step": 4877500 }, { "epoch": 39.02, "learning_rate": 3.0488e-05, "loss": 8.7513, "step": 4878000 }, { "epoch": 39.03, "learning_rate": 3.0486000000000003e-05, "loss": 8.7601, "step": 4878500 }, { "epoch": 39.03, "learning_rate": 3.0484e-05, "loss": 8.7485, "step": 4879000 }, { "epoch": 39.04, "learning_rate": 3.0482e-05, "loss": 8.7555, "step": 4879500 }, { "epoch": 39.04, "learning_rate": 3.0480000000000003e-05, "loss": 8.7611, "step": 4880000 }, { "epoch": 39.04, "learning_rate": 3.0478000000000002e-05, "loss": 8.7474, "step": 4880500 }, { "epoch": 39.05, "learning_rate": 3.0475999999999998e-05, "loss": 8.7424, "step": 4881000 }, { "epoch": 39.05, "learning_rate": 3.0474000000000003e-05, "loss": 8.7512, "step": 4881500 }, { "epoch": 39.06, "learning_rate": 3.0472e-05, "loss": 8.7401, "step": 4882000 }, { "epoch": 39.06, "learning_rate": 3.0470000000000005e-05, "loss": 8.765, "step": 4882500 }, { "epoch": 39.06, "learning_rate": 3.0468000000000004e-05, "loss": 8.7441, "step": 4883000 }, { "epoch": 39.07, "learning_rate": 3.0466e-05, "loss": 8.7445, "step": 4883500 }, { "epoch": 39.07, "learning_rate": 3.0464000000000005e-05, "loss": 8.7579, "step": 4884000 }, { "epoch": 39.08, "learning_rate": 3.0462e-05, "loss": 8.7583, "step": 4884500 }, { "epoch": 39.08, "learning_rate": 3.046e-05, "loss": 8.7624, "step": 4885000 }, { "epoch": 39.08, "learning_rate": 3.0458000000000002e-05, "loss": 8.7391, "step": 4885500 }, { "epoch": 39.09, "learning_rate": 3.0456e-05, "loss": 8.7402, "step": 4886000 }, { "epoch": 39.09, "learning_rate": 3.0454e-05, "loss": 8.7515, "step": 4886500 }, { "epoch": 39.1, "learning_rate": 3.0452000000000003e-05, "loss": 8.7343, "step": 4887000 }, { "epoch": 39.1, "learning_rate": 3.045e-05, "loss": 8.7586, "step": 4887500 }, { "epoch": 39.1, "learning_rate": 3.0447999999999997e-05, "loss": 8.7591, "step": 4888000 }, { "epoch": 39.11, "learning_rate": 3.0446000000000003e-05, "loss": 8.752, "step": 4888500 }, { "epoch": 39.11, "learning_rate": 3.0444000000000002e-05, "loss": 8.732, "step": 4889000 }, { "epoch": 39.12, "learning_rate": 3.0442000000000004e-05, "loss": 8.7442, "step": 4889500 }, { "epoch": 39.12, "learning_rate": 3.0440000000000003e-05, "loss": 8.7475, "step": 4890000 }, { "epoch": 39.12, "learning_rate": 3.0438e-05, "loss": 8.7518, "step": 4890500 }, { "epoch": 39.13, "learning_rate": 3.0436000000000005e-05, "loss": 8.7363, "step": 4891000 }, { "epoch": 39.13, "learning_rate": 3.0434e-05, "loss": 8.7592, "step": 4891500 }, { "epoch": 39.14, "learning_rate": 3.0432e-05, "loss": 8.7624, "step": 4892000 }, { "epoch": 39.14, "learning_rate": 3.0430000000000002e-05, "loss": 8.7537, "step": 4892500 }, { "epoch": 39.14, "learning_rate": 3.0428e-05, "loss": 8.7509, "step": 4893000 }, { "epoch": 39.15, "learning_rate": 3.0426e-05, "loss": 8.75, "step": 4893500 }, { "epoch": 39.15, "learning_rate": 3.0424000000000002e-05, "loss": 8.742, "step": 4894000 }, { "epoch": 39.16, "learning_rate": 3.0422e-05, "loss": 8.7389, "step": 4894500 }, { "epoch": 39.16, "learning_rate": 3.0420000000000004e-05, "loss": 8.7427, "step": 4895000 }, { "epoch": 39.16, "learning_rate": 3.0418000000000003e-05, "loss": 8.7532, "step": 4895500 }, { "epoch": 39.17, "learning_rate": 3.0416e-05, "loss": 8.7555, "step": 4896000 }, { "epoch": 39.17, "learning_rate": 3.0414000000000004e-05, "loss": 8.7369, "step": 4896500 }, { "epoch": 39.18, "learning_rate": 3.0412000000000003e-05, "loss": 8.7419, "step": 4897000 }, { "epoch": 39.18, "learning_rate": 3.041e-05, "loss": 8.7646, "step": 4897500 }, { "epoch": 39.18, "learning_rate": 3.0408000000000004e-05, "loss": 8.7573, "step": 4898000 }, { "epoch": 39.19, "learning_rate": 3.0406e-05, "loss": 8.7505, "step": 4898500 }, { "epoch": 39.19, "learning_rate": 3.0404e-05, "loss": 8.7599, "step": 4899000 }, { "epoch": 39.2, "learning_rate": 3.0402e-05, "loss": 8.7563, "step": 4899500 }, { "epoch": 39.2, "learning_rate": 3.04e-05, "loss": 8.7611, "step": 4900000 }, { "epoch": 39.2, "learning_rate": 3.0398e-05, "loss": 8.7558, "step": 4900500 }, { "epoch": 39.21, "learning_rate": 3.0396000000000002e-05, "loss": 8.7493, "step": 4901000 }, { "epoch": 39.21, "learning_rate": 3.0394e-05, "loss": 8.7577, "step": 4901500 }, { "epoch": 39.22, "learning_rate": 3.0392000000000003e-05, "loss": 8.763, "step": 4902000 }, { "epoch": 39.22, "learning_rate": 3.0390000000000002e-05, "loss": 8.7602, "step": 4902500 }, { "epoch": 39.22, "learning_rate": 3.0388e-05, "loss": 8.7557, "step": 4903000 }, { "epoch": 39.23, "learning_rate": 3.0386000000000004e-05, "loss": 8.7493, "step": 4903500 }, { "epoch": 39.23, "learning_rate": 3.0384000000000003e-05, "loss": 8.7575, "step": 4904000 }, { "epoch": 39.24, "learning_rate": 3.0381999999999998e-05, "loss": 8.7494, "step": 4904500 }, { "epoch": 39.24, "learning_rate": 3.0380000000000004e-05, "loss": 8.7553, "step": 4905000 }, { "epoch": 39.24, "learning_rate": 3.0378e-05, "loss": 8.7622, "step": 4905500 }, { "epoch": 39.25, "learning_rate": 3.0376e-05, "loss": 8.7575, "step": 4906000 }, { "epoch": 39.25, "learning_rate": 3.0374e-05, "loss": 8.7431, "step": 4906500 }, { "epoch": 39.26, "learning_rate": 3.0372e-05, "loss": 8.7475, "step": 4907000 }, { "epoch": 39.26, "learning_rate": 3.0370000000000006e-05, "loss": 8.7476, "step": 4907500 }, { "epoch": 39.26, "learning_rate": 3.0368e-05, "loss": 8.7466, "step": 4908000 }, { "epoch": 39.27, "learning_rate": 3.0366e-05, "loss": 8.773, "step": 4908500 }, { "epoch": 39.27, "learning_rate": 3.0364000000000003e-05, "loss": 8.7454, "step": 4909000 }, { "epoch": 39.28, "learning_rate": 3.0362000000000002e-05, "loss": 8.7393, "step": 4909500 }, { "epoch": 39.28, "learning_rate": 3.036e-05, "loss": 8.733, "step": 4910000 }, { "epoch": 39.28, "learning_rate": 3.0358000000000003e-05, "loss": 8.7526, "step": 4910500 }, { "epoch": 39.29, "learning_rate": 3.0356000000000002e-05, "loss": 8.7401, "step": 4911000 }, { "epoch": 39.29, "learning_rate": 3.0353999999999998e-05, "loss": 8.7374, "step": 4911500 }, { "epoch": 39.3, "learning_rate": 3.0352000000000003e-05, "loss": 8.7379, "step": 4912000 }, { "epoch": 39.3, "learning_rate": 3.035e-05, "loss": 8.7475, "step": 4912500 }, { "epoch": 39.3, "learning_rate": 3.0348000000000005e-05, "loss": 8.7626, "step": 4913000 }, { "epoch": 39.31, "learning_rate": 3.0346000000000004e-05, "loss": 8.7494, "step": 4913500 }, { "epoch": 39.31, "learning_rate": 3.0344e-05, "loss": 8.7331, "step": 4914000 }, { "epoch": 39.32, "learning_rate": 3.0342000000000005e-05, "loss": 8.758, "step": 4914500 }, { "epoch": 39.32, "learning_rate": 3.034e-05, "loss": 8.7445, "step": 4915000 }, { "epoch": 39.32, "learning_rate": 3.0338e-05, "loss": 8.7557, "step": 4915500 }, { "epoch": 39.33, "learning_rate": 3.0336000000000002e-05, "loss": 8.7338, "step": 4916000 }, { "epoch": 39.33, "learning_rate": 3.0334e-05, "loss": 8.7548, "step": 4916500 }, { "epoch": 39.34, "learning_rate": 3.0332e-05, "loss": 8.7433, "step": 4917000 }, { "epoch": 39.34, "learning_rate": 3.0330000000000003e-05, "loss": 8.742, "step": 4917500 }, { "epoch": 39.34, "learning_rate": 3.0328e-05, "loss": 8.7466, "step": 4918000 }, { "epoch": 39.35, "learning_rate": 3.0325999999999997e-05, "loss": 8.7683, "step": 4918500 }, { "epoch": 39.35, "learning_rate": 3.0324000000000003e-05, "loss": 8.7379, "step": 4919000 }, { "epoch": 39.36, "learning_rate": 3.0322000000000002e-05, "loss": 8.74, "step": 4919500 }, { "epoch": 39.36, "learning_rate": 3.0320000000000004e-05, "loss": 8.7473, "step": 4920000 }, { "epoch": 39.36, "learning_rate": 3.0318000000000003e-05, "loss": 8.7532, "step": 4920500 }, { "epoch": 39.37, "learning_rate": 3.0316e-05, "loss": 8.7343, "step": 4921000 }, { "epoch": 39.37, "learning_rate": 3.0314000000000005e-05, "loss": 8.7315, "step": 4921500 }, { "epoch": 39.38, "learning_rate": 3.0312e-05, "loss": 8.7502, "step": 4922000 }, { "epoch": 39.38, "learning_rate": 3.031e-05, "loss": 8.7577, "step": 4922500 }, { "epoch": 39.38, "learning_rate": 3.0308000000000002e-05, "loss": 8.7506, "step": 4923000 }, { "epoch": 39.39, "learning_rate": 3.0306e-05, "loss": 8.7397, "step": 4923500 }, { "epoch": 39.39, "learning_rate": 3.0304e-05, "loss": 8.7579, "step": 4924000 }, { "epoch": 39.4, "learning_rate": 3.0302000000000002e-05, "loss": 8.7495, "step": 4924500 }, { "epoch": 39.4, "learning_rate": 3.03e-05, "loss": 8.7567, "step": 4925000 }, { "epoch": 39.4, "learning_rate": 3.0298000000000004e-05, "loss": 8.7674, "step": 4925500 }, { "epoch": 39.41, "learning_rate": 3.0296000000000003e-05, "loss": 8.7311, "step": 4926000 }, { "epoch": 39.41, "learning_rate": 3.0294e-05, "loss": 8.7519, "step": 4926500 }, { "epoch": 39.42, "learning_rate": 3.0292000000000004e-05, "loss": 8.7509, "step": 4927000 }, { "epoch": 39.42, "learning_rate": 3.0290000000000003e-05, "loss": 8.7524, "step": 4927500 }, { "epoch": 39.42, "learning_rate": 3.0288e-05, "loss": 8.7466, "step": 4928000 }, { "epoch": 39.43, "learning_rate": 3.0286000000000004e-05, "loss": 8.7362, "step": 4928500 }, { "epoch": 39.43, "learning_rate": 3.0284e-05, "loss": 8.7601, "step": 4929000 }, { "epoch": 39.44, "learning_rate": 3.0282e-05, "loss": 8.7734, "step": 4929500 }, { "epoch": 39.44, "learning_rate": 3.028e-05, "loss": 8.7366, "step": 4930000 }, { "epoch": 39.44, "learning_rate": 3.0278e-05, "loss": 8.746, "step": 4930500 }, { "epoch": 39.45, "learning_rate": 3.0276e-05, "loss": 8.7659, "step": 4931000 }, { "epoch": 39.45, "learning_rate": 3.0274000000000002e-05, "loss": 8.757, "step": 4931500 }, { "epoch": 39.46, "learning_rate": 3.0272e-05, "loss": 8.7537, "step": 4932000 }, { "epoch": 39.46, "learning_rate": 3.0270000000000003e-05, "loss": 8.7389, "step": 4932500 }, { "epoch": 39.46, "learning_rate": 3.0268000000000002e-05, "loss": 8.7401, "step": 4933000 }, { "epoch": 39.47, "learning_rate": 3.0266e-05, "loss": 8.7508, "step": 4933500 }, { "epoch": 39.47, "learning_rate": 3.0264000000000004e-05, "loss": 8.7493, "step": 4934000 }, { "epoch": 39.48, "learning_rate": 3.0262000000000003e-05, "loss": 8.7623, "step": 4934500 }, { "epoch": 39.48, "learning_rate": 3.0259999999999998e-05, "loss": 8.7561, "step": 4935000 }, { "epoch": 39.48, "learning_rate": 3.0258000000000004e-05, "loss": 8.7528, "step": 4935500 }, { "epoch": 39.49, "learning_rate": 3.0256e-05, "loss": 8.7543, "step": 4936000 }, { "epoch": 39.49, "learning_rate": 3.0254e-05, "loss": 8.7596, "step": 4936500 }, { "epoch": 39.5, "learning_rate": 3.0252e-05, "loss": 8.7601, "step": 4937000 }, { "epoch": 39.5, "learning_rate": 3.025e-05, "loss": 8.7418, "step": 4937500 }, { "epoch": 39.5, "learning_rate": 3.0248000000000002e-05, "loss": 8.7632, "step": 4938000 }, { "epoch": 39.51, "learning_rate": 3.0246e-05, "loss": 8.7607, "step": 4938500 }, { "epoch": 39.51, "learning_rate": 3.0244e-05, "loss": 8.7739, "step": 4939000 }, { "epoch": 39.52, "learning_rate": 3.0242000000000003e-05, "loss": 8.7714, "step": 4939500 }, { "epoch": 39.52, "learning_rate": 3.0240000000000002e-05, "loss": 8.7469, "step": 4940000 }, { "epoch": 39.52, "learning_rate": 3.0238e-05, "loss": 8.7383, "step": 4940500 }, { "epoch": 39.53, "learning_rate": 3.0236000000000003e-05, "loss": 8.7417, "step": 4941000 }, { "epoch": 39.53, "learning_rate": 3.0234000000000002e-05, "loss": 8.7597, "step": 4941500 }, { "epoch": 39.54, "learning_rate": 3.0231999999999998e-05, "loss": 8.759, "step": 4942000 }, { "epoch": 39.54, "learning_rate": 3.0230000000000004e-05, "loss": 8.75, "step": 4942500 }, { "epoch": 39.54, "learning_rate": 3.0228e-05, "loss": 8.7569, "step": 4943000 }, { "epoch": 39.55, "learning_rate": 3.0225999999999998e-05, "loss": 8.7443, "step": 4943500 }, { "epoch": 39.55, "learning_rate": 3.0224e-05, "loss": 8.7457, "step": 4944000 }, { "epoch": 39.56, "learning_rate": 3.0222e-05, "loss": 8.7452, "step": 4944500 }, { "epoch": 39.56, "learning_rate": 3.0220000000000005e-05, "loss": 8.7676, "step": 4945000 }, { "epoch": 39.56, "learning_rate": 3.0218e-05, "loss": 8.7664, "step": 4945500 }, { "epoch": 39.57, "learning_rate": 3.0216e-05, "loss": 8.7387, "step": 4946000 }, { "epoch": 39.57, "learning_rate": 3.0214000000000002e-05, "loss": 8.7356, "step": 4946500 }, { "epoch": 39.58, "learning_rate": 3.0212e-05, "loss": 8.7374, "step": 4947000 }, { "epoch": 39.58, "learning_rate": 3.021e-05, "loss": 8.7642, "step": 4947500 }, { "epoch": 39.58, "learning_rate": 3.0208000000000003e-05, "loss": 8.7243, "step": 4948000 }, { "epoch": 39.59, "learning_rate": 3.0206000000000002e-05, "loss": 8.7564, "step": 4948500 }, { "epoch": 39.59, "learning_rate": 3.0203999999999997e-05, "loss": 8.7518, "step": 4949000 }, { "epoch": 39.6, "learning_rate": 3.0202000000000003e-05, "loss": 8.7434, "step": 4949500 }, { "epoch": 39.6, "learning_rate": 3.02e-05, "loss": 8.7628, "step": 4950000 }, { "epoch": 39.6, "learning_rate": 3.0198000000000005e-05, "loss": 8.7545, "step": 4950500 }, { "epoch": 39.61, "learning_rate": 3.0196000000000004e-05, "loss": 8.7644, "step": 4951000 }, { "epoch": 39.61, "learning_rate": 3.0194e-05, "loss": 8.7391, "step": 4951500 }, { "epoch": 39.62, "learning_rate": 3.0192000000000005e-05, "loss": 8.7499, "step": 4952000 }, { "epoch": 39.62, "learning_rate": 3.019e-05, "loss": 8.7352, "step": 4952500 }, { "epoch": 39.62, "learning_rate": 3.0188e-05, "loss": 8.7636, "step": 4953000 }, { "epoch": 39.63, "learning_rate": 3.0186000000000002e-05, "loss": 8.7496, "step": 4953500 }, { "epoch": 39.63, "learning_rate": 3.0184e-05, "loss": 8.7358, "step": 4954000 }, { "epoch": 39.64, "learning_rate": 3.0182e-05, "loss": 8.7525, "step": 4954500 }, { "epoch": 39.64, "learning_rate": 3.0180000000000002e-05, "loss": 8.7574, "step": 4955000 }, { "epoch": 39.64, "learning_rate": 3.0178e-05, "loss": 8.7353, "step": 4955500 }, { "epoch": 39.65, "learning_rate": 3.0175999999999997e-05, "loss": 8.7494, "step": 4956000 }, { "epoch": 39.65, "learning_rate": 3.0174000000000003e-05, "loss": 8.7562, "step": 4956500 }, { "epoch": 39.66, "learning_rate": 3.0172000000000002e-05, "loss": 8.7657, "step": 4957000 }, { "epoch": 39.66, "learning_rate": 3.0170000000000004e-05, "loss": 8.7574, "step": 4957500 }, { "epoch": 39.66, "learning_rate": 3.0168000000000003e-05, "loss": 8.7346, "step": 4958000 }, { "epoch": 39.67, "learning_rate": 3.0166e-05, "loss": 8.7545, "step": 4958500 }, { "epoch": 39.67, "learning_rate": 3.0164000000000005e-05, "loss": 8.7355, "step": 4959000 }, { "epoch": 39.68, "learning_rate": 3.0162e-05, "loss": 8.7554, "step": 4959500 }, { "epoch": 39.68, "learning_rate": 3.016e-05, "loss": 8.7386, "step": 4960000 }, { "epoch": 39.68, "learning_rate": 3.0158e-05, "loss": 8.7733, "step": 4960500 }, { "epoch": 39.69, "learning_rate": 3.0156e-05, "loss": 8.7579, "step": 4961000 }, { "epoch": 39.69, "learning_rate": 3.0154e-05, "loss": 8.7395, "step": 4961500 }, { "epoch": 39.7, "learning_rate": 3.0152000000000002e-05, "loss": 8.7509, "step": 4962000 }, { "epoch": 39.7, "learning_rate": 3.015e-05, "loss": 8.7579, "step": 4962500 }, { "epoch": 39.7, "learning_rate": 3.0148000000000003e-05, "loss": 8.766, "step": 4963000 }, { "epoch": 39.71, "learning_rate": 3.0146000000000002e-05, "loss": 8.7558, "step": 4963500 }, { "epoch": 39.71, "learning_rate": 3.0144e-05, "loss": 8.7411, "step": 4964000 }, { "epoch": 39.72, "learning_rate": 3.0142000000000004e-05, "loss": 8.743, "step": 4964500 }, { "epoch": 39.72, "learning_rate": 3.0140000000000003e-05, "loss": 8.7613, "step": 4965000 }, { "epoch": 39.72, "learning_rate": 3.0138e-05, "loss": 8.7548, "step": 4965500 }, { "epoch": 39.73, "learning_rate": 3.0136000000000004e-05, "loss": 8.7564, "step": 4966000 }, { "epoch": 39.73, "learning_rate": 3.0134e-05, "loss": 8.7643, "step": 4966500 }, { "epoch": 39.74, "learning_rate": 3.0132e-05, "loss": 8.7443, "step": 4967000 }, { "epoch": 39.74, "learning_rate": 3.013e-05, "loss": 8.7429, "step": 4967500 }, { "epoch": 39.74, "learning_rate": 3.0128e-05, "loss": 8.7387, "step": 4968000 }, { "epoch": 39.75, "learning_rate": 3.0126e-05, "loss": 8.7493, "step": 4968500 }, { "epoch": 39.75, "learning_rate": 3.0124e-05, "loss": 8.7569, "step": 4969000 }, { "epoch": 39.76, "learning_rate": 3.0122e-05, "loss": 8.7584, "step": 4969500 }, { "epoch": 39.76, "learning_rate": 3.0120000000000003e-05, "loss": 8.7304, "step": 4970000 }, { "epoch": 39.76, "learning_rate": 3.0118000000000002e-05, "loss": 8.751, "step": 4970500 }, { "epoch": 39.77, "learning_rate": 3.0116e-05, "loss": 8.7464, "step": 4971000 }, { "epoch": 39.77, "learning_rate": 3.0114000000000003e-05, "loss": 8.7581, "step": 4971500 }, { "epoch": 39.78, "learning_rate": 3.0112000000000002e-05, "loss": 8.7285, "step": 4972000 }, { "epoch": 39.78, "learning_rate": 3.0109999999999998e-05, "loss": 8.7416, "step": 4972500 }, { "epoch": 39.78, "learning_rate": 3.0108000000000004e-05, "loss": 8.7679, "step": 4973000 }, { "epoch": 39.79, "learning_rate": 3.0106e-05, "loss": 8.7456, "step": 4973500 }, { "epoch": 39.79, "learning_rate": 3.0104e-05, "loss": 8.7449, "step": 4974000 }, { "epoch": 39.8, "learning_rate": 3.0102e-05, "loss": 8.7508, "step": 4974500 }, { "epoch": 39.8, "learning_rate": 3.01e-05, "loss": 8.7478, "step": 4975000 }, { "epoch": 39.8, "learning_rate": 3.0098000000000006e-05, "loss": 8.7635, "step": 4975500 }, { "epoch": 39.81, "learning_rate": 3.0096e-05, "loss": 8.7444, "step": 4976000 }, { "epoch": 39.81, "learning_rate": 3.0094e-05, "loss": 8.7665, "step": 4976500 }, { "epoch": 39.82, "learning_rate": 3.0092000000000003e-05, "loss": 8.7498, "step": 4977000 }, { "epoch": 39.82, "learning_rate": 3.009e-05, "loss": 8.7407, "step": 4977500 }, { "epoch": 39.82, "learning_rate": 3.0088e-05, "loss": 8.7639, "step": 4978000 }, { "epoch": 39.83, "learning_rate": 3.0086000000000003e-05, "loss": 8.743, "step": 4978500 }, { "epoch": 39.83, "learning_rate": 3.0084000000000002e-05, "loss": 8.7481, "step": 4979000 }, { "epoch": 39.84, "learning_rate": 3.0081999999999998e-05, "loss": 8.7633, "step": 4979500 }, { "epoch": 39.84, "learning_rate": 3.0080000000000003e-05, "loss": 8.7551, "step": 4980000 }, { "epoch": 39.84, "learning_rate": 3.0078e-05, "loss": 8.7231, "step": 4980500 }, { "epoch": 39.85, "learning_rate": 3.0076000000000005e-05, "loss": 8.736, "step": 4981000 }, { "epoch": 39.85, "learning_rate": 3.0074000000000004e-05, "loss": 8.7554, "step": 4981500 }, { "epoch": 39.86, "learning_rate": 3.0072e-05, "loss": 8.7634, "step": 4982000 }, { "epoch": 39.86, "learning_rate": 3.0070000000000005e-05, "loss": 8.7628, "step": 4982500 }, { "epoch": 39.86, "learning_rate": 3.0068e-05, "loss": 8.7533, "step": 4983000 }, { "epoch": 39.87, "learning_rate": 3.0066e-05, "loss": 8.729, "step": 4983500 }, { "epoch": 39.87, "learning_rate": 3.0064000000000002e-05, "loss": 8.7147, "step": 4984000 }, { "epoch": 39.88, "learning_rate": 3.0062e-05, "loss": 8.7533, "step": 4984500 }, { "epoch": 39.88, "learning_rate": 3.006e-05, "loss": 8.765, "step": 4985000 }, { "epoch": 39.88, "learning_rate": 3.0058000000000003e-05, "loss": 8.7484, "step": 4985500 }, { "epoch": 39.89, "learning_rate": 3.0056e-05, "loss": 8.7692, "step": 4986000 }, { "epoch": 39.89, "learning_rate": 3.0053999999999997e-05, "loss": 8.7441, "step": 4986500 }, { "epoch": 39.9, "learning_rate": 3.0052000000000003e-05, "loss": 8.7442, "step": 4987000 }, { "epoch": 39.9, "learning_rate": 3.0050000000000002e-05, "loss": 8.7522, "step": 4987500 }, { "epoch": 39.9, "learning_rate": 3.0048000000000004e-05, "loss": 8.7505, "step": 4988000 }, { "epoch": 39.91, "learning_rate": 3.0046000000000003e-05, "loss": 8.7491, "step": 4988500 }, { "epoch": 39.91, "learning_rate": 3.0044e-05, "loss": 8.7463, "step": 4989000 }, { "epoch": 39.92, "learning_rate": 3.0042000000000005e-05, "loss": 8.7675, "step": 4989500 }, { "epoch": 39.92, "learning_rate": 3.004e-05, "loss": 8.7524, "step": 4990000 }, { "epoch": 39.92, "learning_rate": 3.0038e-05, "loss": 8.7482, "step": 4990500 }, { "epoch": 39.93, "learning_rate": 3.0036000000000002e-05, "loss": 8.7445, "step": 4991000 }, { "epoch": 39.93, "learning_rate": 3.0034e-05, "loss": 8.754, "step": 4991500 }, { "epoch": 39.94, "learning_rate": 3.0032e-05, "loss": 8.7425, "step": 4992000 }, { "epoch": 39.94, "learning_rate": 3.0030000000000002e-05, "loss": 8.7654, "step": 4992500 }, { "epoch": 39.94, "learning_rate": 3.0028e-05, "loss": 8.7632, "step": 4993000 }, { "epoch": 39.95, "learning_rate": 3.0026000000000004e-05, "loss": 8.7484, "step": 4993500 }, { "epoch": 39.95, "learning_rate": 3.0024000000000003e-05, "loss": 8.7421, "step": 4994000 }, { "epoch": 39.96, "learning_rate": 3.0022e-05, "loss": 8.7496, "step": 4994500 }, { "epoch": 39.96, "learning_rate": 3.0020000000000004e-05, "loss": 8.7253, "step": 4995000 }, { "epoch": 39.96, "learning_rate": 3.0018000000000003e-05, "loss": 8.745, "step": 4995500 }, { "epoch": 39.97, "learning_rate": 3.0016e-05, "loss": 8.7375, "step": 4996000 }, { "epoch": 39.97, "learning_rate": 3.0014000000000004e-05, "loss": 8.759, "step": 4996500 }, { "epoch": 39.98, "learning_rate": 3.0012e-05, "loss": 8.7423, "step": 4997000 }, { "epoch": 39.98, "learning_rate": 3.001e-05, "loss": 8.7437, "step": 4997500 }, { "epoch": 39.98, "learning_rate": 3.0008e-05, "loss": 8.7338, "step": 4998000 }, { "epoch": 39.99, "learning_rate": 3.0006e-05, "loss": 8.7462, "step": 4998500 }, { "epoch": 39.99, "learning_rate": 3.0004e-05, "loss": 8.7478, "step": 4999000 }, { "epoch": 40.0, "learning_rate": 3.0002000000000002e-05, "loss": 8.7705, "step": 4999500 }, { "epoch": 40.0, "learning_rate": 3e-05, "loss": 8.7505, "step": 5000000 }, { "epoch": 40.0, "learning_rate": 2.9998000000000003e-05, "loss": 8.7456, "step": 5000500 }, { "epoch": 40.01, "learning_rate": 2.9996000000000002e-05, "loss": 8.7817, "step": 5001000 }, { "epoch": 40.01, "learning_rate": 2.9994e-05, "loss": 8.7449, "step": 5001500 }, { "epoch": 40.02, "learning_rate": 2.9992000000000004e-05, "loss": 8.7398, "step": 5002000 }, { "epoch": 40.02, "learning_rate": 2.9990000000000003e-05, "loss": 8.7355, "step": 5002500 }, { "epoch": 40.02, "learning_rate": 2.9987999999999998e-05, "loss": 8.75, "step": 5003000 }, { "epoch": 40.03, "learning_rate": 2.9986000000000004e-05, "loss": 8.7452, "step": 5003500 }, { "epoch": 40.03, "learning_rate": 2.9984e-05, "loss": 8.7408, "step": 5004000 }, { "epoch": 40.04, "learning_rate": 2.9982e-05, "loss": 8.7754, "step": 5004500 }, { "epoch": 40.04, "learning_rate": 2.998e-05, "loss": 8.7353, "step": 5005000 }, { "epoch": 40.04, "learning_rate": 2.9978e-05, "loss": 8.7549, "step": 5005500 }, { "epoch": 40.05, "learning_rate": 2.9976000000000006e-05, "loss": 8.7617, "step": 5006000 }, { "epoch": 40.05, "learning_rate": 2.9974e-05, "loss": 8.7543, "step": 5006500 }, { "epoch": 40.06, "learning_rate": 2.9972e-05, "loss": 8.7662, "step": 5007000 }, { "epoch": 40.06, "learning_rate": 2.9970000000000003e-05, "loss": 8.7774, "step": 5007500 }, { "epoch": 40.06, "learning_rate": 2.9968000000000002e-05, "loss": 8.7501, "step": 5008000 }, { "epoch": 40.07, "learning_rate": 2.9966e-05, "loss": 8.7439, "step": 5008500 }, { "epoch": 40.07, "learning_rate": 2.9964000000000003e-05, "loss": 8.7361, "step": 5009000 }, { "epoch": 40.08, "learning_rate": 2.9962000000000002e-05, "loss": 8.7512, "step": 5009500 }, { "epoch": 40.08, "learning_rate": 2.9959999999999998e-05, "loss": 8.7437, "step": 5010000 }, { "epoch": 40.08, "learning_rate": 2.9958000000000004e-05, "loss": 8.7573, "step": 5010500 }, { "epoch": 40.09, "learning_rate": 2.9956e-05, "loss": 8.7495, "step": 5011000 }, { "epoch": 40.09, "learning_rate": 2.9953999999999998e-05, "loss": 8.7367, "step": 5011500 }, { "epoch": 40.1, "learning_rate": 2.9952000000000004e-05, "loss": 8.7606, "step": 5012000 }, { "epoch": 40.1, "learning_rate": 2.995e-05, "loss": 8.7388, "step": 5012500 }, { "epoch": 40.1, "learning_rate": 2.9948000000000005e-05, "loss": 8.7555, "step": 5013000 }, { "epoch": 40.11, "learning_rate": 2.9946e-05, "loss": 8.7442, "step": 5013500 }, { "epoch": 40.11, "learning_rate": 2.9944e-05, "loss": 8.7458, "step": 5014000 }, { "epoch": 40.12, "learning_rate": 2.9942000000000002e-05, "loss": 8.7391, "step": 5014500 }, { "epoch": 40.12, "learning_rate": 2.994e-05, "loss": 8.7603, "step": 5015000 }, { "epoch": 40.12, "learning_rate": 2.9938e-05, "loss": 8.7334, "step": 5015500 }, { "epoch": 40.13, "learning_rate": 2.9936000000000003e-05, "loss": 8.7649, "step": 5016000 }, { "epoch": 40.13, "learning_rate": 2.9934000000000002e-05, "loss": 8.7419, "step": 5016500 }, { "epoch": 40.14, "learning_rate": 2.9931999999999997e-05, "loss": 8.7629, "step": 5017000 }, { "epoch": 40.14, "learning_rate": 2.9930000000000003e-05, "loss": 8.749, "step": 5017500 }, { "epoch": 40.14, "learning_rate": 2.9928000000000002e-05, "loss": 8.7565, "step": 5018000 }, { "epoch": 40.15, "learning_rate": 2.9926000000000005e-05, "loss": 8.765, "step": 5018500 }, { "epoch": 40.15, "learning_rate": 2.9924000000000004e-05, "loss": 8.7511, "step": 5019000 }, { "epoch": 40.16, "learning_rate": 2.9922e-05, "loss": 8.7529, "step": 5019500 }, { "epoch": 40.16, "learning_rate": 2.9920000000000005e-05, "loss": 8.7403, "step": 5020000 }, { "epoch": 40.16, "learning_rate": 2.9918e-05, "loss": 8.746, "step": 5020500 }, { "epoch": 40.17, "learning_rate": 2.9916e-05, "loss": 8.7579, "step": 5021000 }, { "epoch": 40.17, "learning_rate": 2.9914000000000002e-05, "loss": 8.762, "step": 5021500 }, { "epoch": 40.18, "learning_rate": 2.9912e-05, "loss": 8.7533, "step": 5022000 }, { "epoch": 40.18, "learning_rate": 2.991e-05, "loss": 8.7498, "step": 5022500 }, { "epoch": 40.18, "learning_rate": 2.9908000000000002e-05, "loss": 8.7665, "step": 5023000 }, { "epoch": 40.19, "learning_rate": 2.9906e-05, "loss": 8.7413, "step": 5023500 }, { "epoch": 40.19, "learning_rate": 2.9904e-05, "loss": 8.7545, "step": 5024000 }, { "epoch": 40.2, "learning_rate": 2.9902000000000003e-05, "loss": 8.744, "step": 5024500 }, { "epoch": 40.2, "learning_rate": 2.9900000000000002e-05, "loss": 8.748, "step": 5025000 }, { "epoch": 40.2, "learning_rate": 2.9898000000000004e-05, "loss": 8.7343, "step": 5025500 }, { "epoch": 40.21, "learning_rate": 2.9896000000000003e-05, "loss": 8.7666, "step": 5026000 }, { "epoch": 40.21, "learning_rate": 2.9894e-05, "loss": 8.7313, "step": 5026500 }, { "epoch": 40.22, "learning_rate": 2.9892000000000005e-05, "loss": 8.7534, "step": 5027000 }, { "epoch": 40.22, "learning_rate": 2.989e-05, "loss": 8.7221, "step": 5027500 }, { "epoch": 40.22, "learning_rate": 2.9888e-05, "loss": 8.7512, "step": 5028000 }, { "epoch": 40.23, "learning_rate": 2.9886e-05, "loss": 8.74, "step": 5028500 }, { "epoch": 40.23, "learning_rate": 2.9884e-05, "loss": 8.725, "step": 5029000 }, { "epoch": 40.24, "learning_rate": 2.9882e-05, "loss": 8.7458, "step": 5029500 }, { "epoch": 40.24, "learning_rate": 2.9880000000000002e-05, "loss": 8.7317, "step": 5030000 }, { "epoch": 40.24, "learning_rate": 2.9878e-05, "loss": 8.7577, "step": 5030500 }, { "epoch": 40.25, "learning_rate": 2.9876000000000003e-05, "loss": 8.7406, "step": 5031000 }, { "epoch": 40.25, "learning_rate": 2.9874000000000002e-05, "loss": 8.7204, "step": 5031500 }, { "epoch": 40.26, "learning_rate": 2.9872e-05, "loss": 8.7494, "step": 5032000 }, { "epoch": 40.26, "learning_rate": 2.9870000000000004e-05, "loss": 8.7425, "step": 5032500 }, { "epoch": 40.26, "learning_rate": 2.9868000000000003e-05, "loss": 8.76, "step": 5033000 }, { "epoch": 40.27, "learning_rate": 2.9866e-05, "loss": 8.7468, "step": 5033500 }, { "epoch": 40.27, "learning_rate": 2.9864000000000004e-05, "loss": 8.7666, "step": 5034000 }, { "epoch": 40.28, "learning_rate": 2.9862e-05, "loss": 8.7479, "step": 5034500 }, { "epoch": 40.28, "learning_rate": 2.986e-05, "loss": 8.7574, "step": 5035000 }, { "epoch": 40.28, "learning_rate": 2.9858e-05, "loss": 8.7458, "step": 5035500 }, { "epoch": 40.29, "learning_rate": 2.9856e-05, "loss": 8.7565, "step": 5036000 }, { "epoch": 40.29, "learning_rate": 2.9854000000000002e-05, "loss": 8.7368, "step": 5036500 }, { "epoch": 40.3, "learning_rate": 2.9852e-05, "loss": 8.7349, "step": 5037000 }, { "epoch": 40.3, "learning_rate": 2.985e-05, "loss": 8.7624, "step": 5037500 }, { "epoch": 40.3, "learning_rate": 2.9848000000000003e-05, "loss": 8.7489, "step": 5038000 }, { "epoch": 40.31, "learning_rate": 2.9846000000000002e-05, "loss": 8.7783, "step": 5038500 }, { "epoch": 40.31, "learning_rate": 2.9844e-05, "loss": 8.7472, "step": 5039000 }, { "epoch": 40.32, "learning_rate": 2.9842000000000003e-05, "loss": 8.7732, "step": 5039500 }, { "epoch": 40.32, "learning_rate": 2.9840000000000002e-05, "loss": 8.7493, "step": 5040000 }, { "epoch": 40.32, "learning_rate": 2.9837999999999998e-05, "loss": 8.7685, "step": 5040500 }, { "epoch": 40.33, "learning_rate": 2.9836000000000004e-05, "loss": 8.747, "step": 5041000 }, { "epoch": 40.33, "learning_rate": 2.9834e-05, "loss": 8.7701, "step": 5041500 }, { "epoch": 40.34, "learning_rate": 2.9831999999999998e-05, "loss": 8.7499, "step": 5042000 }, { "epoch": 40.34, "learning_rate": 2.9830000000000004e-05, "loss": 8.7407, "step": 5042500 }, { "epoch": 40.34, "learning_rate": 2.9828e-05, "loss": 8.7373, "step": 5043000 }, { "epoch": 40.35, "learning_rate": 2.9826000000000005e-05, "loss": 8.7498, "step": 5043500 }, { "epoch": 40.35, "learning_rate": 2.9824e-05, "loss": 8.7506, "step": 5044000 }, { "epoch": 40.36, "learning_rate": 2.9822e-05, "loss": 8.7419, "step": 5044500 }, { "epoch": 40.36, "learning_rate": 2.9820000000000002e-05, "loss": 8.724, "step": 5045000 }, { "epoch": 40.36, "learning_rate": 2.9818e-05, "loss": 8.7658, "step": 5045500 }, { "epoch": 40.37, "learning_rate": 2.9816e-05, "loss": 8.7359, "step": 5046000 }, { "epoch": 40.37, "learning_rate": 2.9814000000000003e-05, "loss": 8.7612, "step": 5046500 }, { "epoch": 40.38, "learning_rate": 2.9812000000000002e-05, "loss": 8.7472, "step": 5047000 }, { "epoch": 40.38, "learning_rate": 2.9809999999999997e-05, "loss": 8.7529, "step": 5047500 }, { "epoch": 40.38, "learning_rate": 2.9808000000000003e-05, "loss": 8.7495, "step": 5048000 }, { "epoch": 40.39, "learning_rate": 2.9806000000000002e-05, "loss": 8.7434, "step": 5048500 }, { "epoch": 40.39, "learning_rate": 2.9804000000000005e-05, "loss": 8.7405, "step": 5049000 }, { "epoch": 40.4, "learning_rate": 2.9802000000000004e-05, "loss": 8.7591, "step": 5049500 }, { "epoch": 40.4, "learning_rate": 2.98e-05, "loss": 8.7706, "step": 5050000 }, { "epoch": 40.4, "learning_rate": 2.9798000000000005e-05, "loss": 8.7428, "step": 5050500 }, { "epoch": 40.41, "learning_rate": 2.9796e-05, "loss": 8.7384, "step": 5051000 }, { "epoch": 40.41, "learning_rate": 2.9794e-05, "loss": 8.7302, "step": 5051500 }, { "epoch": 40.42, "learning_rate": 2.9792000000000002e-05, "loss": 8.7482, "step": 5052000 }, { "epoch": 40.42, "learning_rate": 2.979e-05, "loss": 8.7592, "step": 5052500 }, { "epoch": 40.42, "learning_rate": 2.9788e-05, "loss": 8.7616, "step": 5053000 }, { "epoch": 40.43, "learning_rate": 2.9786000000000002e-05, "loss": 8.7478, "step": 5053500 }, { "epoch": 40.43, "learning_rate": 2.9784e-05, "loss": 8.7495, "step": 5054000 }, { "epoch": 40.44, "learning_rate": 2.9782e-05, "loss": 8.7515, "step": 5054500 }, { "epoch": 40.44, "learning_rate": 2.9780000000000003e-05, "loss": 8.7418, "step": 5055000 }, { "epoch": 40.44, "learning_rate": 2.9778000000000002e-05, "loss": 8.7366, "step": 5055500 }, { "epoch": 40.45, "learning_rate": 2.9776000000000004e-05, "loss": 8.746, "step": 5056000 }, { "epoch": 40.45, "learning_rate": 2.9774000000000003e-05, "loss": 8.7376, "step": 5056500 }, { "epoch": 40.46, "learning_rate": 2.9772e-05, "loss": 8.7249, "step": 5057000 }, { "epoch": 40.46, "learning_rate": 2.9770000000000005e-05, "loss": 8.759, "step": 5057500 }, { "epoch": 40.46, "learning_rate": 2.9768e-05, "loss": 8.7461, "step": 5058000 }, { "epoch": 40.47, "learning_rate": 2.9766e-05, "loss": 8.7535, "step": 5058500 }, { "epoch": 40.47, "learning_rate": 2.9764e-05, "loss": 8.7555, "step": 5059000 }, { "epoch": 40.48, "learning_rate": 2.9762e-05, "loss": 8.7462, "step": 5059500 }, { "epoch": 40.48, "learning_rate": 2.976e-05, "loss": 8.7432, "step": 5060000 }, { "epoch": 40.48, "learning_rate": 2.9758000000000002e-05, "loss": 8.75, "step": 5060500 }, { "epoch": 40.49, "learning_rate": 2.9756e-05, "loss": 8.7482, "step": 5061000 }, { "epoch": 40.49, "learning_rate": 2.9754000000000003e-05, "loss": 8.7396, "step": 5061500 }, { "epoch": 40.5, "learning_rate": 2.9752000000000002e-05, "loss": 8.7361, "step": 5062000 }, { "epoch": 40.5, "learning_rate": 2.975e-05, "loss": 8.7424, "step": 5062500 }, { "epoch": 40.5, "learning_rate": 2.9748000000000004e-05, "loss": 8.7549, "step": 5063000 }, { "epoch": 40.51, "learning_rate": 2.9746000000000003e-05, "loss": 8.748, "step": 5063500 }, { "epoch": 40.51, "learning_rate": 2.9744e-05, "loss": 8.7511, "step": 5064000 }, { "epoch": 40.52, "learning_rate": 2.9742000000000004e-05, "loss": 8.773, "step": 5064500 }, { "epoch": 40.52, "learning_rate": 2.974e-05, "loss": 8.7532, "step": 5065000 }, { "epoch": 40.52, "learning_rate": 2.9738e-05, "loss": 8.7364, "step": 5065500 }, { "epoch": 40.53, "learning_rate": 2.9736e-05, "loss": 8.741, "step": 5066000 }, { "epoch": 40.53, "learning_rate": 2.9734e-05, "loss": 8.7395, "step": 5066500 }, { "epoch": 40.54, "learning_rate": 2.9732e-05, "loss": 8.757, "step": 5067000 }, { "epoch": 40.54, "learning_rate": 2.973e-05, "loss": 8.7749, "step": 5067500 }, { "epoch": 40.54, "learning_rate": 2.9728e-05, "loss": 8.7675, "step": 5068000 }, { "epoch": 40.55, "learning_rate": 2.9726000000000003e-05, "loss": 8.7429, "step": 5068500 }, { "epoch": 40.55, "learning_rate": 2.9724000000000002e-05, "loss": 8.7662, "step": 5069000 }, { "epoch": 40.56, "learning_rate": 2.9722e-05, "loss": 8.7393, "step": 5069500 }, { "epoch": 40.56, "learning_rate": 2.9720000000000003e-05, "loss": 8.761, "step": 5070000 }, { "epoch": 40.56, "learning_rate": 2.9718000000000002e-05, "loss": 8.7613, "step": 5070500 }, { "epoch": 40.57, "learning_rate": 2.9715999999999998e-05, "loss": 8.7464, "step": 5071000 }, { "epoch": 40.57, "learning_rate": 2.9714000000000004e-05, "loss": 8.7545, "step": 5071500 }, { "epoch": 40.58, "learning_rate": 2.9712e-05, "loss": 8.7531, "step": 5072000 }, { "epoch": 40.58, "learning_rate": 2.971e-05, "loss": 8.7482, "step": 5072500 }, { "epoch": 40.58, "learning_rate": 2.9708e-05, "loss": 8.7297, "step": 5073000 }, { "epoch": 40.59, "learning_rate": 2.9706e-05, "loss": 8.7503, "step": 5073500 }, { "epoch": 40.59, "learning_rate": 2.9704000000000006e-05, "loss": 8.7432, "step": 5074000 }, { "epoch": 40.6, "learning_rate": 2.9702e-05, "loss": 8.7496, "step": 5074500 }, { "epoch": 40.6, "learning_rate": 2.97e-05, "loss": 8.7403, "step": 5075000 }, { "epoch": 40.6, "learning_rate": 2.9698000000000003e-05, "loss": 8.754, "step": 5075500 }, { "epoch": 40.61, "learning_rate": 2.9696e-05, "loss": 8.7684, "step": 5076000 }, { "epoch": 40.61, "learning_rate": 2.9694e-05, "loss": 8.7367, "step": 5076500 }, { "epoch": 40.62, "learning_rate": 2.9692000000000003e-05, "loss": 8.7342, "step": 5077000 }, { "epoch": 40.62, "learning_rate": 2.9690000000000002e-05, "loss": 8.7518, "step": 5077500 }, { "epoch": 40.62, "learning_rate": 2.9687999999999998e-05, "loss": 8.7569, "step": 5078000 }, { "epoch": 40.63, "learning_rate": 2.9686000000000003e-05, "loss": 8.7475, "step": 5078500 }, { "epoch": 40.63, "learning_rate": 2.9684e-05, "loss": 8.726, "step": 5079000 }, { "epoch": 40.64, "learning_rate": 2.9681999999999998e-05, "loss": 8.7539, "step": 5079500 }, { "epoch": 40.64, "learning_rate": 2.9680000000000004e-05, "loss": 8.7532, "step": 5080000 }, { "epoch": 40.64, "learning_rate": 2.9678e-05, "loss": 8.7188, "step": 5080500 }, { "epoch": 40.65, "learning_rate": 2.9676000000000005e-05, "loss": 8.7454, "step": 5081000 }, { "epoch": 40.65, "learning_rate": 2.9674e-05, "loss": 8.7378, "step": 5081500 }, { "epoch": 40.66, "learning_rate": 2.9672e-05, "loss": 8.7523, "step": 5082000 }, { "epoch": 40.66, "learning_rate": 2.9670000000000002e-05, "loss": 8.7701, "step": 5082500 }, { "epoch": 40.66, "learning_rate": 2.9668e-05, "loss": 8.7638, "step": 5083000 }, { "epoch": 40.67, "learning_rate": 2.9666e-05, "loss": 8.7533, "step": 5083500 }, { "epoch": 40.67, "learning_rate": 2.9664000000000003e-05, "loss": 8.7525, "step": 5084000 }, { "epoch": 40.68, "learning_rate": 2.9662e-05, "loss": 8.7471, "step": 5084500 }, { "epoch": 40.68, "learning_rate": 2.9659999999999997e-05, "loss": 8.7427, "step": 5085000 }, { "epoch": 40.68, "learning_rate": 2.9658000000000003e-05, "loss": 8.7179, "step": 5085500 }, { "epoch": 40.69, "learning_rate": 2.9656000000000002e-05, "loss": 8.7513, "step": 5086000 }, { "epoch": 40.69, "learning_rate": 2.9654000000000004e-05, "loss": 8.7701, "step": 5086500 }, { "epoch": 40.7, "learning_rate": 2.9652000000000003e-05, "loss": 8.7359, "step": 5087000 }, { "epoch": 40.7, "learning_rate": 2.965e-05, "loss": 8.7513, "step": 5087500 }, { "epoch": 40.7, "learning_rate": 2.9648000000000005e-05, "loss": 8.7651, "step": 5088000 }, { "epoch": 40.71, "learning_rate": 2.9646e-05, "loss": 8.7589, "step": 5088500 }, { "epoch": 40.71, "learning_rate": 2.9644e-05, "loss": 8.7349, "step": 5089000 }, { "epoch": 40.72, "learning_rate": 2.9642000000000002e-05, "loss": 8.7447, "step": 5089500 }, { "epoch": 40.72, "learning_rate": 2.964e-05, "loss": 8.7589, "step": 5090000 }, { "epoch": 40.72, "learning_rate": 2.9638e-05, "loss": 8.7545, "step": 5090500 }, { "epoch": 40.73, "learning_rate": 2.9636000000000002e-05, "loss": 8.7664, "step": 5091000 }, { "epoch": 40.73, "learning_rate": 2.9634e-05, "loss": 8.7568, "step": 5091500 }, { "epoch": 40.74, "learning_rate": 2.9632e-05, "loss": 8.7171, "step": 5092000 }, { "epoch": 40.74, "learning_rate": 2.9630000000000003e-05, "loss": 8.7398, "step": 5092500 }, { "epoch": 40.74, "learning_rate": 2.9628e-05, "loss": 8.7552, "step": 5093000 }, { "epoch": 40.75, "learning_rate": 2.9626000000000004e-05, "loss": 8.7454, "step": 5093500 }, { "epoch": 40.75, "learning_rate": 2.9624000000000003e-05, "loss": 8.7413, "step": 5094000 }, { "epoch": 40.76, "learning_rate": 2.9622e-05, "loss": 8.7628, "step": 5094500 }, { "epoch": 40.76, "learning_rate": 2.9620000000000004e-05, "loss": 8.7508, "step": 5095000 }, { "epoch": 40.76, "learning_rate": 2.9618e-05, "loss": 8.7611, "step": 5095500 }, { "epoch": 40.77, "learning_rate": 2.9616e-05, "loss": 8.7595, "step": 5096000 }, { "epoch": 40.77, "learning_rate": 2.9614e-05, "loss": 8.751, "step": 5096500 }, { "epoch": 40.78, "learning_rate": 2.9612e-05, "loss": 8.7578, "step": 5097000 }, { "epoch": 40.78, "learning_rate": 2.961e-05, "loss": 8.7635, "step": 5097500 }, { "epoch": 40.78, "learning_rate": 2.9608000000000002e-05, "loss": 8.7467, "step": 5098000 }, { "epoch": 40.79, "learning_rate": 2.9606e-05, "loss": 8.7488, "step": 5098500 }, { "epoch": 40.79, "learning_rate": 2.9604000000000003e-05, "loss": 8.7647, "step": 5099000 }, { "epoch": 40.8, "learning_rate": 2.9602000000000002e-05, "loss": 8.7278, "step": 5099500 }, { "epoch": 40.8, "learning_rate": 2.96e-05, "loss": 8.778, "step": 5100000 }, { "epoch": 40.8, "learning_rate": 2.9598000000000004e-05, "loss": 8.7508, "step": 5100500 }, { "epoch": 40.81, "learning_rate": 2.9596000000000003e-05, "loss": 8.7598, "step": 5101000 }, { "epoch": 40.81, "learning_rate": 2.9593999999999998e-05, "loss": 8.7358, "step": 5101500 }, { "epoch": 40.82, "learning_rate": 2.9592000000000004e-05, "loss": 8.7674, "step": 5102000 }, { "epoch": 40.82, "learning_rate": 2.959e-05, "loss": 8.7594, "step": 5102500 }, { "epoch": 40.82, "learning_rate": 2.9588e-05, "loss": 8.7385, "step": 5103000 }, { "epoch": 40.83, "learning_rate": 2.9586e-05, "loss": 8.7575, "step": 5103500 }, { "epoch": 40.83, "learning_rate": 2.9584e-05, "loss": 8.7455, "step": 5104000 }, { "epoch": 40.84, "learning_rate": 2.9582000000000006e-05, "loss": 8.7629, "step": 5104500 }, { "epoch": 40.84, "learning_rate": 2.958e-05, "loss": 8.7665, "step": 5105000 }, { "epoch": 40.84, "learning_rate": 2.9578e-05, "loss": 8.7685, "step": 5105500 }, { "epoch": 40.85, "learning_rate": 2.9576000000000003e-05, "loss": 8.7574, "step": 5106000 }, { "epoch": 40.85, "learning_rate": 2.9574000000000002e-05, "loss": 8.7605, "step": 5106500 }, { "epoch": 40.86, "learning_rate": 2.9572e-05, "loss": 8.7529, "step": 5107000 }, { "epoch": 40.86, "learning_rate": 2.9570000000000003e-05, "loss": 8.7579, "step": 5107500 }, { "epoch": 40.86, "learning_rate": 2.9568000000000002e-05, "loss": 8.7437, "step": 5108000 }, { "epoch": 40.87, "learning_rate": 2.9565999999999998e-05, "loss": 8.7513, "step": 5108500 }, { "epoch": 40.87, "learning_rate": 2.9564000000000004e-05, "loss": 8.7425, "step": 5109000 }, { "epoch": 40.88, "learning_rate": 2.9562e-05, "loss": 8.7462, "step": 5109500 }, { "epoch": 40.88, "learning_rate": 2.9559999999999998e-05, "loss": 8.747, "step": 5110000 }, { "epoch": 40.88, "learning_rate": 2.9558000000000004e-05, "loss": 8.742, "step": 5110500 }, { "epoch": 40.89, "learning_rate": 2.9556e-05, "loss": 8.7585, "step": 5111000 }, { "epoch": 40.89, "learning_rate": 2.9554000000000005e-05, "loss": 8.7286, "step": 5111500 }, { "epoch": 40.9, "learning_rate": 2.9552e-05, "loss": 8.7305, "step": 5112000 }, { "epoch": 40.9, "learning_rate": 2.955e-05, "loss": 8.7394, "step": 5112500 }, { "epoch": 40.9, "learning_rate": 2.9548000000000002e-05, "loss": 8.7289, "step": 5113000 }, { "epoch": 40.91, "learning_rate": 2.9546e-05, "loss": 8.7567, "step": 5113500 }, { "epoch": 40.91, "learning_rate": 2.9544e-05, "loss": 8.7588, "step": 5114000 }, { "epoch": 40.92, "learning_rate": 2.9542000000000003e-05, "loss": 8.7701, "step": 5114500 }, { "epoch": 40.92, "learning_rate": 2.9540000000000002e-05, "loss": 8.7753, "step": 5115000 }, { "epoch": 40.92, "learning_rate": 2.9537999999999997e-05, "loss": 8.7567, "step": 5115500 }, { "epoch": 40.93, "learning_rate": 2.9536000000000003e-05, "loss": 8.7463, "step": 5116000 }, { "epoch": 40.93, "learning_rate": 2.9534000000000002e-05, "loss": 8.742, "step": 5116500 }, { "epoch": 40.94, "learning_rate": 2.9532000000000005e-05, "loss": 8.7644, "step": 5117000 }, { "epoch": 40.94, "learning_rate": 2.9530000000000004e-05, "loss": 8.7481, "step": 5117500 }, { "epoch": 40.94, "learning_rate": 2.9528e-05, "loss": 8.7491, "step": 5118000 }, { "epoch": 40.95, "learning_rate": 2.9526000000000005e-05, "loss": 8.768, "step": 5118500 }, { "epoch": 40.95, "learning_rate": 2.9524e-05, "loss": 8.7647, "step": 5119000 }, { "epoch": 40.96, "learning_rate": 2.9522e-05, "loss": 8.7708, "step": 5119500 }, { "epoch": 40.96, "learning_rate": 2.9520000000000002e-05, "loss": 8.7756, "step": 5120000 }, { "epoch": 40.96, "learning_rate": 2.9518e-05, "loss": 8.7478, "step": 5120500 }, { "epoch": 40.97, "learning_rate": 2.9516e-05, "loss": 8.7569, "step": 5121000 }, { "epoch": 40.97, "learning_rate": 2.9514000000000002e-05, "loss": 8.7477, "step": 5121500 }, { "epoch": 40.98, "learning_rate": 2.9512e-05, "loss": 8.7475, "step": 5122000 }, { "epoch": 40.98, "learning_rate": 2.951e-05, "loss": 8.7506, "step": 5122500 }, { "epoch": 40.98, "learning_rate": 2.9508000000000003e-05, "loss": 8.7522, "step": 5123000 }, { "epoch": 40.99, "learning_rate": 2.9506000000000002e-05, "loss": 8.7523, "step": 5123500 }, { "epoch": 40.99, "learning_rate": 2.9504000000000004e-05, "loss": 8.7166, "step": 5124000 }, { "epoch": 41.0, "learning_rate": 2.9502000000000003e-05, "loss": 8.7369, "step": 5124500 }, { "epoch": 41.0, "learning_rate": 2.95e-05, "loss": 8.7477, "step": 5125000 }, { "epoch": 41.0, "learning_rate": 2.9498000000000005e-05, "loss": 8.7372, "step": 5125500 }, { "epoch": 41.01, "learning_rate": 2.9496e-05, "loss": 8.7399, "step": 5126000 }, { "epoch": 41.01, "learning_rate": 2.9494e-05, "loss": 8.7453, "step": 5126500 }, { "epoch": 41.02, "learning_rate": 2.9492e-05, "loss": 8.7489, "step": 5127000 }, { "epoch": 41.02, "learning_rate": 2.949e-05, "loss": 8.7403, "step": 5127500 }, { "epoch": 41.02, "learning_rate": 2.9488e-05, "loss": 8.7424, "step": 5128000 }, { "epoch": 41.03, "learning_rate": 2.9486000000000002e-05, "loss": 8.7606, "step": 5128500 }, { "epoch": 41.03, "learning_rate": 2.9484e-05, "loss": 8.7361, "step": 5129000 }, { "epoch": 41.04, "learning_rate": 2.9482000000000003e-05, "loss": 8.7368, "step": 5129500 }, { "epoch": 41.04, "learning_rate": 2.9480000000000002e-05, "loss": 8.7362, "step": 5130000 }, { "epoch": 41.04, "learning_rate": 2.9478e-05, "loss": 8.7485, "step": 5130500 }, { "epoch": 41.05, "learning_rate": 2.9476000000000004e-05, "loss": 8.7467, "step": 5131000 }, { "epoch": 41.05, "learning_rate": 2.9474000000000003e-05, "loss": 8.7366, "step": 5131500 }, { "epoch": 41.06, "learning_rate": 2.9472e-05, "loss": 8.7634, "step": 5132000 }, { "epoch": 41.06, "learning_rate": 2.9470000000000004e-05, "loss": 8.7401, "step": 5132500 }, { "epoch": 41.06, "learning_rate": 2.9468e-05, "loss": 8.7475, "step": 5133000 }, { "epoch": 41.07, "learning_rate": 2.9466e-05, "loss": 8.7284, "step": 5133500 }, { "epoch": 41.07, "learning_rate": 2.9464e-05, "loss": 8.7558, "step": 5134000 }, { "epoch": 41.08, "learning_rate": 2.9462e-05, "loss": 8.7659, "step": 5134500 }, { "epoch": 41.08, "learning_rate": 2.946e-05, "loss": 8.7443, "step": 5135000 }, { "epoch": 41.08, "learning_rate": 2.9458e-05, "loss": 8.7659, "step": 5135500 }, { "epoch": 41.09, "learning_rate": 2.9456e-05, "loss": 8.75, "step": 5136000 }, { "epoch": 41.09, "learning_rate": 2.9454000000000003e-05, "loss": 8.7427, "step": 5136500 }, { "epoch": 41.1, "learning_rate": 2.9452000000000002e-05, "loss": 8.7646, "step": 5137000 }, { "epoch": 41.1, "learning_rate": 2.945e-05, "loss": 8.7516, "step": 5137500 }, { "epoch": 41.1, "learning_rate": 2.9448000000000003e-05, "loss": 8.7424, "step": 5138000 }, { "epoch": 41.11, "learning_rate": 2.9446000000000002e-05, "loss": 8.7429, "step": 5138500 }, { "epoch": 41.11, "learning_rate": 2.9443999999999998e-05, "loss": 8.7378, "step": 5139000 }, { "epoch": 41.12, "learning_rate": 2.9442000000000004e-05, "loss": 8.7476, "step": 5139500 }, { "epoch": 41.12, "learning_rate": 2.944e-05, "loss": 8.7383, "step": 5140000 }, { "epoch": 41.12, "learning_rate": 2.9438e-05, "loss": 8.727, "step": 5140500 }, { "epoch": 41.13, "learning_rate": 2.9436000000000004e-05, "loss": 8.7539, "step": 5141000 }, { "epoch": 41.13, "learning_rate": 2.9434e-05, "loss": 8.7327, "step": 5141500 }, { "epoch": 41.14, "learning_rate": 2.9432000000000006e-05, "loss": 8.7298, "step": 5142000 }, { "epoch": 41.14, "learning_rate": 2.943e-05, "loss": 8.7547, "step": 5142500 }, { "epoch": 41.14, "learning_rate": 2.9428e-05, "loss": 8.7754, "step": 5143000 }, { "epoch": 41.15, "learning_rate": 2.9426000000000003e-05, "loss": 8.7467, "step": 5143500 }, { "epoch": 41.15, "learning_rate": 2.9424e-05, "loss": 8.7479, "step": 5144000 }, { "epoch": 41.16, "learning_rate": 2.9422e-05, "loss": 8.7595, "step": 5144500 }, { "epoch": 41.16, "learning_rate": 2.9420000000000003e-05, "loss": 8.7508, "step": 5145000 }, { "epoch": 41.16, "learning_rate": 2.9418000000000002e-05, "loss": 8.751, "step": 5145500 }, { "epoch": 41.17, "learning_rate": 2.9415999999999998e-05, "loss": 8.7441, "step": 5146000 }, { "epoch": 41.17, "learning_rate": 2.9414000000000003e-05, "loss": 8.7359, "step": 5146500 }, { "epoch": 41.18, "learning_rate": 2.9412000000000002e-05, "loss": 8.7659, "step": 5147000 }, { "epoch": 41.18, "learning_rate": 2.9409999999999998e-05, "loss": 8.7331, "step": 5147500 }, { "epoch": 41.18, "learning_rate": 2.9408000000000004e-05, "loss": 8.7317, "step": 5148000 }, { "epoch": 41.19, "learning_rate": 2.9406e-05, "loss": 8.7618, "step": 5148500 }, { "epoch": 41.19, "learning_rate": 2.9404000000000005e-05, "loss": 8.7403, "step": 5149000 }, { "epoch": 41.2, "learning_rate": 2.9402e-05, "loss": 8.7397, "step": 5149500 }, { "epoch": 41.2, "learning_rate": 2.94e-05, "loss": 8.7402, "step": 5150000 }, { "epoch": 41.2, "learning_rate": 2.9398000000000002e-05, "loss": 8.7297, "step": 5150500 }, { "epoch": 41.21, "learning_rate": 2.9396e-05, "loss": 8.7221, "step": 5151000 }, { "epoch": 41.21, "learning_rate": 2.9394e-05, "loss": 8.7213, "step": 5151500 }, { "epoch": 41.22, "learning_rate": 2.9392000000000003e-05, "loss": 8.7582, "step": 5152000 }, { "epoch": 41.22, "learning_rate": 2.939e-05, "loss": 8.7276, "step": 5152500 }, { "epoch": 41.22, "learning_rate": 2.9388e-05, "loss": 8.746, "step": 5153000 }, { "epoch": 41.23, "learning_rate": 2.9386000000000003e-05, "loss": 8.7626, "step": 5153500 }, { "epoch": 41.23, "learning_rate": 2.9384000000000002e-05, "loss": 8.7627, "step": 5154000 }, { "epoch": 41.24, "learning_rate": 2.9382000000000004e-05, "loss": 8.7424, "step": 5154500 }, { "epoch": 41.24, "learning_rate": 2.9380000000000003e-05, "loss": 8.7539, "step": 5155000 }, { "epoch": 41.24, "learning_rate": 2.9378e-05, "loss": 8.7658, "step": 5155500 }, { "epoch": 41.25, "learning_rate": 2.9376000000000005e-05, "loss": 8.7556, "step": 5156000 }, { "epoch": 41.25, "learning_rate": 2.9374e-05, "loss": 8.7403, "step": 5156500 }, { "epoch": 41.26, "learning_rate": 2.9372e-05, "loss": 8.7448, "step": 5157000 }, { "epoch": 41.26, "learning_rate": 2.9370000000000002e-05, "loss": 8.754, "step": 5157500 }, { "epoch": 41.26, "learning_rate": 2.9368e-05, "loss": 8.734, "step": 5158000 }, { "epoch": 41.27, "learning_rate": 2.9366e-05, "loss": 8.7455, "step": 5158500 }, { "epoch": 41.27, "learning_rate": 2.9364000000000002e-05, "loss": 8.7515, "step": 5159000 }, { "epoch": 41.28, "learning_rate": 2.9362e-05, "loss": 8.7513, "step": 5159500 }, { "epoch": 41.28, "learning_rate": 2.9360000000000003e-05, "loss": 8.758, "step": 5160000 }, { "epoch": 41.28, "learning_rate": 2.9358000000000003e-05, "loss": 8.7455, "step": 5160500 }, { "epoch": 41.29, "learning_rate": 2.9356e-05, "loss": 8.7554, "step": 5161000 }, { "epoch": 41.29, "learning_rate": 2.9354000000000004e-05, "loss": 8.7549, "step": 5161500 }, { "epoch": 41.3, "learning_rate": 2.9352000000000003e-05, "loss": 8.7527, "step": 5162000 }, { "epoch": 41.3, "learning_rate": 2.935e-05, "loss": 8.7614, "step": 5162500 }, { "epoch": 41.3, "learning_rate": 2.9348000000000004e-05, "loss": 8.765, "step": 5163000 }, { "epoch": 41.31, "learning_rate": 2.9346e-05, "loss": 8.7392, "step": 5163500 }, { "epoch": 41.31, "learning_rate": 2.9344e-05, "loss": 8.753, "step": 5164000 }, { "epoch": 41.32, "learning_rate": 2.9342e-05, "loss": 8.7588, "step": 5164500 }, { "epoch": 41.32, "learning_rate": 2.934e-05, "loss": 8.7458, "step": 5165000 }, { "epoch": 41.32, "learning_rate": 2.9338e-05, "loss": 8.7649, "step": 5165500 }, { "epoch": 41.33, "learning_rate": 2.9336000000000002e-05, "loss": 8.7574, "step": 5166000 }, { "epoch": 41.33, "learning_rate": 2.9334e-05, "loss": 8.7475, "step": 5166500 }, { "epoch": 41.34, "learning_rate": 2.9332000000000003e-05, "loss": 8.7439, "step": 5167000 }, { "epoch": 41.34, "learning_rate": 2.9330000000000002e-05, "loss": 8.7311, "step": 5167500 }, { "epoch": 41.34, "learning_rate": 2.9328e-05, "loss": 8.7669, "step": 5168000 }, { "epoch": 41.35, "learning_rate": 2.9326000000000003e-05, "loss": 8.7418, "step": 5168500 }, { "epoch": 41.35, "learning_rate": 2.9324000000000002e-05, "loss": 8.7509, "step": 5169000 }, { "epoch": 41.36, "learning_rate": 2.9321999999999998e-05, "loss": 8.7581, "step": 5169500 }, { "epoch": 41.36, "learning_rate": 2.9320000000000004e-05, "loss": 8.7518, "step": 5170000 }, { "epoch": 41.36, "learning_rate": 2.9318e-05, "loss": 8.7492, "step": 5170500 }, { "epoch": 41.37, "learning_rate": 2.9316e-05, "loss": 8.7481, "step": 5171000 }, { "epoch": 41.37, "learning_rate": 2.9314e-05, "loss": 8.7605, "step": 5171500 }, { "epoch": 41.38, "learning_rate": 2.9312e-05, "loss": 8.7657, "step": 5172000 }, { "epoch": 41.38, "learning_rate": 2.9310000000000006e-05, "loss": 8.7683, "step": 5172500 }, { "epoch": 41.38, "learning_rate": 2.9308e-05, "loss": 8.7439, "step": 5173000 }, { "epoch": 41.39, "learning_rate": 2.9306e-05, "loss": 8.7549, "step": 5173500 }, { "epoch": 41.39, "learning_rate": 2.9304000000000003e-05, "loss": 8.7327, "step": 5174000 }, { "epoch": 41.4, "learning_rate": 2.9302e-05, "loss": 8.7345, "step": 5174500 }, { "epoch": 41.4, "learning_rate": 2.93e-05, "loss": 8.76, "step": 5175000 }, { "epoch": 41.4, "learning_rate": 2.9298000000000003e-05, "loss": 8.75, "step": 5175500 }, { "epoch": 41.41, "learning_rate": 2.9296000000000002e-05, "loss": 8.7548, "step": 5176000 }, { "epoch": 41.41, "learning_rate": 2.9293999999999998e-05, "loss": 8.7563, "step": 5176500 }, { "epoch": 41.42, "learning_rate": 2.9292000000000003e-05, "loss": 8.7294, "step": 5177000 }, { "epoch": 41.42, "learning_rate": 2.929e-05, "loss": 8.7636, "step": 5177500 }, { "epoch": 41.42, "learning_rate": 2.9287999999999998e-05, "loss": 8.7386, "step": 5178000 }, { "epoch": 41.43, "learning_rate": 2.9286000000000004e-05, "loss": 8.7577, "step": 5178500 }, { "epoch": 41.43, "learning_rate": 2.9284e-05, "loss": 8.7786, "step": 5179000 }, { "epoch": 41.44, "learning_rate": 2.9282000000000005e-05, "loss": 8.7607, "step": 5179500 }, { "epoch": 41.44, "learning_rate": 2.928e-05, "loss": 8.7617, "step": 5180000 }, { "epoch": 41.44, "learning_rate": 2.9278e-05, "loss": 8.7357, "step": 5180500 }, { "epoch": 41.45, "learning_rate": 2.9276000000000002e-05, "loss": 8.7464, "step": 5181000 }, { "epoch": 41.45, "learning_rate": 2.9274e-05, "loss": 8.7453, "step": 5181500 }, { "epoch": 41.46, "learning_rate": 2.9272e-05, "loss": 8.7429, "step": 5182000 }, { "epoch": 41.46, "learning_rate": 2.9270000000000003e-05, "loss": 8.7689, "step": 5182500 }, { "epoch": 41.46, "learning_rate": 2.9268e-05, "loss": 8.7433, "step": 5183000 }, { "epoch": 41.47, "learning_rate": 2.9265999999999997e-05, "loss": 8.7615, "step": 5183500 }, { "epoch": 41.47, "learning_rate": 2.9264000000000003e-05, "loss": 8.7569, "step": 5184000 }, { "epoch": 41.48, "learning_rate": 2.9262000000000002e-05, "loss": 8.7515, "step": 5184500 }, { "epoch": 41.48, "learning_rate": 2.9260000000000004e-05, "loss": 8.7598, "step": 5185000 }, { "epoch": 41.48, "learning_rate": 2.9258000000000003e-05, "loss": 8.7549, "step": 5185500 }, { "epoch": 41.49, "learning_rate": 2.9256e-05, "loss": 8.7392, "step": 5186000 }, { "epoch": 41.49, "learning_rate": 2.9254000000000005e-05, "loss": 8.7468, "step": 5186500 }, { "epoch": 41.5, "learning_rate": 2.9252e-05, "loss": 8.7358, "step": 5187000 }, { "epoch": 41.5, "learning_rate": 2.925e-05, "loss": 8.7365, "step": 5187500 }, { "epoch": 41.5, "learning_rate": 2.9248000000000002e-05, "loss": 8.7439, "step": 5188000 }, { "epoch": 41.51, "learning_rate": 2.9246e-05, "loss": 8.7545, "step": 5188500 }, { "epoch": 41.51, "learning_rate": 2.9244e-05, "loss": 8.7371, "step": 5189000 }, { "epoch": 41.52, "learning_rate": 2.9242000000000002e-05, "loss": 8.754, "step": 5189500 }, { "epoch": 41.52, "learning_rate": 2.924e-05, "loss": 8.7447, "step": 5190000 }, { "epoch": 41.52, "learning_rate": 2.9238e-05, "loss": 8.7566, "step": 5190500 }, { "epoch": 41.53, "learning_rate": 2.9236000000000003e-05, "loss": 8.7427, "step": 5191000 }, { "epoch": 41.53, "learning_rate": 2.9234e-05, "loss": 8.7729, "step": 5191500 }, { "epoch": 41.54, "learning_rate": 2.9232000000000004e-05, "loss": 8.7625, "step": 5192000 }, { "epoch": 41.54, "learning_rate": 2.9230000000000003e-05, "loss": 8.7593, "step": 5192500 }, { "epoch": 41.54, "learning_rate": 2.9228e-05, "loss": 8.7465, "step": 5193000 }, { "epoch": 41.55, "learning_rate": 2.9226000000000004e-05, "loss": 8.7756, "step": 5193500 }, { "epoch": 41.55, "learning_rate": 2.9224e-05, "loss": 8.7533, "step": 5194000 }, { "epoch": 41.56, "learning_rate": 2.9222e-05, "loss": 8.7418, "step": 5194500 }, { "epoch": 41.56, "learning_rate": 2.922e-05, "loss": 8.7605, "step": 5195000 }, { "epoch": 41.56, "learning_rate": 2.9218e-05, "loss": 8.7566, "step": 5195500 }, { "epoch": 41.57, "learning_rate": 2.9216e-05, "loss": 8.7664, "step": 5196000 }, { "epoch": 41.57, "learning_rate": 2.9214000000000002e-05, "loss": 8.756, "step": 5196500 }, { "epoch": 41.58, "learning_rate": 2.9212e-05, "loss": 8.7434, "step": 5197000 }, { "epoch": 41.58, "learning_rate": 2.9210000000000003e-05, "loss": 8.7461, "step": 5197500 }, { "epoch": 41.58, "learning_rate": 2.9208000000000002e-05, "loss": 8.747, "step": 5198000 }, { "epoch": 41.59, "learning_rate": 2.9206e-05, "loss": 8.7316, "step": 5198500 }, { "epoch": 41.59, "learning_rate": 2.9204000000000004e-05, "loss": 8.7449, "step": 5199000 }, { "epoch": 41.6, "learning_rate": 2.9202000000000003e-05, "loss": 8.7568, "step": 5199500 }, { "epoch": 41.6, "learning_rate": 2.9199999999999998e-05, "loss": 8.7466, "step": 5200000 }, { "epoch": 41.6, "learning_rate": 2.9198000000000004e-05, "loss": 8.7487, "step": 5200500 }, { "epoch": 41.61, "learning_rate": 2.9196e-05, "loss": 8.7604, "step": 5201000 }, { "epoch": 41.61, "learning_rate": 2.9194e-05, "loss": 8.7658, "step": 5201500 }, { "epoch": 41.62, "learning_rate": 2.9192e-05, "loss": 8.7508, "step": 5202000 }, { "epoch": 41.62, "learning_rate": 2.919e-05, "loss": 8.7424, "step": 5202500 }, { "epoch": 41.62, "learning_rate": 2.9188e-05, "loss": 8.7345, "step": 5203000 }, { "epoch": 41.63, "learning_rate": 2.9186e-05, "loss": 8.7549, "step": 5203500 }, { "epoch": 41.63, "learning_rate": 2.9184e-05, "loss": 8.7611, "step": 5204000 }, { "epoch": 41.64, "learning_rate": 2.9182000000000003e-05, "loss": 8.741, "step": 5204500 }, { "epoch": 41.64, "learning_rate": 2.9180000000000002e-05, "loss": 8.7553, "step": 5205000 }, { "epoch": 41.64, "learning_rate": 2.9178e-05, "loss": 8.7322, "step": 5205500 }, { "epoch": 41.65, "learning_rate": 2.9176000000000003e-05, "loss": 8.7469, "step": 5206000 }, { "epoch": 41.65, "learning_rate": 2.9174000000000002e-05, "loss": 8.7455, "step": 5206500 }, { "epoch": 41.66, "learning_rate": 2.9171999999999998e-05, "loss": 8.7611, "step": 5207000 }, { "epoch": 41.66, "learning_rate": 2.9170000000000004e-05, "loss": 8.7628, "step": 5207500 }, { "epoch": 41.66, "learning_rate": 2.9168e-05, "loss": 8.76, "step": 5208000 }, { "epoch": 41.67, "learning_rate": 2.9165999999999998e-05, "loss": 8.7311, "step": 5208500 }, { "epoch": 41.67, "learning_rate": 2.9164000000000004e-05, "loss": 8.7501, "step": 5209000 }, { "epoch": 41.68, "learning_rate": 2.9162e-05, "loss": 8.7412, "step": 5209500 }, { "epoch": 41.68, "learning_rate": 2.9160000000000005e-05, "loss": 8.7357, "step": 5210000 }, { "epoch": 41.68, "learning_rate": 2.9158e-05, "loss": 8.7377, "step": 5210500 }, { "epoch": 41.69, "learning_rate": 2.9156e-05, "loss": 8.755, "step": 5211000 }, { "epoch": 41.69, "learning_rate": 2.9154000000000002e-05, "loss": 8.7651, "step": 5211500 }, { "epoch": 41.7, "learning_rate": 2.9152e-05, "loss": 8.777, "step": 5212000 }, { "epoch": 41.7, "learning_rate": 2.915e-05, "loss": 8.7655, "step": 5212500 }, { "epoch": 41.7, "learning_rate": 2.9148000000000003e-05, "loss": 8.7643, "step": 5213000 }, { "epoch": 41.71, "learning_rate": 2.9146000000000002e-05, "loss": 8.7651, "step": 5213500 }, { "epoch": 41.71, "learning_rate": 2.9143999999999997e-05, "loss": 8.7456, "step": 5214000 }, { "epoch": 41.72, "learning_rate": 2.9142000000000003e-05, "loss": 8.7401, "step": 5214500 }, { "epoch": 41.72, "learning_rate": 2.9140000000000002e-05, "loss": 8.7455, "step": 5215000 }, { "epoch": 41.72, "learning_rate": 2.9137999999999998e-05, "loss": 8.7453, "step": 5215500 }, { "epoch": 41.73, "learning_rate": 2.9136000000000004e-05, "loss": 8.7612, "step": 5216000 }, { "epoch": 41.73, "learning_rate": 2.9134e-05, "loss": 8.7525, "step": 5216500 }, { "epoch": 41.74, "learning_rate": 2.9132000000000005e-05, "loss": 8.7619, "step": 5217000 }, { "epoch": 41.74, "learning_rate": 2.913e-05, "loss": 8.7522, "step": 5217500 }, { "epoch": 41.74, "learning_rate": 2.9128e-05, "loss": 8.7513, "step": 5218000 }, { "epoch": 41.75, "learning_rate": 2.9126000000000002e-05, "loss": 8.735, "step": 5218500 }, { "epoch": 41.75, "learning_rate": 2.9124e-05, "loss": 8.7575, "step": 5219000 }, { "epoch": 41.76, "learning_rate": 2.9122e-05, "loss": 8.7254, "step": 5219500 }, { "epoch": 41.76, "learning_rate": 2.9120000000000002e-05, "loss": 8.7473, "step": 5220000 }, { "epoch": 41.76, "learning_rate": 2.9118e-05, "loss": 8.739, "step": 5220500 }, { "epoch": 41.77, "learning_rate": 2.9116e-05, "loss": 8.7301, "step": 5221000 }, { "epoch": 41.77, "learning_rate": 2.9114000000000003e-05, "loss": 8.7388, "step": 5221500 }, { "epoch": 41.78, "learning_rate": 2.9112000000000002e-05, "loss": 8.7345, "step": 5222000 }, { "epoch": 41.78, "learning_rate": 2.9110000000000004e-05, "loss": 8.7821, "step": 5222500 }, { "epoch": 41.78, "learning_rate": 2.9108000000000003e-05, "loss": 8.7426, "step": 5223000 }, { "epoch": 41.79, "learning_rate": 2.9106e-05, "loss": 8.7483, "step": 5223500 }, { "epoch": 41.79, "learning_rate": 2.9104000000000005e-05, "loss": 8.7397, "step": 5224000 }, { "epoch": 41.8, "learning_rate": 2.9102e-05, "loss": 8.7404, "step": 5224500 }, { "epoch": 41.8, "learning_rate": 2.91e-05, "loss": 8.7392, "step": 5225000 }, { "epoch": 41.8, "learning_rate": 2.9098e-05, "loss": 8.7562, "step": 5225500 }, { "epoch": 41.81, "learning_rate": 2.9096e-05, "loss": 8.7394, "step": 5226000 }, { "epoch": 41.81, "learning_rate": 2.9094e-05, "loss": 8.7531, "step": 5226500 }, { "epoch": 41.82, "learning_rate": 2.9092000000000002e-05, "loss": 8.7547, "step": 5227000 }, { "epoch": 41.82, "learning_rate": 2.909e-05, "loss": 8.7371, "step": 5227500 }, { "epoch": 41.82, "learning_rate": 2.9088000000000003e-05, "loss": 8.7319, "step": 5228000 }, { "epoch": 41.83, "learning_rate": 2.9086000000000002e-05, "loss": 8.7354, "step": 5228500 }, { "epoch": 41.83, "learning_rate": 2.9084e-05, "loss": 8.764, "step": 5229000 }, { "epoch": 41.84, "learning_rate": 2.9082000000000004e-05, "loss": 8.7545, "step": 5229500 }, { "epoch": 41.84, "learning_rate": 2.9080000000000003e-05, "loss": 8.7354, "step": 5230000 }, { "epoch": 41.84, "learning_rate": 2.9078e-05, "loss": 8.7684, "step": 5230500 }, { "epoch": 41.85, "learning_rate": 2.9076000000000004e-05, "loss": 8.7591, "step": 5231000 }, { "epoch": 41.85, "learning_rate": 2.9074e-05, "loss": 8.7317, "step": 5231500 }, { "epoch": 41.86, "learning_rate": 2.9072e-05, "loss": 8.768, "step": 5232000 }, { "epoch": 41.86, "learning_rate": 2.907e-05, "loss": 8.7335, "step": 5232500 }, { "epoch": 41.86, "learning_rate": 2.9068e-05, "loss": 8.75, "step": 5233000 }, { "epoch": 41.87, "learning_rate": 2.9066e-05, "loss": 8.7467, "step": 5233500 }, { "epoch": 41.87, "learning_rate": 2.9064e-05, "loss": 8.7684, "step": 5234000 }, { "epoch": 41.88, "learning_rate": 2.9062e-05, "loss": 8.7475, "step": 5234500 }, { "epoch": 41.88, "learning_rate": 2.9060000000000003e-05, "loss": 8.7555, "step": 5235000 }, { "epoch": 41.88, "learning_rate": 2.9058000000000002e-05, "loss": 8.7614, "step": 5235500 }, { "epoch": 41.89, "learning_rate": 2.9056e-05, "loss": 8.7459, "step": 5236000 }, { "epoch": 41.89, "learning_rate": 2.9054000000000003e-05, "loss": 8.7522, "step": 5236500 }, { "epoch": 41.9, "learning_rate": 2.9052000000000002e-05, "loss": 8.7685, "step": 5237000 }, { "epoch": 41.9, "learning_rate": 2.9049999999999998e-05, "loss": 8.7407, "step": 5237500 }, { "epoch": 41.9, "learning_rate": 2.9048000000000004e-05, "loss": 8.7357, "step": 5238000 }, { "epoch": 41.91, "learning_rate": 2.9046e-05, "loss": 8.7572, "step": 5238500 }, { "epoch": 41.91, "learning_rate": 2.9044e-05, "loss": 8.7281, "step": 5239000 }, { "epoch": 41.92, "learning_rate": 2.9042000000000004e-05, "loss": 8.7542, "step": 5239500 }, { "epoch": 41.92, "learning_rate": 2.904e-05, "loss": 8.7479, "step": 5240000 }, { "epoch": 41.92, "learning_rate": 2.9038000000000006e-05, "loss": 8.7503, "step": 5240500 }, { "epoch": 41.93, "learning_rate": 2.9036e-05, "loss": 8.7634, "step": 5241000 }, { "epoch": 41.93, "learning_rate": 2.9034e-05, "loss": 8.7446, "step": 5241500 }, { "epoch": 41.94, "learning_rate": 2.9032000000000003e-05, "loss": 8.7615, "step": 5242000 }, { "epoch": 41.94, "learning_rate": 2.903e-05, "loss": 8.7514, "step": 5242500 }, { "epoch": 41.94, "learning_rate": 2.9028e-05, "loss": 8.7586, "step": 5243000 }, { "epoch": 41.95, "learning_rate": 2.9026000000000003e-05, "loss": 8.7486, "step": 5243500 }, { "epoch": 41.95, "learning_rate": 2.9024000000000002e-05, "loss": 8.7408, "step": 5244000 }, { "epoch": 41.96, "learning_rate": 2.9021999999999998e-05, "loss": 8.7438, "step": 5244500 }, { "epoch": 41.96, "learning_rate": 2.9020000000000003e-05, "loss": 8.7433, "step": 5245000 }, { "epoch": 41.96, "learning_rate": 2.9018000000000002e-05, "loss": 8.7446, "step": 5245500 }, { "epoch": 41.97, "learning_rate": 2.9015999999999998e-05, "loss": 8.7422, "step": 5246000 }, { "epoch": 41.97, "learning_rate": 2.9014000000000004e-05, "loss": 8.7594, "step": 5246500 }, { "epoch": 41.98, "learning_rate": 2.9012e-05, "loss": 8.7776, "step": 5247000 }, { "epoch": 41.98, "learning_rate": 2.9010000000000005e-05, "loss": 8.7384, "step": 5247500 }, { "epoch": 41.98, "learning_rate": 2.9008e-05, "loss": 8.7528, "step": 5248000 }, { "epoch": 41.99, "learning_rate": 2.9006e-05, "loss": 8.7713, "step": 5248500 }, { "epoch": 41.99, "learning_rate": 2.9004000000000002e-05, "loss": 8.7549, "step": 5249000 }, { "epoch": 42.0, "learning_rate": 2.9002e-05, "loss": 8.7718, "step": 5249500 }, { "epoch": 42.0, "learning_rate": 2.9e-05, "loss": 8.7475, "step": 5250000 }, { "epoch": 42.0, "learning_rate": 2.8998000000000003e-05, "loss": 8.7482, "step": 5250500 }, { "epoch": 42.01, "learning_rate": 2.8996e-05, "loss": 8.7563, "step": 5251000 }, { "epoch": 42.01, "learning_rate": 2.8994e-05, "loss": 8.7415, "step": 5251500 }, { "epoch": 42.02, "learning_rate": 2.8992000000000003e-05, "loss": 8.7348, "step": 5252000 }, { "epoch": 42.02, "learning_rate": 2.8990000000000002e-05, "loss": 8.7418, "step": 5252500 }, { "epoch": 42.02, "learning_rate": 2.8988000000000004e-05, "loss": 8.7594, "step": 5253000 }, { "epoch": 42.03, "learning_rate": 2.8986000000000003e-05, "loss": 8.738, "step": 5253500 }, { "epoch": 42.03, "learning_rate": 2.8984e-05, "loss": 8.764, "step": 5254000 }, { "epoch": 42.04, "learning_rate": 2.8982000000000005e-05, "loss": 8.7543, "step": 5254500 }, { "epoch": 42.04, "learning_rate": 2.898e-05, "loss": 8.7594, "step": 5255000 }, { "epoch": 42.04, "learning_rate": 2.8978e-05, "loss": 8.7562, "step": 5255500 }, { "epoch": 42.05, "learning_rate": 2.8976000000000002e-05, "loss": 8.7392, "step": 5256000 }, { "epoch": 42.05, "learning_rate": 2.8974e-05, "loss": 8.7419, "step": 5256500 }, { "epoch": 42.06, "learning_rate": 2.8972e-05, "loss": 8.7453, "step": 5257000 }, { "epoch": 42.06, "learning_rate": 2.8970000000000002e-05, "loss": 8.7567, "step": 5257500 }, { "epoch": 42.06, "learning_rate": 2.8968e-05, "loss": 8.7413, "step": 5258000 }, { "epoch": 42.07, "learning_rate": 2.8966e-05, "loss": 8.7743, "step": 5258500 }, { "epoch": 42.07, "learning_rate": 2.8964000000000003e-05, "loss": 8.7528, "step": 5259000 }, { "epoch": 42.08, "learning_rate": 2.8962e-05, "loss": 8.7313, "step": 5259500 }, { "epoch": 42.08, "learning_rate": 2.8960000000000004e-05, "loss": 8.7458, "step": 5260000 }, { "epoch": 42.08, "learning_rate": 2.8958000000000003e-05, "loss": 8.7473, "step": 5260500 }, { "epoch": 42.09, "learning_rate": 2.8956e-05, "loss": 8.7443, "step": 5261000 }, { "epoch": 42.09, "learning_rate": 2.8954000000000004e-05, "loss": 8.7618, "step": 5261500 }, { "epoch": 42.1, "learning_rate": 2.8952e-05, "loss": 8.74, "step": 5262000 }, { "epoch": 42.1, "learning_rate": 2.895e-05, "loss": 8.7473, "step": 5262500 }, { "epoch": 42.1, "learning_rate": 2.8948e-05, "loss": 8.7649, "step": 5263000 }, { "epoch": 42.11, "learning_rate": 2.8946e-05, "loss": 8.7388, "step": 5263500 }, { "epoch": 42.11, "learning_rate": 2.8944e-05, "loss": 8.7401, "step": 5264000 }, { "epoch": 42.12, "learning_rate": 2.8942000000000002e-05, "loss": 8.7441, "step": 5264500 }, { "epoch": 42.12, "learning_rate": 2.894e-05, "loss": 8.7554, "step": 5265000 }, { "epoch": 42.12, "learning_rate": 2.8938000000000003e-05, "loss": 8.7577, "step": 5265500 }, { "epoch": 42.13, "learning_rate": 2.8936000000000002e-05, "loss": 8.7511, "step": 5266000 }, { "epoch": 42.13, "learning_rate": 2.8934e-05, "loss": 8.7364, "step": 5266500 }, { "epoch": 42.14, "learning_rate": 2.8932000000000004e-05, "loss": 8.7579, "step": 5267000 }, { "epoch": 42.14, "learning_rate": 2.8930000000000003e-05, "loss": 8.7487, "step": 5267500 }, { "epoch": 42.14, "learning_rate": 2.8927999999999998e-05, "loss": 8.7518, "step": 5268000 }, { "epoch": 42.15, "learning_rate": 2.8926000000000004e-05, "loss": 8.7439, "step": 5268500 }, { "epoch": 42.15, "learning_rate": 2.8924e-05, "loss": 8.7323, "step": 5269000 }, { "epoch": 42.16, "learning_rate": 2.8922e-05, "loss": 8.756, "step": 5269500 }, { "epoch": 42.16, "learning_rate": 2.8920000000000004e-05, "loss": 8.7538, "step": 5270000 }, { "epoch": 42.16, "learning_rate": 2.8918e-05, "loss": 8.7593, "step": 5270500 }, { "epoch": 42.17, "learning_rate": 2.8916e-05, "loss": 8.741, "step": 5271000 }, { "epoch": 42.17, "learning_rate": 2.8914e-05, "loss": 8.7458, "step": 5271500 }, { "epoch": 42.18, "learning_rate": 2.8912e-05, "loss": 8.7416, "step": 5272000 }, { "epoch": 42.18, "learning_rate": 2.8910000000000003e-05, "loss": 8.7345, "step": 5272500 }, { "epoch": 42.18, "learning_rate": 2.8908000000000002e-05, "loss": 8.7677, "step": 5273000 }, { "epoch": 42.19, "learning_rate": 2.8906e-05, "loss": 8.728, "step": 5273500 }, { "epoch": 42.19, "learning_rate": 2.8904000000000003e-05, "loss": 8.7219, "step": 5274000 }, { "epoch": 42.2, "learning_rate": 2.8902000000000002e-05, "loss": 8.7384, "step": 5274500 }, { "epoch": 42.2, "learning_rate": 2.8899999999999998e-05, "loss": 8.7399, "step": 5275000 }, { "epoch": 42.2, "learning_rate": 2.8898000000000004e-05, "loss": 8.7402, "step": 5275500 }, { "epoch": 42.21, "learning_rate": 2.8896000000000003e-05, "loss": 8.7622, "step": 5276000 }, { "epoch": 42.21, "learning_rate": 2.8893999999999998e-05, "loss": 8.7576, "step": 5276500 }, { "epoch": 42.22, "learning_rate": 2.8892000000000004e-05, "loss": 8.7458, "step": 5277000 }, { "epoch": 42.22, "learning_rate": 2.889e-05, "loss": 8.7549, "step": 5277500 }, { "epoch": 42.22, "learning_rate": 2.8888000000000005e-05, "loss": 8.7424, "step": 5278000 }, { "epoch": 42.23, "learning_rate": 2.8886e-05, "loss": 8.7541, "step": 5278500 }, { "epoch": 42.23, "learning_rate": 2.8884e-05, "loss": 8.7314, "step": 5279000 }, { "epoch": 42.24, "learning_rate": 2.8882000000000002e-05, "loss": 8.7536, "step": 5279500 }, { "epoch": 42.24, "learning_rate": 2.888e-05, "loss": 8.7545, "step": 5280000 }, { "epoch": 42.24, "learning_rate": 2.8878e-05, "loss": 8.7425, "step": 5280500 }, { "epoch": 42.25, "learning_rate": 2.8876000000000003e-05, "loss": 8.7414, "step": 5281000 }, { "epoch": 42.25, "learning_rate": 2.8874000000000002e-05, "loss": 8.7579, "step": 5281500 }, { "epoch": 42.26, "learning_rate": 2.8872e-05, "loss": 8.7539, "step": 5282000 }, { "epoch": 42.26, "learning_rate": 2.8870000000000003e-05, "loss": 8.7471, "step": 5282500 }, { "epoch": 42.26, "learning_rate": 2.8868000000000002e-05, "loss": 8.7663, "step": 5283000 }, { "epoch": 42.27, "learning_rate": 2.8866000000000005e-05, "loss": 8.7502, "step": 5283500 }, { "epoch": 42.27, "learning_rate": 2.8864000000000004e-05, "loss": 8.7577, "step": 5284000 }, { "epoch": 42.28, "learning_rate": 2.8862e-05, "loss": 8.7446, "step": 5284500 }, { "epoch": 42.28, "learning_rate": 2.8860000000000005e-05, "loss": 8.7369, "step": 5285000 }, { "epoch": 42.28, "learning_rate": 2.8858e-05, "loss": 8.7502, "step": 5285500 }, { "epoch": 42.29, "learning_rate": 2.8856e-05, "loss": 8.7388, "step": 5286000 }, { "epoch": 42.29, "learning_rate": 2.8854000000000002e-05, "loss": 8.7629, "step": 5286500 }, { "epoch": 42.3, "learning_rate": 2.8852e-05, "loss": 8.7465, "step": 5287000 }, { "epoch": 42.3, "learning_rate": 2.885e-05, "loss": 8.7568, "step": 5287500 }, { "epoch": 42.3, "learning_rate": 2.8848000000000002e-05, "loss": 8.7466, "step": 5288000 }, { "epoch": 42.31, "learning_rate": 2.8846e-05, "loss": 8.7394, "step": 5288500 }, { "epoch": 42.31, "learning_rate": 2.8844e-05, "loss": 8.772, "step": 5289000 }, { "epoch": 42.32, "learning_rate": 2.8842000000000003e-05, "loss": 8.7608, "step": 5289500 }, { "epoch": 42.32, "learning_rate": 2.8840000000000002e-05, "loss": 8.7411, "step": 5290000 }, { "epoch": 42.32, "learning_rate": 2.8838000000000004e-05, "loss": 8.759, "step": 5290500 }, { "epoch": 42.33, "learning_rate": 2.8836000000000003e-05, "loss": 8.7662, "step": 5291000 }, { "epoch": 42.33, "learning_rate": 2.8834e-05, "loss": 8.749, "step": 5291500 }, { "epoch": 42.34, "learning_rate": 2.8832000000000004e-05, "loss": 8.733, "step": 5292000 }, { "epoch": 42.34, "learning_rate": 2.883e-05, "loss": 8.752, "step": 5292500 }, { "epoch": 42.34, "learning_rate": 2.8828e-05, "loss": 8.7675, "step": 5293000 }, { "epoch": 42.35, "learning_rate": 2.8826e-05, "loss": 8.7431, "step": 5293500 }, { "epoch": 42.35, "learning_rate": 2.8824e-05, "loss": 8.7304, "step": 5294000 }, { "epoch": 42.36, "learning_rate": 2.8822e-05, "loss": 8.7311, "step": 5294500 }, { "epoch": 42.36, "learning_rate": 2.8820000000000002e-05, "loss": 8.7507, "step": 5295000 }, { "epoch": 42.36, "learning_rate": 2.8818e-05, "loss": 8.7534, "step": 5295500 }, { "epoch": 42.37, "learning_rate": 2.8816000000000003e-05, "loss": 8.7589, "step": 5296000 }, { "epoch": 42.37, "learning_rate": 2.8814000000000002e-05, "loss": 8.7571, "step": 5296500 }, { "epoch": 42.38, "learning_rate": 2.8812e-05, "loss": 8.7541, "step": 5297000 }, { "epoch": 42.38, "learning_rate": 2.8810000000000004e-05, "loss": 8.747, "step": 5297500 }, { "epoch": 42.38, "learning_rate": 2.8808000000000003e-05, "loss": 8.7649, "step": 5298000 }, { "epoch": 42.39, "learning_rate": 2.8805999999999998e-05, "loss": 8.7625, "step": 5298500 }, { "epoch": 42.39, "learning_rate": 2.8804000000000004e-05, "loss": 8.7581, "step": 5299000 }, { "epoch": 42.4, "learning_rate": 2.8802e-05, "loss": 8.7341, "step": 5299500 }, { "epoch": 42.4, "learning_rate": 2.88e-05, "loss": 8.7548, "step": 5300000 }, { "epoch": 42.4, "learning_rate": 2.8798e-05, "loss": 8.7445, "step": 5300500 }, { "epoch": 42.41, "learning_rate": 2.8796e-05, "loss": 8.7347, "step": 5301000 }, { "epoch": 42.41, "learning_rate": 2.8794e-05, "loss": 8.7726, "step": 5301500 }, { "epoch": 42.42, "learning_rate": 2.8792e-05, "loss": 8.754, "step": 5302000 }, { "epoch": 42.42, "learning_rate": 2.879e-05, "loss": 8.7642, "step": 5302500 }, { "epoch": 42.42, "learning_rate": 2.8788000000000003e-05, "loss": 8.7617, "step": 5303000 }, { "epoch": 42.43, "learning_rate": 2.8786000000000002e-05, "loss": 8.7558, "step": 5303500 }, { "epoch": 42.43, "learning_rate": 2.8784e-05, "loss": 8.7578, "step": 5304000 }, { "epoch": 42.44, "learning_rate": 2.8782000000000003e-05, "loss": 8.7592, "step": 5304500 }, { "epoch": 42.44, "learning_rate": 2.8780000000000002e-05, "loss": 8.7525, "step": 5305000 }, { "epoch": 42.44, "learning_rate": 2.8777999999999998e-05, "loss": 8.7456, "step": 5305500 }, { "epoch": 42.45, "learning_rate": 2.8776000000000004e-05, "loss": 8.7528, "step": 5306000 }, { "epoch": 42.45, "learning_rate": 2.8774e-05, "loss": 8.7218, "step": 5306500 }, { "epoch": 42.46, "learning_rate": 2.8771999999999998e-05, "loss": 8.7622, "step": 5307000 }, { "epoch": 42.46, "learning_rate": 2.8770000000000004e-05, "loss": 8.7578, "step": 5307500 }, { "epoch": 42.46, "learning_rate": 2.8768e-05, "loss": 8.7551, "step": 5308000 }, { "epoch": 42.47, "learning_rate": 2.8766000000000005e-05, "loss": 8.7387, "step": 5308500 }, { "epoch": 42.47, "learning_rate": 2.8764e-05, "loss": 8.731, "step": 5309000 }, { "epoch": 42.48, "learning_rate": 2.8762e-05, "loss": 8.7423, "step": 5309500 }, { "epoch": 42.48, "learning_rate": 2.8760000000000002e-05, "loss": 8.743, "step": 5310000 }, { "epoch": 42.48, "learning_rate": 2.8758e-05, "loss": 8.7546, "step": 5310500 }, { "epoch": 42.49, "learning_rate": 2.8756e-05, "loss": 8.7475, "step": 5311000 }, { "epoch": 42.49, "learning_rate": 2.8754000000000003e-05, "loss": 8.7512, "step": 5311500 }, { "epoch": 42.5, "learning_rate": 2.8752000000000002e-05, "loss": 8.7326, "step": 5312000 }, { "epoch": 42.5, "learning_rate": 2.8749999999999997e-05, "loss": 8.7603, "step": 5312500 }, { "epoch": 42.5, "learning_rate": 2.8748000000000003e-05, "loss": 8.7381, "step": 5313000 }, { "epoch": 42.51, "learning_rate": 2.8746000000000002e-05, "loss": 8.7539, "step": 5313500 }, { "epoch": 42.51, "learning_rate": 2.8743999999999998e-05, "loss": 8.7535, "step": 5314000 }, { "epoch": 42.52, "learning_rate": 2.8742000000000004e-05, "loss": 8.75, "step": 5314500 }, { "epoch": 42.52, "learning_rate": 2.874e-05, "loss": 8.7428, "step": 5315000 }, { "epoch": 42.52, "learning_rate": 2.8738000000000005e-05, "loss": 8.7477, "step": 5315500 }, { "epoch": 42.53, "learning_rate": 2.8736e-05, "loss": 8.7395, "step": 5316000 }, { "epoch": 42.53, "learning_rate": 2.8734e-05, "loss": 8.7462, "step": 5316500 }, { "epoch": 42.54, "learning_rate": 2.8732000000000002e-05, "loss": 8.7541, "step": 5317000 }, { "epoch": 42.54, "learning_rate": 2.873e-05, "loss": 8.7485, "step": 5317500 }, { "epoch": 42.54, "learning_rate": 2.8728e-05, "loss": 8.7501, "step": 5318000 }, { "epoch": 42.55, "learning_rate": 2.8726000000000002e-05, "loss": 8.7601, "step": 5318500 }, { "epoch": 42.55, "learning_rate": 2.8724e-05, "loss": 8.7559, "step": 5319000 }, { "epoch": 42.56, "learning_rate": 2.8722e-05, "loss": 8.7538, "step": 5319500 }, { "epoch": 42.56, "learning_rate": 2.8720000000000003e-05, "loss": 8.7439, "step": 5320000 }, { "epoch": 42.56, "learning_rate": 2.8718000000000002e-05, "loss": 8.746, "step": 5320500 }, { "epoch": 42.57, "learning_rate": 2.8716000000000004e-05, "loss": 8.7534, "step": 5321000 }, { "epoch": 42.57, "learning_rate": 2.8714000000000003e-05, "loss": 8.74, "step": 5321500 }, { "epoch": 42.58, "learning_rate": 2.8712e-05, "loss": 8.7273, "step": 5322000 }, { "epoch": 42.58, "learning_rate": 2.8710000000000005e-05, "loss": 8.75, "step": 5322500 }, { "epoch": 42.58, "learning_rate": 2.8708e-05, "loss": 8.7557, "step": 5323000 }, { "epoch": 42.59, "learning_rate": 2.8706e-05, "loss": 8.7428, "step": 5323500 }, { "epoch": 42.59, "learning_rate": 2.8704e-05, "loss": 8.7684, "step": 5324000 }, { "epoch": 42.6, "learning_rate": 2.8702e-05, "loss": 8.7631, "step": 5324500 }, { "epoch": 42.6, "learning_rate": 2.87e-05, "loss": 8.7374, "step": 5325000 }, { "epoch": 42.6, "learning_rate": 2.8698000000000002e-05, "loss": 8.7479, "step": 5325500 }, { "epoch": 42.61, "learning_rate": 2.8696e-05, "loss": 8.7467, "step": 5326000 }, { "epoch": 42.61, "learning_rate": 2.8694e-05, "loss": 8.746, "step": 5326500 }, { "epoch": 42.62, "learning_rate": 2.8692000000000002e-05, "loss": 8.7597, "step": 5327000 }, { "epoch": 42.62, "learning_rate": 2.869e-05, "loss": 8.7664, "step": 5327500 }, { "epoch": 42.62, "learning_rate": 2.8688000000000004e-05, "loss": 8.7333, "step": 5328000 }, { "epoch": 42.63, "learning_rate": 2.8686000000000003e-05, "loss": 8.7437, "step": 5328500 }, { "epoch": 42.63, "learning_rate": 2.8684e-05, "loss": 8.7552, "step": 5329000 }, { "epoch": 42.64, "learning_rate": 2.8682000000000004e-05, "loss": 8.771, "step": 5329500 }, { "epoch": 42.64, "learning_rate": 2.868e-05, "loss": 8.7714, "step": 5330000 }, { "epoch": 42.64, "learning_rate": 2.8678e-05, "loss": 8.7393, "step": 5330500 }, { "epoch": 42.65, "learning_rate": 2.8676e-05, "loss": 8.7482, "step": 5331000 }, { "epoch": 42.65, "learning_rate": 2.8674e-05, "loss": 8.7499, "step": 5331500 }, { "epoch": 42.66, "learning_rate": 2.8672e-05, "loss": 8.7393, "step": 5332000 }, { "epoch": 42.66, "learning_rate": 2.867e-05, "loss": 8.7396, "step": 5332500 }, { "epoch": 42.66, "learning_rate": 2.8668e-05, "loss": 8.7517, "step": 5333000 }, { "epoch": 42.67, "learning_rate": 2.8666000000000003e-05, "loss": 8.7523, "step": 5333500 }, { "epoch": 42.67, "learning_rate": 2.8664000000000002e-05, "loss": 8.7469, "step": 5334000 }, { "epoch": 42.68, "learning_rate": 2.8662e-05, "loss": 8.732, "step": 5334500 }, { "epoch": 42.68, "learning_rate": 2.8660000000000003e-05, "loss": 8.7367, "step": 5335000 }, { "epoch": 42.68, "learning_rate": 2.8658000000000002e-05, "loss": 8.7589, "step": 5335500 }, { "epoch": 42.69, "learning_rate": 2.8655999999999998e-05, "loss": 8.7536, "step": 5336000 }, { "epoch": 42.69, "learning_rate": 2.8654000000000004e-05, "loss": 8.7442, "step": 5336500 }, { "epoch": 42.7, "learning_rate": 2.8652e-05, "loss": 8.7337, "step": 5337000 }, { "epoch": 42.7, "learning_rate": 2.865e-05, "loss": 8.7527, "step": 5337500 }, { "epoch": 42.7, "learning_rate": 2.8648000000000004e-05, "loss": 8.7348, "step": 5338000 }, { "epoch": 42.71, "learning_rate": 2.8646e-05, "loss": 8.7424, "step": 5338500 }, { "epoch": 42.71, "learning_rate": 2.8644e-05, "loss": 8.7526, "step": 5339000 }, { "epoch": 42.72, "learning_rate": 2.8642e-05, "loss": 8.7739, "step": 5339500 }, { "epoch": 42.72, "learning_rate": 2.864e-05, "loss": 8.7705, "step": 5340000 }, { "epoch": 42.72, "learning_rate": 2.8638000000000003e-05, "loss": 8.7545, "step": 5340500 }, { "epoch": 42.73, "learning_rate": 2.8636e-05, "loss": 8.7562, "step": 5341000 }, { "epoch": 42.73, "learning_rate": 2.8634e-05, "loss": 8.7587, "step": 5341500 }, { "epoch": 42.74, "learning_rate": 2.8632000000000003e-05, "loss": 8.7391, "step": 5342000 }, { "epoch": 42.74, "learning_rate": 2.8630000000000002e-05, "loss": 8.7548, "step": 5342500 }, { "epoch": 42.74, "learning_rate": 2.8627999999999998e-05, "loss": 8.7524, "step": 5343000 }, { "epoch": 42.75, "learning_rate": 2.8626000000000003e-05, "loss": 8.7447, "step": 5343500 }, { "epoch": 42.75, "learning_rate": 2.8624000000000002e-05, "loss": 8.7477, "step": 5344000 }, { "epoch": 42.76, "learning_rate": 2.8621999999999998e-05, "loss": 8.7559, "step": 5344500 }, { "epoch": 42.76, "learning_rate": 2.8620000000000004e-05, "loss": 8.7561, "step": 5345000 }, { "epoch": 42.76, "learning_rate": 2.8618e-05, "loss": 8.7571, "step": 5345500 }, { "epoch": 42.77, "learning_rate": 2.8616000000000005e-05, "loss": 8.747, "step": 5346000 }, { "epoch": 42.77, "learning_rate": 2.8614e-05, "loss": 8.7505, "step": 5346500 }, { "epoch": 42.78, "learning_rate": 2.8612e-05, "loss": 8.7585, "step": 5347000 }, { "epoch": 42.78, "learning_rate": 2.8610000000000002e-05, "loss": 8.747, "step": 5347500 }, { "epoch": 42.78, "learning_rate": 2.8608e-05, "loss": 8.7525, "step": 5348000 }, { "epoch": 42.79, "learning_rate": 2.8606e-05, "loss": 8.7857, "step": 5348500 }, { "epoch": 42.79, "learning_rate": 2.8604000000000003e-05, "loss": 8.7587, "step": 5349000 }, { "epoch": 42.8, "learning_rate": 2.8602e-05, "loss": 8.7344, "step": 5349500 }, { "epoch": 42.8, "learning_rate": 2.86e-05, "loss": 8.7549, "step": 5350000 }, { "epoch": 42.8, "learning_rate": 2.8598000000000003e-05, "loss": 8.7447, "step": 5350500 }, { "epoch": 42.81, "learning_rate": 2.8596000000000002e-05, "loss": 8.7389, "step": 5351000 }, { "epoch": 42.81, "learning_rate": 2.8594000000000004e-05, "loss": 8.7406, "step": 5351500 }, { "epoch": 42.82, "learning_rate": 2.8592000000000003e-05, "loss": 8.7411, "step": 5352000 }, { "epoch": 42.82, "learning_rate": 2.859e-05, "loss": 8.7527, "step": 5352500 }, { "epoch": 42.82, "learning_rate": 2.8588000000000005e-05, "loss": 8.7535, "step": 5353000 }, { "epoch": 42.83, "learning_rate": 2.8586e-05, "loss": 8.7648, "step": 5353500 }, { "epoch": 42.83, "learning_rate": 2.8584e-05, "loss": 8.7378, "step": 5354000 }, { "epoch": 42.84, "learning_rate": 2.8582000000000002e-05, "loss": 8.7386, "step": 5354500 }, { "epoch": 42.84, "learning_rate": 2.858e-05, "loss": 8.7867, "step": 5355000 }, { "epoch": 42.84, "learning_rate": 2.8578e-05, "loss": 8.7503, "step": 5355500 }, { "epoch": 42.85, "learning_rate": 2.8576000000000002e-05, "loss": 8.7374, "step": 5356000 }, { "epoch": 42.85, "learning_rate": 2.8574e-05, "loss": 8.7374, "step": 5356500 }, { "epoch": 42.86, "learning_rate": 2.8572e-05, "loss": 8.7514, "step": 5357000 }, { "epoch": 42.86, "learning_rate": 2.8570000000000003e-05, "loss": 8.7448, "step": 5357500 }, { "epoch": 42.86, "learning_rate": 2.8568e-05, "loss": 8.7558, "step": 5358000 }, { "epoch": 42.87, "learning_rate": 2.8566000000000004e-05, "loss": 8.7401, "step": 5358500 }, { "epoch": 42.87, "learning_rate": 2.8564000000000003e-05, "loss": 8.7517, "step": 5359000 }, { "epoch": 42.88, "learning_rate": 2.8562e-05, "loss": 8.7475, "step": 5359500 }, { "epoch": 42.88, "learning_rate": 2.8560000000000004e-05, "loss": 8.7384, "step": 5360000 }, { "epoch": 42.88, "learning_rate": 2.8558e-05, "loss": 8.7356, "step": 5360500 }, { "epoch": 42.89, "learning_rate": 2.8556e-05, "loss": 8.7426, "step": 5361000 }, { "epoch": 42.89, "learning_rate": 2.8554e-05, "loss": 8.7313, "step": 5361500 }, { "epoch": 42.9, "learning_rate": 2.8552e-05, "loss": 8.7349, "step": 5362000 }, { "epoch": 42.9, "learning_rate": 2.855e-05, "loss": 8.7505, "step": 5362500 }, { "epoch": 42.9, "learning_rate": 2.8548000000000002e-05, "loss": 8.7413, "step": 5363000 }, { "epoch": 42.91, "learning_rate": 2.8546e-05, "loss": 8.749, "step": 5363500 }, { "epoch": 42.91, "learning_rate": 2.8544000000000003e-05, "loss": 8.7499, "step": 5364000 }, { "epoch": 42.92, "learning_rate": 2.8542000000000002e-05, "loss": 8.746, "step": 5364500 }, { "epoch": 42.92, "learning_rate": 2.854e-05, "loss": 8.7449, "step": 5365000 }, { "epoch": 42.92, "learning_rate": 2.8538000000000004e-05, "loss": 8.7559, "step": 5365500 }, { "epoch": 42.93, "learning_rate": 2.8536000000000003e-05, "loss": 8.7492, "step": 5366000 }, { "epoch": 42.93, "learning_rate": 2.8533999999999998e-05, "loss": 8.752, "step": 5366500 }, { "epoch": 42.94, "learning_rate": 2.8532000000000004e-05, "loss": 8.7331, "step": 5367000 }, { "epoch": 42.94, "learning_rate": 2.853e-05, "loss": 8.7477, "step": 5367500 }, { "epoch": 42.94, "learning_rate": 2.8528e-05, "loss": 8.7407, "step": 5368000 }, { "epoch": 42.95, "learning_rate": 2.8526000000000004e-05, "loss": 8.7527, "step": 5368500 }, { "epoch": 42.95, "learning_rate": 2.8524e-05, "loss": 8.7426, "step": 5369000 }, { "epoch": 42.96, "learning_rate": 2.8522e-05, "loss": 8.7498, "step": 5369500 }, { "epoch": 42.96, "learning_rate": 2.852e-05, "loss": 8.7594, "step": 5370000 }, { "epoch": 42.96, "learning_rate": 2.8518e-05, "loss": 8.7346, "step": 5370500 }, { "epoch": 42.97, "learning_rate": 2.8516000000000003e-05, "loss": 8.7614, "step": 5371000 }, { "epoch": 42.97, "learning_rate": 2.8514000000000002e-05, "loss": 8.7406, "step": 5371500 }, { "epoch": 42.98, "learning_rate": 2.8512e-05, "loss": 8.7681, "step": 5372000 }, { "epoch": 42.98, "learning_rate": 2.8510000000000003e-05, "loss": 8.7402, "step": 5372500 }, { "epoch": 42.98, "learning_rate": 2.8508000000000002e-05, "loss": 8.7643, "step": 5373000 }, { "epoch": 42.99, "learning_rate": 2.8505999999999998e-05, "loss": 8.7345, "step": 5373500 }, { "epoch": 42.99, "learning_rate": 2.8504000000000004e-05, "loss": 8.7577, "step": 5374000 }, { "epoch": 43.0, "learning_rate": 2.8502000000000003e-05, "loss": 8.764, "step": 5374500 }, { "epoch": 43.0, "learning_rate": 2.8499999999999998e-05, "loss": 8.7486, "step": 5375000 }, { "epoch": 43.0, "learning_rate": 2.8498000000000004e-05, "loss": 8.7336, "step": 5375500 }, { "epoch": 43.01, "learning_rate": 2.8496e-05, "loss": 8.7403, "step": 5376000 }, { "epoch": 43.01, "learning_rate": 2.8494000000000005e-05, "loss": 8.7306, "step": 5376500 }, { "epoch": 43.02, "learning_rate": 2.8492e-05, "loss": 8.7597, "step": 5377000 }, { "epoch": 43.02, "learning_rate": 2.849e-05, "loss": 8.7522, "step": 5377500 }, { "epoch": 43.02, "learning_rate": 2.8488000000000002e-05, "loss": 8.7249, "step": 5378000 }, { "epoch": 43.03, "learning_rate": 2.8486e-05, "loss": 8.7439, "step": 5378500 }, { "epoch": 43.03, "learning_rate": 2.8484e-05, "loss": 8.7518, "step": 5379000 }, { "epoch": 43.04, "learning_rate": 2.8482000000000003e-05, "loss": 8.7537, "step": 5379500 }, { "epoch": 43.04, "learning_rate": 2.8480000000000002e-05, "loss": 8.7357, "step": 5380000 }, { "epoch": 43.04, "learning_rate": 2.8478e-05, "loss": 8.7472, "step": 5380500 }, { "epoch": 43.05, "learning_rate": 2.8476000000000003e-05, "loss": 8.7569, "step": 5381000 }, { "epoch": 43.05, "learning_rate": 2.8474000000000002e-05, "loss": 8.7491, "step": 5381500 }, { "epoch": 43.06, "learning_rate": 2.8471999999999998e-05, "loss": 8.7555, "step": 5382000 }, { "epoch": 43.06, "learning_rate": 2.8470000000000004e-05, "loss": 8.7402, "step": 5382500 }, { "epoch": 43.06, "learning_rate": 2.8468e-05, "loss": 8.7279, "step": 5383000 }, { "epoch": 43.07, "learning_rate": 2.8466000000000005e-05, "loss": 8.7577, "step": 5383500 }, { "epoch": 43.07, "learning_rate": 2.8464e-05, "loss": 8.7557, "step": 5384000 }, { "epoch": 43.08, "learning_rate": 2.8462e-05, "loss": 8.7334, "step": 5384500 }, { "epoch": 43.08, "learning_rate": 2.8460000000000002e-05, "loss": 8.762, "step": 5385000 }, { "epoch": 43.08, "learning_rate": 2.8458e-05, "loss": 8.7402, "step": 5385500 }, { "epoch": 43.09, "learning_rate": 2.8456e-05, "loss": 8.746, "step": 5386000 }, { "epoch": 43.09, "learning_rate": 2.8454000000000002e-05, "loss": 8.7625, "step": 5386500 }, { "epoch": 43.1, "learning_rate": 2.8452e-05, "loss": 8.7502, "step": 5387000 }, { "epoch": 43.1, "learning_rate": 2.845e-05, "loss": 8.7673, "step": 5387500 }, { "epoch": 43.1, "learning_rate": 2.8448000000000003e-05, "loss": 8.7293, "step": 5388000 }, { "epoch": 43.11, "learning_rate": 2.8446000000000002e-05, "loss": 8.7603, "step": 5388500 }, { "epoch": 43.11, "learning_rate": 2.8444000000000004e-05, "loss": 8.7621, "step": 5389000 }, { "epoch": 43.12, "learning_rate": 2.8442000000000003e-05, "loss": 8.7334, "step": 5389500 }, { "epoch": 43.12, "learning_rate": 2.844e-05, "loss": 8.7457, "step": 5390000 }, { "epoch": 43.12, "learning_rate": 2.8438000000000005e-05, "loss": 8.7668, "step": 5390500 }, { "epoch": 43.13, "learning_rate": 2.8436e-05, "loss": 8.7648, "step": 5391000 }, { "epoch": 43.13, "learning_rate": 2.8434e-05, "loss": 8.7555, "step": 5391500 }, { "epoch": 43.14, "learning_rate": 2.8432e-05, "loss": 8.7406, "step": 5392000 }, { "epoch": 43.14, "learning_rate": 2.843e-05, "loss": 8.7495, "step": 5392500 }, { "epoch": 43.14, "learning_rate": 2.8428e-05, "loss": 8.7545, "step": 5393000 }, { "epoch": 43.15, "learning_rate": 2.8426000000000002e-05, "loss": 8.7465, "step": 5393500 }, { "epoch": 43.15, "learning_rate": 2.8424e-05, "loss": 8.7539, "step": 5394000 }, { "epoch": 43.16, "learning_rate": 2.8422e-05, "loss": 8.7613, "step": 5394500 }, { "epoch": 43.16, "learning_rate": 2.8420000000000002e-05, "loss": 8.7443, "step": 5395000 }, { "epoch": 43.16, "learning_rate": 2.8418e-05, "loss": 8.7556, "step": 5395500 }, { "epoch": 43.17, "learning_rate": 2.8416000000000004e-05, "loss": 8.7497, "step": 5396000 }, { "epoch": 43.17, "learning_rate": 2.8414000000000003e-05, "loss": 8.7365, "step": 5396500 }, { "epoch": 43.18, "learning_rate": 2.8412e-05, "loss": 8.7451, "step": 5397000 }, { "epoch": 43.18, "learning_rate": 2.8410000000000004e-05, "loss": 8.754, "step": 5397500 }, { "epoch": 43.18, "learning_rate": 2.8408e-05, "loss": 8.75, "step": 5398000 }, { "epoch": 43.19, "learning_rate": 2.8406e-05, "loss": 8.7316, "step": 5398500 }, { "epoch": 43.19, "learning_rate": 2.8404000000000005e-05, "loss": 8.7479, "step": 5399000 }, { "epoch": 43.2, "learning_rate": 2.8402e-05, "loss": 8.7558, "step": 5399500 }, { "epoch": 43.2, "learning_rate": 2.84e-05, "loss": 8.7489, "step": 5400000 }, { "epoch": 43.2, "learning_rate": 2.8398e-05, "loss": 8.7589, "step": 5400500 }, { "epoch": 43.21, "learning_rate": 2.8396e-05, "loss": 8.7668, "step": 5401000 }, { "epoch": 43.21, "learning_rate": 2.8394000000000003e-05, "loss": 8.7401, "step": 5401500 }, { "epoch": 43.22, "learning_rate": 2.8392000000000002e-05, "loss": 8.7255, "step": 5402000 }, { "epoch": 43.22, "learning_rate": 2.839e-05, "loss": 8.7485, "step": 5402500 }, { "epoch": 43.22, "learning_rate": 2.8388000000000003e-05, "loss": 8.7645, "step": 5403000 }, { "epoch": 43.23, "learning_rate": 2.8386000000000002e-05, "loss": 8.7538, "step": 5403500 }, { "epoch": 43.23, "learning_rate": 2.8383999999999998e-05, "loss": 8.7392, "step": 5404000 }, { "epoch": 43.24, "learning_rate": 2.8382000000000004e-05, "loss": 8.7647, "step": 5404500 }, { "epoch": 43.24, "learning_rate": 2.8380000000000003e-05, "loss": 8.7501, "step": 5405000 }, { "epoch": 43.24, "learning_rate": 2.8378e-05, "loss": 8.7456, "step": 5405500 }, { "epoch": 43.25, "learning_rate": 2.8376000000000004e-05, "loss": 8.7379, "step": 5406000 }, { "epoch": 43.25, "learning_rate": 2.8374e-05, "loss": 8.7268, "step": 5406500 }, { "epoch": 43.26, "learning_rate": 2.8372000000000006e-05, "loss": 8.7481, "step": 5407000 }, { "epoch": 43.26, "learning_rate": 2.837e-05, "loss": 8.7357, "step": 5407500 }, { "epoch": 43.26, "learning_rate": 2.8368e-05, "loss": 8.7381, "step": 5408000 }, { "epoch": 43.27, "learning_rate": 2.8366000000000003e-05, "loss": 8.7575, "step": 5408500 }, { "epoch": 43.27, "learning_rate": 2.8364e-05, "loss": 8.7509, "step": 5409000 }, { "epoch": 43.28, "learning_rate": 2.8362e-05, "loss": 8.7717, "step": 5409500 }, { "epoch": 43.28, "learning_rate": 2.8360000000000003e-05, "loss": 8.7564, "step": 5410000 }, { "epoch": 43.28, "learning_rate": 2.8358000000000002e-05, "loss": 8.7603, "step": 5410500 }, { "epoch": 43.29, "learning_rate": 2.8356e-05, "loss": 8.7593, "step": 5411000 }, { "epoch": 43.29, "learning_rate": 2.8354000000000003e-05, "loss": 8.7328, "step": 5411500 }, { "epoch": 43.3, "learning_rate": 2.8352000000000002e-05, "loss": 8.7608, "step": 5412000 }, { "epoch": 43.3, "learning_rate": 2.8349999999999998e-05, "loss": 8.7745, "step": 5412500 }, { "epoch": 43.3, "learning_rate": 2.8348000000000004e-05, "loss": 8.7421, "step": 5413000 }, { "epoch": 43.31, "learning_rate": 2.8346e-05, "loss": 8.7473, "step": 5413500 }, { "epoch": 43.31, "learning_rate": 2.8344000000000005e-05, "loss": 8.7318, "step": 5414000 }, { "epoch": 43.32, "learning_rate": 2.8342e-05, "loss": 8.7436, "step": 5414500 }, { "epoch": 43.32, "learning_rate": 2.834e-05, "loss": 8.7547, "step": 5415000 }, { "epoch": 43.32, "learning_rate": 2.8338000000000002e-05, "loss": 8.7304, "step": 5415500 }, { "epoch": 43.33, "learning_rate": 2.8336e-05, "loss": 8.7454, "step": 5416000 }, { "epoch": 43.33, "learning_rate": 2.8334e-05, "loss": 8.7509, "step": 5416500 }, { "epoch": 43.34, "learning_rate": 2.8332000000000002e-05, "loss": 8.7514, "step": 5417000 }, { "epoch": 43.34, "learning_rate": 2.833e-05, "loss": 8.7453, "step": 5417500 }, { "epoch": 43.34, "learning_rate": 2.8328e-05, "loss": 8.7523, "step": 5418000 }, { "epoch": 43.35, "learning_rate": 2.8326000000000003e-05, "loss": 8.7431, "step": 5418500 }, { "epoch": 43.35, "learning_rate": 2.8324000000000002e-05, "loss": 8.7323, "step": 5419000 }, { "epoch": 43.36, "learning_rate": 2.8322000000000004e-05, "loss": 8.7574, "step": 5419500 }, { "epoch": 43.36, "learning_rate": 2.8320000000000003e-05, "loss": 8.7521, "step": 5420000 }, { "epoch": 43.36, "learning_rate": 2.8318e-05, "loss": 8.732, "step": 5420500 }, { "epoch": 43.37, "learning_rate": 2.8316000000000005e-05, "loss": 8.7456, "step": 5421000 }, { "epoch": 43.37, "learning_rate": 2.8314e-05, "loss": 8.7439, "step": 5421500 }, { "epoch": 43.38, "learning_rate": 2.8312e-05, "loss": 8.7591, "step": 5422000 }, { "epoch": 43.38, "learning_rate": 2.8310000000000002e-05, "loss": 8.7438, "step": 5422500 }, { "epoch": 43.38, "learning_rate": 2.8308e-05, "loss": 8.7174, "step": 5423000 }, { "epoch": 43.39, "learning_rate": 2.8306e-05, "loss": 8.7327, "step": 5423500 }, { "epoch": 43.39, "learning_rate": 2.8304000000000002e-05, "loss": 8.7465, "step": 5424000 }, { "epoch": 43.4, "learning_rate": 2.8302e-05, "loss": 8.7492, "step": 5424500 }, { "epoch": 43.4, "learning_rate": 2.83e-05, "loss": 8.7612, "step": 5425000 }, { "epoch": 43.4, "learning_rate": 2.8298000000000002e-05, "loss": 8.762, "step": 5425500 }, { "epoch": 43.41, "learning_rate": 2.8296e-05, "loss": 8.7435, "step": 5426000 }, { "epoch": 43.41, "learning_rate": 2.8294000000000004e-05, "loss": 8.7469, "step": 5426500 }, { "epoch": 43.42, "learning_rate": 2.8292000000000003e-05, "loss": 8.7388, "step": 5427000 }, { "epoch": 43.42, "learning_rate": 2.829e-05, "loss": 8.7388, "step": 5427500 }, { "epoch": 43.42, "learning_rate": 2.8288000000000004e-05, "loss": 8.7397, "step": 5428000 }, { "epoch": 43.43, "learning_rate": 2.8286e-05, "loss": 8.7518, "step": 5428500 }, { "epoch": 43.43, "learning_rate": 2.8284e-05, "loss": 8.7681, "step": 5429000 }, { "epoch": 43.44, "learning_rate": 2.8282e-05, "loss": 8.7556, "step": 5429500 }, { "epoch": 43.44, "learning_rate": 2.828e-05, "loss": 8.7471, "step": 5430000 }, { "epoch": 43.44, "learning_rate": 2.8278e-05, "loss": 8.7618, "step": 5430500 }, { "epoch": 43.45, "learning_rate": 2.8276e-05, "loss": 8.7462, "step": 5431000 }, { "epoch": 43.45, "learning_rate": 2.8274e-05, "loss": 8.757, "step": 5431500 }, { "epoch": 43.46, "learning_rate": 2.8272000000000003e-05, "loss": 8.7524, "step": 5432000 }, { "epoch": 43.46, "learning_rate": 2.8270000000000002e-05, "loss": 8.7559, "step": 5432500 }, { "epoch": 43.46, "learning_rate": 2.8268e-05, "loss": 8.7532, "step": 5433000 }, { "epoch": 43.47, "learning_rate": 2.8266000000000003e-05, "loss": 8.7512, "step": 5433500 }, { "epoch": 43.47, "learning_rate": 2.8264000000000002e-05, "loss": 8.7358, "step": 5434000 }, { "epoch": 43.48, "learning_rate": 2.8261999999999998e-05, "loss": 8.7496, "step": 5434500 }, { "epoch": 43.48, "learning_rate": 2.8260000000000004e-05, "loss": 8.76, "step": 5435000 }, { "epoch": 43.48, "learning_rate": 2.8258e-05, "loss": 8.728, "step": 5435500 }, { "epoch": 43.49, "learning_rate": 2.8256e-05, "loss": 8.7463, "step": 5436000 }, { "epoch": 43.49, "learning_rate": 2.8254000000000004e-05, "loss": 8.741, "step": 5436500 }, { "epoch": 43.5, "learning_rate": 2.8252e-05, "loss": 8.756, "step": 5437000 }, { "epoch": 43.5, "learning_rate": 2.825e-05, "loss": 8.7305, "step": 5437500 }, { "epoch": 43.5, "learning_rate": 2.8248e-05, "loss": 8.7525, "step": 5438000 }, { "epoch": 43.51, "learning_rate": 2.8246e-05, "loss": 8.7441, "step": 5438500 }, { "epoch": 43.51, "learning_rate": 2.8244000000000003e-05, "loss": 8.7339, "step": 5439000 }, { "epoch": 43.52, "learning_rate": 2.8242e-05, "loss": 8.7489, "step": 5439500 }, { "epoch": 43.52, "learning_rate": 2.824e-05, "loss": 8.7341, "step": 5440000 }, { "epoch": 43.52, "learning_rate": 2.8238000000000003e-05, "loss": 8.7409, "step": 5440500 }, { "epoch": 43.53, "learning_rate": 2.8236000000000002e-05, "loss": 8.7529, "step": 5441000 }, { "epoch": 43.53, "learning_rate": 2.8233999999999998e-05, "loss": 8.7492, "step": 5441500 }, { "epoch": 43.54, "learning_rate": 2.8232000000000003e-05, "loss": 8.7448, "step": 5442000 }, { "epoch": 43.54, "learning_rate": 2.8230000000000002e-05, "loss": 8.7688, "step": 5442500 }, { "epoch": 43.54, "learning_rate": 2.8227999999999998e-05, "loss": 8.7563, "step": 5443000 }, { "epoch": 43.55, "learning_rate": 2.8226000000000004e-05, "loss": 8.7509, "step": 5443500 }, { "epoch": 43.55, "learning_rate": 2.8224e-05, "loss": 8.7392, "step": 5444000 }, { "epoch": 43.56, "learning_rate": 2.8222000000000005e-05, "loss": 8.7552, "step": 5444500 }, { "epoch": 43.56, "learning_rate": 2.822e-05, "loss": 8.7489, "step": 5445000 }, { "epoch": 43.56, "learning_rate": 2.8218e-05, "loss": 8.7531, "step": 5445500 }, { "epoch": 43.57, "learning_rate": 2.8216000000000002e-05, "loss": 8.743, "step": 5446000 }, { "epoch": 43.57, "learning_rate": 2.8214e-05, "loss": 8.7537, "step": 5446500 }, { "epoch": 43.58, "learning_rate": 2.8212e-05, "loss": 8.7614, "step": 5447000 }, { "epoch": 43.58, "learning_rate": 2.8210000000000003e-05, "loss": 8.7572, "step": 5447500 }, { "epoch": 43.58, "learning_rate": 2.8208e-05, "loss": 8.7589, "step": 5448000 }, { "epoch": 43.59, "learning_rate": 2.8206e-05, "loss": 8.7484, "step": 5448500 }, { "epoch": 43.59, "learning_rate": 2.8204000000000003e-05, "loss": 8.7523, "step": 5449000 }, { "epoch": 43.6, "learning_rate": 2.8202000000000002e-05, "loss": 8.7728, "step": 5449500 }, { "epoch": 43.6, "learning_rate": 2.8199999999999998e-05, "loss": 8.7685, "step": 5450000 }, { "epoch": 43.6, "learning_rate": 2.8198000000000003e-05, "loss": 8.7338, "step": 5450500 }, { "epoch": 43.61, "learning_rate": 2.8196e-05, "loss": 8.7438, "step": 5451000 }, { "epoch": 43.61, "learning_rate": 2.8194000000000005e-05, "loss": 8.7571, "step": 5451500 }, { "epoch": 43.62, "learning_rate": 2.8192e-05, "loss": 8.7517, "step": 5452000 }, { "epoch": 43.62, "learning_rate": 2.819e-05, "loss": 8.741, "step": 5452500 }, { "epoch": 43.62, "learning_rate": 2.8188000000000002e-05, "loss": 8.761, "step": 5453000 }, { "epoch": 43.63, "learning_rate": 2.8186e-05, "loss": 8.7624, "step": 5453500 }, { "epoch": 43.63, "learning_rate": 2.8184e-05, "loss": 8.7354, "step": 5454000 }, { "epoch": 43.64, "learning_rate": 2.8182000000000002e-05, "loss": 8.7612, "step": 5454500 }, { "epoch": 43.64, "learning_rate": 2.818e-05, "loss": 8.7455, "step": 5455000 }, { "epoch": 43.64, "learning_rate": 2.8178e-05, "loss": 8.7523, "step": 5455500 }, { "epoch": 43.65, "learning_rate": 2.8176000000000003e-05, "loss": 8.743, "step": 5456000 }, { "epoch": 43.65, "learning_rate": 2.8174e-05, "loss": 8.7434, "step": 5456500 }, { "epoch": 43.66, "learning_rate": 2.8172000000000004e-05, "loss": 8.7519, "step": 5457000 }, { "epoch": 43.66, "learning_rate": 2.8170000000000003e-05, "loss": 8.7701, "step": 5457500 }, { "epoch": 43.66, "learning_rate": 2.8168e-05, "loss": 8.7346, "step": 5458000 }, { "epoch": 43.67, "learning_rate": 2.8166000000000004e-05, "loss": 8.7489, "step": 5458500 }, { "epoch": 43.67, "learning_rate": 2.8164e-05, "loss": 8.7605, "step": 5459000 }, { "epoch": 43.68, "learning_rate": 2.8162e-05, "loss": 8.7552, "step": 5459500 }, { "epoch": 43.68, "learning_rate": 2.816e-05, "loss": 8.7655, "step": 5460000 }, { "epoch": 43.68, "learning_rate": 2.8158e-05, "loss": 8.7549, "step": 5460500 }, { "epoch": 43.69, "learning_rate": 2.8156e-05, "loss": 8.7495, "step": 5461000 }, { "epoch": 43.69, "learning_rate": 2.8154000000000002e-05, "loss": 8.7421, "step": 5461500 }, { "epoch": 43.7, "learning_rate": 2.8152e-05, "loss": 8.7524, "step": 5462000 }, { "epoch": 43.7, "learning_rate": 2.815e-05, "loss": 8.7292, "step": 5462500 }, { "epoch": 43.7, "learning_rate": 2.8148000000000002e-05, "loss": 8.7404, "step": 5463000 }, { "epoch": 43.71, "learning_rate": 2.8146e-05, "loss": 8.7361, "step": 5463500 }, { "epoch": 43.71, "learning_rate": 2.8144000000000004e-05, "loss": 8.7581, "step": 5464000 }, { "epoch": 43.72, "learning_rate": 2.8142000000000003e-05, "loss": 8.738, "step": 5464500 }, { "epoch": 43.72, "learning_rate": 2.8139999999999998e-05, "loss": 8.752, "step": 5465000 }, { "epoch": 43.72, "learning_rate": 2.8138000000000004e-05, "loss": 8.7579, "step": 5465500 }, { "epoch": 43.73, "learning_rate": 2.8136e-05, "loss": 8.7538, "step": 5466000 }, { "epoch": 43.73, "learning_rate": 2.8134e-05, "loss": 8.7673, "step": 5466500 }, { "epoch": 43.74, "learning_rate": 2.8132000000000004e-05, "loss": 8.757, "step": 5467000 }, { "epoch": 43.74, "learning_rate": 2.813e-05, "loss": 8.7556, "step": 5467500 }, { "epoch": 43.74, "learning_rate": 2.8128e-05, "loss": 8.7621, "step": 5468000 }, { "epoch": 43.75, "learning_rate": 2.8126e-05, "loss": 8.7426, "step": 5468500 }, { "epoch": 43.75, "learning_rate": 2.8124e-05, "loss": 8.7464, "step": 5469000 }, { "epoch": 43.76, "learning_rate": 2.8122000000000003e-05, "loss": 8.7606, "step": 5469500 }, { "epoch": 43.76, "learning_rate": 2.8120000000000002e-05, "loss": 8.7578, "step": 5470000 }, { "epoch": 43.76, "learning_rate": 2.8118e-05, "loss": 8.7595, "step": 5470500 }, { "epoch": 43.77, "learning_rate": 2.8116000000000003e-05, "loss": 8.7623, "step": 5471000 }, { "epoch": 43.77, "learning_rate": 2.8114000000000002e-05, "loss": 8.7659, "step": 5471500 }, { "epoch": 43.78, "learning_rate": 2.8111999999999998e-05, "loss": 8.7288, "step": 5472000 }, { "epoch": 43.78, "learning_rate": 2.8110000000000004e-05, "loss": 8.7571, "step": 5472500 }, { "epoch": 43.78, "learning_rate": 2.8108000000000003e-05, "loss": 8.7507, "step": 5473000 }, { "epoch": 43.79, "learning_rate": 2.8105999999999998e-05, "loss": 8.7552, "step": 5473500 }, { "epoch": 43.79, "learning_rate": 2.8104000000000004e-05, "loss": 8.7412, "step": 5474000 }, { "epoch": 43.8, "learning_rate": 2.8102e-05, "loss": 8.7593, "step": 5474500 }, { "epoch": 43.8, "learning_rate": 2.8100000000000005e-05, "loss": 8.7473, "step": 5475000 }, { "epoch": 43.8, "learning_rate": 2.8098e-05, "loss": 8.743, "step": 5475500 }, { "epoch": 43.81, "learning_rate": 2.8096e-05, "loss": 8.7398, "step": 5476000 }, { "epoch": 43.81, "learning_rate": 2.8094000000000002e-05, "loss": 8.7581, "step": 5476500 }, { "epoch": 43.82, "learning_rate": 2.8092e-05, "loss": 8.757, "step": 5477000 }, { "epoch": 43.82, "learning_rate": 2.809e-05, "loss": 8.7461, "step": 5477500 }, { "epoch": 43.82, "learning_rate": 2.8088000000000003e-05, "loss": 8.7471, "step": 5478000 }, { "epoch": 43.83, "learning_rate": 2.8086000000000002e-05, "loss": 8.7338, "step": 5478500 }, { "epoch": 43.83, "learning_rate": 2.8084e-05, "loss": 8.7577, "step": 5479000 }, { "epoch": 43.84, "learning_rate": 2.8082000000000003e-05, "loss": 8.7688, "step": 5479500 }, { "epoch": 43.84, "learning_rate": 2.8080000000000002e-05, "loss": 8.7421, "step": 5480000 }, { "epoch": 43.84, "learning_rate": 2.8077999999999998e-05, "loss": 8.7512, "step": 5480500 }, { "epoch": 43.85, "learning_rate": 2.8076000000000004e-05, "loss": 8.7438, "step": 5481000 }, { "epoch": 43.85, "learning_rate": 2.8074e-05, "loss": 8.7651, "step": 5481500 }, { "epoch": 43.86, "learning_rate": 2.8072000000000005e-05, "loss": 8.7421, "step": 5482000 }, { "epoch": 43.86, "learning_rate": 2.807e-05, "loss": 8.7196, "step": 5482500 }, { "epoch": 43.86, "learning_rate": 2.8068e-05, "loss": 8.765, "step": 5483000 }, { "epoch": 43.87, "learning_rate": 2.8066000000000002e-05, "loss": 8.7428, "step": 5483500 }, { "epoch": 43.87, "learning_rate": 2.8064e-05, "loss": 8.7699, "step": 5484000 }, { "epoch": 43.88, "learning_rate": 2.8062e-05, "loss": 8.7677, "step": 5484500 }, { "epoch": 43.88, "learning_rate": 2.8060000000000002e-05, "loss": 8.7349, "step": 5485000 }, { "epoch": 43.88, "learning_rate": 2.8058e-05, "loss": 8.7878, "step": 5485500 }, { "epoch": 43.89, "learning_rate": 2.8056e-05, "loss": 8.7675, "step": 5486000 }, { "epoch": 43.89, "learning_rate": 2.8054000000000003e-05, "loss": 8.7342, "step": 5486500 }, { "epoch": 43.9, "learning_rate": 2.8052000000000002e-05, "loss": 8.766, "step": 5487000 }, { "epoch": 43.9, "learning_rate": 2.8050000000000004e-05, "loss": 8.7431, "step": 5487500 }, { "epoch": 43.9, "learning_rate": 2.8048000000000003e-05, "loss": 8.7542, "step": 5488000 }, { "epoch": 43.91, "learning_rate": 2.8046e-05, "loss": 8.7575, "step": 5488500 }, { "epoch": 43.91, "learning_rate": 2.8044000000000005e-05, "loss": 8.7469, "step": 5489000 }, { "epoch": 43.92, "learning_rate": 2.8042e-05, "loss": 8.7625, "step": 5489500 }, { "epoch": 43.92, "learning_rate": 2.804e-05, "loss": 8.7561, "step": 5490000 }, { "epoch": 43.92, "learning_rate": 2.8038e-05, "loss": 8.7565, "step": 5490500 }, { "epoch": 43.93, "learning_rate": 2.8036e-05, "loss": 8.7574, "step": 5491000 }, { "epoch": 43.93, "learning_rate": 2.8034e-05, "loss": 8.759, "step": 5491500 }, { "epoch": 43.94, "learning_rate": 2.8032000000000002e-05, "loss": 8.7567, "step": 5492000 }, { "epoch": 43.94, "learning_rate": 2.803e-05, "loss": 8.7458, "step": 5492500 }, { "epoch": 43.94, "learning_rate": 2.8028e-05, "loss": 8.7656, "step": 5493000 }, { "epoch": 43.95, "learning_rate": 2.8026000000000002e-05, "loss": 8.7462, "step": 5493500 }, { "epoch": 43.95, "learning_rate": 2.8024e-05, "loss": 8.7433, "step": 5494000 }, { "epoch": 43.96, "learning_rate": 2.8022000000000004e-05, "loss": 8.7643, "step": 5494500 }, { "epoch": 43.96, "learning_rate": 2.8020000000000003e-05, "loss": 8.7524, "step": 5495000 }, { "epoch": 43.96, "learning_rate": 2.8018e-05, "loss": 8.7331, "step": 5495500 }, { "epoch": 43.97, "learning_rate": 2.8016000000000004e-05, "loss": 8.7588, "step": 5496000 }, { "epoch": 43.97, "learning_rate": 2.8014e-05, "loss": 8.7473, "step": 5496500 }, { "epoch": 43.98, "learning_rate": 2.8012e-05, "loss": 8.7686, "step": 5497000 }, { "epoch": 43.98, "learning_rate": 2.8010000000000005e-05, "loss": 8.7381, "step": 5497500 }, { "epoch": 43.98, "learning_rate": 2.8008e-05, "loss": 8.7763, "step": 5498000 }, { "epoch": 43.99, "learning_rate": 2.8006e-05, "loss": 8.7268, "step": 5498500 }, { "epoch": 43.99, "learning_rate": 2.8004e-05, "loss": 8.7596, "step": 5499000 }, { "epoch": 44.0, "learning_rate": 2.8002e-05, "loss": 8.7396, "step": 5499500 }, { "epoch": 44.0, "learning_rate": 2.8000000000000003e-05, "loss": 8.7703, "step": 5500000 }, { "epoch": 44.0, "learning_rate": 2.7998000000000002e-05, "loss": 8.7615, "step": 5500500 }, { "epoch": 44.01, "learning_rate": 2.7996e-05, "loss": 8.74, "step": 5501000 }, { "epoch": 44.01, "learning_rate": 2.7994000000000003e-05, "loss": 8.7499, "step": 5501500 }, { "epoch": 44.02, "learning_rate": 2.7992000000000002e-05, "loss": 8.7227, "step": 5502000 }, { "epoch": 44.02, "learning_rate": 2.7989999999999998e-05, "loss": 8.7661, "step": 5502500 }, { "epoch": 44.02, "learning_rate": 2.7988000000000004e-05, "loss": 8.7538, "step": 5503000 }, { "epoch": 44.03, "learning_rate": 2.7986000000000003e-05, "loss": 8.7712, "step": 5503500 }, { "epoch": 44.03, "learning_rate": 2.7984e-05, "loss": 8.7632, "step": 5504000 }, { "epoch": 44.04, "learning_rate": 2.7982000000000004e-05, "loss": 8.7578, "step": 5504500 }, { "epoch": 44.04, "learning_rate": 2.798e-05, "loss": 8.7695, "step": 5505000 }, { "epoch": 44.04, "learning_rate": 2.7978e-05, "loss": 8.7375, "step": 5505500 }, { "epoch": 44.05, "learning_rate": 2.7976e-05, "loss": 8.7324, "step": 5506000 }, { "epoch": 44.05, "learning_rate": 2.7974e-05, "loss": 8.7492, "step": 5506500 }, { "epoch": 44.06, "learning_rate": 2.7972000000000003e-05, "loss": 8.7528, "step": 5507000 }, { "epoch": 44.06, "learning_rate": 2.797e-05, "loss": 8.7321, "step": 5507500 }, { "epoch": 44.06, "learning_rate": 2.7968e-05, "loss": 8.744, "step": 5508000 }, { "epoch": 44.07, "learning_rate": 2.7966000000000003e-05, "loss": 8.7635, "step": 5508500 }, { "epoch": 44.07, "learning_rate": 2.7964000000000002e-05, "loss": 8.7465, "step": 5509000 }, { "epoch": 44.08, "learning_rate": 2.7962e-05, "loss": 8.7581, "step": 5509500 }, { "epoch": 44.08, "learning_rate": 2.7960000000000003e-05, "loss": 8.7334, "step": 5510000 }, { "epoch": 44.08, "learning_rate": 2.7958000000000002e-05, "loss": 8.7442, "step": 5510500 }, { "epoch": 44.09, "learning_rate": 2.7955999999999998e-05, "loss": 8.7711, "step": 5511000 }, { "epoch": 44.09, "learning_rate": 2.7954000000000004e-05, "loss": 8.7483, "step": 5511500 }, { "epoch": 44.1, "learning_rate": 2.7952e-05, "loss": 8.7413, "step": 5512000 }, { "epoch": 44.1, "learning_rate": 2.7950000000000005e-05, "loss": 8.737, "step": 5512500 }, { "epoch": 44.1, "learning_rate": 2.7948e-05, "loss": 8.7533, "step": 5513000 }, { "epoch": 44.11, "learning_rate": 2.7946e-05, "loss": 8.7272, "step": 5513500 }, { "epoch": 44.11, "learning_rate": 2.7944000000000002e-05, "loss": 8.7569, "step": 5514000 }, { "epoch": 44.12, "learning_rate": 2.7942e-05, "loss": 8.7553, "step": 5514500 }, { "epoch": 44.12, "learning_rate": 2.794e-05, "loss": 8.7429, "step": 5515000 }, { "epoch": 44.12, "learning_rate": 2.7938000000000003e-05, "loss": 8.76, "step": 5515500 }, { "epoch": 44.13, "learning_rate": 2.7936e-05, "loss": 8.7532, "step": 5516000 }, { "epoch": 44.13, "learning_rate": 2.7934e-05, "loss": 8.7496, "step": 5516500 }, { "epoch": 44.14, "learning_rate": 2.7932000000000003e-05, "loss": 8.7405, "step": 5517000 }, { "epoch": 44.14, "learning_rate": 2.7930000000000002e-05, "loss": 8.7437, "step": 5517500 }, { "epoch": 44.14, "learning_rate": 2.7927999999999998e-05, "loss": 8.7378, "step": 5518000 }, { "epoch": 44.15, "learning_rate": 2.7926000000000003e-05, "loss": 8.7658, "step": 5518500 }, { "epoch": 44.15, "learning_rate": 2.7924e-05, "loss": 8.7506, "step": 5519000 }, { "epoch": 44.16, "learning_rate": 2.7922000000000005e-05, "loss": 8.7414, "step": 5519500 }, { "epoch": 44.16, "learning_rate": 2.792e-05, "loss": 8.7302, "step": 5520000 }, { "epoch": 44.16, "learning_rate": 2.7918e-05, "loss": 8.7469, "step": 5520500 }, { "epoch": 44.17, "learning_rate": 2.7916000000000002e-05, "loss": 8.7614, "step": 5521000 }, { "epoch": 44.17, "learning_rate": 2.7914e-05, "loss": 8.7577, "step": 5521500 }, { "epoch": 44.18, "learning_rate": 2.7912e-05, "loss": 8.7654, "step": 5522000 }, { "epoch": 44.18, "learning_rate": 2.7910000000000002e-05, "loss": 8.7501, "step": 5522500 }, { "epoch": 44.18, "learning_rate": 2.7908e-05, "loss": 8.7457, "step": 5523000 }, { "epoch": 44.19, "learning_rate": 2.7906e-05, "loss": 8.7274, "step": 5523500 }, { "epoch": 44.19, "learning_rate": 2.7904000000000003e-05, "loss": 8.7331, "step": 5524000 }, { "epoch": 44.2, "learning_rate": 2.7902e-05, "loss": 8.7446, "step": 5524500 }, { "epoch": 44.2, "learning_rate": 2.7900000000000004e-05, "loss": 8.7538, "step": 5525000 }, { "epoch": 44.2, "learning_rate": 2.7898000000000003e-05, "loss": 8.7607, "step": 5525500 }, { "epoch": 44.21, "learning_rate": 2.7896e-05, "loss": 8.7387, "step": 5526000 }, { "epoch": 44.21, "learning_rate": 2.7894000000000004e-05, "loss": 8.7484, "step": 5526500 }, { "epoch": 44.22, "learning_rate": 2.7892e-05, "loss": 8.7603, "step": 5527000 }, { "epoch": 44.22, "learning_rate": 2.789e-05, "loss": 8.7366, "step": 5527500 }, { "epoch": 44.22, "learning_rate": 2.7888e-05, "loss": 8.7574, "step": 5528000 }, { "epoch": 44.23, "learning_rate": 2.7886e-05, "loss": 8.7521, "step": 5528500 }, { "epoch": 44.23, "learning_rate": 2.7884e-05, "loss": 8.7762, "step": 5529000 }, { "epoch": 44.24, "learning_rate": 2.7882000000000002e-05, "loss": 8.7488, "step": 5529500 }, { "epoch": 44.24, "learning_rate": 2.788e-05, "loss": 8.7673, "step": 5530000 }, { "epoch": 44.24, "learning_rate": 2.7878e-05, "loss": 8.7731, "step": 5530500 }, { "epoch": 44.25, "learning_rate": 2.7876000000000002e-05, "loss": 8.7537, "step": 5531000 }, { "epoch": 44.25, "learning_rate": 2.7874e-05, "loss": 8.7429, "step": 5531500 }, { "epoch": 44.26, "learning_rate": 2.7872000000000004e-05, "loss": 8.7654, "step": 5532000 }, { "epoch": 44.26, "learning_rate": 2.7870000000000003e-05, "loss": 8.7517, "step": 5532500 }, { "epoch": 44.26, "learning_rate": 2.7867999999999998e-05, "loss": 8.7448, "step": 5533000 }, { "epoch": 44.27, "learning_rate": 2.7866000000000004e-05, "loss": 8.7675, "step": 5533500 }, { "epoch": 44.27, "learning_rate": 2.7864e-05, "loss": 8.7507, "step": 5534000 }, { "epoch": 44.28, "learning_rate": 2.7862e-05, "loss": 8.7458, "step": 5534500 }, { "epoch": 44.28, "learning_rate": 2.7860000000000004e-05, "loss": 8.7422, "step": 5535000 }, { "epoch": 44.28, "learning_rate": 2.7858e-05, "loss": 8.7435, "step": 5535500 }, { "epoch": 44.29, "learning_rate": 2.7856e-05, "loss": 8.7444, "step": 5536000 }, { "epoch": 44.29, "learning_rate": 2.7854e-05, "loss": 8.7557, "step": 5536500 }, { "epoch": 44.3, "learning_rate": 2.7852e-05, "loss": 8.7469, "step": 5537000 }, { "epoch": 44.3, "learning_rate": 2.7850000000000003e-05, "loss": 8.7473, "step": 5537500 }, { "epoch": 44.3, "learning_rate": 2.7848000000000002e-05, "loss": 8.7757, "step": 5538000 }, { "epoch": 44.31, "learning_rate": 2.7846e-05, "loss": 8.7588, "step": 5538500 }, { "epoch": 44.31, "learning_rate": 2.7844000000000003e-05, "loss": 8.7625, "step": 5539000 }, { "epoch": 44.32, "learning_rate": 2.7842000000000002e-05, "loss": 8.7635, "step": 5539500 }, { "epoch": 44.32, "learning_rate": 2.7839999999999998e-05, "loss": 8.751, "step": 5540000 }, { "epoch": 44.32, "learning_rate": 2.7838000000000004e-05, "loss": 8.7675, "step": 5540500 }, { "epoch": 44.33, "learning_rate": 2.7836000000000003e-05, "loss": 8.7534, "step": 5541000 }, { "epoch": 44.33, "learning_rate": 2.7833999999999998e-05, "loss": 8.7579, "step": 5541500 }, { "epoch": 44.34, "learning_rate": 2.7832000000000004e-05, "loss": 8.7522, "step": 5542000 }, { "epoch": 44.34, "learning_rate": 2.783e-05, "loss": 8.763, "step": 5542500 }, { "epoch": 44.34, "learning_rate": 2.7828000000000005e-05, "loss": 8.7802, "step": 5543000 }, { "epoch": 44.35, "learning_rate": 2.7826e-05, "loss": 8.7413, "step": 5543500 }, { "epoch": 44.35, "learning_rate": 2.7824e-05, "loss": 8.7534, "step": 5544000 }, { "epoch": 44.36, "learning_rate": 2.7822000000000002e-05, "loss": 8.745, "step": 5544500 }, { "epoch": 44.36, "learning_rate": 2.782e-05, "loss": 8.7507, "step": 5545000 }, { "epoch": 44.36, "learning_rate": 2.7818e-05, "loss": 8.7679, "step": 5545500 }, { "epoch": 44.37, "learning_rate": 2.7816000000000003e-05, "loss": 8.7352, "step": 5546000 }, { "epoch": 44.37, "learning_rate": 2.7814000000000002e-05, "loss": 8.7381, "step": 5546500 }, { "epoch": 44.38, "learning_rate": 2.7812e-05, "loss": 8.7373, "step": 5547000 }, { "epoch": 44.38, "learning_rate": 2.7810000000000003e-05, "loss": 8.7263, "step": 5547500 }, { "epoch": 44.38, "learning_rate": 2.7808000000000002e-05, "loss": 8.7578, "step": 5548000 }, { "epoch": 44.39, "learning_rate": 2.7805999999999998e-05, "loss": 8.7369, "step": 5548500 }, { "epoch": 44.39, "learning_rate": 2.7804000000000003e-05, "loss": 8.7415, "step": 5549000 }, { "epoch": 44.4, "learning_rate": 2.7802e-05, "loss": 8.7438, "step": 5549500 }, { "epoch": 44.4, "learning_rate": 2.7800000000000005e-05, "loss": 8.7414, "step": 5550000 }, { "epoch": 44.4, "learning_rate": 2.7798e-05, "loss": 8.7455, "step": 5550500 }, { "epoch": 44.41, "learning_rate": 2.7796e-05, "loss": 8.7642, "step": 5551000 }, { "epoch": 44.41, "learning_rate": 2.7794000000000002e-05, "loss": 8.7234, "step": 5551500 }, { "epoch": 44.42, "learning_rate": 2.7792e-05, "loss": 8.7547, "step": 5552000 }, { "epoch": 44.42, "learning_rate": 2.779e-05, "loss": 8.7605, "step": 5552500 }, { "epoch": 44.42, "learning_rate": 2.7788000000000002e-05, "loss": 8.74, "step": 5553000 }, { "epoch": 44.43, "learning_rate": 2.7786e-05, "loss": 8.7727, "step": 5553500 }, { "epoch": 44.43, "learning_rate": 2.7784e-05, "loss": 8.7487, "step": 5554000 }, { "epoch": 44.44, "learning_rate": 2.7782000000000003e-05, "loss": 8.7595, "step": 5554500 }, { "epoch": 44.44, "learning_rate": 2.778e-05, "loss": 8.745, "step": 5555000 }, { "epoch": 44.44, "learning_rate": 2.7778000000000004e-05, "loss": 8.7575, "step": 5555500 }, { "epoch": 44.45, "learning_rate": 2.7776000000000003e-05, "loss": 8.7418, "step": 5556000 }, { "epoch": 44.45, "learning_rate": 2.7774e-05, "loss": 8.7582, "step": 5556500 }, { "epoch": 44.46, "learning_rate": 2.7772000000000004e-05, "loss": 8.7672, "step": 5557000 }, { "epoch": 44.46, "learning_rate": 2.777e-05, "loss": 8.7512, "step": 5557500 }, { "epoch": 44.46, "learning_rate": 2.7768e-05, "loss": 8.7349, "step": 5558000 }, { "epoch": 44.47, "learning_rate": 2.7766e-05, "loss": 8.7534, "step": 5558500 }, { "epoch": 44.47, "learning_rate": 2.7764e-05, "loss": 8.7519, "step": 5559000 }, { "epoch": 44.48, "learning_rate": 2.7762e-05, "loss": 8.7428, "step": 5559500 }, { "epoch": 44.48, "learning_rate": 2.7760000000000002e-05, "loss": 8.7444, "step": 5560000 }, { "epoch": 44.48, "learning_rate": 2.7758e-05, "loss": 8.7495, "step": 5560500 }, { "epoch": 44.49, "learning_rate": 2.7756e-05, "loss": 8.7435, "step": 5561000 }, { "epoch": 44.49, "learning_rate": 2.7754000000000002e-05, "loss": 8.7532, "step": 5561500 }, { "epoch": 44.5, "learning_rate": 2.7752e-05, "loss": 8.7437, "step": 5562000 }, { "epoch": 44.5, "learning_rate": 2.7750000000000004e-05, "loss": 8.7521, "step": 5562500 }, { "epoch": 44.5, "learning_rate": 2.7748000000000003e-05, "loss": 8.7301, "step": 5563000 }, { "epoch": 44.51, "learning_rate": 2.7745999999999998e-05, "loss": 8.7443, "step": 5563500 }, { "epoch": 44.51, "learning_rate": 2.7744000000000004e-05, "loss": 8.7602, "step": 5564000 }, { "epoch": 44.52, "learning_rate": 2.7742e-05, "loss": 8.7397, "step": 5564500 }, { "epoch": 44.52, "learning_rate": 2.774e-05, "loss": 8.7655, "step": 5565000 }, { "epoch": 44.52, "learning_rate": 2.7738000000000004e-05, "loss": 8.7431, "step": 5565500 }, { "epoch": 44.53, "learning_rate": 2.7736e-05, "loss": 8.7602, "step": 5566000 }, { "epoch": 44.53, "learning_rate": 2.7734e-05, "loss": 8.7373, "step": 5566500 }, { "epoch": 44.54, "learning_rate": 2.7732e-05, "loss": 8.7546, "step": 5567000 }, { "epoch": 44.54, "learning_rate": 2.773e-05, "loss": 8.75, "step": 5567500 }, { "epoch": 44.54, "learning_rate": 2.7728000000000003e-05, "loss": 8.754, "step": 5568000 }, { "epoch": 44.55, "learning_rate": 2.7726000000000002e-05, "loss": 8.7607, "step": 5568500 }, { "epoch": 44.55, "learning_rate": 2.7724e-05, "loss": 8.7451, "step": 5569000 }, { "epoch": 44.56, "learning_rate": 2.7722000000000003e-05, "loss": 8.7528, "step": 5569500 }, { "epoch": 44.56, "learning_rate": 2.7720000000000002e-05, "loss": 8.7414, "step": 5570000 }, { "epoch": 44.56, "learning_rate": 2.7717999999999998e-05, "loss": 8.7569, "step": 5570500 }, { "epoch": 44.57, "learning_rate": 2.7716000000000004e-05, "loss": 8.7637, "step": 5571000 }, { "epoch": 44.57, "learning_rate": 2.7714000000000003e-05, "loss": 8.7232, "step": 5571500 }, { "epoch": 44.58, "learning_rate": 2.7711999999999998e-05, "loss": 8.7376, "step": 5572000 }, { "epoch": 44.58, "learning_rate": 2.7710000000000004e-05, "loss": 8.797, "step": 5572500 }, { "epoch": 44.58, "learning_rate": 2.7708e-05, "loss": 8.7584, "step": 5573000 }, { "epoch": 44.59, "learning_rate": 2.7706e-05, "loss": 8.7503, "step": 5573500 }, { "epoch": 44.59, "learning_rate": 2.7704e-05, "loss": 8.7637, "step": 5574000 }, { "epoch": 44.6, "learning_rate": 2.7702e-05, "loss": 8.7446, "step": 5574500 }, { "epoch": 44.6, "learning_rate": 2.7700000000000002e-05, "loss": 8.7689, "step": 5575000 }, { "epoch": 44.6, "learning_rate": 2.7698e-05, "loss": 8.7234, "step": 5575500 }, { "epoch": 44.61, "learning_rate": 2.7696e-05, "loss": 8.7474, "step": 5576000 }, { "epoch": 44.61, "learning_rate": 2.7694000000000003e-05, "loss": 8.7503, "step": 5576500 }, { "epoch": 44.62, "learning_rate": 2.7692000000000002e-05, "loss": 8.7702, "step": 5577000 }, { "epoch": 44.62, "learning_rate": 2.769e-05, "loss": 8.7458, "step": 5577500 }, { "epoch": 44.62, "learning_rate": 2.7688000000000003e-05, "loss": 8.7436, "step": 5578000 }, { "epoch": 44.63, "learning_rate": 2.7686000000000002e-05, "loss": 8.759, "step": 5578500 }, { "epoch": 44.63, "learning_rate": 2.7683999999999998e-05, "loss": 8.7764, "step": 5579000 }, { "epoch": 44.64, "learning_rate": 2.7682000000000004e-05, "loss": 8.726, "step": 5579500 }, { "epoch": 44.64, "learning_rate": 2.768e-05, "loss": 8.7482, "step": 5580000 }, { "epoch": 44.64, "learning_rate": 2.7678000000000005e-05, "loss": 8.7502, "step": 5580500 }, { "epoch": 44.65, "learning_rate": 2.7676e-05, "loss": 8.7432, "step": 5581000 }, { "epoch": 44.65, "learning_rate": 2.7674e-05, "loss": 8.7393, "step": 5581500 }, { "epoch": 44.66, "learning_rate": 2.7672000000000002e-05, "loss": 8.7517, "step": 5582000 }, { "epoch": 44.66, "learning_rate": 2.767e-05, "loss": 8.7501, "step": 5582500 }, { "epoch": 44.66, "learning_rate": 2.7668e-05, "loss": 8.7501, "step": 5583000 }, { "epoch": 44.67, "learning_rate": 2.7666000000000002e-05, "loss": 8.7381, "step": 5583500 }, { "epoch": 44.67, "learning_rate": 2.7664e-05, "loss": 8.7523, "step": 5584000 }, { "epoch": 44.68, "learning_rate": 2.7662e-05, "loss": 8.7607, "step": 5584500 }, { "epoch": 44.68, "learning_rate": 2.7660000000000003e-05, "loss": 8.7514, "step": 5585000 }, { "epoch": 44.68, "learning_rate": 2.7658000000000002e-05, "loss": 8.754, "step": 5585500 }, { "epoch": 44.69, "learning_rate": 2.7655999999999997e-05, "loss": 8.7565, "step": 5586000 }, { "epoch": 44.69, "learning_rate": 2.7654000000000003e-05, "loss": 8.7537, "step": 5586500 }, { "epoch": 44.7, "learning_rate": 2.7652e-05, "loss": 8.7543, "step": 5587000 }, { "epoch": 44.7, "learning_rate": 2.7650000000000005e-05, "loss": 8.7594, "step": 5587500 }, { "epoch": 44.7, "learning_rate": 2.7648e-05, "loss": 8.7255, "step": 5588000 }, { "epoch": 44.71, "learning_rate": 2.7646e-05, "loss": 8.7426, "step": 5588500 }, { "epoch": 44.71, "learning_rate": 2.7644e-05, "loss": 8.7606, "step": 5589000 }, { "epoch": 44.72, "learning_rate": 2.7642e-05, "loss": 8.7508, "step": 5589500 }, { "epoch": 44.72, "learning_rate": 2.764e-05, "loss": 8.7789, "step": 5590000 }, { "epoch": 44.72, "learning_rate": 2.7638000000000002e-05, "loss": 8.761, "step": 5590500 }, { "epoch": 44.73, "learning_rate": 2.7636e-05, "loss": 8.752, "step": 5591000 }, { "epoch": 44.73, "learning_rate": 2.7634e-05, "loss": 8.7507, "step": 5591500 }, { "epoch": 44.74, "learning_rate": 2.7632000000000002e-05, "loss": 8.7448, "step": 5592000 }, { "epoch": 44.74, "learning_rate": 2.763e-05, "loss": 8.7579, "step": 5592500 }, { "epoch": 44.74, "learning_rate": 2.7628000000000004e-05, "loss": 8.7489, "step": 5593000 }, { "epoch": 44.75, "learning_rate": 2.7626000000000003e-05, "loss": 8.7393, "step": 5593500 }, { "epoch": 44.75, "learning_rate": 2.7624e-05, "loss": 8.7354, "step": 5594000 }, { "epoch": 44.76, "learning_rate": 2.7622000000000004e-05, "loss": 8.7544, "step": 5594500 }, { "epoch": 44.76, "learning_rate": 2.762e-05, "loss": 8.7476, "step": 5595000 }, { "epoch": 44.76, "learning_rate": 2.7618e-05, "loss": 8.7522, "step": 5595500 }, { "epoch": 44.77, "learning_rate": 2.7616000000000005e-05, "loss": 8.7524, "step": 5596000 }, { "epoch": 44.77, "learning_rate": 2.7614e-05, "loss": 8.7509, "step": 5596500 }, { "epoch": 44.78, "learning_rate": 2.7612e-05, "loss": 8.7456, "step": 5597000 }, { "epoch": 44.78, "learning_rate": 2.761e-05, "loss": 8.7546, "step": 5597500 }, { "epoch": 44.78, "learning_rate": 2.7608e-05, "loss": 8.7436, "step": 5598000 }, { "epoch": 44.79, "learning_rate": 2.7606000000000003e-05, "loss": 8.7489, "step": 5598500 }, { "epoch": 44.79, "learning_rate": 2.7604000000000002e-05, "loss": 8.7478, "step": 5599000 }, { "epoch": 44.8, "learning_rate": 2.7602e-05, "loss": 8.7671, "step": 5599500 }, { "epoch": 44.8, "learning_rate": 2.7600000000000003e-05, "loss": 8.7259, "step": 5600000 }, { "epoch": 44.8, "learning_rate": 2.7598000000000002e-05, "loss": 8.7464, "step": 5600500 }, { "epoch": 44.81, "learning_rate": 2.7595999999999998e-05, "loss": 8.7576, "step": 5601000 }, { "epoch": 44.81, "learning_rate": 2.7594000000000004e-05, "loss": 8.76, "step": 5601500 }, { "epoch": 44.82, "learning_rate": 2.7592000000000003e-05, "loss": 8.749, "step": 5602000 }, { "epoch": 44.82, "learning_rate": 2.759e-05, "loss": 8.7396, "step": 5602500 }, { "epoch": 44.82, "learning_rate": 2.7588000000000004e-05, "loss": 8.7548, "step": 5603000 }, { "epoch": 44.83, "learning_rate": 2.7586e-05, "loss": 8.749, "step": 5603500 }, { "epoch": 44.83, "learning_rate": 2.7584e-05, "loss": 8.7405, "step": 5604000 }, { "epoch": 44.84, "learning_rate": 2.7582e-05, "loss": 8.7313, "step": 5604500 }, { "epoch": 44.84, "learning_rate": 2.758e-05, "loss": 8.7484, "step": 5605000 }, { "epoch": 44.84, "learning_rate": 2.7578000000000003e-05, "loss": 8.7538, "step": 5605500 }, { "epoch": 44.85, "learning_rate": 2.7576e-05, "loss": 8.7327, "step": 5606000 }, { "epoch": 44.85, "learning_rate": 2.7574e-05, "loss": 8.7579, "step": 5606500 }, { "epoch": 44.86, "learning_rate": 2.7572000000000003e-05, "loss": 8.7534, "step": 5607000 }, { "epoch": 44.86, "learning_rate": 2.7570000000000002e-05, "loss": 8.7592, "step": 5607500 }, { "epoch": 44.86, "learning_rate": 2.7568e-05, "loss": 8.7463, "step": 5608000 }, { "epoch": 44.87, "learning_rate": 2.7566000000000003e-05, "loss": 8.7585, "step": 5608500 }, { "epoch": 44.87, "learning_rate": 2.7564000000000002e-05, "loss": 8.7565, "step": 5609000 }, { "epoch": 44.88, "learning_rate": 2.7561999999999998e-05, "loss": 8.7447, "step": 5609500 }, { "epoch": 44.88, "learning_rate": 2.7560000000000004e-05, "loss": 8.7463, "step": 5610000 }, { "epoch": 44.88, "learning_rate": 2.7558e-05, "loss": 8.7629, "step": 5610500 }, { "epoch": 44.89, "learning_rate": 2.7556000000000005e-05, "loss": 8.745, "step": 5611000 }, { "epoch": 44.89, "learning_rate": 2.7554e-05, "loss": 8.7568, "step": 5611500 }, { "epoch": 44.9, "learning_rate": 2.7552e-05, "loss": 8.736, "step": 5612000 }, { "epoch": 44.9, "learning_rate": 2.7550000000000002e-05, "loss": 8.7506, "step": 5612500 }, { "epoch": 44.9, "learning_rate": 2.7548e-05, "loss": 8.7462, "step": 5613000 }, { "epoch": 44.91, "learning_rate": 2.7546e-05, "loss": 8.7456, "step": 5613500 }, { "epoch": 44.91, "learning_rate": 2.7544000000000003e-05, "loss": 8.7411, "step": 5614000 }, { "epoch": 44.92, "learning_rate": 2.7542e-05, "loss": 8.7492, "step": 5614500 }, { "epoch": 44.92, "learning_rate": 2.754e-05, "loss": 8.7323, "step": 5615000 }, { "epoch": 44.92, "learning_rate": 2.7538000000000003e-05, "loss": 8.7379, "step": 5615500 }, { "epoch": 44.93, "learning_rate": 2.7536000000000002e-05, "loss": 8.75, "step": 5616000 }, { "epoch": 44.93, "learning_rate": 2.7533999999999998e-05, "loss": 8.7576, "step": 5616500 }, { "epoch": 44.94, "learning_rate": 2.7532000000000003e-05, "loss": 8.7381, "step": 5617000 }, { "epoch": 44.94, "learning_rate": 2.753e-05, "loss": 8.7578, "step": 5617500 }, { "epoch": 44.94, "learning_rate": 2.7528000000000005e-05, "loss": 8.7511, "step": 5618000 }, { "epoch": 44.95, "learning_rate": 2.7526e-05, "loss": 8.732, "step": 5618500 }, { "epoch": 44.95, "learning_rate": 2.7524e-05, "loss": 8.7555, "step": 5619000 }, { "epoch": 44.96, "learning_rate": 2.7522000000000002e-05, "loss": 8.757, "step": 5619500 }, { "epoch": 44.96, "learning_rate": 2.752e-05, "loss": 8.7301, "step": 5620000 }, { "epoch": 44.96, "learning_rate": 2.7518e-05, "loss": 8.7624, "step": 5620500 }, { "epoch": 44.97, "learning_rate": 2.7516000000000002e-05, "loss": 8.7701, "step": 5621000 }, { "epoch": 44.97, "learning_rate": 2.7514e-05, "loss": 8.7324, "step": 5621500 }, { "epoch": 44.98, "learning_rate": 2.7512e-05, "loss": 8.7347, "step": 5622000 }, { "epoch": 44.98, "learning_rate": 2.7510000000000003e-05, "loss": 8.7492, "step": 5622500 }, { "epoch": 44.98, "learning_rate": 2.7508e-05, "loss": 8.7635, "step": 5623000 }, { "epoch": 44.99, "learning_rate": 2.7506000000000004e-05, "loss": 8.7536, "step": 5623500 }, { "epoch": 44.99, "learning_rate": 2.7504000000000003e-05, "loss": 8.7551, "step": 5624000 }, { "epoch": 45.0, "learning_rate": 2.7502e-05, "loss": 8.7298, "step": 5624500 }, { "epoch": 45.0, "learning_rate": 2.7500000000000004e-05, "loss": 8.7506, "step": 5625000 }, { "epoch": 45.0, "learning_rate": 2.7498e-05, "loss": 8.7648, "step": 5625500 }, { "epoch": 45.01, "learning_rate": 2.7496e-05, "loss": 8.7288, "step": 5626000 }, { "epoch": 45.01, "learning_rate": 2.7494000000000005e-05, "loss": 8.7384, "step": 5626500 }, { "epoch": 45.02, "learning_rate": 2.7492e-05, "loss": 8.7468, "step": 5627000 }, { "epoch": 45.02, "learning_rate": 2.749e-05, "loss": 8.7561, "step": 5627500 }, { "epoch": 45.02, "learning_rate": 2.7488000000000002e-05, "loss": 8.7536, "step": 5628000 }, { "epoch": 45.03, "learning_rate": 2.7486e-05, "loss": 8.7585, "step": 5628500 }, { "epoch": 45.03, "learning_rate": 2.7484e-05, "loss": 8.7572, "step": 5629000 }, { "epoch": 45.04, "learning_rate": 2.7482000000000002e-05, "loss": 8.7597, "step": 5629500 }, { "epoch": 45.04, "learning_rate": 2.748e-05, "loss": 8.7362, "step": 5630000 }, { "epoch": 45.04, "learning_rate": 2.7478000000000004e-05, "loss": 8.7469, "step": 5630500 }, { "epoch": 45.05, "learning_rate": 2.7476000000000003e-05, "loss": 8.7504, "step": 5631000 }, { "epoch": 45.05, "learning_rate": 2.7473999999999998e-05, "loss": 8.7599, "step": 5631500 }, { "epoch": 45.06, "learning_rate": 2.7472000000000004e-05, "loss": 8.7375, "step": 5632000 }, { "epoch": 45.06, "learning_rate": 2.7470000000000003e-05, "loss": 8.7556, "step": 5632500 }, { "epoch": 45.06, "learning_rate": 2.7468e-05, "loss": 8.7591, "step": 5633000 }, { "epoch": 45.07, "learning_rate": 2.7466000000000004e-05, "loss": 8.7547, "step": 5633500 }, { "epoch": 45.07, "learning_rate": 2.7464e-05, "loss": 8.7417, "step": 5634000 }, { "epoch": 45.08, "learning_rate": 2.7462e-05, "loss": 8.7517, "step": 5634500 }, { "epoch": 45.08, "learning_rate": 2.746e-05, "loss": 8.7555, "step": 5635000 }, { "epoch": 45.08, "learning_rate": 2.7458e-05, "loss": 8.7499, "step": 5635500 }, { "epoch": 45.09, "learning_rate": 2.7456000000000003e-05, "loss": 8.7495, "step": 5636000 }, { "epoch": 45.09, "learning_rate": 2.7454000000000002e-05, "loss": 8.7463, "step": 5636500 }, { "epoch": 45.1, "learning_rate": 2.7452e-05, "loss": 8.7562, "step": 5637000 }, { "epoch": 45.1, "learning_rate": 2.7450000000000003e-05, "loss": 8.7396, "step": 5637500 }, { "epoch": 45.1, "learning_rate": 2.7448000000000002e-05, "loss": 8.7567, "step": 5638000 }, { "epoch": 45.11, "learning_rate": 2.7446e-05, "loss": 8.7583, "step": 5638500 }, { "epoch": 45.11, "learning_rate": 2.7444000000000004e-05, "loss": 8.7568, "step": 5639000 }, { "epoch": 45.12, "learning_rate": 2.7442000000000003e-05, "loss": 8.7365, "step": 5639500 }, { "epoch": 45.12, "learning_rate": 2.7439999999999998e-05, "loss": 8.7339, "step": 5640000 }, { "epoch": 45.12, "learning_rate": 2.7438000000000004e-05, "loss": 8.7336, "step": 5640500 }, { "epoch": 45.13, "learning_rate": 2.7436e-05, "loss": 8.7464, "step": 5641000 }, { "epoch": 45.13, "learning_rate": 2.7434e-05, "loss": 8.7542, "step": 5641500 }, { "epoch": 45.14, "learning_rate": 2.7432e-05, "loss": 8.7501, "step": 5642000 }, { "epoch": 45.14, "learning_rate": 2.743e-05, "loss": 8.7461, "step": 5642500 }, { "epoch": 45.14, "learning_rate": 2.7428000000000002e-05, "loss": 8.7403, "step": 5643000 }, { "epoch": 45.15, "learning_rate": 2.7426e-05, "loss": 8.7499, "step": 5643500 }, { "epoch": 45.15, "learning_rate": 2.7424e-05, "loss": 8.729, "step": 5644000 }, { "epoch": 45.16, "learning_rate": 2.7422000000000003e-05, "loss": 8.7306, "step": 5644500 }, { "epoch": 45.16, "learning_rate": 2.7420000000000002e-05, "loss": 8.7549, "step": 5645000 }, { "epoch": 45.16, "learning_rate": 2.7418e-05, "loss": 8.7512, "step": 5645500 }, { "epoch": 45.17, "learning_rate": 2.7416000000000003e-05, "loss": 8.7465, "step": 5646000 }, { "epoch": 45.17, "learning_rate": 2.7414000000000002e-05, "loss": 8.7547, "step": 5646500 }, { "epoch": 45.18, "learning_rate": 2.7411999999999998e-05, "loss": 8.7574, "step": 5647000 }, { "epoch": 45.18, "learning_rate": 2.7410000000000004e-05, "loss": 8.7554, "step": 5647500 }, { "epoch": 45.18, "learning_rate": 2.7408e-05, "loss": 8.7544, "step": 5648000 }, { "epoch": 45.19, "learning_rate": 2.7406000000000005e-05, "loss": 8.7452, "step": 5648500 }, { "epoch": 45.19, "learning_rate": 2.7404e-05, "loss": 8.7617, "step": 5649000 }, { "epoch": 45.2, "learning_rate": 2.7402e-05, "loss": 8.7578, "step": 5649500 }, { "epoch": 45.2, "learning_rate": 2.7400000000000002e-05, "loss": 8.7341, "step": 5650000 }, { "epoch": 45.2, "learning_rate": 2.7398e-05, "loss": 8.751, "step": 5650500 }, { "epoch": 45.21, "learning_rate": 2.7396e-05, "loss": 8.7531, "step": 5651000 }, { "epoch": 45.21, "learning_rate": 2.7394000000000002e-05, "loss": 8.7521, "step": 5651500 }, { "epoch": 45.22, "learning_rate": 2.7392e-05, "loss": 8.7563, "step": 5652000 }, { "epoch": 45.22, "learning_rate": 2.739e-05, "loss": 8.7296, "step": 5652500 }, { "epoch": 45.22, "learning_rate": 2.7388000000000003e-05, "loss": 8.7342, "step": 5653000 }, { "epoch": 45.23, "learning_rate": 2.7386000000000002e-05, "loss": 8.7397, "step": 5653500 }, { "epoch": 45.23, "learning_rate": 2.7383999999999997e-05, "loss": 8.7421, "step": 5654000 }, { "epoch": 45.24, "learning_rate": 2.7382000000000003e-05, "loss": 8.7634, "step": 5654500 }, { "epoch": 45.24, "learning_rate": 2.738e-05, "loss": 8.7624, "step": 5655000 }, { "epoch": 45.24, "learning_rate": 2.7378000000000005e-05, "loss": 8.7374, "step": 5655500 }, { "epoch": 45.25, "learning_rate": 2.7376e-05, "loss": 8.7532, "step": 5656000 }, { "epoch": 45.25, "learning_rate": 2.7374e-05, "loss": 8.7641, "step": 5656500 }, { "epoch": 45.26, "learning_rate": 2.7372e-05, "loss": 8.7552, "step": 5657000 }, { "epoch": 45.26, "learning_rate": 2.737e-05, "loss": 8.7434, "step": 5657500 }, { "epoch": 45.26, "learning_rate": 2.7368e-05, "loss": 8.7399, "step": 5658000 }, { "epoch": 45.27, "learning_rate": 2.7366000000000002e-05, "loss": 8.7304, "step": 5658500 }, { "epoch": 45.27, "learning_rate": 2.7364e-05, "loss": 8.7571, "step": 5659000 }, { "epoch": 45.28, "learning_rate": 2.7362e-05, "loss": 8.7559, "step": 5659500 }, { "epoch": 45.28, "learning_rate": 2.7360000000000002e-05, "loss": 8.7389, "step": 5660000 }, { "epoch": 45.28, "learning_rate": 2.7358e-05, "loss": 8.7452, "step": 5660500 }, { "epoch": 45.29, "learning_rate": 2.7356000000000004e-05, "loss": 8.7393, "step": 5661000 }, { "epoch": 45.29, "learning_rate": 2.7354000000000003e-05, "loss": 8.7449, "step": 5661500 }, { "epoch": 45.3, "learning_rate": 2.7352e-05, "loss": 8.7396, "step": 5662000 }, { "epoch": 45.3, "learning_rate": 2.7350000000000004e-05, "loss": 8.7701, "step": 5662500 }, { "epoch": 45.3, "learning_rate": 2.7348e-05, "loss": 8.7215, "step": 5663000 }, { "epoch": 45.31, "learning_rate": 2.7346e-05, "loss": 8.7503, "step": 5663500 }, { "epoch": 45.31, "learning_rate": 2.7344000000000005e-05, "loss": 8.7458, "step": 5664000 }, { "epoch": 45.32, "learning_rate": 2.7342e-05, "loss": 8.7598, "step": 5664500 }, { "epoch": 45.32, "learning_rate": 2.734e-05, "loss": 8.7571, "step": 5665000 }, { "epoch": 45.32, "learning_rate": 2.7338e-05, "loss": 8.7597, "step": 5665500 }, { "epoch": 45.33, "learning_rate": 2.7336e-05, "loss": 8.7333, "step": 5666000 }, { "epoch": 45.33, "learning_rate": 2.7334000000000003e-05, "loss": 8.7392, "step": 5666500 }, { "epoch": 45.34, "learning_rate": 2.7332000000000002e-05, "loss": 8.7465, "step": 5667000 }, { "epoch": 45.34, "learning_rate": 2.733e-05, "loss": 8.7716, "step": 5667500 }, { "epoch": 45.34, "learning_rate": 2.7328000000000003e-05, "loss": 8.753, "step": 5668000 }, { "epoch": 45.35, "learning_rate": 2.7326000000000002e-05, "loss": 8.7654, "step": 5668500 }, { "epoch": 45.35, "learning_rate": 2.7323999999999998e-05, "loss": 8.7528, "step": 5669000 }, { "epoch": 45.36, "learning_rate": 2.7322000000000004e-05, "loss": 8.7393, "step": 5669500 }, { "epoch": 45.36, "learning_rate": 2.7320000000000003e-05, "loss": 8.7328, "step": 5670000 }, { "epoch": 45.36, "learning_rate": 2.7318e-05, "loss": 8.7414, "step": 5670500 }, { "epoch": 45.37, "learning_rate": 2.7316000000000004e-05, "loss": 8.7385, "step": 5671000 }, { "epoch": 45.37, "learning_rate": 2.7314e-05, "loss": 8.7302, "step": 5671500 }, { "epoch": 45.38, "learning_rate": 2.7312e-05, "loss": 8.7391, "step": 5672000 }, { "epoch": 45.38, "learning_rate": 2.731e-05, "loss": 8.7278, "step": 5672500 }, { "epoch": 45.38, "learning_rate": 2.7308e-05, "loss": 8.7331, "step": 5673000 }, { "epoch": 45.39, "learning_rate": 2.7306000000000002e-05, "loss": 8.769, "step": 5673500 }, { "epoch": 45.39, "learning_rate": 2.7304e-05, "loss": 8.7511, "step": 5674000 }, { "epoch": 45.4, "learning_rate": 2.7302e-05, "loss": 8.7534, "step": 5674500 }, { "epoch": 45.4, "learning_rate": 2.7300000000000003e-05, "loss": 8.7478, "step": 5675000 }, { "epoch": 45.4, "learning_rate": 2.7298000000000002e-05, "loss": 8.7523, "step": 5675500 }, { "epoch": 45.41, "learning_rate": 2.7296e-05, "loss": 8.7617, "step": 5676000 }, { "epoch": 45.41, "learning_rate": 2.7294000000000003e-05, "loss": 8.7659, "step": 5676500 }, { "epoch": 45.42, "learning_rate": 2.7292000000000002e-05, "loss": 8.7483, "step": 5677000 }, { "epoch": 45.42, "learning_rate": 2.7289999999999998e-05, "loss": 8.7447, "step": 5677500 }, { "epoch": 45.42, "learning_rate": 2.7288000000000004e-05, "loss": 8.7578, "step": 5678000 }, { "epoch": 45.43, "learning_rate": 2.7286e-05, "loss": 8.7448, "step": 5678500 }, { "epoch": 45.43, "learning_rate": 2.7284000000000005e-05, "loss": 8.7563, "step": 5679000 }, { "epoch": 45.44, "learning_rate": 2.7282e-05, "loss": 8.7658, "step": 5679500 }, { "epoch": 45.44, "learning_rate": 2.728e-05, "loss": 8.7427, "step": 5680000 }, { "epoch": 45.44, "learning_rate": 2.7278000000000002e-05, "loss": 8.7484, "step": 5680500 }, { "epoch": 45.45, "learning_rate": 2.7276e-05, "loss": 8.75, "step": 5681000 }, { "epoch": 45.45, "learning_rate": 2.7274e-05, "loss": 8.766, "step": 5681500 }, { "epoch": 45.46, "learning_rate": 2.7272000000000002e-05, "loss": 8.7624, "step": 5682000 }, { "epoch": 45.46, "learning_rate": 2.727e-05, "loss": 8.7578, "step": 5682500 }, { "epoch": 45.46, "learning_rate": 2.7268e-05, "loss": 8.7428, "step": 5683000 }, { "epoch": 45.47, "learning_rate": 2.7266000000000003e-05, "loss": 8.7607, "step": 5683500 }, { "epoch": 45.47, "learning_rate": 2.7264000000000002e-05, "loss": 8.7651, "step": 5684000 }, { "epoch": 45.48, "learning_rate": 2.7261999999999997e-05, "loss": 8.7308, "step": 5684500 }, { "epoch": 45.48, "learning_rate": 2.7260000000000003e-05, "loss": 8.7589, "step": 5685000 }, { "epoch": 45.48, "learning_rate": 2.7258e-05, "loss": 8.7456, "step": 5685500 }, { "epoch": 45.49, "learning_rate": 2.7256000000000005e-05, "loss": 8.7471, "step": 5686000 }, { "epoch": 45.49, "learning_rate": 2.7254e-05, "loss": 8.7569, "step": 5686500 }, { "epoch": 45.5, "learning_rate": 2.7252e-05, "loss": 8.7514, "step": 5687000 }, { "epoch": 45.5, "learning_rate": 2.725e-05, "loss": 8.7715, "step": 5687500 }, { "epoch": 45.5, "learning_rate": 2.7248e-05, "loss": 8.7424, "step": 5688000 }, { "epoch": 45.51, "learning_rate": 2.7246e-05, "loss": 8.7587, "step": 5688500 }, { "epoch": 45.51, "learning_rate": 2.7244000000000002e-05, "loss": 8.737, "step": 5689000 }, { "epoch": 45.52, "learning_rate": 2.7242e-05, "loss": 8.7527, "step": 5689500 }, { "epoch": 45.52, "learning_rate": 2.724e-05, "loss": 8.7653, "step": 5690000 }, { "epoch": 45.52, "learning_rate": 2.7238000000000002e-05, "loss": 8.7794, "step": 5690500 }, { "epoch": 45.53, "learning_rate": 2.7236e-05, "loss": 8.762, "step": 5691000 }, { "epoch": 45.53, "learning_rate": 2.7234000000000004e-05, "loss": 8.7284, "step": 5691500 }, { "epoch": 45.54, "learning_rate": 2.7232000000000003e-05, "loss": 8.7737, "step": 5692000 }, { "epoch": 45.54, "learning_rate": 2.723e-05, "loss": 8.7497, "step": 5692500 }, { "epoch": 45.54, "learning_rate": 2.7228000000000004e-05, "loss": 8.7498, "step": 5693000 }, { "epoch": 45.55, "learning_rate": 2.7226e-05, "loss": 8.7342, "step": 5693500 }, { "epoch": 45.55, "learning_rate": 2.7224e-05, "loss": 8.7403, "step": 5694000 }, { "epoch": 45.56, "learning_rate": 2.7222000000000005e-05, "loss": 8.728, "step": 5694500 }, { "epoch": 45.56, "learning_rate": 2.722e-05, "loss": 8.7517, "step": 5695000 }, { "epoch": 45.56, "learning_rate": 2.7218e-05, "loss": 8.7671, "step": 5695500 }, { "epoch": 45.57, "learning_rate": 2.7216e-05, "loss": 8.7559, "step": 5696000 }, { "epoch": 45.57, "learning_rate": 2.7214e-05, "loss": 8.7531, "step": 5696500 }, { "epoch": 45.58, "learning_rate": 2.7212e-05, "loss": 8.7542, "step": 5697000 }, { "epoch": 45.58, "learning_rate": 2.7210000000000002e-05, "loss": 8.7501, "step": 5697500 }, { "epoch": 45.58, "learning_rate": 2.7208e-05, "loss": 8.7545, "step": 5698000 }, { "epoch": 45.59, "learning_rate": 2.7206000000000003e-05, "loss": 8.7579, "step": 5698500 }, { "epoch": 45.59, "learning_rate": 2.7204000000000002e-05, "loss": 8.7569, "step": 5699000 }, { "epoch": 45.6, "learning_rate": 2.7201999999999998e-05, "loss": 8.7476, "step": 5699500 }, { "epoch": 45.6, "learning_rate": 2.7200000000000004e-05, "loss": 8.756, "step": 5700000 }, { "epoch": 45.6, "learning_rate": 2.7198000000000003e-05, "loss": 8.7581, "step": 5700500 }, { "epoch": 45.61, "learning_rate": 2.7196e-05, "loss": 8.7526, "step": 5701000 }, { "epoch": 45.61, "learning_rate": 2.7194000000000004e-05, "loss": 8.7295, "step": 5701500 }, { "epoch": 45.62, "learning_rate": 2.7192e-05, "loss": 8.7598, "step": 5702000 }, { "epoch": 45.62, "learning_rate": 2.719e-05, "loss": 8.773, "step": 5702500 }, { "epoch": 45.62, "learning_rate": 2.7188e-05, "loss": 8.7634, "step": 5703000 }, { "epoch": 45.63, "learning_rate": 2.7186e-05, "loss": 8.753, "step": 5703500 }, { "epoch": 45.63, "learning_rate": 2.7184000000000003e-05, "loss": 8.7565, "step": 5704000 }, { "epoch": 45.64, "learning_rate": 2.7182e-05, "loss": 8.7503, "step": 5704500 }, { "epoch": 45.64, "learning_rate": 2.718e-05, "loss": 8.7341, "step": 5705000 }, { "epoch": 45.64, "learning_rate": 2.7178000000000003e-05, "loss": 8.7609, "step": 5705500 }, { "epoch": 45.65, "learning_rate": 2.7176000000000002e-05, "loss": 8.75, "step": 5706000 }, { "epoch": 45.65, "learning_rate": 2.7174e-05, "loss": 8.7364, "step": 5706500 }, { "epoch": 45.66, "learning_rate": 2.7172000000000003e-05, "loss": 8.7303, "step": 5707000 }, { "epoch": 45.66, "learning_rate": 2.7170000000000002e-05, "loss": 8.7308, "step": 5707500 }, { "epoch": 45.66, "learning_rate": 2.7167999999999998e-05, "loss": 8.7416, "step": 5708000 }, { "epoch": 45.67, "learning_rate": 2.7166000000000004e-05, "loss": 8.7528, "step": 5708500 }, { "epoch": 45.67, "learning_rate": 2.7164e-05, "loss": 8.7401, "step": 5709000 }, { "epoch": 45.68, "learning_rate": 2.7162e-05, "loss": 8.7599, "step": 5709500 }, { "epoch": 45.68, "learning_rate": 2.716e-05, "loss": 8.7515, "step": 5710000 }, { "epoch": 45.68, "learning_rate": 2.7158e-05, "loss": 8.7505, "step": 5710500 }, { "epoch": 45.69, "learning_rate": 2.7156000000000002e-05, "loss": 8.7393, "step": 5711000 }, { "epoch": 45.69, "learning_rate": 2.7154e-05, "loss": 8.7583, "step": 5711500 }, { "epoch": 45.7, "learning_rate": 2.7152e-05, "loss": 8.7512, "step": 5712000 }, { "epoch": 45.7, "learning_rate": 2.7150000000000003e-05, "loss": 8.7752, "step": 5712500 }, { "epoch": 45.7, "learning_rate": 2.7148e-05, "loss": 8.7246, "step": 5713000 }, { "epoch": 45.71, "learning_rate": 2.7146e-05, "loss": 8.7286, "step": 5713500 }, { "epoch": 45.71, "learning_rate": 2.7144000000000003e-05, "loss": 8.762, "step": 5714000 }, { "epoch": 45.72, "learning_rate": 2.7142000000000002e-05, "loss": 8.7629, "step": 5714500 }, { "epoch": 45.72, "learning_rate": 2.7139999999999998e-05, "loss": 8.7537, "step": 5715000 }, { "epoch": 45.72, "learning_rate": 2.7138000000000003e-05, "loss": 8.7325, "step": 5715500 }, { "epoch": 45.73, "learning_rate": 2.7136e-05, "loss": 8.7564, "step": 5716000 }, { "epoch": 45.73, "learning_rate": 2.7134000000000005e-05, "loss": 8.7518, "step": 5716500 }, { "epoch": 45.74, "learning_rate": 2.7132e-05, "loss": 8.7531, "step": 5717000 }, { "epoch": 45.74, "learning_rate": 2.713e-05, "loss": 8.7565, "step": 5717500 }, { "epoch": 45.74, "learning_rate": 2.7128000000000002e-05, "loss": 8.7471, "step": 5718000 }, { "epoch": 45.75, "learning_rate": 2.7126e-05, "loss": 8.7442, "step": 5718500 }, { "epoch": 45.75, "learning_rate": 2.7124e-05, "loss": 8.7349, "step": 5719000 }, { "epoch": 45.76, "learning_rate": 2.7122000000000002e-05, "loss": 8.7696, "step": 5719500 }, { "epoch": 45.76, "learning_rate": 2.712e-05, "loss": 8.7241, "step": 5720000 }, { "epoch": 45.76, "learning_rate": 2.7118e-05, "loss": 8.7427, "step": 5720500 }, { "epoch": 45.77, "learning_rate": 2.7116000000000003e-05, "loss": 8.742, "step": 5721000 }, { "epoch": 45.77, "learning_rate": 2.7114e-05, "loss": 8.7448, "step": 5721500 }, { "epoch": 45.78, "learning_rate": 2.7112000000000004e-05, "loss": 8.7447, "step": 5722000 }, { "epoch": 45.78, "learning_rate": 2.7110000000000003e-05, "loss": 8.7504, "step": 5722500 }, { "epoch": 45.78, "learning_rate": 2.7108e-05, "loss": 8.7576, "step": 5723000 }, { "epoch": 45.79, "learning_rate": 2.7106000000000004e-05, "loss": 8.7549, "step": 5723500 }, { "epoch": 45.79, "learning_rate": 2.7104e-05, "loss": 8.7399, "step": 5724000 }, { "epoch": 45.8, "learning_rate": 2.7102e-05, "loss": 8.7536, "step": 5724500 }, { "epoch": 45.8, "learning_rate": 2.7100000000000005e-05, "loss": 8.7464, "step": 5725000 }, { "epoch": 45.8, "learning_rate": 2.7098e-05, "loss": 8.7495, "step": 5725500 }, { "epoch": 45.81, "learning_rate": 2.7096e-05, "loss": 8.7249, "step": 5726000 }, { "epoch": 45.81, "learning_rate": 2.7094000000000002e-05, "loss": 8.7563, "step": 5726500 }, { "epoch": 45.82, "learning_rate": 2.7092e-05, "loss": 8.7426, "step": 5727000 }, { "epoch": 45.82, "learning_rate": 2.709e-05, "loss": 8.7689, "step": 5727500 }, { "epoch": 45.82, "learning_rate": 2.7088000000000002e-05, "loss": 8.7647, "step": 5728000 }, { "epoch": 45.83, "learning_rate": 2.7086e-05, "loss": 8.7298, "step": 5728500 }, { "epoch": 45.83, "learning_rate": 2.7084000000000004e-05, "loss": 8.7655, "step": 5729000 }, { "epoch": 45.84, "learning_rate": 2.7082000000000003e-05, "loss": 8.7471, "step": 5729500 }, { "epoch": 45.84, "learning_rate": 2.7079999999999998e-05, "loss": 8.7496, "step": 5730000 }, { "epoch": 45.84, "learning_rate": 2.7078000000000004e-05, "loss": 8.7589, "step": 5730500 }, { "epoch": 45.85, "learning_rate": 2.7076000000000003e-05, "loss": 8.7401, "step": 5731000 }, { "epoch": 45.85, "learning_rate": 2.7074e-05, "loss": 8.7549, "step": 5731500 }, { "epoch": 45.86, "learning_rate": 2.7072000000000004e-05, "loss": 8.7424, "step": 5732000 }, { "epoch": 45.86, "learning_rate": 2.707e-05, "loss": 8.7539, "step": 5732500 }, { "epoch": 45.86, "learning_rate": 2.7068e-05, "loss": 8.7414, "step": 5733000 }, { "epoch": 45.87, "learning_rate": 2.7066e-05, "loss": 8.7383, "step": 5733500 }, { "epoch": 45.87, "learning_rate": 2.7064e-05, "loss": 8.7264, "step": 5734000 }, { "epoch": 45.88, "learning_rate": 2.7062000000000003e-05, "loss": 8.7442, "step": 5734500 }, { "epoch": 45.88, "learning_rate": 2.7060000000000002e-05, "loss": 8.7359, "step": 5735000 }, { "epoch": 45.88, "learning_rate": 2.7058e-05, "loss": 8.7462, "step": 5735500 }, { "epoch": 45.89, "learning_rate": 2.7056000000000003e-05, "loss": 8.7562, "step": 5736000 }, { "epoch": 45.89, "learning_rate": 2.7054000000000002e-05, "loss": 8.7568, "step": 5736500 }, { "epoch": 45.9, "learning_rate": 2.7052e-05, "loss": 8.7426, "step": 5737000 }, { "epoch": 45.9, "learning_rate": 2.7050000000000004e-05, "loss": 8.7565, "step": 5737500 }, { "epoch": 45.9, "learning_rate": 2.7048000000000003e-05, "loss": 8.75, "step": 5738000 }, { "epoch": 45.91, "learning_rate": 2.7045999999999998e-05, "loss": 8.7641, "step": 5738500 }, { "epoch": 45.91, "learning_rate": 2.7044000000000004e-05, "loss": 8.7573, "step": 5739000 }, { "epoch": 45.92, "learning_rate": 2.7042e-05, "loss": 8.7687, "step": 5739500 }, { "epoch": 45.92, "learning_rate": 2.704e-05, "loss": 8.7415, "step": 5740000 }, { "epoch": 45.92, "learning_rate": 2.7038e-05, "loss": 8.7469, "step": 5740500 }, { "epoch": 45.93, "learning_rate": 2.7036e-05, "loss": 8.7578, "step": 5741000 }, { "epoch": 45.93, "learning_rate": 2.7034000000000002e-05, "loss": 8.745, "step": 5741500 }, { "epoch": 45.94, "learning_rate": 2.7032e-05, "loss": 8.7687, "step": 5742000 }, { "epoch": 45.94, "learning_rate": 2.703e-05, "loss": 8.7399, "step": 5742500 }, { "epoch": 45.94, "learning_rate": 2.7028000000000003e-05, "loss": 8.7286, "step": 5743000 }, { "epoch": 45.95, "learning_rate": 2.7026000000000002e-05, "loss": 8.7645, "step": 5743500 }, { "epoch": 45.95, "learning_rate": 2.7024e-05, "loss": 8.7479, "step": 5744000 }, { "epoch": 45.96, "learning_rate": 2.7022000000000003e-05, "loss": 8.7498, "step": 5744500 }, { "epoch": 45.96, "learning_rate": 2.7020000000000002e-05, "loss": 8.7465, "step": 5745000 }, { "epoch": 45.96, "learning_rate": 2.7017999999999998e-05, "loss": 8.7339, "step": 5745500 }, { "epoch": 45.97, "learning_rate": 2.7016000000000004e-05, "loss": 8.7568, "step": 5746000 }, { "epoch": 45.97, "learning_rate": 2.7014e-05, "loss": 8.7516, "step": 5746500 }, { "epoch": 45.98, "learning_rate": 2.7012000000000005e-05, "loss": 8.7502, "step": 5747000 }, { "epoch": 45.98, "learning_rate": 2.701e-05, "loss": 8.7439, "step": 5747500 }, { "epoch": 45.98, "learning_rate": 2.7008e-05, "loss": 8.7227, "step": 5748000 }, { "epoch": 45.99, "learning_rate": 2.7006000000000002e-05, "loss": 8.7394, "step": 5748500 }, { "epoch": 45.99, "learning_rate": 2.7004e-05, "loss": 8.7544, "step": 5749000 }, { "epoch": 46.0, "learning_rate": 2.7002e-05, "loss": 8.7551, "step": 5749500 }, { "epoch": 46.0, "learning_rate": 2.7000000000000002e-05, "loss": 8.7557, "step": 5750000 }, { "epoch": 46.0, "learning_rate": 2.6998e-05, "loss": 8.7367, "step": 5750500 }, { "epoch": 46.01, "learning_rate": 2.6996e-05, "loss": 8.7424, "step": 5751000 }, { "epoch": 46.01, "learning_rate": 2.6994000000000003e-05, "loss": 8.7407, "step": 5751500 }, { "epoch": 46.02, "learning_rate": 2.6992000000000002e-05, "loss": 8.7518, "step": 5752000 }, { "epoch": 46.02, "learning_rate": 2.6989999999999997e-05, "loss": 8.7337, "step": 5752500 }, { "epoch": 46.02, "learning_rate": 2.6988000000000003e-05, "loss": 8.7544, "step": 5753000 }, { "epoch": 46.03, "learning_rate": 2.6986e-05, "loss": 8.7565, "step": 5753500 }, { "epoch": 46.03, "learning_rate": 2.6984000000000005e-05, "loss": 8.7517, "step": 5754000 }, { "epoch": 46.04, "learning_rate": 2.6982e-05, "loss": 8.7513, "step": 5754500 }, { "epoch": 46.04, "learning_rate": 2.698e-05, "loss": 8.7448, "step": 5755000 }, { "epoch": 46.04, "learning_rate": 2.6978000000000005e-05, "loss": 8.7548, "step": 5755500 }, { "epoch": 46.05, "learning_rate": 2.6976e-05, "loss": 8.7665, "step": 5756000 }, { "epoch": 46.05, "learning_rate": 2.6974e-05, "loss": 8.7515, "step": 5756500 }, { "epoch": 46.06, "learning_rate": 2.6972000000000002e-05, "loss": 8.7517, "step": 5757000 }, { "epoch": 46.06, "learning_rate": 2.697e-05, "loss": 8.7448, "step": 5757500 }, { "epoch": 46.06, "learning_rate": 2.6968e-05, "loss": 8.7294, "step": 5758000 }, { "epoch": 46.07, "learning_rate": 2.6966000000000002e-05, "loss": 8.7442, "step": 5758500 }, { "epoch": 46.07, "learning_rate": 2.6964e-05, "loss": 8.7542, "step": 5759000 }, { "epoch": 46.08, "learning_rate": 2.6962000000000004e-05, "loss": 8.7379, "step": 5759500 }, { "epoch": 46.08, "learning_rate": 2.6960000000000003e-05, "loss": 8.7337, "step": 5760000 }, { "epoch": 46.08, "learning_rate": 2.6958e-05, "loss": 8.7505, "step": 5760500 }, { "epoch": 46.09, "learning_rate": 2.6956000000000004e-05, "loss": 8.7486, "step": 5761000 }, { "epoch": 46.09, "learning_rate": 2.6954000000000003e-05, "loss": 8.7579, "step": 5761500 }, { "epoch": 46.1, "learning_rate": 2.6952e-05, "loss": 8.7604, "step": 5762000 }, { "epoch": 46.1, "learning_rate": 2.6950000000000005e-05, "loss": 8.7413, "step": 5762500 }, { "epoch": 46.1, "learning_rate": 2.6948e-05, "loss": 8.7397, "step": 5763000 }, { "epoch": 46.11, "learning_rate": 2.6946e-05, "loss": 8.7546, "step": 5763500 }, { "epoch": 46.11, "learning_rate": 2.6944e-05, "loss": 8.7336, "step": 5764000 }, { "epoch": 46.12, "learning_rate": 2.6942e-05, "loss": 8.7443, "step": 5764500 }, { "epoch": 46.12, "learning_rate": 2.694e-05, "loss": 8.7536, "step": 5765000 }, { "epoch": 46.12, "learning_rate": 2.6938000000000002e-05, "loss": 8.7478, "step": 5765500 }, { "epoch": 46.13, "learning_rate": 2.6936e-05, "loss": 8.7496, "step": 5766000 }, { "epoch": 46.13, "learning_rate": 2.6934000000000003e-05, "loss": 8.7481, "step": 5766500 }, { "epoch": 46.14, "learning_rate": 2.6932000000000002e-05, "loss": 8.757, "step": 5767000 }, { "epoch": 46.14, "learning_rate": 2.693e-05, "loss": 8.7575, "step": 5767500 }, { "epoch": 46.14, "learning_rate": 2.6928000000000004e-05, "loss": 8.7331, "step": 5768000 }, { "epoch": 46.15, "learning_rate": 2.6926000000000003e-05, "loss": 8.7469, "step": 5768500 }, { "epoch": 46.15, "learning_rate": 2.6924e-05, "loss": 8.7278, "step": 5769000 }, { "epoch": 46.16, "learning_rate": 2.6922000000000004e-05, "loss": 8.7525, "step": 5769500 }, { "epoch": 46.16, "learning_rate": 2.692e-05, "loss": 8.7467, "step": 5770000 }, { "epoch": 46.16, "learning_rate": 2.6918e-05, "loss": 8.7415, "step": 5770500 }, { "epoch": 46.17, "learning_rate": 2.6916e-05, "loss": 8.7323, "step": 5771000 }, { "epoch": 46.17, "learning_rate": 2.6914e-05, "loss": 8.7679, "step": 5771500 }, { "epoch": 46.18, "learning_rate": 2.6912000000000003e-05, "loss": 8.7431, "step": 5772000 }, { "epoch": 46.18, "learning_rate": 2.691e-05, "loss": 8.7584, "step": 5772500 }, { "epoch": 46.18, "learning_rate": 2.6908e-05, "loss": 8.7535, "step": 5773000 }, { "epoch": 46.19, "learning_rate": 2.6906000000000003e-05, "loss": 8.7462, "step": 5773500 }, { "epoch": 46.19, "learning_rate": 2.6904000000000002e-05, "loss": 8.7516, "step": 5774000 }, { "epoch": 46.2, "learning_rate": 2.6902e-05, "loss": 8.7497, "step": 5774500 }, { "epoch": 46.2, "learning_rate": 2.6900000000000003e-05, "loss": 8.7255, "step": 5775000 }, { "epoch": 46.2, "learning_rate": 2.6898000000000002e-05, "loss": 8.7567, "step": 5775500 }, { "epoch": 46.21, "learning_rate": 2.6895999999999998e-05, "loss": 8.7388, "step": 5776000 }, { "epoch": 46.21, "learning_rate": 2.6894000000000004e-05, "loss": 8.7545, "step": 5776500 }, { "epoch": 46.22, "learning_rate": 2.6892e-05, "loss": 8.7426, "step": 5777000 }, { "epoch": 46.22, "learning_rate": 2.689e-05, "loss": 8.7602, "step": 5777500 }, { "epoch": 46.22, "learning_rate": 2.6888e-05, "loss": 8.7569, "step": 5778000 }, { "epoch": 46.23, "learning_rate": 2.6886e-05, "loss": 8.7494, "step": 5778500 }, { "epoch": 46.23, "learning_rate": 2.6884000000000002e-05, "loss": 8.755, "step": 5779000 }, { "epoch": 46.24, "learning_rate": 2.6882e-05, "loss": 8.7509, "step": 5779500 }, { "epoch": 46.24, "learning_rate": 2.688e-05, "loss": 8.7505, "step": 5780000 }, { "epoch": 46.24, "learning_rate": 2.6878000000000003e-05, "loss": 8.7278, "step": 5780500 }, { "epoch": 46.25, "learning_rate": 2.6876e-05, "loss": 8.7462, "step": 5781000 }, { "epoch": 46.25, "learning_rate": 2.6874e-05, "loss": 8.7565, "step": 5781500 }, { "epoch": 46.26, "learning_rate": 2.6872000000000003e-05, "loss": 8.7647, "step": 5782000 }, { "epoch": 46.26, "learning_rate": 2.6870000000000002e-05, "loss": 8.7543, "step": 5782500 }, { "epoch": 46.26, "learning_rate": 2.6867999999999998e-05, "loss": 8.7625, "step": 5783000 }, { "epoch": 46.27, "learning_rate": 2.6866000000000003e-05, "loss": 8.7526, "step": 5783500 }, { "epoch": 46.27, "learning_rate": 2.6864e-05, "loss": 8.7373, "step": 5784000 }, { "epoch": 46.28, "learning_rate": 2.6862000000000005e-05, "loss": 8.773, "step": 5784500 }, { "epoch": 46.28, "learning_rate": 2.686e-05, "loss": 8.7549, "step": 5785000 }, { "epoch": 46.28, "learning_rate": 2.6858e-05, "loss": 8.7392, "step": 5785500 }, { "epoch": 46.29, "learning_rate": 2.6856000000000002e-05, "loss": 8.7625, "step": 5786000 }, { "epoch": 46.29, "learning_rate": 2.6854e-05, "loss": 8.7509, "step": 5786500 }, { "epoch": 46.3, "learning_rate": 2.6852e-05, "loss": 8.752, "step": 5787000 }, { "epoch": 46.3, "learning_rate": 2.6850000000000002e-05, "loss": 8.7579, "step": 5787500 }, { "epoch": 46.3, "learning_rate": 2.6848e-05, "loss": 8.7452, "step": 5788000 }, { "epoch": 46.31, "learning_rate": 2.6846e-05, "loss": 8.7376, "step": 5788500 }, { "epoch": 46.31, "learning_rate": 2.6844000000000003e-05, "loss": 8.7538, "step": 5789000 }, { "epoch": 46.32, "learning_rate": 2.6842e-05, "loss": 8.7397, "step": 5789500 }, { "epoch": 46.32, "learning_rate": 2.6840000000000004e-05, "loss": 8.7583, "step": 5790000 } ], "max_steps": 12500000, "num_train_epochs": 100, "total_flos": 5.855308347472392e+18, "trial_name": null, "trial_params": null }