{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.48, "global_step": 1560000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9998e-05, "loss": 167.4813, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.999600000000001e-05, "loss": 339.0435, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.9994e-05, "loss": 460.3145, "step": 1500 }, { "epoch": 0.02, "learning_rate": 4.9992e-05, "loss": 391.4744, "step": 2000 }, { "epoch": 0.02, "learning_rate": 4.999e-05, "loss": 429.6304, "step": 2500 }, { "epoch": 0.02, "learning_rate": 4.9988e-05, "loss": 402.7589, "step": 3000 }, { "epoch": 0.03, "learning_rate": 4.9986000000000006e-05, "loss": 346.8141, "step": 3500 }, { "epoch": 0.03, "learning_rate": 4.9984e-05, "loss": 269.2469, "step": 4000 }, { "epoch": 0.04, "learning_rate": 4.9982000000000004e-05, "loss": 249.9555, "step": 4500 }, { "epoch": 0.04, "learning_rate": 4.9980000000000006e-05, "loss": 282.5226, "step": 5000 }, { "epoch": 0.04, "learning_rate": 4.9978e-05, "loss": 276.7962, "step": 5500 }, { "epoch": 0.05, "learning_rate": 4.9976000000000004e-05, "loss": 239.1784, "step": 6000 }, { "epoch": 0.05, "learning_rate": 4.9974000000000006e-05, "loss": 214.4642, "step": 6500 }, { "epoch": 0.06, "learning_rate": 4.9972e-05, "loss": 206.2337, "step": 7000 }, { "epoch": 0.06, "learning_rate": 4.997e-05, "loss": 202.7909, "step": 7500 }, { "epoch": 0.06, "learning_rate": 4.996800000000001e-05, "loss": 207.2598, "step": 8000 }, { "epoch": 0.07, "learning_rate": 4.9966e-05, "loss": 198.4287, "step": 8500 }, { "epoch": 0.07, "learning_rate": 4.9964e-05, "loss": 202.6692, "step": 9000 }, { "epoch": 0.08, "learning_rate": 4.9962e-05, "loss": 172.0607, "step": 9500 }, { "epoch": 0.08, "learning_rate": 4.996e-05, "loss": 152.3532, "step": 10000 }, { "epoch": 0.08, "learning_rate": 4.9958000000000005e-05, "loss": 141.7099, "step": 10500 }, { "epoch": 0.09, "learning_rate": 4.9956e-05, "loss": 131.85, "step": 11000 }, { "epoch": 0.09, "learning_rate": 4.9954e-05, "loss": 131.9644, "step": 11500 }, { "epoch": 0.1, "learning_rate": 4.9952000000000006e-05, "loss": 120.3319, "step": 12000 }, { "epoch": 0.1, "learning_rate": 4.995e-05, "loss": 125.1385, "step": 12500 }, { "epoch": 0.1, "learning_rate": 4.9948000000000004e-05, "loss": 102.6195, "step": 13000 }, { "epoch": 0.11, "learning_rate": 4.9946000000000006e-05, "loss": 97.9456, "step": 13500 }, { "epoch": 0.11, "learning_rate": 4.9944e-05, "loss": 90.2301, "step": 14000 }, { "epoch": 0.12, "learning_rate": 4.9942e-05, "loss": 85.6502, "step": 14500 }, { "epoch": 0.12, "learning_rate": 4.9940000000000006e-05, "loss": 71.9912, "step": 15000 }, { "epoch": 0.12, "learning_rate": 4.9938e-05, "loss": 66.2195, "step": 15500 }, { "epoch": 0.13, "learning_rate": 4.9936000000000004e-05, "loss": 59.0722, "step": 16000 }, { "epoch": 0.13, "learning_rate": 4.9934e-05, "loss": 52.456, "step": 16500 }, { "epoch": 0.14, "learning_rate": 4.9932e-05, "loss": 44.7411, "step": 17000 }, { "epoch": 0.14, "learning_rate": 4.9930000000000005e-05, "loss": 37.4041, "step": 17500 }, { "epoch": 0.14, "learning_rate": 4.9928e-05, "loss": 33.0052, "step": 18000 }, { "epoch": 0.15, "learning_rate": 4.9926e-05, "loss": 27.9028, "step": 18500 }, { "epoch": 0.15, "learning_rate": 4.9924000000000005e-05, "loss": 23.4986, "step": 19000 }, { "epoch": 0.16, "learning_rate": 4.9922e-05, "loss": 22.3736, "step": 19500 }, { "epoch": 0.16, "learning_rate": 4.992e-05, "loss": 19.143, "step": 20000 }, { "epoch": 0.16, "learning_rate": 4.9918000000000006e-05, "loss": 16.728, "step": 20500 }, { "epoch": 0.17, "learning_rate": 4.9916e-05, "loss": 14.6861, "step": 21000 }, { "epoch": 0.17, "learning_rate": 4.9914e-05, "loss": 13.893, "step": 21500 }, { "epoch": 0.18, "learning_rate": 4.9912000000000006e-05, "loss": 13.4733, "step": 22000 }, { "epoch": 0.18, "learning_rate": 4.991e-05, "loss": 12.3987, "step": 22500 }, { "epoch": 0.18, "learning_rate": 4.9908000000000004e-05, "loss": 11.5524, "step": 23000 }, { "epoch": 0.19, "learning_rate": 4.9906e-05, "loss": 11.2799, "step": 23500 }, { "epoch": 0.19, "learning_rate": 4.9904e-05, "loss": 10.9152, "step": 24000 }, { "epoch": 0.2, "learning_rate": 4.9902000000000004e-05, "loss": 10.5949, "step": 24500 }, { "epoch": 0.2, "learning_rate": 4.99e-05, "loss": 10.8094, "step": 25000 }, { "epoch": 0.2, "learning_rate": 4.9898e-05, "loss": 10.2322, "step": 25500 }, { "epoch": 0.21, "learning_rate": 4.9896000000000005e-05, "loss": 10.0505, "step": 26000 }, { "epoch": 0.21, "learning_rate": 4.9894e-05, "loss": 10.1343, "step": 26500 }, { "epoch": 0.22, "learning_rate": 4.9892e-05, "loss": 9.9442, "step": 27000 }, { "epoch": 0.22, "learning_rate": 4.9890000000000005e-05, "loss": 9.7331, "step": 27500 }, { "epoch": 0.22, "learning_rate": 4.9888e-05, "loss": 9.7229, "step": 28000 }, { "epoch": 0.23, "learning_rate": 4.9886e-05, "loss": 9.7828, "step": 28500 }, { "epoch": 0.23, "learning_rate": 4.9884000000000006e-05, "loss": 9.7042, "step": 29000 }, { "epoch": 0.24, "learning_rate": 4.9882e-05, "loss": 9.6642, "step": 29500 }, { "epoch": 0.24, "learning_rate": 4.9880000000000004e-05, "loss": 9.8011, "step": 30000 }, { "epoch": 0.24, "learning_rate": 4.9878e-05, "loss": 9.7558, "step": 30500 }, { "epoch": 0.25, "learning_rate": 4.9876e-05, "loss": 9.8933, "step": 31000 }, { "epoch": 0.25, "learning_rate": 4.9874000000000004e-05, "loss": 9.6691, "step": 31500 }, { "epoch": 0.26, "learning_rate": 4.9872e-05, "loss": 9.5504, "step": 32000 }, { "epoch": 0.26, "learning_rate": 4.987e-05, "loss": 9.4497, "step": 32500 }, { "epoch": 0.26, "learning_rate": 4.9868000000000004e-05, "loss": 9.4204, "step": 33000 }, { "epoch": 0.27, "learning_rate": 4.9866e-05, "loss": 9.3652, "step": 33500 }, { "epoch": 0.27, "learning_rate": 4.9864e-05, "loss": 9.7497, "step": 34000 }, { "epoch": 0.28, "learning_rate": 4.9862000000000005e-05, "loss": 9.5389, "step": 34500 }, { "epoch": 0.28, "learning_rate": 4.986e-05, "loss": 9.6565, "step": 35000 }, { "epoch": 0.28, "learning_rate": 4.9858e-05, "loss": 9.3094, "step": 35500 }, { "epoch": 0.29, "learning_rate": 4.9856000000000005e-05, "loss": 9.3693, "step": 36000 }, { "epoch": 0.29, "learning_rate": 4.9854e-05, "loss": 9.3462, "step": 36500 }, { "epoch": 0.3, "learning_rate": 4.9852e-05, "loss": 9.4076, "step": 37000 }, { "epoch": 0.3, "learning_rate": 4.9850000000000006e-05, "loss": 9.2531, "step": 37500 }, { "epoch": 0.3, "learning_rate": 4.9848e-05, "loss": 9.227, "step": 38000 }, { "epoch": 0.31, "learning_rate": 4.9846000000000004e-05, "loss": 9.3106, "step": 38500 }, { "epoch": 0.31, "learning_rate": 4.9844e-05, "loss": 9.2394, "step": 39000 }, { "epoch": 0.32, "learning_rate": 4.9842e-05, "loss": 9.2606, "step": 39500 }, { "epoch": 0.32, "learning_rate": 4.9840000000000004e-05, "loss": 9.2406, "step": 40000 }, { "epoch": 0.32, "learning_rate": 4.9838e-05, "loss": 9.2116, "step": 40500 }, { "epoch": 0.33, "learning_rate": 4.9836e-05, "loss": 9.2192, "step": 41000 }, { "epoch": 0.33, "learning_rate": 4.9834000000000004e-05, "loss": 9.2842, "step": 41500 }, { "epoch": 0.34, "learning_rate": 4.9832e-05, "loss": 9.2024, "step": 42000 }, { "epoch": 0.34, "learning_rate": 4.983e-05, "loss": 9.3669, "step": 42500 }, { "epoch": 0.34, "learning_rate": 4.9828000000000005e-05, "loss": 9.3505, "step": 43000 }, { "epoch": 0.35, "learning_rate": 4.9826e-05, "loss": 9.2114, "step": 43500 }, { "epoch": 0.35, "learning_rate": 4.9824e-05, "loss": 9.1553, "step": 44000 }, { "epoch": 0.36, "learning_rate": 4.9822000000000005e-05, "loss": 9.1217, "step": 44500 }, { "epoch": 0.36, "learning_rate": 4.982e-05, "loss": 9.2315, "step": 45000 }, { "epoch": 0.36, "learning_rate": 4.9818e-05, "loss": 9.3219, "step": 45500 }, { "epoch": 0.37, "learning_rate": 4.9816e-05, "loss": 9.0737, "step": 46000 }, { "epoch": 0.37, "learning_rate": 4.981400000000001e-05, "loss": 9.1281, "step": 46500 }, { "epoch": 0.38, "learning_rate": 4.9812000000000004e-05, "loss": 9.1292, "step": 47000 }, { "epoch": 0.38, "learning_rate": 4.981e-05, "loss": 9.2538, "step": 47500 }, { "epoch": 0.38, "learning_rate": 4.9808e-05, "loss": 9.1272, "step": 48000 }, { "epoch": 0.39, "learning_rate": 4.9806000000000004e-05, "loss": 9.225, "step": 48500 }, { "epoch": 0.39, "learning_rate": 4.9804e-05, "loss": 9.0919, "step": 49000 }, { "epoch": 0.4, "learning_rate": 4.9802e-05, "loss": 9.122, "step": 49500 }, { "epoch": 0.4, "learning_rate": 4.9800000000000004e-05, "loss": 9.1869, "step": 50000 }, { "epoch": 0.4, "learning_rate": 4.9798e-05, "loss": 9.0186, "step": 50500 }, { "epoch": 0.41, "learning_rate": 4.9796e-05, "loss": 9.1333, "step": 51000 }, { "epoch": 0.41, "learning_rate": 4.9794000000000005e-05, "loss": 9.1123, "step": 51500 }, { "epoch": 0.42, "learning_rate": 4.9792e-05, "loss": 9.1951, "step": 52000 }, { "epoch": 0.42, "learning_rate": 4.979e-05, "loss": 9.1012, "step": 52500 }, { "epoch": 0.42, "learning_rate": 4.9788e-05, "loss": 8.9948, "step": 53000 }, { "epoch": 0.43, "learning_rate": 4.978600000000001e-05, "loss": 9.2808, "step": 53500 }, { "epoch": 0.43, "learning_rate": 4.9784e-05, "loss": 9.0797, "step": 54000 }, { "epoch": 0.44, "learning_rate": 4.9782e-05, "loss": 9.0534, "step": 54500 }, { "epoch": 0.44, "learning_rate": 4.978e-05, "loss": 8.995, "step": 55000 }, { "epoch": 0.44, "learning_rate": 4.9778000000000004e-05, "loss": 9.0096, "step": 55500 }, { "epoch": 0.45, "learning_rate": 4.9776e-05, "loss": 8.9898, "step": 56000 }, { "epoch": 0.45, "learning_rate": 4.9774e-05, "loss": 8.9447, "step": 56500 }, { "epoch": 0.46, "learning_rate": 4.9772000000000004e-05, "loss": 9.2217, "step": 57000 }, { "epoch": 0.46, "learning_rate": 4.977e-05, "loss": 9.0623, "step": 57500 }, { "epoch": 0.46, "learning_rate": 4.9768e-05, "loss": 9.0159, "step": 58000 }, { "epoch": 0.47, "learning_rate": 4.9766000000000004e-05, "loss": 9.0159, "step": 58500 }, { "epoch": 0.47, "learning_rate": 4.976400000000001e-05, "loss": 8.938, "step": 59000 }, { "epoch": 0.48, "learning_rate": 4.9762e-05, "loss": 8.9805, "step": 59500 }, { "epoch": 0.48, "learning_rate": 4.976e-05, "loss": 8.9252, "step": 60000 }, { "epoch": 0.48, "learning_rate": 4.975800000000001e-05, "loss": 9.0771, "step": 60500 }, { "epoch": 0.49, "learning_rate": 4.9756e-05, "loss": 9.0443, "step": 61000 }, { "epoch": 0.49, "learning_rate": 4.9754e-05, "loss": 9.1127, "step": 61500 }, { "epoch": 0.5, "learning_rate": 4.975200000000001e-05, "loss": 8.9619, "step": 62000 }, { "epoch": 0.5, "learning_rate": 4.975e-05, "loss": 8.9725, "step": 62500 }, { "epoch": 0.5, "learning_rate": 4.9748e-05, "loss": 8.9863, "step": 63000 }, { "epoch": 0.51, "learning_rate": 4.9746e-05, "loss": 8.9595, "step": 63500 }, { "epoch": 0.51, "learning_rate": 4.9744000000000003e-05, "loss": 8.8928, "step": 64000 }, { "epoch": 0.52, "learning_rate": 4.9742e-05, "loss": 9.0314, "step": 64500 }, { "epoch": 0.52, "learning_rate": 4.974e-05, "loss": 8.9132, "step": 65000 }, { "epoch": 0.52, "learning_rate": 4.9738000000000004e-05, "loss": 8.9137, "step": 65500 }, { "epoch": 0.53, "learning_rate": 4.9736000000000006e-05, "loss": 8.9657, "step": 66000 }, { "epoch": 0.53, "learning_rate": 4.9734e-05, "loss": 8.9497, "step": 66500 }, { "epoch": 0.54, "learning_rate": 4.9732e-05, "loss": 8.8762, "step": 67000 }, { "epoch": 0.54, "learning_rate": 4.973000000000001e-05, "loss": 8.8903, "step": 67500 }, { "epoch": 0.54, "learning_rate": 4.9728e-05, "loss": 8.8748, "step": 68000 }, { "epoch": 0.55, "learning_rate": 4.9726e-05, "loss": 8.8706, "step": 68500 }, { "epoch": 0.55, "learning_rate": 4.972400000000001e-05, "loss": 8.8593, "step": 69000 }, { "epoch": 0.56, "learning_rate": 4.9722e-05, "loss": 8.9197, "step": 69500 }, { "epoch": 0.56, "learning_rate": 4.972e-05, "loss": 8.8718, "step": 70000 }, { "epoch": 0.56, "learning_rate": 4.9718e-05, "loss": 8.8917, "step": 70500 }, { "epoch": 0.57, "learning_rate": 4.9716e-05, "loss": 8.859, "step": 71000 }, { "epoch": 0.57, "learning_rate": 4.9714000000000005e-05, "loss": 9.1695, "step": 71500 }, { "epoch": 0.58, "learning_rate": 4.9712e-05, "loss": 8.8216, "step": 72000 }, { "epoch": 0.58, "learning_rate": 4.9710000000000003e-05, "loss": 8.8896, "step": 72500 }, { "epoch": 0.58, "learning_rate": 4.9708000000000006e-05, "loss": 8.857, "step": 73000 }, { "epoch": 0.59, "learning_rate": 4.9706e-05, "loss": 8.9091, "step": 73500 }, { "epoch": 0.59, "learning_rate": 4.9704000000000004e-05, "loss": 8.9185, "step": 74000 }, { "epoch": 0.6, "learning_rate": 4.9702000000000006e-05, "loss": 8.8406, "step": 74500 }, { "epoch": 0.6, "learning_rate": 4.97e-05, "loss": 8.8969, "step": 75000 }, { "epoch": 0.6, "learning_rate": 4.9698e-05, "loss": 8.8779, "step": 75500 }, { "epoch": 0.61, "learning_rate": 4.969600000000001e-05, "loss": 8.8665, "step": 76000 }, { "epoch": 0.61, "learning_rate": 4.9694e-05, "loss": 8.8258, "step": 76500 }, { "epoch": 0.62, "learning_rate": 4.9692e-05, "loss": 8.9677, "step": 77000 }, { "epoch": 0.62, "learning_rate": 4.969e-05, "loss": 8.8426, "step": 77500 }, { "epoch": 0.62, "learning_rate": 4.9688e-05, "loss": 8.8894, "step": 78000 }, { "epoch": 0.63, "learning_rate": 4.9686000000000005e-05, "loss": 8.81, "step": 78500 }, { "epoch": 0.63, "learning_rate": 4.9684e-05, "loss": 8.8793, "step": 79000 }, { "epoch": 0.64, "learning_rate": 4.9682e-05, "loss": 8.8209, "step": 79500 }, { "epoch": 0.64, "learning_rate": 4.9680000000000005e-05, "loss": 8.8762, "step": 80000 }, { "epoch": 0.64, "learning_rate": 4.9678e-05, "loss": 8.8526, "step": 80500 }, { "epoch": 0.65, "learning_rate": 4.9676000000000003e-05, "loss": 8.8207, "step": 81000 }, { "epoch": 0.65, "learning_rate": 4.9674000000000006e-05, "loss": 8.8084, "step": 81500 }, { "epoch": 0.66, "learning_rate": 4.9672e-05, "loss": 8.8289, "step": 82000 }, { "epoch": 0.66, "learning_rate": 4.967e-05, "loss": 8.8631, "step": 82500 }, { "epoch": 0.66, "learning_rate": 4.9668000000000006e-05, "loss": 8.7972, "step": 83000 }, { "epoch": 0.67, "learning_rate": 4.9666e-05, "loss": 8.8412, "step": 83500 }, { "epoch": 0.67, "learning_rate": 4.9664000000000004e-05, "loss": 8.8133, "step": 84000 }, { "epoch": 0.68, "learning_rate": 4.9662e-05, "loss": 8.8066, "step": 84500 }, { "epoch": 0.68, "learning_rate": 4.966e-05, "loss": 8.8591, "step": 85000 }, { "epoch": 0.68, "learning_rate": 4.9658000000000005e-05, "loss": 8.8006, "step": 85500 }, { "epoch": 0.69, "learning_rate": 4.9656e-05, "loss": 8.8801, "step": 86000 }, { "epoch": 0.69, "learning_rate": 4.9654e-05, "loss": 8.9259, "step": 86500 }, { "epoch": 0.7, "learning_rate": 4.9652000000000005e-05, "loss": 8.8784, "step": 87000 }, { "epoch": 0.7, "learning_rate": 4.965e-05, "loss": 8.8027, "step": 87500 }, { "epoch": 0.7, "learning_rate": 4.9648e-05, "loss": 8.7856, "step": 88000 }, { "epoch": 0.71, "learning_rate": 4.9646000000000005e-05, "loss": 8.8136, "step": 88500 }, { "epoch": 0.71, "learning_rate": 4.9644e-05, "loss": 8.7518, "step": 89000 }, { "epoch": 0.72, "learning_rate": 4.9642e-05, "loss": 9.0135, "step": 89500 }, { "epoch": 0.72, "learning_rate": 4.9640000000000006e-05, "loss": 8.8534, "step": 90000 }, { "epoch": 0.72, "learning_rate": 4.9638e-05, "loss": 8.7789, "step": 90500 }, { "epoch": 0.73, "learning_rate": 4.9636000000000004e-05, "loss": 8.8706, "step": 91000 }, { "epoch": 0.73, "learning_rate": 4.9634e-05, "loss": 8.7578, "step": 91500 }, { "epoch": 0.74, "learning_rate": 4.9632e-05, "loss": 8.8256, "step": 92000 }, { "epoch": 0.74, "learning_rate": 4.9630000000000004e-05, "loss": 8.8399, "step": 92500 }, { "epoch": 0.74, "learning_rate": 4.9628e-05, "loss": 8.9445, "step": 93000 }, { "epoch": 0.75, "learning_rate": 4.9626e-05, "loss": 8.8409, "step": 93500 }, { "epoch": 0.75, "learning_rate": 4.9624000000000005e-05, "loss": 8.8985, "step": 94000 }, { "epoch": 0.76, "learning_rate": 4.9622e-05, "loss": 8.8067, "step": 94500 }, { "epoch": 0.76, "learning_rate": 4.962e-05, "loss": 8.7973, "step": 95000 }, { "epoch": 0.76, "learning_rate": 4.9618000000000005e-05, "loss": 8.8158, "step": 95500 }, { "epoch": 0.77, "learning_rate": 4.9616e-05, "loss": 8.7935, "step": 96000 }, { "epoch": 0.77, "learning_rate": 4.9614e-05, "loss": 8.8262, "step": 96500 }, { "epoch": 0.78, "learning_rate": 4.9612000000000005e-05, "loss": 8.7424, "step": 97000 }, { "epoch": 0.78, "learning_rate": 4.961e-05, "loss": 8.8062, "step": 97500 }, { "epoch": 0.78, "learning_rate": 4.9608000000000003e-05, "loss": 8.821, "step": 98000 }, { "epoch": 0.79, "learning_rate": 4.9606000000000006e-05, "loss": 8.7761, "step": 98500 }, { "epoch": 0.79, "learning_rate": 4.9604e-05, "loss": 8.7752, "step": 99000 }, { "epoch": 0.8, "learning_rate": 4.9602000000000004e-05, "loss": 8.7686, "step": 99500 }, { "epoch": 0.8, "learning_rate": 4.96e-05, "loss": 8.8078, "step": 100000 }, { "epoch": 0.8, "learning_rate": 4.9598e-05, "loss": 8.755, "step": 100500 }, { "epoch": 0.81, "learning_rate": 4.9596000000000004e-05, "loss": 8.8267, "step": 101000 }, { "epoch": 0.81, "learning_rate": 4.9594e-05, "loss": 8.7747, "step": 101500 }, { "epoch": 0.82, "learning_rate": 4.9592e-05, "loss": 8.8316, "step": 102000 }, { "epoch": 0.82, "learning_rate": 4.9590000000000005e-05, "loss": 8.7557, "step": 102500 }, { "epoch": 0.82, "learning_rate": 4.9588e-05, "loss": 8.7278, "step": 103000 }, { "epoch": 0.83, "learning_rate": 4.9586e-05, "loss": 8.7551, "step": 103500 }, { "epoch": 0.83, "learning_rate": 4.9584000000000005e-05, "loss": 8.7773, "step": 104000 }, { "epoch": 0.84, "learning_rate": 4.9582e-05, "loss": 8.7889, "step": 104500 }, { "epoch": 0.84, "learning_rate": 4.958e-05, "loss": 8.769, "step": 105000 }, { "epoch": 0.84, "learning_rate": 4.9578000000000005e-05, "loss": 8.7733, "step": 105500 }, { "epoch": 0.85, "learning_rate": 4.9576e-05, "loss": 8.772, "step": 106000 }, { "epoch": 0.85, "learning_rate": 4.9574000000000003e-05, "loss": 8.7841, "step": 106500 }, { "epoch": 0.86, "learning_rate": 4.9572e-05, "loss": 8.763, "step": 107000 }, { "epoch": 0.86, "learning_rate": 4.957e-05, "loss": 8.7616, "step": 107500 }, { "epoch": 0.86, "learning_rate": 4.9568000000000004e-05, "loss": 8.8227, "step": 108000 }, { "epoch": 0.87, "learning_rate": 4.9566e-05, "loss": 8.7499, "step": 108500 }, { "epoch": 0.87, "learning_rate": 4.9564e-05, "loss": 8.7877, "step": 109000 }, { "epoch": 0.88, "learning_rate": 4.9562000000000004e-05, "loss": 8.7573, "step": 109500 }, { "epoch": 0.88, "learning_rate": 4.956e-05, "loss": 8.7705, "step": 110000 }, { "epoch": 0.88, "learning_rate": 4.9558e-05, "loss": 8.7416, "step": 110500 }, { "epoch": 0.89, "learning_rate": 4.9556000000000005e-05, "loss": 8.7569, "step": 111000 }, { "epoch": 0.89, "learning_rate": 4.9554e-05, "loss": 8.786, "step": 111500 }, { "epoch": 0.9, "learning_rate": 4.9552e-05, "loss": 8.7313, "step": 112000 }, { "epoch": 0.9, "learning_rate": 4.9550000000000005e-05, "loss": 8.7426, "step": 112500 }, { "epoch": 0.9, "learning_rate": 4.9548e-05, "loss": 8.7644, "step": 113000 }, { "epoch": 0.91, "learning_rate": 4.9546e-05, "loss": 8.777, "step": 113500 }, { "epoch": 0.91, "learning_rate": 4.9544e-05, "loss": 8.7138, "step": 114000 }, { "epoch": 0.92, "learning_rate": 4.954200000000001e-05, "loss": 8.745, "step": 114500 }, { "epoch": 0.92, "learning_rate": 4.9540000000000003e-05, "loss": 8.7519, "step": 115000 }, { "epoch": 0.92, "learning_rate": 4.9538e-05, "loss": 8.7538, "step": 115500 }, { "epoch": 0.93, "learning_rate": 4.9536e-05, "loss": 8.7966, "step": 116000 }, { "epoch": 0.93, "learning_rate": 4.9534000000000004e-05, "loss": 8.7846, "step": 116500 }, { "epoch": 0.94, "learning_rate": 4.9532e-05, "loss": 8.739, "step": 117000 }, { "epoch": 0.94, "learning_rate": 4.953e-05, "loss": 8.7326, "step": 117500 }, { "epoch": 0.94, "learning_rate": 4.9528000000000004e-05, "loss": 8.7237, "step": 118000 }, { "epoch": 0.95, "learning_rate": 4.9526e-05, "loss": 8.7634, "step": 118500 }, { "epoch": 0.95, "learning_rate": 4.9524e-05, "loss": 8.7499, "step": 119000 }, { "epoch": 0.96, "learning_rate": 4.9522000000000005e-05, "loss": 8.7802, "step": 119500 }, { "epoch": 0.96, "learning_rate": 4.952e-05, "loss": 8.7407, "step": 120000 }, { "epoch": 0.96, "learning_rate": 4.9518e-05, "loss": 8.7336, "step": 120500 }, { "epoch": 0.97, "learning_rate": 4.9516e-05, "loss": 8.7446, "step": 121000 }, { "epoch": 0.97, "learning_rate": 4.951400000000001e-05, "loss": 8.7434, "step": 121500 }, { "epoch": 0.98, "learning_rate": 4.9512e-05, "loss": 8.7396, "step": 122000 }, { "epoch": 0.98, "learning_rate": 4.951e-05, "loss": 8.7272, "step": 122500 }, { "epoch": 0.98, "learning_rate": 4.9508e-05, "loss": 8.751, "step": 123000 }, { "epoch": 0.99, "learning_rate": 4.9506000000000003e-05, "loss": 8.7366, "step": 123500 }, { "epoch": 0.99, "learning_rate": 4.9504e-05, "loss": 8.761, "step": 124000 }, { "epoch": 1.0, "learning_rate": 4.9502e-05, "loss": 8.7528, "step": 124500 }, { "epoch": 1.0, "learning_rate": 4.9500000000000004e-05, "loss": 8.77, "step": 125000 }, { "epoch": 1.0, "learning_rate": 4.9498e-05, "loss": 8.7479, "step": 125500 }, { "epoch": 1.01, "learning_rate": 4.9496e-05, "loss": 8.7402, "step": 126000 }, { "epoch": 1.01, "learning_rate": 4.9494000000000004e-05, "loss": 8.7317, "step": 126500 }, { "epoch": 1.02, "learning_rate": 4.9492000000000007e-05, "loss": 8.7582, "step": 127000 }, { "epoch": 1.02, "learning_rate": 4.949e-05, "loss": 8.785, "step": 127500 }, { "epoch": 1.02, "learning_rate": 4.9488e-05, "loss": 8.7487, "step": 128000 }, { "epoch": 1.03, "learning_rate": 4.948600000000001e-05, "loss": 8.7275, "step": 128500 }, { "epoch": 1.03, "learning_rate": 4.9484e-05, "loss": 8.7491, "step": 129000 }, { "epoch": 1.04, "learning_rate": 4.9482e-05, "loss": 8.7415, "step": 129500 }, { "epoch": 1.04, "learning_rate": 4.948000000000001e-05, "loss": 8.7348, "step": 130000 }, { "epoch": 1.04, "learning_rate": 4.9478e-05, "loss": 8.7247, "step": 130500 }, { "epoch": 1.05, "learning_rate": 4.9476e-05, "loss": 8.7298, "step": 131000 }, { "epoch": 1.05, "learning_rate": 4.9474e-05, "loss": 8.7433, "step": 131500 }, { "epoch": 1.06, "learning_rate": 4.9472e-05, "loss": 8.7733, "step": 132000 }, { "epoch": 1.06, "learning_rate": 4.947e-05, "loss": 8.7228, "step": 132500 }, { "epoch": 1.06, "learning_rate": 4.9468e-05, "loss": 8.7441, "step": 133000 }, { "epoch": 1.07, "learning_rate": 4.9466000000000004e-05, "loss": 8.7274, "step": 133500 }, { "epoch": 1.07, "learning_rate": 4.9464000000000006e-05, "loss": 8.7556, "step": 134000 }, { "epoch": 1.08, "learning_rate": 4.9462e-05, "loss": 8.7592, "step": 134500 }, { "epoch": 1.08, "learning_rate": 4.946e-05, "loss": 8.7349, "step": 135000 }, { "epoch": 1.08, "learning_rate": 4.9458000000000007e-05, "loss": 8.7432, "step": 135500 }, { "epoch": 1.09, "learning_rate": 4.9456e-05, "loss": 8.7744, "step": 136000 }, { "epoch": 1.09, "learning_rate": 4.9454e-05, "loss": 8.7506, "step": 136500 }, { "epoch": 1.1, "learning_rate": 4.945200000000001e-05, "loss": 8.7566, "step": 137000 }, { "epoch": 1.1, "learning_rate": 4.945e-05, "loss": 8.7312, "step": 137500 }, { "epoch": 1.1, "learning_rate": 4.9448e-05, "loss": 8.7084, "step": 138000 }, { "epoch": 1.11, "learning_rate": 4.9446e-05, "loss": 8.7298, "step": 138500 }, { "epoch": 1.11, "learning_rate": 4.9444e-05, "loss": 8.7195, "step": 139000 }, { "epoch": 1.12, "learning_rate": 4.9442000000000005e-05, "loss": 8.7922, "step": 139500 }, { "epoch": 1.12, "learning_rate": 4.944e-05, "loss": 8.7165, "step": 140000 }, { "epoch": 1.12, "learning_rate": 4.9438e-05, "loss": 8.7332, "step": 140500 }, { "epoch": 1.13, "learning_rate": 4.9436000000000006e-05, "loss": 8.7391, "step": 141000 }, { "epoch": 1.13, "learning_rate": 4.9434e-05, "loss": 8.7532, "step": 141500 }, { "epoch": 1.14, "learning_rate": 4.9432000000000004e-05, "loss": 8.7782, "step": 142000 }, { "epoch": 1.14, "learning_rate": 4.9430000000000006e-05, "loss": 8.7489, "step": 142500 }, { "epoch": 1.14, "learning_rate": 4.9428e-05, "loss": 8.7545, "step": 143000 }, { "epoch": 1.15, "learning_rate": 4.9426e-05, "loss": 8.7656, "step": 143500 }, { "epoch": 1.15, "learning_rate": 4.9424000000000007e-05, "loss": 8.7302, "step": 144000 }, { "epoch": 1.16, "learning_rate": 4.9422e-05, "loss": 8.7296, "step": 144500 }, { "epoch": 1.16, "learning_rate": 4.942e-05, "loss": 8.7566, "step": 145000 }, { "epoch": 1.16, "learning_rate": 4.9418e-05, "loss": 8.7108, "step": 145500 }, { "epoch": 1.17, "learning_rate": 4.9416e-05, "loss": 8.7323, "step": 146000 }, { "epoch": 1.17, "learning_rate": 4.9414000000000005e-05, "loss": 8.7436, "step": 146500 }, { "epoch": 1.18, "learning_rate": 4.9412e-05, "loss": 8.6866, "step": 147000 }, { "epoch": 1.18, "learning_rate": 4.941e-05, "loss": 8.7285, "step": 147500 }, { "epoch": 1.18, "learning_rate": 4.9408000000000005e-05, "loss": 8.7386, "step": 148000 }, { "epoch": 1.19, "learning_rate": 4.9406e-05, "loss": 8.7394, "step": 148500 }, { "epoch": 1.19, "learning_rate": 4.9404e-05, "loss": 8.7729, "step": 149000 }, { "epoch": 1.2, "learning_rate": 4.9402000000000006e-05, "loss": 8.7637, "step": 149500 }, { "epoch": 1.2, "learning_rate": 4.94e-05, "loss": 8.7381, "step": 150000 }, { "epoch": 1.2, "learning_rate": 4.9398e-05, "loss": 8.7235, "step": 150500 }, { "epoch": 1.21, "learning_rate": 4.9396000000000006e-05, "loss": 8.7198, "step": 151000 }, { "epoch": 1.21, "learning_rate": 4.9394e-05, "loss": 8.7512, "step": 151500 }, { "epoch": 1.22, "learning_rate": 4.9392000000000004e-05, "loss": 8.7068, "step": 152000 }, { "epoch": 1.22, "learning_rate": 4.939e-05, "loss": 8.7282, "step": 152500 }, { "epoch": 1.22, "learning_rate": 4.9388e-05, "loss": 8.732, "step": 153000 }, { "epoch": 1.23, "learning_rate": 4.9386000000000005e-05, "loss": 8.7478, "step": 153500 }, { "epoch": 1.23, "learning_rate": 4.9384e-05, "loss": 8.7374, "step": 154000 }, { "epoch": 1.24, "learning_rate": 4.9382e-05, "loss": 8.7333, "step": 154500 }, { "epoch": 1.24, "learning_rate": 4.9380000000000005e-05, "loss": 8.7092, "step": 155000 }, { "epoch": 1.24, "learning_rate": 4.9378e-05, "loss": 8.7646, "step": 155500 }, { "epoch": 1.25, "learning_rate": 4.9376e-05, "loss": 8.7004, "step": 156000 }, { "epoch": 1.25, "learning_rate": 4.9374000000000005e-05, "loss": 8.7022, "step": 156500 }, { "epoch": 1.26, "learning_rate": 4.9372e-05, "loss": 8.7469, "step": 157000 }, { "epoch": 1.26, "learning_rate": 4.937e-05, "loss": 8.7383, "step": 157500 }, { "epoch": 1.26, "learning_rate": 4.9368000000000006e-05, "loss": 8.7374, "step": 158000 }, { "epoch": 1.27, "learning_rate": 4.9366e-05, "loss": 8.7537, "step": 158500 }, { "epoch": 1.27, "learning_rate": 4.9364000000000004e-05, "loss": 8.7513, "step": 159000 }, { "epoch": 1.28, "learning_rate": 4.9362e-05, "loss": 8.7303, "step": 159500 }, { "epoch": 1.28, "learning_rate": 4.936e-05, "loss": 8.734, "step": 160000 }, { "epoch": 1.28, "learning_rate": 4.9358000000000004e-05, "loss": 8.7497, "step": 160500 }, { "epoch": 1.29, "learning_rate": 4.9356e-05, "loss": 8.7377, "step": 161000 }, { "epoch": 1.29, "learning_rate": 4.9354e-05, "loss": 8.7308, "step": 161500 }, { "epoch": 1.3, "learning_rate": 4.9352000000000005e-05, "loss": 8.7586, "step": 162000 }, { "epoch": 1.3, "learning_rate": 4.935e-05, "loss": 8.744, "step": 162500 }, { "epoch": 1.3, "learning_rate": 4.9348e-05, "loss": 8.7429, "step": 163000 }, { "epoch": 1.31, "learning_rate": 4.9346000000000005e-05, "loss": 8.7083, "step": 163500 }, { "epoch": 1.31, "learning_rate": 4.9344e-05, "loss": 8.7558, "step": 164000 }, { "epoch": 1.32, "learning_rate": 4.9342e-05, "loss": 8.7276, "step": 164500 }, { "epoch": 1.32, "learning_rate": 4.9340000000000005e-05, "loss": 8.7388, "step": 165000 }, { "epoch": 1.32, "learning_rate": 4.9338e-05, "loss": 8.7236, "step": 165500 }, { "epoch": 1.33, "learning_rate": 4.9336e-05, "loss": 8.6968, "step": 166000 }, { "epoch": 1.33, "learning_rate": 4.9334000000000006e-05, "loss": 8.738, "step": 166500 }, { "epoch": 1.34, "learning_rate": 4.9332e-05, "loss": 8.6997, "step": 167000 }, { "epoch": 1.34, "learning_rate": 4.9330000000000004e-05, "loss": 8.7297, "step": 167500 }, { "epoch": 1.34, "learning_rate": 4.9328e-05, "loss": 8.7298, "step": 168000 }, { "epoch": 1.35, "learning_rate": 4.9326e-05, "loss": 8.7035, "step": 168500 }, { "epoch": 1.35, "learning_rate": 4.9324000000000004e-05, "loss": 8.7204, "step": 169000 }, { "epoch": 1.36, "learning_rate": 4.9322e-05, "loss": 8.7452, "step": 169500 }, { "epoch": 1.36, "learning_rate": 4.932e-05, "loss": 8.6998, "step": 170000 }, { "epoch": 1.36, "learning_rate": 4.9318000000000005e-05, "loss": 8.7377, "step": 170500 }, { "epoch": 1.37, "learning_rate": 4.9316e-05, "loss": 8.7331, "step": 171000 }, { "epoch": 1.37, "learning_rate": 4.9314e-05, "loss": 8.7163, "step": 171500 }, { "epoch": 1.38, "learning_rate": 4.9312000000000005e-05, "loss": 8.7268, "step": 172000 }, { "epoch": 1.38, "learning_rate": 4.931e-05, "loss": 8.6967, "step": 172500 }, { "epoch": 1.38, "learning_rate": 4.9308e-05, "loss": 8.7252, "step": 173000 }, { "epoch": 1.39, "learning_rate": 4.9306000000000005e-05, "loss": 8.7349, "step": 173500 }, { "epoch": 1.39, "learning_rate": 4.9304e-05, "loss": 8.7176, "step": 174000 }, { "epoch": 1.4, "learning_rate": 4.9302e-05, "loss": 8.7122, "step": 174500 }, { "epoch": 1.4, "learning_rate": 4.93e-05, "loss": 8.7241, "step": 175000 }, { "epoch": 1.4, "learning_rate": 4.9298e-05, "loss": 8.7116, "step": 175500 }, { "epoch": 1.41, "learning_rate": 4.9296000000000004e-05, "loss": 8.7077, "step": 176000 }, { "epoch": 1.41, "learning_rate": 4.9294e-05, "loss": 8.7184, "step": 176500 }, { "epoch": 1.42, "learning_rate": 4.9292e-05, "loss": 8.7507, "step": 177000 }, { "epoch": 1.42, "learning_rate": 4.9290000000000004e-05, "loss": 8.7461, "step": 177500 }, { "epoch": 1.42, "learning_rate": 4.9288e-05, "loss": 8.7107, "step": 178000 }, { "epoch": 1.43, "learning_rate": 4.9286e-05, "loss": 8.7484, "step": 178500 }, { "epoch": 1.43, "learning_rate": 4.9284000000000005e-05, "loss": 8.7338, "step": 179000 }, { "epoch": 1.44, "learning_rate": 4.9282e-05, "loss": 8.7505, "step": 179500 }, { "epoch": 1.44, "learning_rate": 4.928e-05, "loss": 8.6856, "step": 180000 }, { "epoch": 1.44, "learning_rate": 4.9278000000000005e-05, "loss": 8.7148, "step": 180500 }, { "epoch": 1.45, "learning_rate": 4.9276e-05, "loss": 8.727, "step": 181000 }, { "epoch": 1.45, "learning_rate": 4.9274e-05, "loss": 8.7365, "step": 181500 }, { "epoch": 1.46, "learning_rate": 4.9272e-05, "loss": 8.7123, "step": 182000 }, { "epoch": 1.46, "learning_rate": 4.927000000000001e-05, "loss": 8.7501, "step": 182500 }, { "epoch": 1.46, "learning_rate": 4.9268e-05, "loss": 8.7205, "step": 183000 }, { "epoch": 1.47, "learning_rate": 4.9266e-05, "loss": 8.7338, "step": 183500 }, { "epoch": 1.47, "learning_rate": 4.9264e-05, "loss": 8.725, "step": 184000 }, { "epoch": 1.48, "learning_rate": 4.9262000000000004e-05, "loss": 8.7439, "step": 184500 }, { "epoch": 1.48, "learning_rate": 4.926e-05, "loss": 8.7272, "step": 185000 }, { "epoch": 1.48, "learning_rate": 4.9258e-05, "loss": 8.711, "step": 185500 }, { "epoch": 1.49, "learning_rate": 4.9256000000000004e-05, "loss": 8.7415, "step": 186000 }, { "epoch": 1.49, "learning_rate": 4.9254e-05, "loss": 8.7222, "step": 186500 }, { "epoch": 1.5, "learning_rate": 4.9252e-05, "loss": 8.6877, "step": 187000 }, { "epoch": 1.5, "learning_rate": 4.9250000000000004e-05, "loss": 8.718, "step": 187500 }, { "epoch": 1.5, "learning_rate": 4.9248e-05, "loss": 8.7498, "step": 188000 }, { "epoch": 1.51, "learning_rate": 4.9246e-05, "loss": 8.7576, "step": 188500 }, { "epoch": 1.51, "learning_rate": 4.9244e-05, "loss": 8.7261, "step": 189000 }, { "epoch": 1.52, "learning_rate": 4.924200000000001e-05, "loss": 8.6984, "step": 189500 }, { "epoch": 1.52, "learning_rate": 4.924e-05, "loss": 8.7172, "step": 190000 }, { "epoch": 1.52, "learning_rate": 4.9238e-05, "loss": 8.7044, "step": 190500 }, { "epoch": 1.53, "learning_rate": 4.923600000000001e-05, "loss": 8.7316, "step": 191000 }, { "epoch": 1.53, "learning_rate": 4.9234e-05, "loss": 8.7253, "step": 191500 }, { "epoch": 1.54, "learning_rate": 4.9232e-05, "loss": 8.7278, "step": 192000 }, { "epoch": 1.54, "learning_rate": 4.923e-05, "loss": 8.7171, "step": 192500 }, { "epoch": 1.54, "learning_rate": 4.9228000000000004e-05, "loss": 8.7442, "step": 193000 }, { "epoch": 1.55, "learning_rate": 4.9226e-05, "loss": 8.7353, "step": 193500 }, { "epoch": 1.55, "learning_rate": 4.9224e-05, "loss": 8.7023, "step": 194000 }, { "epoch": 1.56, "learning_rate": 4.9222000000000004e-05, "loss": 8.6957, "step": 194500 }, { "epoch": 1.56, "learning_rate": 4.9220000000000006e-05, "loss": 8.7123, "step": 195000 }, { "epoch": 1.56, "learning_rate": 4.9218e-05, "loss": 8.7047, "step": 195500 }, { "epoch": 1.57, "learning_rate": 4.9216e-05, "loss": 8.7271, "step": 196000 }, { "epoch": 1.57, "learning_rate": 4.921400000000001e-05, "loss": 8.7182, "step": 196500 }, { "epoch": 1.58, "learning_rate": 4.9212e-05, "loss": 8.6991, "step": 197000 }, { "epoch": 1.58, "learning_rate": 4.921e-05, "loss": 8.695, "step": 197500 }, { "epoch": 1.58, "learning_rate": 4.920800000000001e-05, "loss": 8.7277, "step": 198000 }, { "epoch": 1.59, "learning_rate": 4.9206e-05, "loss": 8.7258, "step": 198500 }, { "epoch": 1.59, "learning_rate": 4.9204e-05, "loss": 8.7348, "step": 199000 }, { "epoch": 1.6, "learning_rate": 4.9202e-05, "loss": 8.7168, "step": 199500 }, { "epoch": 1.6, "learning_rate": 4.92e-05, "loss": 8.7074, "step": 200000 }, { "epoch": 1.6, "learning_rate": 4.9198e-05, "loss": 8.7199, "step": 200500 }, { "epoch": 1.61, "learning_rate": 4.9196e-05, "loss": 8.7036, "step": 201000 }, { "epoch": 1.61, "learning_rate": 4.9194000000000004e-05, "loss": 8.6906, "step": 201500 }, { "epoch": 1.62, "learning_rate": 4.9192000000000006e-05, "loss": 8.7086, "step": 202000 }, { "epoch": 1.62, "learning_rate": 4.919e-05, "loss": 8.708, "step": 202500 }, { "epoch": 1.62, "learning_rate": 4.9188000000000004e-05, "loss": 8.7123, "step": 203000 }, { "epoch": 1.63, "learning_rate": 4.9186000000000006e-05, "loss": 8.7107, "step": 203500 }, { "epoch": 1.63, "learning_rate": 4.9184e-05, "loss": 8.7279, "step": 204000 }, { "epoch": 1.64, "learning_rate": 4.9182e-05, "loss": 8.7128, "step": 204500 }, { "epoch": 1.64, "learning_rate": 4.918000000000001e-05, "loss": 8.7156, "step": 205000 }, { "epoch": 1.64, "learning_rate": 4.9178e-05, "loss": 8.7054, "step": 205500 }, { "epoch": 1.65, "learning_rate": 4.9176e-05, "loss": 8.6972, "step": 206000 }, { "epoch": 1.65, "learning_rate": 4.9174e-05, "loss": 8.7417, "step": 206500 }, { "epoch": 1.66, "learning_rate": 4.9172e-05, "loss": 8.7225, "step": 207000 }, { "epoch": 1.66, "learning_rate": 4.9170000000000005e-05, "loss": 8.6858, "step": 207500 }, { "epoch": 1.66, "learning_rate": 4.9168e-05, "loss": 8.7073, "step": 208000 }, { "epoch": 1.67, "learning_rate": 4.9166e-05, "loss": 8.7324, "step": 208500 }, { "epoch": 1.67, "learning_rate": 4.9164000000000006e-05, "loss": 8.7009, "step": 209000 }, { "epoch": 1.68, "learning_rate": 4.9162e-05, "loss": 8.7222, "step": 209500 }, { "epoch": 1.68, "learning_rate": 4.9160000000000004e-05, "loss": 8.7189, "step": 210000 }, { "epoch": 1.68, "learning_rate": 4.9158000000000006e-05, "loss": 8.6845, "step": 210500 }, { "epoch": 1.69, "learning_rate": 4.9156e-05, "loss": 8.7141, "step": 211000 }, { "epoch": 1.69, "learning_rate": 4.9154e-05, "loss": 8.6976, "step": 211500 }, { "epoch": 1.7, "learning_rate": 4.9152000000000006e-05, "loss": 8.7004, "step": 212000 }, { "epoch": 1.7, "learning_rate": 4.915e-05, "loss": 8.7042, "step": 212500 }, { "epoch": 1.7, "learning_rate": 4.9148e-05, "loss": 8.7278, "step": 213000 }, { "epoch": 1.71, "learning_rate": 4.9146e-05, "loss": 8.7039, "step": 213500 }, { "epoch": 1.71, "learning_rate": 4.9144e-05, "loss": 8.7104, "step": 214000 }, { "epoch": 1.72, "learning_rate": 4.9142000000000005e-05, "loss": 8.6993, "step": 214500 }, { "epoch": 1.72, "learning_rate": 4.914e-05, "loss": 8.7215, "step": 215000 }, { "epoch": 1.72, "learning_rate": 4.9138e-05, "loss": 8.7066, "step": 215500 }, { "epoch": 1.73, "learning_rate": 4.9136000000000005e-05, "loss": 8.6924, "step": 216000 }, { "epoch": 1.73, "learning_rate": 4.9134e-05, "loss": 8.6996, "step": 216500 }, { "epoch": 1.74, "learning_rate": 4.9132e-05, "loss": 8.7252, "step": 217000 }, { "epoch": 1.74, "learning_rate": 4.9130000000000006e-05, "loss": 8.698, "step": 217500 }, { "epoch": 1.74, "learning_rate": 4.9128e-05, "loss": 8.7188, "step": 218000 }, { "epoch": 1.75, "learning_rate": 4.9126e-05, "loss": 8.7053, "step": 218500 }, { "epoch": 1.75, "learning_rate": 4.9124000000000006e-05, "loss": 8.7161, "step": 219000 }, { "epoch": 1.76, "learning_rate": 4.9122e-05, "loss": 8.711, "step": 219500 }, { "epoch": 1.76, "learning_rate": 4.9120000000000004e-05, "loss": 8.6989, "step": 220000 }, { "epoch": 1.76, "learning_rate": 4.9118e-05, "loss": 8.6944, "step": 220500 }, { "epoch": 1.77, "learning_rate": 4.9116e-05, "loss": 8.7141, "step": 221000 }, { "epoch": 1.77, "learning_rate": 4.9114000000000004e-05, "loss": 8.7241, "step": 221500 }, { "epoch": 1.78, "learning_rate": 4.9112e-05, "loss": 8.736, "step": 222000 }, { "epoch": 1.78, "learning_rate": 4.911e-05, "loss": 8.6733, "step": 222500 }, { "epoch": 1.78, "learning_rate": 4.9108000000000005e-05, "loss": 8.7222, "step": 223000 }, { "epoch": 1.79, "learning_rate": 4.9106e-05, "loss": 8.6865, "step": 223500 }, { "epoch": 1.79, "learning_rate": 4.9104e-05, "loss": 8.7143, "step": 224000 }, { "epoch": 1.8, "learning_rate": 4.9102000000000005e-05, "loss": 8.7387, "step": 224500 }, { "epoch": 1.8, "learning_rate": 4.91e-05, "loss": 8.6927, "step": 225000 }, { "epoch": 1.8, "learning_rate": 4.9098e-05, "loss": 8.7262, "step": 225500 }, { "epoch": 1.81, "learning_rate": 4.9096000000000006e-05, "loss": 8.7573, "step": 226000 }, { "epoch": 1.81, "learning_rate": 4.9094e-05, "loss": 8.7058, "step": 226500 }, { "epoch": 1.82, "learning_rate": 4.9092000000000004e-05, "loss": 8.7144, "step": 227000 }, { "epoch": 1.82, "learning_rate": 4.9090000000000006e-05, "loss": 8.6902, "step": 227500 }, { "epoch": 1.82, "learning_rate": 4.9088e-05, "loss": 8.7049, "step": 228000 }, { "epoch": 1.83, "learning_rate": 4.9086000000000004e-05, "loss": 8.7139, "step": 228500 }, { "epoch": 1.83, "learning_rate": 4.9084e-05, "loss": 8.7206, "step": 229000 }, { "epoch": 1.84, "learning_rate": 4.9082e-05, "loss": 8.7224, "step": 229500 }, { "epoch": 1.84, "learning_rate": 4.9080000000000004e-05, "loss": 8.7396, "step": 230000 }, { "epoch": 1.84, "learning_rate": 4.9078e-05, "loss": 8.7352, "step": 230500 }, { "epoch": 1.85, "learning_rate": 4.9076e-05, "loss": 8.7382, "step": 231000 }, { "epoch": 1.85, "learning_rate": 4.9074000000000005e-05, "loss": 8.7159, "step": 231500 }, { "epoch": 1.86, "learning_rate": 4.9072e-05, "loss": 8.6957, "step": 232000 }, { "epoch": 1.86, "learning_rate": 4.907e-05, "loss": 8.7114, "step": 232500 }, { "epoch": 1.86, "learning_rate": 4.9068000000000005e-05, "loss": 8.7356, "step": 233000 }, { "epoch": 1.87, "learning_rate": 4.9066e-05, "loss": 8.692, "step": 233500 }, { "epoch": 1.87, "learning_rate": 4.9064e-05, "loss": 8.7049, "step": 234000 }, { "epoch": 1.88, "learning_rate": 4.9062000000000006e-05, "loss": 8.7047, "step": 234500 }, { "epoch": 1.88, "learning_rate": 4.906e-05, "loss": 8.7047, "step": 235000 }, { "epoch": 1.88, "learning_rate": 4.9058000000000004e-05, "loss": 8.6655, "step": 235500 }, { "epoch": 1.89, "learning_rate": 4.9056e-05, "loss": 8.7262, "step": 236000 }, { "epoch": 1.89, "learning_rate": 4.9054e-05, "loss": 8.7033, "step": 236500 }, { "epoch": 1.9, "learning_rate": 4.9052000000000004e-05, "loss": 8.7357, "step": 237000 }, { "epoch": 1.9, "learning_rate": 4.905e-05, "loss": 8.719, "step": 237500 }, { "epoch": 1.9, "learning_rate": 4.9048e-05, "loss": 8.6847, "step": 238000 }, { "epoch": 1.91, "learning_rate": 4.9046000000000004e-05, "loss": 8.7089, "step": 238500 }, { "epoch": 1.91, "learning_rate": 4.9044e-05, "loss": 8.7355, "step": 239000 }, { "epoch": 1.92, "learning_rate": 4.9042e-05, "loss": 8.7264, "step": 239500 }, { "epoch": 1.92, "learning_rate": 4.9040000000000005e-05, "loss": 8.7056, "step": 240000 }, { "epoch": 1.92, "learning_rate": 4.9038e-05, "loss": 8.7343, "step": 240500 }, { "epoch": 1.93, "learning_rate": 4.9036e-05, "loss": 8.682, "step": 241000 }, { "epoch": 1.93, "learning_rate": 4.9034000000000005e-05, "loss": 8.717, "step": 241500 }, { "epoch": 1.94, "learning_rate": 4.9032e-05, "loss": 8.7235, "step": 242000 }, { "epoch": 1.94, "learning_rate": 4.903e-05, "loss": 8.7069, "step": 242500 }, { "epoch": 1.94, "learning_rate": 4.9028e-05, "loss": 8.7021, "step": 243000 }, { "epoch": 1.95, "learning_rate": 4.9026e-05, "loss": 8.6757, "step": 243500 }, { "epoch": 1.95, "learning_rate": 4.9024000000000004e-05, "loss": 8.7079, "step": 244000 }, { "epoch": 1.96, "learning_rate": 4.9022e-05, "loss": 8.6958, "step": 244500 }, { "epoch": 1.96, "learning_rate": 4.902e-05, "loss": 8.6831, "step": 245000 }, { "epoch": 1.96, "learning_rate": 4.9018000000000004e-05, "loss": 8.7316, "step": 245500 }, { "epoch": 1.97, "learning_rate": 4.9016e-05, "loss": 8.691, "step": 246000 }, { "epoch": 1.97, "learning_rate": 4.9014e-05, "loss": 8.7349, "step": 246500 }, { "epoch": 1.98, "learning_rate": 4.9012000000000004e-05, "loss": 8.7, "step": 247000 }, { "epoch": 1.98, "learning_rate": 4.901e-05, "loss": 8.7117, "step": 247500 }, { "epoch": 1.98, "learning_rate": 4.9008e-05, "loss": 8.7021, "step": 248000 }, { "epoch": 1.99, "learning_rate": 4.9006000000000005e-05, "loss": 8.7323, "step": 248500 }, { "epoch": 1.99, "learning_rate": 4.9004e-05, "loss": 8.7232, "step": 249000 }, { "epoch": 2.0, "learning_rate": 4.9002e-05, "loss": 8.6874, "step": 249500 }, { "epoch": 2.0, "learning_rate": 4.9e-05, "loss": 8.7074, "step": 250000 }, { "epoch": 2.0, "learning_rate": 4.899800000000001e-05, "loss": 8.7001, "step": 250500 }, { "epoch": 2.01, "learning_rate": 4.8996e-05, "loss": 8.7197, "step": 251000 }, { "epoch": 2.01, "learning_rate": 4.8994e-05, "loss": 8.6888, "step": 251500 }, { "epoch": 2.02, "learning_rate": 4.8992e-05, "loss": 8.6867, "step": 252000 }, { "epoch": 2.02, "learning_rate": 4.8990000000000004e-05, "loss": 8.7181, "step": 252500 }, { "epoch": 2.02, "learning_rate": 4.8988e-05, "loss": 8.7285, "step": 253000 }, { "epoch": 2.03, "learning_rate": 4.8986e-05, "loss": 8.6933, "step": 253500 }, { "epoch": 2.03, "learning_rate": 4.8984000000000004e-05, "loss": 8.7063, "step": 254000 }, { "epoch": 2.04, "learning_rate": 4.8982e-05, "loss": 8.6891, "step": 254500 }, { "epoch": 2.04, "learning_rate": 4.898e-05, "loss": 8.7265, "step": 255000 }, { "epoch": 2.04, "learning_rate": 4.8978000000000004e-05, "loss": 8.7078, "step": 255500 }, { "epoch": 2.05, "learning_rate": 4.8976e-05, "loss": 8.7243, "step": 256000 }, { "epoch": 2.05, "learning_rate": 4.8974e-05, "loss": 8.7045, "step": 256500 }, { "epoch": 2.06, "learning_rate": 4.8972e-05, "loss": 8.681, "step": 257000 }, { "epoch": 2.06, "learning_rate": 4.897000000000001e-05, "loss": 8.7018, "step": 257500 }, { "epoch": 2.06, "learning_rate": 4.8968e-05, "loss": 8.6847, "step": 258000 }, { "epoch": 2.07, "learning_rate": 4.8966e-05, "loss": 8.7208, "step": 258500 }, { "epoch": 2.07, "learning_rate": 4.896400000000001e-05, "loss": 8.7038, "step": 259000 }, { "epoch": 2.08, "learning_rate": 4.8962e-05, "loss": 8.7077, "step": 259500 }, { "epoch": 2.08, "learning_rate": 4.896e-05, "loss": 8.7176, "step": 260000 }, { "epoch": 2.08, "learning_rate": 4.8958e-05, "loss": 8.71, "step": 260500 }, { "epoch": 2.09, "learning_rate": 4.8956000000000004e-05, "loss": 8.6918, "step": 261000 }, { "epoch": 2.09, "learning_rate": 4.8954e-05, "loss": 8.6844, "step": 261500 }, { "epoch": 2.1, "learning_rate": 4.8952e-05, "loss": 8.7114, "step": 262000 }, { "epoch": 2.1, "learning_rate": 4.8950000000000004e-05, "loss": 8.6925, "step": 262500 }, { "epoch": 2.1, "learning_rate": 4.8948000000000006e-05, "loss": 8.7158, "step": 263000 }, { "epoch": 2.11, "learning_rate": 4.8946e-05, "loss": 8.7094, "step": 263500 }, { "epoch": 2.11, "learning_rate": 4.8944e-05, "loss": 8.6932, "step": 264000 }, { "epoch": 2.12, "learning_rate": 4.894200000000001e-05, "loss": 8.7112, "step": 264500 }, { "epoch": 2.12, "learning_rate": 4.894e-05, "loss": 8.7103, "step": 265000 }, { "epoch": 2.12, "learning_rate": 4.8938e-05, "loss": 8.7032, "step": 265500 }, { "epoch": 2.13, "learning_rate": 4.893600000000001e-05, "loss": 8.7108, "step": 266000 }, { "epoch": 2.13, "learning_rate": 4.8934e-05, "loss": 8.7083, "step": 266500 }, { "epoch": 2.14, "learning_rate": 4.8932e-05, "loss": 8.734, "step": 267000 }, { "epoch": 2.14, "learning_rate": 4.893e-05, "loss": 8.7262, "step": 267500 }, { "epoch": 2.14, "learning_rate": 4.8928e-05, "loss": 8.7284, "step": 268000 }, { "epoch": 2.15, "learning_rate": 4.8926e-05, "loss": 8.6903, "step": 268500 }, { "epoch": 2.15, "learning_rate": 4.8924e-05, "loss": 8.7144, "step": 269000 }, { "epoch": 2.16, "learning_rate": 4.8922000000000004e-05, "loss": 8.6904, "step": 269500 }, { "epoch": 2.16, "learning_rate": 4.8920000000000006e-05, "loss": 8.6849, "step": 270000 }, { "epoch": 2.16, "learning_rate": 4.8918e-05, "loss": 8.6964, "step": 270500 }, { "epoch": 2.17, "learning_rate": 4.8916000000000004e-05, "loss": 8.7546, "step": 271000 }, { "epoch": 2.17, "learning_rate": 4.8914000000000006e-05, "loss": 8.72, "step": 271500 }, { "epoch": 2.18, "learning_rate": 4.8912e-05, "loss": 8.7185, "step": 272000 }, { "epoch": 2.18, "learning_rate": 4.891e-05, "loss": 8.7199, "step": 272500 }, { "epoch": 2.18, "learning_rate": 4.890800000000001e-05, "loss": 8.7157, "step": 273000 }, { "epoch": 2.19, "learning_rate": 4.8906e-05, "loss": 8.7014, "step": 273500 }, { "epoch": 2.19, "learning_rate": 4.8904e-05, "loss": 8.7041, "step": 274000 }, { "epoch": 2.2, "learning_rate": 4.8902e-05, "loss": 8.695, "step": 274500 }, { "epoch": 2.2, "learning_rate": 4.89e-05, "loss": 8.7169, "step": 275000 }, { "epoch": 2.2, "learning_rate": 4.8898000000000005e-05, "loss": 8.7167, "step": 275500 }, { "epoch": 2.21, "learning_rate": 4.8896e-05, "loss": 8.6953, "step": 276000 }, { "epoch": 2.21, "learning_rate": 4.8894e-05, "loss": 8.7152, "step": 276500 }, { "epoch": 2.22, "learning_rate": 4.8892000000000006e-05, "loss": 8.71, "step": 277000 }, { "epoch": 2.22, "learning_rate": 4.889e-05, "loss": 8.7259, "step": 277500 }, { "epoch": 2.22, "learning_rate": 4.8888000000000004e-05, "loss": 8.698, "step": 278000 }, { "epoch": 2.23, "learning_rate": 4.8886000000000006e-05, "loss": 8.6828, "step": 278500 }, { "epoch": 2.23, "learning_rate": 4.8884e-05, "loss": 8.6984, "step": 279000 }, { "epoch": 2.24, "learning_rate": 4.8882e-05, "loss": 8.7121, "step": 279500 }, { "epoch": 2.24, "learning_rate": 4.8880000000000006e-05, "loss": 8.6799, "step": 280000 }, { "epoch": 2.24, "learning_rate": 4.8878e-05, "loss": 8.7074, "step": 280500 }, { "epoch": 2.25, "learning_rate": 4.8876e-05, "loss": 8.6898, "step": 281000 }, { "epoch": 2.25, "learning_rate": 4.8874e-05, "loss": 8.7204, "step": 281500 }, { "epoch": 2.26, "learning_rate": 4.8872e-05, "loss": 8.6998, "step": 282000 }, { "epoch": 2.26, "learning_rate": 4.8870000000000005e-05, "loss": 8.7008, "step": 282500 }, { "epoch": 2.26, "learning_rate": 4.8868e-05, "loss": 8.6955, "step": 283000 }, { "epoch": 2.27, "learning_rate": 4.8866e-05, "loss": 8.6972, "step": 283500 }, { "epoch": 2.27, "learning_rate": 4.8864000000000005e-05, "loss": 8.7064, "step": 284000 }, { "epoch": 2.28, "learning_rate": 4.8862e-05, "loss": 8.6976, "step": 284500 }, { "epoch": 2.28, "learning_rate": 4.886e-05, "loss": 8.6878, "step": 285000 }, { "epoch": 2.28, "learning_rate": 4.8858000000000006e-05, "loss": 8.7264, "step": 285500 }, { "epoch": 2.29, "learning_rate": 4.8856e-05, "loss": 8.6855, "step": 286000 }, { "epoch": 2.29, "learning_rate": 4.8854e-05, "loss": 8.7167, "step": 286500 }, { "epoch": 2.3, "learning_rate": 4.8852000000000006e-05, "loss": 8.7011, "step": 287000 }, { "epoch": 2.3, "learning_rate": 4.885e-05, "loss": 8.6987, "step": 287500 }, { "epoch": 2.3, "learning_rate": 4.8848000000000004e-05, "loss": 8.678, "step": 288000 }, { "epoch": 2.31, "learning_rate": 4.8846e-05, "loss": 8.7051, "step": 288500 }, { "epoch": 2.31, "learning_rate": 4.8844e-05, "loss": 8.7251, "step": 289000 }, { "epoch": 2.32, "learning_rate": 4.8842000000000004e-05, "loss": 8.6853, "step": 289500 }, { "epoch": 2.32, "learning_rate": 4.884e-05, "loss": 8.6958, "step": 290000 }, { "epoch": 2.32, "learning_rate": 4.8838e-05, "loss": 8.7028, "step": 290500 }, { "epoch": 2.33, "learning_rate": 4.8836000000000005e-05, "loss": 8.698, "step": 291000 }, { "epoch": 2.33, "learning_rate": 4.8834e-05, "loss": 8.7032, "step": 291500 }, { "epoch": 2.34, "learning_rate": 4.8832e-05, "loss": 8.7055, "step": 292000 }, { "epoch": 2.34, "learning_rate": 4.8830000000000005e-05, "loss": 8.7102, "step": 292500 }, { "epoch": 2.34, "learning_rate": 4.8828e-05, "loss": 8.6988, "step": 293000 }, { "epoch": 2.35, "learning_rate": 4.8826e-05, "loss": 8.7191, "step": 293500 }, { "epoch": 2.35, "learning_rate": 4.8824000000000006e-05, "loss": 8.7392, "step": 294000 }, { "epoch": 2.36, "learning_rate": 4.8822e-05, "loss": 8.7151, "step": 294500 }, { "epoch": 2.36, "learning_rate": 4.8820000000000004e-05, "loss": 8.6793, "step": 295000 }, { "epoch": 2.36, "learning_rate": 4.8818000000000006e-05, "loss": 8.677, "step": 295500 }, { "epoch": 2.37, "learning_rate": 4.8816e-05, "loss": 8.7137, "step": 296000 }, { "epoch": 2.37, "learning_rate": 4.8814000000000004e-05, "loss": 8.6906, "step": 296500 }, { "epoch": 2.38, "learning_rate": 4.8812e-05, "loss": 8.6967, "step": 297000 }, { "epoch": 2.38, "learning_rate": 4.881e-05, "loss": 8.6804, "step": 297500 }, { "epoch": 2.38, "learning_rate": 4.8808000000000004e-05, "loss": 8.7055, "step": 298000 }, { "epoch": 2.39, "learning_rate": 4.8806e-05, "loss": 8.7177, "step": 298500 }, { "epoch": 2.39, "learning_rate": 4.8804e-05, "loss": 8.7065, "step": 299000 }, { "epoch": 2.4, "learning_rate": 4.8802000000000005e-05, "loss": 8.7076, "step": 299500 }, { "epoch": 2.4, "learning_rate": 4.88e-05, "loss": 8.7121, "step": 300000 }, { "epoch": 2.4, "learning_rate": 4.8798e-05, "loss": 8.7141, "step": 300500 }, { "epoch": 2.41, "learning_rate": 4.8796000000000005e-05, "loss": 8.6999, "step": 301000 }, { "epoch": 2.41, "learning_rate": 4.8794e-05, "loss": 8.6949, "step": 301500 }, { "epoch": 2.42, "learning_rate": 4.8792e-05, "loss": 8.6949, "step": 302000 }, { "epoch": 2.42, "learning_rate": 4.8790000000000006e-05, "loss": 8.7159, "step": 302500 }, { "epoch": 2.42, "learning_rate": 4.8788e-05, "loss": 8.7052, "step": 303000 }, { "epoch": 2.43, "learning_rate": 4.8786000000000004e-05, "loss": 8.6986, "step": 303500 }, { "epoch": 2.43, "learning_rate": 4.8784e-05, "loss": 8.7041, "step": 304000 }, { "epoch": 2.44, "learning_rate": 4.8782e-05, "loss": 8.7078, "step": 304500 }, { "epoch": 2.44, "learning_rate": 4.8780000000000004e-05, "loss": 8.6994, "step": 305000 }, { "epoch": 2.44, "learning_rate": 4.8778e-05, "loss": 8.6767, "step": 305500 }, { "epoch": 2.45, "learning_rate": 4.8776e-05, "loss": 8.6984, "step": 306000 }, { "epoch": 2.45, "learning_rate": 4.8774000000000004e-05, "loss": 8.7268, "step": 306500 }, { "epoch": 2.46, "learning_rate": 4.8772e-05, "loss": 8.7017, "step": 307000 }, { "epoch": 2.46, "learning_rate": 4.877e-05, "loss": 8.7056, "step": 307500 }, { "epoch": 2.46, "learning_rate": 4.8768000000000005e-05, "loss": 8.7253, "step": 308000 }, { "epoch": 2.47, "learning_rate": 4.8766e-05, "loss": 8.7167, "step": 308500 }, { "epoch": 2.47, "learning_rate": 4.8764e-05, "loss": 8.6715, "step": 309000 }, { "epoch": 2.48, "learning_rate": 4.8762000000000005e-05, "loss": 8.7112, "step": 309500 }, { "epoch": 2.48, "learning_rate": 4.876e-05, "loss": 8.7181, "step": 310000 }, { "epoch": 2.48, "learning_rate": 4.8758e-05, "loss": 8.6818, "step": 310500 }, { "epoch": 2.49, "learning_rate": 4.8756e-05, "loss": 8.6989, "step": 311000 }, { "epoch": 2.49, "learning_rate": 4.8754e-05, "loss": 8.7037, "step": 311500 }, { "epoch": 2.5, "learning_rate": 4.8752000000000004e-05, "loss": 8.6931, "step": 312000 }, { "epoch": 2.5, "learning_rate": 4.875e-05, "loss": 8.7028, "step": 312500 }, { "epoch": 2.5, "learning_rate": 4.8748e-05, "loss": 8.6934, "step": 313000 }, { "epoch": 2.51, "learning_rate": 4.8746000000000004e-05, "loss": 8.7059, "step": 313500 }, { "epoch": 2.51, "learning_rate": 4.8744e-05, "loss": 8.6945, "step": 314000 }, { "epoch": 2.52, "learning_rate": 4.8742e-05, "loss": 8.67, "step": 314500 }, { "epoch": 2.52, "learning_rate": 4.8740000000000004e-05, "loss": 8.6978, "step": 315000 }, { "epoch": 2.52, "learning_rate": 4.8738e-05, "loss": 8.7081, "step": 315500 }, { "epoch": 2.53, "learning_rate": 4.8736e-05, "loss": 8.6743, "step": 316000 }, { "epoch": 2.53, "learning_rate": 4.8734000000000005e-05, "loss": 8.7259, "step": 316500 }, { "epoch": 2.54, "learning_rate": 4.8732e-05, "loss": 8.7153, "step": 317000 }, { "epoch": 2.54, "learning_rate": 4.873e-05, "loss": 8.7164, "step": 317500 }, { "epoch": 2.54, "learning_rate": 4.8728e-05, "loss": 8.6748, "step": 318000 }, { "epoch": 2.55, "learning_rate": 4.872600000000001e-05, "loss": 8.6927, "step": 318500 }, { "epoch": 2.55, "learning_rate": 4.8724e-05, "loss": 8.7285, "step": 319000 }, { "epoch": 2.56, "learning_rate": 4.8722e-05, "loss": 8.6986, "step": 319500 }, { "epoch": 2.56, "learning_rate": 4.872000000000001e-05, "loss": 8.7152, "step": 320000 }, { "epoch": 2.56, "learning_rate": 4.8718000000000003e-05, "loss": 8.7072, "step": 320500 }, { "epoch": 2.57, "learning_rate": 4.8716e-05, "loss": 8.6971, "step": 321000 }, { "epoch": 2.57, "learning_rate": 4.8714e-05, "loss": 8.714, "step": 321500 }, { "epoch": 2.58, "learning_rate": 4.8712000000000004e-05, "loss": 8.703, "step": 322000 }, { "epoch": 2.58, "learning_rate": 4.871e-05, "loss": 8.7072, "step": 322500 }, { "epoch": 2.58, "learning_rate": 4.8708e-05, "loss": 8.6667, "step": 323000 }, { "epoch": 2.59, "learning_rate": 4.8706000000000004e-05, "loss": 8.7107, "step": 323500 }, { "epoch": 2.59, "learning_rate": 4.8704e-05, "loss": 8.701, "step": 324000 }, { "epoch": 2.6, "learning_rate": 4.8702e-05, "loss": 8.7082, "step": 324500 }, { "epoch": 2.6, "learning_rate": 4.87e-05, "loss": 8.701, "step": 325000 }, { "epoch": 2.6, "learning_rate": 4.869800000000001e-05, "loss": 8.6732, "step": 325500 }, { "epoch": 2.61, "learning_rate": 4.8696e-05, "loss": 8.7238, "step": 326000 }, { "epoch": 2.61, "learning_rate": 4.8694e-05, "loss": 8.7107, "step": 326500 }, { "epoch": 2.62, "learning_rate": 4.869200000000001e-05, "loss": 8.7347, "step": 327000 }, { "epoch": 2.62, "learning_rate": 4.869e-05, "loss": 8.6986, "step": 327500 }, { "epoch": 2.62, "learning_rate": 4.8688e-05, "loss": 8.6949, "step": 328000 }, { "epoch": 2.63, "learning_rate": 4.8686e-05, "loss": 8.6919, "step": 328500 }, { "epoch": 2.63, "learning_rate": 4.8684000000000003e-05, "loss": 8.7026, "step": 329000 }, { "epoch": 2.64, "learning_rate": 4.8682e-05, "loss": 8.6933, "step": 329500 }, { "epoch": 2.64, "learning_rate": 4.868e-05, "loss": 8.7021, "step": 330000 }, { "epoch": 2.64, "learning_rate": 4.8678000000000004e-05, "loss": 8.7339, "step": 330500 }, { "epoch": 2.65, "learning_rate": 4.8676000000000006e-05, "loss": 8.7103, "step": 331000 }, { "epoch": 2.65, "learning_rate": 4.8674e-05, "loss": 8.683, "step": 331500 }, { "epoch": 2.66, "learning_rate": 4.8672000000000004e-05, "loss": 8.707, "step": 332000 }, { "epoch": 2.66, "learning_rate": 4.867000000000001e-05, "loss": 8.713, "step": 332500 }, { "epoch": 2.66, "learning_rate": 4.8668e-05, "loss": 8.7049, "step": 333000 }, { "epoch": 2.67, "learning_rate": 4.8666e-05, "loss": 8.6956, "step": 333500 }, { "epoch": 2.67, "learning_rate": 4.866400000000001e-05, "loss": 8.6998, "step": 334000 }, { "epoch": 2.68, "learning_rate": 4.8662e-05, "loss": 8.7074, "step": 334500 }, { "epoch": 2.68, "learning_rate": 4.866e-05, "loss": 8.7095, "step": 335000 }, { "epoch": 2.68, "learning_rate": 4.8658e-05, "loss": 8.7078, "step": 335500 }, { "epoch": 2.69, "learning_rate": 4.8656e-05, "loss": 8.6895, "step": 336000 }, { "epoch": 2.69, "learning_rate": 4.8654e-05, "loss": 8.6883, "step": 336500 }, { "epoch": 2.7, "learning_rate": 4.8652e-05, "loss": 8.7053, "step": 337000 }, { "epoch": 2.7, "learning_rate": 4.8650000000000003e-05, "loss": 8.6985, "step": 337500 }, { "epoch": 2.7, "learning_rate": 4.8648000000000006e-05, "loss": 8.7011, "step": 338000 }, { "epoch": 2.71, "learning_rate": 4.8646e-05, "loss": 8.6913, "step": 338500 }, { "epoch": 2.71, "learning_rate": 4.8644000000000004e-05, "loss": 8.699, "step": 339000 }, { "epoch": 2.72, "learning_rate": 4.8642000000000006e-05, "loss": 8.7051, "step": 339500 }, { "epoch": 2.72, "learning_rate": 4.864e-05, "loss": 8.6863, "step": 340000 }, { "epoch": 2.72, "learning_rate": 4.8638e-05, "loss": 8.69, "step": 340500 }, { "epoch": 2.73, "learning_rate": 4.863600000000001e-05, "loss": 8.6958, "step": 341000 }, { "epoch": 2.73, "learning_rate": 4.8634e-05, "loss": 8.6927, "step": 341500 }, { "epoch": 2.74, "learning_rate": 4.8632e-05, "loss": 8.6934, "step": 342000 }, { "epoch": 2.74, "learning_rate": 4.863e-05, "loss": 8.7063, "step": 342500 }, { "epoch": 2.74, "learning_rate": 4.8628e-05, "loss": 8.7023, "step": 343000 }, { "epoch": 2.75, "learning_rate": 4.8626000000000005e-05, "loss": 8.7075, "step": 343500 }, { "epoch": 2.75, "learning_rate": 4.8624e-05, "loss": 8.6927, "step": 344000 }, { "epoch": 2.76, "learning_rate": 4.8622e-05, "loss": 8.7182, "step": 344500 }, { "epoch": 2.76, "learning_rate": 4.8620000000000005e-05, "loss": 8.6924, "step": 345000 }, { "epoch": 2.76, "learning_rate": 4.8618e-05, "loss": 8.6963, "step": 345500 }, { "epoch": 2.77, "learning_rate": 4.8616000000000003e-05, "loss": 8.6809, "step": 346000 }, { "epoch": 2.77, "learning_rate": 4.8614000000000006e-05, "loss": 8.6995, "step": 346500 }, { "epoch": 2.78, "learning_rate": 4.8612e-05, "loss": 8.7175, "step": 347000 }, { "epoch": 2.78, "learning_rate": 4.861e-05, "loss": 8.7025, "step": 347500 }, { "epoch": 2.78, "learning_rate": 4.8608000000000006e-05, "loss": 8.7097, "step": 348000 }, { "epoch": 2.79, "learning_rate": 4.8606e-05, "loss": 8.699, "step": 348500 }, { "epoch": 2.79, "learning_rate": 4.8604000000000004e-05, "loss": 8.7232, "step": 349000 }, { "epoch": 2.8, "learning_rate": 4.8602e-05, "loss": 8.713, "step": 349500 }, { "epoch": 2.8, "learning_rate": 4.86e-05, "loss": 8.7031, "step": 350000 }, { "epoch": 2.8, "learning_rate": 4.8598000000000005e-05, "loss": 8.6899, "step": 350500 }, { "epoch": 2.81, "learning_rate": 4.8596e-05, "loss": 8.6683, "step": 351000 }, { "epoch": 2.81, "learning_rate": 4.8594e-05, "loss": 8.6702, "step": 351500 }, { "epoch": 2.82, "learning_rate": 4.8592000000000005e-05, "loss": 8.6919, "step": 352000 }, { "epoch": 2.82, "learning_rate": 4.859e-05, "loss": 8.6994, "step": 352500 }, { "epoch": 2.82, "learning_rate": 4.8588e-05, "loss": 8.7184, "step": 353000 }, { "epoch": 2.83, "learning_rate": 4.8586000000000005e-05, "loss": 8.7062, "step": 353500 }, { "epoch": 2.83, "learning_rate": 4.8584e-05, "loss": 8.7013, "step": 354000 }, { "epoch": 2.84, "learning_rate": 4.8582e-05, "loss": 8.7126, "step": 354500 }, { "epoch": 2.84, "learning_rate": 4.8580000000000006e-05, "loss": 8.6769, "step": 355000 }, { "epoch": 2.84, "learning_rate": 4.8578e-05, "loss": 8.6851, "step": 355500 }, { "epoch": 2.85, "learning_rate": 4.8576000000000004e-05, "loss": 8.6912, "step": 356000 }, { "epoch": 2.85, "learning_rate": 4.8574000000000006e-05, "loss": 8.711, "step": 356500 }, { "epoch": 2.86, "learning_rate": 4.8572e-05, "loss": 8.7133, "step": 357000 }, { "epoch": 2.86, "learning_rate": 4.8570000000000004e-05, "loss": 8.7251, "step": 357500 }, { "epoch": 2.86, "learning_rate": 4.8568e-05, "loss": 8.6906, "step": 358000 }, { "epoch": 2.87, "learning_rate": 4.8566e-05, "loss": 8.688, "step": 358500 }, { "epoch": 2.87, "learning_rate": 4.8564000000000005e-05, "loss": 8.7029, "step": 359000 }, { "epoch": 2.88, "learning_rate": 4.8562e-05, "loss": 8.7185, "step": 359500 }, { "epoch": 2.88, "learning_rate": 4.856e-05, "loss": 8.7108, "step": 360000 }, { "epoch": 2.88, "learning_rate": 4.8558000000000005e-05, "loss": 8.7167, "step": 360500 }, { "epoch": 2.89, "learning_rate": 4.8556e-05, "loss": 8.6894, "step": 361000 }, { "epoch": 2.89, "learning_rate": 4.8554e-05, "loss": 8.6856, "step": 361500 }, { "epoch": 2.9, "learning_rate": 4.8552000000000005e-05, "loss": 8.675, "step": 362000 }, { "epoch": 2.9, "learning_rate": 4.855e-05, "loss": 8.6954, "step": 362500 }, { "epoch": 2.9, "learning_rate": 4.8548000000000003e-05, "loss": 8.6984, "step": 363000 }, { "epoch": 2.91, "learning_rate": 4.8546000000000006e-05, "loss": 8.7377, "step": 363500 }, { "epoch": 2.91, "learning_rate": 4.8544e-05, "loss": 8.6971, "step": 364000 }, { "epoch": 2.92, "learning_rate": 4.8542000000000004e-05, "loss": 8.6765, "step": 364500 }, { "epoch": 2.92, "learning_rate": 4.854e-05, "loss": 8.7073, "step": 365000 }, { "epoch": 2.92, "learning_rate": 4.8538e-05, "loss": 8.6889, "step": 365500 }, { "epoch": 2.93, "learning_rate": 4.8536000000000004e-05, "loss": 8.7079, "step": 366000 }, { "epoch": 2.93, "learning_rate": 4.8534e-05, "loss": 8.6962, "step": 366500 }, { "epoch": 2.94, "learning_rate": 4.8532e-05, "loss": 8.6914, "step": 367000 }, { "epoch": 2.94, "learning_rate": 4.8530000000000005e-05, "loss": 8.7295, "step": 367500 }, { "epoch": 2.94, "learning_rate": 4.8528e-05, "loss": 8.7068, "step": 368000 }, { "epoch": 2.95, "learning_rate": 4.8526e-05, "loss": 8.6933, "step": 368500 }, { "epoch": 2.95, "learning_rate": 4.8524000000000005e-05, "loss": 8.6943, "step": 369000 }, { "epoch": 2.96, "learning_rate": 4.8522e-05, "loss": 8.6945, "step": 369500 }, { "epoch": 2.96, "learning_rate": 4.852e-05, "loss": 8.7121, "step": 370000 }, { "epoch": 2.96, "learning_rate": 4.8518000000000005e-05, "loss": 8.686, "step": 370500 }, { "epoch": 2.97, "learning_rate": 4.8516e-05, "loss": 8.7053, "step": 371000 }, { "epoch": 2.97, "learning_rate": 4.8514000000000003e-05, "loss": 8.6727, "step": 371500 }, { "epoch": 2.98, "learning_rate": 4.8512e-05, "loss": 8.6801, "step": 372000 }, { "epoch": 2.98, "learning_rate": 4.851e-05, "loss": 8.6943, "step": 372500 }, { "epoch": 2.98, "learning_rate": 4.8508000000000004e-05, "loss": 8.687, "step": 373000 }, { "epoch": 2.99, "learning_rate": 4.8506e-05, "loss": 8.7085, "step": 373500 }, { "epoch": 2.99, "learning_rate": 4.8504e-05, "loss": 8.7113, "step": 374000 }, { "epoch": 3.0, "learning_rate": 4.8502000000000004e-05, "loss": 8.7011, "step": 374500 }, { "epoch": 3.0, "learning_rate": 4.85e-05, "loss": 8.699, "step": 375000 }, { "epoch": 3.0, "learning_rate": 4.8498e-05, "loss": 8.6982, "step": 375500 }, { "epoch": 3.01, "learning_rate": 4.8496000000000005e-05, "loss": 8.7074, "step": 376000 }, { "epoch": 3.01, "learning_rate": 4.8494e-05, "loss": 8.6757, "step": 376500 }, { "epoch": 3.02, "learning_rate": 4.8492e-05, "loss": 8.7023, "step": 377000 }, { "epoch": 3.02, "learning_rate": 4.8490000000000005e-05, "loss": 8.6954, "step": 377500 }, { "epoch": 3.02, "learning_rate": 4.8488e-05, "loss": 8.6936, "step": 378000 }, { "epoch": 3.03, "learning_rate": 4.8486e-05, "loss": 8.7095, "step": 378500 }, { "epoch": 3.03, "learning_rate": 4.8484e-05, "loss": 8.7091, "step": 379000 }, { "epoch": 3.04, "learning_rate": 4.8482e-05, "loss": 8.7081, "step": 379500 }, { "epoch": 3.04, "learning_rate": 4.8480000000000003e-05, "loss": 8.6958, "step": 380000 }, { "epoch": 3.04, "learning_rate": 4.8478e-05, "loss": 8.6931, "step": 380500 }, { "epoch": 3.05, "learning_rate": 4.8476e-05, "loss": 8.7034, "step": 381000 }, { "epoch": 3.05, "learning_rate": 4.8474000000000004e-05, "loss": 8.6621, "step": 381500 }, { "epoch": 3.06, "learning_rate": 4.8472e-05, "loss": 8.6779, "step": 382000 }, { "epoch": 3.06, "learning_rate": 4.847e-05, "loss": 8.7087, "step": 382500 }, { "epoch": 3.06, "learning_rate": 4.8468000000000004e-05, "loss": 8.7243, "step": 383000 }, { "epoch": 3.07, "learning_rate": 4.8466e-05, "loss": 8.6768, "step": 383500 }, { "epoch": 3.07, "learning_rate": 4.8464e-05, "loss": 8.6821, "step": 384000 }, { "epoch": 3.08, "learning_rate": 4.8462000000000005e-05, "loss": 8.7114, "step": 384500 }, { "epoch": 3.08, "learning_rate": 4.846e-05, "loss": 8.6725, "step": 385000 }, { "epoch": 3.08, "learning_rate": 4.8458e-05, "loss": 8.7012, "step": 385500 }, { "epoch": 3.09, "learning_rate": 4.8456e-05, "loss": 8.6869, "step": 386000 }, { "epoch": 3.09, "learning_rate": 4.845400000000001e-05, "loss": 8.7117, "step": 386500 }, { "epoch": 3.1, "learning_rate": 4.8452e-05, "loss": 8.715, "step": 387000 }, { "epoch": 3.1, "learning_rate": 4.845e-05, "loss": 8.6688, "step": 387500 }, { "epoch": 3.1, "learning_rate": 4.844800000000001e-05, "loss": 8.6967, "step": 388000 }, { "epoch": 3.11, "learning_rate": 4.8446e-05, "loss": 8.6825, "step": 388500 }, { "epoch": 3.11, "learning_rate": 4.8444e-05, "loss": 8.6951, "step": 389000 }, { "epoch": 3.12, "learning_rate": 4.8442e-05, "loss": 8.6929, "step": 389500 }, { "epoch": 3.12, "learning_rate": 4.8440000000000004e-05, "loss": 8.7185, "step": 390000 }, { "epoch": 3.12, "learning_rate": 4.8438e-05, "loss": 8.6915, "step": 390500 }, { "epoch": 3.13, "learning_rate": 4.8436e-05, "loss": 8.7071, "step": 391000 }, { "epoch": 3.13, "learning_rate": 4.8434000000000004e-05, "loss": 8.6841, "step": 391500 }, { "epoch": 3.14, "learning_rate": 4.8432e-05, "loss": 8.6782, "step": 392000 }, { "epoch": 3.14, "learning_rate": 4.843e-05, "loss": 8.6894, "step": 392500 }, { "epoch": 3.14, "learning_rate": 4.8428e-05, "loss": 8.6945, "step": 393000 }, { "epoch": 3.15, "learning_rate": 4.842600000000001e-05, "loss": 8.684, "step": 393500 }, { "epoch": 3.15, "learning_rate": 4.8424e-05, "loss": 8.654, "step": 394000 }, { "epoch": 3.16, "learning_rate": 4.8422e-05, "loss": 8.6775, "step": 394500 }, { "epoch": 3.16, "learning_rate": 4.842000000000001e-05, "loss": 8.6904, "step": 395000 }, { "epoch": 3.16, "learning_rate": 4.8418e-05, "loss": 8.7069, "step": 395500 }, { "epoch": 3.17, "learning_rate": 4.8416e-05, "loss": 8.691, "step": 396000 }, { "epoch": 3.17, "learning_rate": 4.8414e-05, "loss": 8.6932, "step": 396500 }, { "epoch": 3.18, "learning_rate": 4.8412e-05, "loss": 8.6928, "step": 397000 }, { "epoch": 3.18, "learning_rate": 4.841e-05, "loss": 8.7049, "step": 397500 }, { "epoch": 3.18, "learning_rate": 4.8408e-05, "loss": 8.7108, "step": 398000 }, { "epoch": 3.19, "learning_rate": 4.8406000000000004e-05, "loss": 8.7185, "step": 398500 }, { "epoch": 3.19, "learning_rate": 4.8404000000000006e-05, "loss": 8.718, "step": 399000 }, { "epoch": 3.2, "learning_rate": 4.8402e-05, "loss": 8.678, "step": 399500 }, { "epoch": 3.2, "learning_rate": 4.8400000000000004e-05, "loss": 8.6971, "step": 400000 }, { "epoch": 3.2, "learning_rate": 4.8398000000000007e-05, "loss": 8.6769, "step": 400500 }, { "epoch": 3.21, "learning_rate": 4.8396e-05, "loss": 8.7164, "step": 401000 }, { "epoch": 3.21, "learning_rate": 4.8394e-05, "loss": 8.6986, "step": 401500 }, { "epoch": 3.22, "learning_rate": 4.839200000000001e-05, "loss": 8.6726, "step": 402000 }, { "epoch": 3.22, "learning_rate": 4.839e-05, "loss": 8.6929, "step": 402500 }, { "epoch": 3.22, "learning_rate": 4.8388e-05, "loss": 8.7256, "step": 403000 }, { "epoch": 3.23, "learning_rate": 4.8386e-05, "loss": 8.6922, "step": 403500 }, { "epoch": 3.23, "learning_rate": 4.8384e-05, "loss": 8.6959, "step": 404000 }, { "epoch": 3.24, "learning_rate": 4.8382e-05, "loss": 8.6909, "step": 404500 }, { "epoch": 3.24, "learning_rate": 4.838e-05, "loss": 8.6923, "step": 405000 }, { "epoch": 3.24, "learning_rate": 4.8378e-05, "loss": 8.6927, "step": 405500 }, { "epoch": 3.25, "learning_rate": 4.8376000000000006e-05, "loss": 8.7172, "step": 406000 }, { "epoch": 3.25, "learning_rate": 4.8374e-05, "loss": 8.6758, "step": 406500 }, { "epoch": 3.26, "learning_rate": 4.8372000000000004e-05, "loss": 8.7046, "step": 407000 }, { "epoch": 3.26, "learning_rate": 4.8370000000000006e-05, "loss": 8.7108, "step": 407500 }, { "epoch": 3.26, "learning_rate": 4.8368e-05, "loss": 8.692, "step": 408000 }, { "epoch": 3.27, "learning_rate": 4.8366e-05, "loss": 8.689, "step": 408500 }, { "epoch": 3.27, "learning_rate": 4.8364000000000007e-05, "loss": 8.6759, "step": 409000 }, { "epoch": 3.28, "learning_rate": 4.8362e-05, "loss": 8.6801, "step": 409500 }, { "epoch": 3.28, "learning_rate": 4.836e-05, "loss": 8.6988, "step": 410000 }, { "epoch": 3.28, "learning_rate": 4.8358e-05, "loss": 8.6677, "step": 410500 }, { "epoch": 3.29, "learning_rate": 4.8356e-05, "loss": 8.6791, "step": 411000 }, { "epoch": 3.29, "learning_rate": 4.8354000000000005e-05, "loss": 8.6931, "step": 411500 }, { "epoch": 3.3, "learning_rate": 4.8352e-05, "loss": 8.668, "step": 412000 }, { "epoch": 3.3, "learning_rate": 4.835e-05, "loss": 8.694, "step": 412500 }, { "epoch": 3.3, "learning_rate": 4.8348000000000005e-05, "loss": 8.6952, "step": 413000 }, { "epoch": 3.31, "learning_rate": 4.8346e-05, "loss": 8.6957, "step": 413500 }, { "epoch": 3.31, "learning_rate": 4.8344e-05, "loss": 8.6833, "step": 414000 }, { "epoch": 3.32, "learning_rate": 4.8342000000000006e-05, "loss": 8.7016, "step": 414500 }, { "epoch": 3.32, "learning_rate": 4.834e-05, "loss": 8.7104, "step": 415000 }, { "epoch": 3.32, "learning_rate": 4.8338e-05, "loss": 8.6991, "step": 415500 }, { "epoch": 3.33, "learning_rate": 4.8336000000000006e-05, "loss": 8.6973, "step": 416000 }, { "epoch": 3.33, "learning_rate": 4.8334e-05, "loss": 8.7134, "step": 416500 }, { "epoch": 3.34, "learning_rate": 4.8332000000000004e-05, "loss": 8.6838, "step": 417000 }, { "epoch": 3.34, "learning_rate": 4.833e-05, "loss": 8.7218, "step": 417500 }, { "epoch": 3.34, "learning_rate": 4.8328e-05, "loss": 8.7094, "step": 418000 }, { "epoch": 3.35, "learning_rate": 4.8326000000000005e-05, "loss": 8.684, "step": 418500 }, { "epoch": 3.35, "learning_rate": 4.8324e-05, "loss": 8.7029, "step": 419000 }, { "epoch": 3.36, "learning_rate": 4.8322e-05, "loss": 8.6928, "step": 419500 }, { "epoch": 3.36, "learning_rate": 4.8320000000000005e-05, "loss": 8.6705, "step": 420000 }, { "epoch": 3.36, "learning_rate": 4.8318e-05, "loss": 8.6614, "step": 420500 }, { "epoch": 3.37, "learning_rate": 4.8316e-05, "loss": 8.6882, "step": 421000 }, { "epoch": 3.37, "learning_rate": 4.8314000000000005e-05, "loss": 8.6703, "step": 421500 }, { "epoch": 3.38, "learning_rate": 4.8312e-05, "loss": 8.6985, "step": 422000 }, { "epoch": 3.38, "learning_rate": 4.8309999999999997e-05, "loss": 8.686, "step": 422500 }, { "epoch": 3.38, "learning_rate": 4.8308000000000006e-05, "loss": 8.6813, "step": 423000 }, { "epoch": 3.39, "learning_rate": 4.8306e-05, "loss": 8.7127, "step": 423500 }, { "epoch": 3.39, "learning_rate": 4.8304000000000004e-05, "loss": 8.6972, "step": 424000 }, { "epoch": 3.4, "learning_rate": 4.8302000000000006e-05, "loss": 8.7183, "step": 424500 }, { "epoch": 3.4, "learning_rate": 4.83e-05, "loss": 8.7121, "step": 425000 }, { "epoch": 3.4, "learning_rate": 4.8298000000000004e-05, "loss": 8.7204, "step": 425500 }, { "epoch": 3.41, "learning_rate": 4.8296e-05, "loss": 8.7153, "step": 426000 }, { "epoch": 3.41, "learning_rate": 4.8294e-05, "loss": 8.6959, "step": 426500 }, { "epoch": 3.42, "learning_rate": 4.8292000000000005e-05, "loss": 8.7169, "step": 427000 }, { "epoch": 3.42, "learning_rate": 4.829e-05, "loss": 8.6833, "step": 427500 }, { "epoch": 3.42, "learning_rate": 4.8288e-05, "loss": 8.707, "step": 428000 }, { "epoch": 3.43, "learning_rate": 4.8286000000000005e-05, "loss": 8.6847, "step": 428500 }, { "epoch": 3.43, "learning_rate": 4.8284e-05, "loss": 8.6911, "step": 429000 }, { "epoch": 3.44, "learning_rate": 4.8282e-05, "loss": 8.7043, "step": 429500 }, { "epoch": 3.44, "learning_rate": 4.8280000000000005e-05, "loss": 8.7095, "step": 430000 }, { "epoch": 3.44, "learning_rate": 4.8278e-05, "loss": 8.6738, "step": 430500 }, { "epoch": 3.45, "learning_rate": 4.8276e-05, "loss": 8.7012, "step": 431000 }, { "epoch": 3.45, "learning_rate": 4.8274000000000006e-05, "loss": 8.6963, "step": 431500 }, { "epoch": 3.46, "learning_rate": 4.8272e-05, "loss": 8.7061, "step": 432000 }, { "epoch": 3.46, "learning_rate": 4.8270000000000004e-05, "loss": 8.6929, "step": 432500 }, { "epoch": 3.46, "learning_rate": 4.8268e-05, "loss": 8.6864, "step": 433000 }, { "epoch": 3.47, "learning_rate": 4.8266e-05, "loss": 8.701, "step": 433500 }, { "epoch": 3.47, "learning_rate": 4.8264000000000004e-05, "loss": 8.7049, "step": 434000 }, { "epoch": 3.48, "learning_rate": 4.8262e-05, "loss": 8.6915, "step": 434500 }, { "epoch": 3.48, "learning_rate": 4.826e-05, "loss": 8.7128, "step": 435000 }, { "epoch": 3.48, "learning_rate": 4.8258000000000005e-05, "loss": 8.7074, "step": 435500 }, { "epoch": 3.49, "learning_rate": 4.8256e-05, "loss": 8.6953, "step": 436000 }, { "epoch": 3.49, "learning_rate": 4.8254e-05, "loss": 8.7117, "step": 436500 }, { "epoch": 3.5, "learning_rate": 4.8252000000000005e-05, "loss": 8.6716, "step": 437000 }, { "epoch": 3.5, "learning_rate": 4.825e-05, "loss": 8.7095, "step": 437500 }, { "epoch": 3.5, "learning_rate": 4.8248e-05, "loss": 8.7017, "step": 438000 }, { "epoch": 3.51, "learning_rate": 4.8246000000000005e-05, "loss": 8.6966, "step": 438500 }, { "epoch": 3.51, "learning_rate": 4.8244e-05, "loss": 8.674, "step": 439000 }, { "epoch": 3.52, "learning_rate": 4.8242e-05, "loss": 8.6855, "step": 439500 }, { "epoch": 3.52, "learning_rate": 4.824e-05, "loss": 8.6747, "step": 440000 }, { "epoch": 3.52, "learning_rate": 4.8238e-05, "loss": 8.701, "step": 440500 }, { "epoch": 3.53, "learning_rate": 4.8236000000000004e-05, "loss": 8.6833, "step": 441000 }, { "epoch": 3.53, "learning_rate": 4.8234e-05, "loss": 8.6943, "step": 441500 }, { "epoch": 3.54, "learning_rate": 4.8232e-05, "loss": 8.689, "step": 442000 }, { "epoch": 3.54, "learning_rate": 4.8230000000000004e-05, "loss": 8.6998, "step": 442500 }, { "epoch": 3.54, "learning_rate": 4.8228e-05, "loss": 8.7034, "step": 443000 }, { "epoch": 3.55, "learning_rate": 4.8226e-05, "loss": 8.7114, "step": 443500 }, { "epoch": 3.55, "learning_rate": 4.8224000000000004e-05, "loss": 8.6846, "step": 444000 }, { "epoch": 3.56, "learning_rate": 4.8222e-05, "loss": 8.6767, "step": 444500 }, { "epoch": 3.56, "learning_rate": 4.822e-05, "loss": 8.726, "step": 445000 }, { "epoch": 3.56, "learning_rate": 4.8218000000000005e-05, "loss": 8.6712, "step": 445500 }, { "epoch": 3.57, "learning_rate": 4.8216e-05, "loss": 8.6972, "step": 446000 }, { "epoch": 3.57, "learning_rate": 4.8214e-05, "loss": 8.7112, "step": 446500 }, { "epoch": 3.58, "learning_rate": 4.8212e-05, "loss": 8.6705, "step": 447000 }, { "epoch": 3.58, "learning_rate": 4.821e-05, "loss": 8.7263, "step": 447500 }, { "epoch": 3.58, "learning_rate": 4.8208e-05, "loss": 8.6963, "step": 448000 }, { "epoch": 3.59, "learning_rate": 4.8206e-05, "loss": 8.7303, "step": 448500 }, { "epoch": 3.59, "learning_rate": 4.820400000000001e-05, "loss": 8.707, "step": 449000 }, { "epoch": 3.6, "learning_rate": 4.8202000000000004e-05, "loss": 8.6949, "step": 449500 }, { "epoch": 3.6, "learning_rate": 4.82e-05, "loss": 8.7131, "step": 450000 }, { "epoch": 3.6, "learning_rate": 4.8198e-05, "loss": 8.683, "step": 450500 }, { "epoch": 3.61, "learning_rate": 4.8196000000000004e-05, "loss": 8.6739, "step": 451000 }, { "epoch": 3.61, "learning_rate": 4.8194e-05, "loss": 8.693, "step": 451500 }, { "epoch": 3.62, "learning_rate": 4.8192e-05, "loss": 8.7077, "step": 452000 }, { "epoch": 3.62, "learning_rate": 4.8190000000000004e-05, "loss": 8.6969, "step": 452500 }, { "epoch": 3.62, "learning_rate": 4.8188e-05, "loss": 8.6799, "step": 453000 }, { "epoch": 3.63, "learning_rate": 4.8186e-05, "loss": 8.7057, "step": 453500 }, { "epoch": 3.63, "learning_rate": 4.8184e-05, "loss": 8.7014, "step": 454000 }, { "epoch": 3.64, "learning_rate": 4.818200000000001e-05, "loss": 8.7065, "step": 454500 }, { "epoch": 3.64, "learning_rate": 4.818e-05, "loss": 8.7073, "step": 455000 }, { "epoch": 3.64, "learning_rate": 4.8178e-05, "loss": 8.7097, "step": 455500 }, { "epoch": 3.65, "learning_rate": 4.817600000000001e-05, "loss": 8.7036, "step": 456000 }, { "epoch": 3.65, "learning_rate": 4.8174e-05, "loss": 8.6856, "step": 456500 }, { "epoch": 3.66, "learning_rate": 4.8172e-05, "loss": 8.7143, "step": 457000 }, { "epoch": 3.66, "learning_rate": 4.817e-05, "loss": 8.6936, "step": 457500 }, { "epoch": 3.66, "learning_rate": 4.8168000000000004e-05, "loss": 8.7067, "step": 458000 }, { "epoch": 3.67, "learning_rate": 4.8166e-05, "loss": 8.6913, "step": 458500 }, { "epoch": 3.67, "learning_rate": 4.8164e-05, "loss": 8.6758, "step": 459000 }, { "epoch": 3.68, "learning_rate": 4.8162000000000004e-05, "loss": 8.6876, "step": 459500 }, { "epoch": 3.68, "learning_rate": 4.816e-05, "loss": 8.6895, "step": 460000 }, { "epoch": 3.68, "learning_rate": 4.8158e-05, "loss": 8.7215, "step": 460500 }, { "epoch": 3.69, "learning_rate": 4.8156000000000004e-05, "loss": 8.6853, "step": 461000 }, { "epoch": 3.69, "learning_rate": 4.815400000000001e-05, "loss": 8.6857, "step": 461500 }, { "epoch": 3.7, "learning_rate": 4.8152e-05, "loss": 8.7186, "step": 462000 }, { "epoch": 3.7, "learning_rate": 4.815e-05, "loss": 8.6972, "step": 462500 }, { "epoch": 3.7, "learning_rate": 4.814800000000001e-05, "loss": 8.705, "step": 463000 }, { "epoch": 3.71, "learning_rate": 4.8146e-05, "loss": 8.7002, "step": 463500 }, { "epoch": 3.71, "learning_rate": 4.8144e-05, "loss": 8.6838, "step": 464000 }, { "epoch": 3.72, "learning_rate": 4.8142e-05, "loss": 8.6851, "step": 464500 }, { "epoch": 3.72, "learning_rate": 4.814e-05, "loss": 8.7308, "step": 465000 }, { "epoch": 3.72, "learning_rate": 4.8138e-05, "loss": 8.6843, "step": 465500 }, { "epoch": 3.73, "learning_rate": 4.8136e-05, "loss": 8.678, "step": 466000 }, { "epoch": 3.73, "learning_rate": 4.8134000000000004e-05, "loss": 8.661, "step": 466500 }, { "epoch": 3.74, "learning_rate": 4.8132000000000006e-05, "loss": 8.6863, "step": 467000 }, { "epoch": 3.74, "learning_rate": 4.813e-05, "loss": 8.6986, "step": 467500 }, { "epoch": 3.74, "learning_rate": 4.8128000000000004e-05, "loss": 8.7165, "step": 468000 }, { "epoch": 3.75, "learning_rate": 4.8126000000000006e-05, "loss": 8.6645, "step": 468500 }, { "epoch": 3.75, "learning_rate": 4.8124e-05, "loss": 8.7061, "step": 469000 }, { "epoch": 3.76, "learning_rate": 4.8122e-05, "loss": 8.6844, "step": 469500 }, { "epoch": 3.76, "learning_rate": 4.812000000000001e-05, "loss": 8.6871, "step": 470000 }, { "epoch": 3.76, "learning_rate": 4.8118e-05, "loss": 8.711, "step": 470500 }, { "epoch": 3.77, "learning_rate": 4.8116e-05, "loss": 8.6687, "step": 471000 }, { "epoch": 3.77, "learning_rate": 4.8114e-05, "loss": 8.6982, "step": 471500 }, { "epoch": 3.78, "learning_rate": 4.8112e-05, "loss": 8.6865, "step": 472000 }, { "epoch": 3.78, "learning_rate": 4.8110000000000005e-05, "loss": 8.6927, "step": 472500 }, { "epoch": 3.78, "learning_rate": 4.8108e-05, "loss": 8.6943, "step": 473000 }, { "epoch": 3.79, "learning_rate": 4.8106e-05, "loss": 8.6945, "step": 473500 }, { "epoch": 3.79, "learning_rate": 4.8104000000000006e-05, "loss": 8.6925, "step": 474000 }, { "epoch": 3.8, "learning_rate": 4.8102e-05, "loss": 8.6788, "step": 474500 }, { "epoch": 3.8, "learning_rate": 4.8100000000000004e-05, "loss": 8.7, "step": 475000 }, { "epoch": 3.8, "learning_rate": 4.8098000000000006e-05, "loss": 8.6685, "step": 475500 }, { "epoch": 3.81, "learning_rate": 4.8096e-05, "loss": 8.6779, "step": 476000 }, { "epoch": 3.81, "learning_rate": 4.8094e-05, "loss": 8.6893, "step": 476500 }, { "epoch": 3.82, "learning_rate": 4.8092000000000006e-05, "loss": 8.6897, "step": 477000 }, { "epoch": 3.82, "learning_rate": 4.809e-05, "loss": 8.7191, "step": 477500 }, { "epoch": 3.82, "learning_rate": 4.8088e-05, "loss": 8.663, "step": 478000 }, { "epoch": 3.83, "learning_rate": 4.8086e-05, "loss": 8.6669, "step": 478500 }, { "epoch": 3.83, "learning_rate": 4.8084e-05, "loss": 8.6691, "step": 479000 }, { "epoch": 3.84, "learning_rate": 4.8082000000000005e-05, "loss": 8.6918, "step": 479500 }, { "epoch": 3.84, "learning_rate": 4.808e-05, "loss": 8.6969, "step": 480000 }, { "epoch": 3.84, "learning_rate": 4.8078e-05, "loss": 8.6683, "step": 480500 }, { "epoch": 3.85, "learning_rate": 4.8076000000000005e-05, "loss": 8.7101, "step": 481000 }, { "epoch": 3.85, "learning_rate": 4.8074e-05, "loss": 8.6696, "step": 481500 }, { "epoch": 3.86, "learning_rate": 4.8072e-05, "loss": 8.6914, "step": 482000 }, { "epoch": 3.86, "learning_rate": 4.8070000000000006e-05, "loss": 8.6875, "step": 482500 }, { "epoch": 3.86, "learning_rate": 4.8068e-05, "loss": 8.7111, "step": 483000 }, { "epoch": 3.87, "learning_rate": 4.8066e-05, "loss": 8.708, "step": 483500 }, { "epoch": 3.87, "learning_rate": 4.8064000000000006e-05, "loss": 8.6654, "step": 484000 }, { "epoch": 3.88, "learning_rate": 4.8062e-05, "loss": 8.6937, "step": 484500 }, { "epoch": 3.88, "learning_rate": 4.8060000000000004e-05, "loss": 8.6961, "step": 485000 }, { "epoch": 3.88, "learning_rate": 4.8058e-05, "loss": 8.7011, "step": 485500 }, { "epoch": 3.89, "learning_rate": 4.8056e-05, "loss": 8.7058, "step": 486000 }, { "epoch": 3.89, "learning_rate": 4.8054000000000004e-05, "loss": 8.6886, "step": 486500 }, { "epoch": 3.9, "learning_rate": 4.8052e-05, "loss": 8.7001, "step": 487000 }, { "epoch": 3.9, "learning_rate": 4.805e-05, "loss": 8.7077, "step": 487500 }, { "epoch": 3.9, "learning_rate": 4.8048000000000005e-05, "loss": 8.6888, "step": 488000 }, { "epoch": 3.91, "learning_rate": 4.8046e-05, "loss": 8.7076, "step": 488500 }, { "epoch": 3.91, "learning_rate": 4.8044e-05, "loss": 8.6638, "step": 489000 }, { "epoch": 3.92, "learning_rate": 4.8042000000000005e-05, "loss": 8.6892, "step": 489500 }, { "epoch": 3.92, "learning_rate": 4.804e-05, "loss": 8.676, "step": 490000 }, { "epoch": 3.92, "learning_rate": 4.8037999999999996e-05, "loss": 8.7038, "step": 490500 }, { "epoch": 3.93, "learning_rate": 4.8036000000000006e-05, "loss": 8.6922, "step": 491000 }, { "epoch": 3.93, "learning_rate": 4.8034e-05, "loss": 8.6947, "step": 491500 }, { "epoch": 3.94, "learning_rate": 4.8032000000000004e-05, "loss": 8.6718, "step": 492000 }, { "epoch": 3.94, "learning_rate": 4.8030000000000006e-05, "loss": 8.7013, "step": 492500 }, { "epoch": 3.94, "learning_rate": 4.8028e-05, "loss": 8.6745, "step": 493000 }, { "epoch": 3.95, "learning_rate": 4.8026000000000004e-05, "loss": 8.7123, "step": 493500 }, { "epoch": 3.95, "learning_rate": 4.8024e-05, "loss": 8.7301, "step": 494000 }, { "epoch": 3.96, "learning_rate": 4.8022e-05, "loss": 8.7264, "step": 494500 }, { "epoch": 3.96, "learning_rate": 4.8020000000000004e-05, "loss": 8.6981, "step": 495000 }, { "epoch": 3.96, "learning_rate": 4.8018e-05, "loss": 8.697, "step": 495500 }, { "epoch": 3.97, "learning_rate": 4.8016e-05, "loss": 8.6812, "step": 496000 }, { "epoch": 3.97, "learning_rate": 4.8014000000000005e-05, "loss": 8.7023, "step": 496500 }, { "epoch": 3.98, "learning_rate": 4.8012e-05, "loss": 8.6937, "step": 497000 }, { "epoch": 3.98, "learning_rate": 4.801e-05, "loss": 8.6924, "step": 497500 }, { "epoch": 3.98, "learning_rate": 4.8008000000000005e-05, "loss": 8.7007, "step": 498000 }, { "epoch": 3.99, "learning_rate": 4.8006e-05, "loss": 8.7113, "step": 498500 }, { "epoch": 3.99, "learning_rate": 4.8004e-05, "loss": 8.6855, "step": 499000 }, { "epoch": 4.0, "learning_rate": 4.8002000000000006e-05, "loss": 8.6865, "step": 499500 }, { "epoch": 4.0, "learning_rate": 4.8e-05, "loss": 8.7076, "step": 500000 }, { "epoch": 4.0, "learning_rate": 4.7998000000000004e-05, "loss": 8.6981, "step": 500500 }, { "epoch": 4.01, "learning_rate": 4.7996e-05, "loss": 8.6832, "step": 501000 }, { "epoch": 4.01, "learning_rate": 4.7994e-05, "loss": 8.6853, "step": 501500 }, { "epoch": 4.02, "learning_rate": 4.7992000000000004e-05, "loss": 8.6963, "step": 502000 }, { "epoch": 4.02, "learning_rate": 4.799e-05, "loss": 8.6834, "step": 502500 }, { "epoch": 4.02, "learning_rate": 4.7988e-05, "loss": 8.6712, "step": 503000 }, { "epoch": 4.03, "learning_rate": 4.7986000000000004e-05, "loss": 8.7123, "step": 503500 }, { "epoch": 4.03, "learning_rate": 4.7984e-05, "loss": 8.6588, "step": 504000 }, { "epoch": 4.04, "learning_rate": 4.7982e-05, "loss": 8.6922, "step": 504500 }, { "epoch": 4.04, "learning_rate": 4.7980000000000005e-05, "loss": 8.702, "step": 505000 }, { "epoch": 4.04, "learning_rate": 4.7978e-05, "loss": 8.7003, "step": 505500 }, { "epoch": 4.05, "learning_rate": 4.7976e-05, "loss": 8.6942, "step": 506000 }, { "epoch": 4.05, "learning_rate": 4.7974000000000005e-05, "loss": 8.7065, "step": 506500 }, { "epoch": 4.06, "learning_rate": 4.7972e-05, "loss": 8.6901, "step": 507000 }, { "epoch": 4.06, "learning_rate": 4.797e-05, "loss": 8.6761, "step": 507500 }, { "epoch": 4.06, "learning_rate": 4.7968e-05, "loss": 8.6732, "step": 508000 }, { "epoch": 4.07, "learning_rate": 4.7966e-05, "loss": 8.6666, "step": 508500 }, { "epoch": 4.07, "learning_rate": 4.7964000000000004e-05, "loss": 8.7143, "step": 509000 }, { "epoch": 4.08, "learning_rate": 4.7962e-05, "loss": 8.7032, "step": 509500 }, { "epoch": 4.08, "learning_rate": 4.796e-05, "loss": 8.6971, "step": 510000 }, { "epoch": 4.08, "learning_rate": 4.7958000000000004e-05, "loss": 8.7349, "step": 510500 }, { "epoch": 4.09, "learning_rate": 4.7956e-05, "loss": 8.7171, "step": 511000 }, { "epoch": 4.09, "learning_rate": 4.7954e-05, "loss": 8.6922, "step": 511500 }, { "epoch": 4.1, "learning_rate": 4.7952000000000004e-05, "loss": 8.6794, "step": 512000 }, { "epoch": 4.1, "learning_rate": 4.795e-05, "loss": 8.7029, "step": 512500 }, { "epoch": 4.1, "learning_rate": 4.7948e-05, "loss": 8.6932, "step": 513000 }, { "epoch": 4.11, "learning_rate": 4.7946000000000005e-05, "loss": 8.7035, "step": 513500 }, { "epoch": 4.11, "learning_rate": 4.7944e-05, "loss": 8.6815, "step": 514000 }, { "epoch": 4.12, "learning_rate": 4.7942e-05, "loss": 8.6972, "step": 514500 }, { "epoch": 4.12, "learning_rate": 4.794e-05, "loss": 8.6814, "step": 515000 }, { "epoch": 4.12, "learning_rate": 4.7938e-05, "loss": 8.6591, "step": 515500 }, { "epoch": 4.13, "learning_rate": 4.7936e-05, "loss": 8.6983, "step": 516000 }, { "epoch": 4.13, "learning_rate": 4.7934e-05, "loss": 8.7041, "step": 516500 }, { "epoch": 4.14, "learning_rate": 4.793200000000001e-05, "loss": 8.6969, "step": 517000 }, { "epoch": 4.14, "learning_rate": 4.7930000000000004e-05, "loss": 8.7102, "step": 517500 }, { "epoch": 4.14, "learning_rate": 4.7928e-05, "loss": 8.6954, "step": 518000 }, { "epoch": 4.15, "learning_rate": 4.7926e-05, "loss": 8.6718, "step": 518500 }, { "epoch": 4.15, "learning_rate": 4.7924000000000004e-05, "loss": 8.6878, "step": 519000 }, { "epoch": 4.16, "learning_rate": 4.7922e-05, "loss": 8.6994, "step": 519500 }, { "epoch": 4.16, "learning_rate": 4.792e-05, "loss": 8.6711, "step": 520000 }, { "epoch": 4.16, "learning_rate": 4.7918000000000004e-05, "loss": 8.7005, "step": 520500 }, { "epoch": 4.17, "learning_rate": 4.7916e-05, "loss": 8.6749, "step": 521000 }, { "epoch": 4.17, "learning_rate": 4.7914e-05, "loss": 8.6821, "step": 521500 }, { "epoch": 4.18, "learning_rate": 4.7912e-05, "loss": 8.6771, "step": 522000 }, { "epoch": 4.18, "learning_rate": 4.791000000000001e-05, "loss": 8.6896, "step": 522500 }, { "epoch": 4.18, "learning_rate": 4.7908e-05, "loss": 8.7081, "step": 523000 }, { "epoch": 4.19, "learning_rate": 4.7906e-05, "loss": 8.6892, "step": 523500 }, { "epoch": 4.19, "learning_rate": 4.790400000000001e-05, "loss": 8.6998, "step": 524000 }, { "epoch": 4.2, "learning_rate": 4.7902e-05, "loss": 8.6664, "step": 524500 }, { "epoch": 4.2, "learning_rate": 4.79e-05, "loss": 8.7092, "step": 525000 }, { "epoch": 4.2, "learning_rate": 4.7898e-05, "loss": 8.6845, "step": 525500 }, { "epoch": 4.21, "learning_rate": 4.7896000000000004e-05, "loss": 8.7126, "step": 526000 }, { "epoch": 4.21, "learning_rate": 4.7894e-05, "loss": 8.6845, "step": 526500 }, { "epoch": 4.22, "learning_rate": 4.7892e-05, "loss": 8.721, "step": 527000 }, { "epoch": 4.22, "learning_rate": 4.7890000000000004e-05, "loss": 8.7022, "step": 527500 }, { "epoch": 4.22, "learning_rate": 4.7888e-05, "loss": 8.6862, "step": 528000 }, { "epoch": 4.23, "learning_rate": 4.7886e-05, "loss": 8.6964, "step": 528500 }, { "epoch": 4.23, "learning_rate": 4.7884000000000004e-05, "loss": 8.6632, "step": 529000 }, { "epoch": 4.24, "learning_rate": 4.788200000000001e-05, "loss": 8.7079, "step": 529500 }, { "epoch": 4.24, "learning_rate": 4.788e-05, "loss": 8.7429, "step": 530000 }, { "epoch": 4.24, "learning_rate": 4.7878e-05, "loss": 8.7043, "step": 530500 }, { "epoch": 4.25, "learning_rate": 4.787600000000001e-05, "loss": 8.7092, "step": 531000 }, { "epoch": 4.25, "learning_rate": 4.7874e-05, "loss": 8.6881, "step": 531500 }, { "epoch": 4.26, "learning_rate": 4.7872e-05, "loss": 8.6922, "step": 532000 }, { "epoch": 4.26, "learning_rate": 4.787e-05, "loss": 8.6983, "step": 532500 }, { "epoch": 4.26, "learning_rate": 4.7868e-05, "loss": 8.7213, "step": 533000 }, { "epoch": 4.27, "learning_rate": 4.7866e-05, "loss": 8.6881, "step": 533500 }, { "epoch": 4.27, "learning_rate": 4.7864e-05, "loss": 8.6939, "step": 534000 }, { "epoch": 4.28, "learning_rate": 4.7862000000000004e-05, "loss": 8.7001, "step": 534500 }, { "epoch": 4.28, "learning_rate": 4.7860000000000006e-05, "loss": 8.6931, "step": 535000 }, { "epoch": 4.28, "learning_rate": 4.7858e-05, "loss": 8.7165, "step": 535500 }, { "epoch": 4.29, "learning_rate": 4.7856000000000004e-05, "loss": 8.6589, "step": 536000 }, { "epoch": 4.29, "learning_rate": 4.7854000000000006e-05, "loss": 8.6572, "step": 536500 }, { "epoch": 4.3, "learning_rate": 4.7852e-05, "loss": 8.6591, "step": 537000 }, { "epoch": 4.3, "learning_rate": 4.785e-05, "loss": 8.6989, "step": 537500 }, { "epoch": 4.3, "learning_rate": 4.784800000000001e-05, "loss": 8.6687, "step": 538000 }, { "epoch": 4.31, "learning_rate": 4.7846e-05, "loss": 8.696, "step": 538500 }, { "epoch": 4.31, "learning_rate": 4.7844e-05, "loss": 8.6808, "step": 539000 }, { "epoch": 4.32, "learning_rate": 4.7842e-05, "loss": 8.6872, "step": 539500 }, { "epoch": 4.32, "learning_rate": 4.784e-05, "loss": 8.6958, "step": 540000 }, { "epoch": 4.32, "learning_rate": 4.7838000000000005e-05, "loss": 8.6808, "step": 540500 }, { "epoch": 4.33, "learning_rate": 4.7836e-05, "loss": 8.7062, "step": 541000 }, { "epoch": 4.33, "learning_rate": 4.7834e-05, "loss": 8.7068, "step": 541500 }, { "epoch": 4.34, "learning_rate": 4.7832000000000006e-05, "loss": 8.6692, "step": 542000 }, { "epoch": 4.34, "learning_rate": 4.783e-05, "loss": 8.6858, "step": 542500 }, { "epoch": 4.34, "learning_rate": 4.7828000000000004e-05, "loss": 8.7149, "step": 543000 }, { "epoch": 4.35, "learning_rate": 4.7826000000000006e-05, "loss": 8.6997, "step": 543500 }, { "epoch": 4.35, "learning_rate": 4.7824e-05, "loss": 8.6814, "step": 544000 }, { "epoch": 4.36, "learning_rate": 4.7822e-05, "loss": 8.7158, "step": 544500 }, { "epoch": 4.36, "learning_rate": 4.7820000000000006e-05, "loss": 8.6842, "step": 545000 }, { "epoch": 4.36, "learning_rate": 4.7818e-05, "loss": 8.6719, "step": 545500 }, { "epoch": 4.37, "learning_rate": 4.7816e-05, "loss": 8.6848, "step": 546000 }, { "epoch": 4.37, "learning_rate": 4.7814e-05, "loss": 8.6839, "step": 546500 }, { "epoch": 4.38, "learning_rate": 4.7812e-05, "loss": 8.6997, "step": 547000 }, { "epoch": 4.38, "learning_rate": 4.7810000000000005e-05, "loss": 8.6884, "step": 547500 }, { "epoch": 4.38, "learning_rate": 4.7808e-05, "loss": 8.6709, "step": 548000 }, { "epoch": 4.39, "learning_rate": 4.7806e-05, "loss": 8.7005, "step": 548500 }, { "epoch": 4.39, "learning_rate": 4.7804000000000005e-05, "loss": 8.688, "step": 549000 }, { "epoch": 4.4, "learning_rate": 4.7802e-05, "loss": 8.6819, "step": 549500 }, { "epoch": 4.4, "learning_rate": 4.78e-05, "loss": 8.6875, "step": 550000 }, { "epoch": 4.4, "learning_rate": 4.7798000000000006e-05, "loss": 8.6997, "step": 550500 }, { "epoch": 4.41, "learning_rate": 4.7796e-05, "loss": 8.7242, "step": 551000 }, { "epoch": 4.41, "learning_rate": 4.7794e-05, "loss": 8.7325, "step": 551500 }, { "epoch": 4.42, "learning_rate": 4.7792000000000006e-05, "loss": 8.6911, "step": 552000 }, { "epoch": 4.42, "learning_rate": 4.779e-05, "loss": 8.6963, "step": 552500 }, { "epoch": 4.42, "learning_rate": 4.7788000000000004e-05, "loss": 8.685, "step": 553000 }, { "epoch": 4.43, "learning_rate": 4.7786000000000006e-05, "loss": 8.6815, "step": 553500 }, { "epoch": 4.43, "learning_rate": 4.7784e-05, "loss": 8.7159, "step": 554000 }, { "epoch": 4.44, "learning_rate": 4.7782000000000004e-05, "loss": 8.6886, "step": 554500 }, { "epoch": 4.44, "learning_rate": 4.778e-05, "loss": 8.6965, "step": 555000 }, { "epoch": 4.44, "learning_rate": 4.7778e-05, "loss": 8.6883, "step": 555500 }, { "epoch": 4.45, "learning_rate": 4.7776000000000005e-05, "loss": 8.7095, "step": 556000 }, { "epoch": 4.45, "learning_rate": 4.7774e-05, "loss": 8.686, "step": 556500 }, { "epoch": 4.46, "learning_rate": 4.7772e-05, "loss": 8.6664, "step": 557000 }, { "epoch": 4.46, "learning_rate": 4.7770000000000005e-05, "loss": 8.7078, "step": 557500 }, { "epoch": 4.46, "learning_rate": 4.7768e-05, "loss": 8.6967, "step": 558000 }, { "epoch": 4.47, "learning_rate": 4.7765999999999996e-05, "loss": 8.6748, "step": 558500 }, { "epoch": 4.47, "learning_rate": 4.7764000000000006e-05, "loss": 8.7147, "step": 559000 }, { "epoch": 4.48, "learning_rate": 4.7762e-05, "loss": 8.7062, "step": 559500 }, { "epoch": 4.48, "learning_rate": 4.7760000000000004e-05, "loss": 8.6903, "step": 560000 }, { "epoch": 4.48, "learning_rate": 4.7758000000000006e-05, "loss": 8.725, "step": 560500 }, { "epoch": 4.49, "learning_rate": 4.7756e-05, "loss": 8.695, "step": 561000 }, { "epoch": 4.49, "learning_rate": 4.7754000000000004e-05, "loss": 8.6866, "step": 561500 }, { "epoch": 4.5, "learning_rate": 4.7752e-05, "loss": 8.6951, "step": 562000 }, { "epoch": 4.5, "learning_rate": 4.775e-05, "loss": 8.6861, "step": 562500 }, { "epoch": 4.5, "learning_rate": 4.7748000000000004e-05, "loss": 8.6742, "step": 563000 }, { "epoch": 4.51, "learning_rate": 4.7746e-05, "loss": 8.7243, "step": 563500 }, { "epoch": 4.51, "learning_rate": 4.7744e-05, "loss": 8.6688, "step": 564000 }, { "epoch": 4.52, "learning_rate": 4.7742000000000005e-05, "loss": 8.6872, "step": 564500 }, { "epoch": 4.52, "learning_rate": 4.774e-05, "loss": 8.694, "step": 565000 }, { "epoch": 4.52, "learning_rate": 4.7738e-05, "loss": 8.6958, "step": 565500 }, { "epoch": 4.53, "learning_rate": 4.7736000000000005e-05, "loss": 8.6991, "step": 566000 }, { "epoch": 4.53, "learning_rate": 4.7734e-05, "loss": 8.6851, "step": 566500 }, { "epoch": 4.54, "learning_rate": 4.7732e-05, "loss": 8.6653, "step": 567000 }, { "epoch": 4.54, "learning_rate": 4.7730000000000005e-05, "loss": 8.7074, "step": 567500 }, { "epoch": 4.54, "learning_rate": 4.7728e-05, "loss": 8.7233, "step": 568000 }, { "epoch": 4.55, "learning_rate": 4.7726000000000004e-05, "loss": 8.6912, "step": 568500 }, { "epoch": 4.55, "learning_rate": 4.7724e-05, "loss": 8.6986, "step": 569000 }, { "epoch": 4.56, "learning_rate": 4.7722e-05, "loss": 8.6708, "step": 569500 }, { "epoch": 4.56, "learning_rate": 4.7720000000000004e-05, "loss": 8.6864, "step": 570000 }, { "epoch": 4.56, "learning_rate": 4.7718e-05, "loss": 8.6781, "step": 570500 }, { "epoch": 4.57, "learning_rate": 4.7716e-05, "loss": 8.6778, "step": 571000 }, { "epoch": 4.57, "learning_rate": 4.7714000000000004e-05, "loss": 8.6937, "step": 571500 }, { "epoch": 4.58, "learning_rate": 4.7712e-05, "loss": 8.722, "step": 572000 }, { "epoch": 4.58, "learning_rate": 4.771e-05, "loss": 8.7032, "step": 572500 }, { "epoch": 4.58, "learning_rate": 4.7708000000000005e-05, "loss": 8.6986, "step": 573000 }, { "epoch": 4.59, "learning_rate": 4.7706e-05, "loss": 8.713, "step": 573500 }, { "epoch": 4.59, "learning_rate": 4.7704e-05, "loss": 8.6979, "step": 574000 }, { "epoch": 4.6, "learning_rate": 4.7702000000000005e-05, "loss": 8.6977, "step": 574500 }, { "epoch": 4.6, "learning_rate": 4.77e-05, "loss": 8.7149, "step": 575000 }, { "epoch": 4.6, "learning_rate": 4.7698e-05, "loss": 8.7074, "step": 575500 }, { "epoch": 4.61, "learning_rate": 4.7696e-05, "loss": 8.6984, "step": 576000 }, { "epoch": 4.61, "learning_rate": 4.7694e-05, "loss": 8.7013, "step": 576500 }, { "epoch": 4.62, "learning_rate": 4.7692000000000003e-05, "loss": 8.7002, "step": 577000 }, { "epoch": 4.62, "learning_rate": 4.769e-05, "loss": 8.6604, "step": 577500 }, { "epoch": 4.62, "learning_rate": 4.768800000000001e-05, "loss": 8.6784, "step": 578000 }, { "epoch": 4.63, "learning_rate": 4.7686000000000004e-05, "loss": 8.6893, "step": 578500 }, { "epoch": 4.63, "learning_rate": 4.7684e-05, "loss": 8.7328, "step": 579000 }, { "epoch": 4.64, "learning_rate": 4.7682e-05, "loss": 8.6774, "step": 579500 }, { "epoch": 4.64, "learning_rate": 4.7680000000000004e-05, "loss": 8.7057, "step": 580000 }, { "epoch": 4.64, "learning_rate": 4.7678e-05, "loss": 8.6948, "step": 580500 }, { "epoch": 4.65, "learning_rate": 4.7676e-05, "loss": 8.7093, "step": 581000 }, { "epoch": 4.65, "learning_rate": 4.7674000000000005e-05, "loss": 8.6769, "step": 581500 }, { "epoch": 4.66, "learning_rate": 4.7672e-05, "loss": 8.6765, "step": 582000 }, { "epoch": 4.66, "learning_rate": 4.767e-05, "loss": 8.6831, "step": 582500 }, { "epoch": 4.66, "learning_rate": 4.7668e-05, "loss": 8.6693, "step": 583000 }, { "epoch": 4.67, "learning_rate": 4.7666e-05, "loss": 8.6931, "step": 583500 }, { "epoch": 4.67, "learning_rate": 4.7664e-05, "loss": 8.7038, "step": 584000 }, { "epoch": 4.68, "learning_rate": 4.7662e-05, "loss": 8.7228, "step": 584500 }, { "epoch": 4.68, "learning_rate": 4.766000000000001e-05, "loss": 8.7059, "step": 585000 }, { "epoch": 4.68, "learning_rate": 4.7658000000000003e-05, "loss": 8.7001, "step": 585500 }, { "epoch": 4.69, "learning_rate": 4.7656e-05, "loss": 8.6872, "step": 586000 }, { "epoch": 4.69, "learning_rate": 4.7654e-05, "loss": 8.6756, "step": 586500 }, { "epoch": 4.7, "learning_rate": 4.7652000000000004e-05, "loss": 8.6775, "step": 587000 }, { "epoch": 4.7, "learning_rate": 4.765e-05, "loss": 8.6848, "step": 587500 }, { "epoch": 4.7, "learning_rate": 4.7648e-05, "loss": 8.675, "step": 588000 }, { "epoch": 4.71, "learning_rate": 4.7646000000000004e-05, "loss": 8.6876, "step": 588500 }, { "epoch": 4.71, "learning_rate": 4.7644e-05, "loss": 8.6964, "step": 589000 }, { "epoch": 4.72, "learning_rate": 4.7642e-05, "loss": 8.704, "step": 589500 }, { "epoch": 4.72, "learning_rate": 4.7640000000000005e-05, "loss": 8.6812, "step": 590000 }, { "epoch": 4.72, "learning_rate": 4.763800000000001e-05, "loss": 8.6891, "step": 590500 }, { "epoch": 4.73, "learning_rate": 4.7636e-05, "loss": 8.6733, "step": 591000 }, { "epoch": 4.73, "learning_rate": 4.7634e-05, "loss": 8.6834, "step": 591500 }, { "epoch": 4.74, "learning_rate": 4.763200000000001e-05, "loss": 8.6956, "step": 592000 }, { "epoch": 4.74, "learning_rate": 4.763e-05, "loss": 8.6648, "step": 592500 }, { "epoch": 4.74, "learning_rate": 4.7628e-05, "loss": 8.6825, "step": 593000 }, { "epoch": 4.75, "learning_rate": 4.7626e-05, "loss": 8.6911, "step": 593500 }, { "epoch": 4.75, "learning_rate": 4.7624000000000003e-05, "loss": 8.6713, "step": 594000 }, { "epoch": 4.76, "learning_rate": 4.7622e-05, "loss": 8.6945, "step": 594500 }, { "epoch": 4.76, "learning_rate": 4.762e-05, "loss": 8.6941, "step": 595000 }, { "epoch": 4.76, "learning_rate": 4.7618000000000004e-05, "loss": 8.7096, "step": 595500 }, { "epoch": 4.77, "learning_rate": 4.7616000000000006e-05, "loss": 8.7098, "step": 596000 }, { "epoch": 4.77, "learning_rate": 4.7614e-05, "loss": 8.7069, "step": 596500 }, { "epoch": 4.78, "learning_rate": 4.7612000000000004e-05, "loss": 8.6807, "step": 597000 }, { "epoch": 4.78, "learning_rate": 4.761000000000001e-05, "loss": 8.7137, "step": 597500 }, { "epoch": 4.78, "learning_rate": 4.7608e-05, "loss": 8.6549, "step": 598000 }, { "epoch": 4.79, "learning_rate": 4.7606e-05, "loss": 8.6963, "step": 598500 }, { "epoch": 4.79, "learning_rate": 4.760400000000001e-05, "loss": 8.683, "step": 599000 }, { "epoch": 4.8, "learning_rate": 4.7602e-05, "loss": 8.672, "step": 599500 }, { "epoch": 4.8, "learning_rate": 4.76e-05, "loss": 8.6915, "step": 600000 }, { "epoch": 4.8, "learning_rate": 4.7598e-05, "loss": 8.6956, "step": 600500 }, { "epoch": 4.81, "learning_rate": 4.7596e-05, "loss": 8.7001, "step": 601000 }, { "epoch": 4.81, "learning_rate": 4.7594e-05, "loss": 8.7155, "step": 601500 }, { "epoch": 4.82, "learning_rate": 4.7592e-05, "loss": 8.6802, "step": 602000 }, { "epoch": 4.82, "learning_rate": 4.7590000000000003e-05, "loss": 8.7002, "step": 602500 }, { "epoch": 4.82, "learning_rate": 4.7588000000000006e-05, "loss": 8.6859, "step": 603000 }, { "epoch": 4.83, "learning_rate": 4.7586e-05, "loss": 8.692, "step": 603500 }, { "epoch": 4.83, "learning_rate": 4.7584000000000004e-05, "loss": 8.6832, "step": 604000 }, { "epoch": 4.84, "learning_rate": 4.7582000000000006e-05, "loss": 8.6765, "step": 604500 }, { "epoch": 4.84, "learning_rate": 4.758e-05, "loss": 8.679, "step": 605000 }, { "epoch": 4.84, "learning_rate": 4.7578e-05, "loss": 8.7049, "step": 605500 }, { "epoch": 4.85, "learning_rate": 4.757600000000001e-05, "loss": 8.7157, "step": 606000 }, { "epoch": 4.85, "learning_rate": 4.7574e-05, "loss": 8.7064, "step": 606500 }, { "epoch": 4.86, "learning_rate": 4.7572e-05, "loss": 8.6896, "step": 607000 }, { "epoch": 4.86, "learning_rate": 4.757e-05, "loss": 8.7057, "step": 607500 }, { "epoch": 4.86, "learning_rate": 4.7568e-05, "loss": 8.6853, "step": 608000 }, { "epoch": 4.87, "learning_rate": 4.7566000000000005e-05, "loss": 8.6964, "step": 608500 }, { "epoch": 4.87, "learning_rate": 4.7564e-05, "loss": 8.7125, "step": 609000 }, { "epoch": 4.88, "learning_rate": 4.7562e-05, "loss": 8.7113, "step": 609500 }, { "epoch": 4.88, "learning_rate": 4.7560000000000005e-05, "loss": 8.6883, "step": 610000 }, { "epoch": 4.88, "learning_rate": 4.7558e-05, "loss": 8.7056, "step": 610500 }, { "epoch": 4.89, "learning_rate": 4.7556000000000003e-05, "loss": 8.6759, "step": 611000 }, { "epoch": 4.89, "learning_rate": 4.7554000000000006e-05, "loss": 8.6787, "step": 611500 }, { "epoch": 4.9, "learning_rate": 4.7552e-05, "loss": 8.6963, "step": 612000 }, { "epoch": 4.9, "learning_rate": 4.755e-05, "loss": 8.7135, "step": 612500 }, { "epoch": 4.9, "learning_rate": 4.7548000000000006e-05, "loss": 8.7052, "step": 613000 }, { "epoch": 4.91, "learning_rate": 4.7546e-05, "loss": 8.6763, "step": 613500 }, { "epoch": 4.91, "learning_rate": 4.7544e-05, "loss": 8.6894, "step": 614000 }, { "epoch": 4.92, "learning_rate": 4.7542e-05, "loss": 8.684, "step": 614500 }, { "epoch": 4.92, "learning_rate": 4.754e-05, "loss": 8.7096, "step": 615000 }, { "epoch": 4.92, "learning_rate": 4.7538000000000005e-05, "loss": 8.7041, "step": 615500 }, { "epoch": 4.93, "learning_rate": 4.7536e-05, "loss": 8.7029, "step": 616000 }, { "epoch": 4.93, "learning_rate": 4.7534e-05, "loss": 8.6898, "step": 616500 }, { "epoch": 4.94, "learning_rate": 4.7532000000000005e-05, "loss": 8.701, "step": 617000 }, { "epoch": 4.94, "learning_rate": 4.753e-05, "loss": 8.7274, "step": 617500 }, { "epoch": 4.94, "learning_rate": 4.7528e-05, "loss": 8.7009, "step": 618000 }, { "epoch": 4.95, "learning_rate": 4.7526000000000005e-05, "loss": 8.6851, "step": 618500 }, { "epoch": 4.95, "learning_rate": 4.7524e-05, "loss": 8.6986, "step": 619000 }, { "epoch": 4.96, "learning_rate": 4.7522e-05, "loss": 8.6776, "step": 619500 }, { "epoch": 4.96, "learning_rate": 4.7520000000000006e-05, "loss": 8.6817, "step": 620000 }, { "epoch": 4.96, "learning_rate": 4.7518e-05, "loss": 8.6879, "step": 620500 }, { "epoch": 4.97, "learning_rate": 4.7516000000000004e-05, "loss": 8.6722, "step": 621000 }, { "epoch": 4.97, "learning_rate": 4.7514000000000006e-05, "loss": 8.695, "step": 621500 }, { "epoch": 4.98, "learning_rate": 4.7512e-05, "loss": 8.673, "step": 622000 }, { "epoch": 4.98, "learning_rate": 4.7510000000000004e-05, "loss": 8.699, "step": 622500 }, { "epoch": 4.98, "learning_rate": 4.7508e-05, "loss": 8.6841, "step": 623000 }, { "epoch": 4.99, "learning_rate": 4.7506e-05, "loss": 8.6652, "step": 623500 }, { "epoch": 4.99, "learning_rate": 4.7504000000000005e-05, "loss": 8.7034, "step": 624000 }, { "epoch": 5.0, "learning_rate": 4.7502e-05, "loss": 8.6708, "step": 624500 }, { "epoch": 5.0, "learning_rate": 4.75e-05, "loss": 8.6996, "step": 625000 }, { "epoch": 5.0, "learning_rate": 4.7498000000000005e-05, "loss": 8.6895, "step": 625500 }, { "epoch": 5.01, "learning_rate": 4.7496e-05, "loss": 8.6687, "step": 626000 }, { "epoch": 5.01, "learning_rate": 4.7493999999999996e-05, "loss": 8.6684, "step": 626500 }, { "epoch": 5.02, "learning_rate": 4.7492000000000005e-05, "loss": 8.6835, "step": 627000 }, { "epoch": 5.02, "learning_rate": 4.749e-05, "loss": 8.6745, "step": 627500 }, { "epoch": 5.02, "learning_rate": 4.7488000000000003e-05, "loss": 8.6795, "step": 628000 }, { "epoch": 5.03, "learning_rate": 4.7486000000000006e-05, "loss": 8.6914, "step": 628500 }, { "epoch": 5.03, "learning_rate": 4.7484e-05, "loss": 8.6906, "step": 629000 }, { "epoch": 5.04, "learning_rate": 4.7482000000000004e-05, "loss": 8.7079, "step": 629500 }, { "epoch": 5.04, "learning_rate": 4.748e-05, "loss": 8.6805, "step": 630000 }, { "epoch": 5.04, "learning_rate": 4.7478e-05, "loss": 8.7025, "step": 630500 }, { "epoch": 5.05, "learning_rate": 4.7476000000000004e-05, "loss": 8.6981, "step": 631000 }, { "epoch": 5.05, "learning_rate": 4.7474e-05, "loss": 8.6627, "step": 631500 }, { "epoch": 5.06, "learning_rate": 4.7472e-05, "loss": 8.6893, "step": 632000 }, { "epoch": 5.06, "learning_rate": 4.7470000000000005e-05, "loss": 8.6852, "step": 632500 }, { "epoch": 5.06, "learning_rate": 4.7468e-05, "loss": 8.663, "step": 633000 }, { "epoch": 5.07, "learning_rate": 4.7466e-05, "loss": 8.6674, "step": 633500 }, { "epoch": 5.07, "learning_rate": 4.7464000000000005e-05, "loss": 8.6806, "step": 634000 }, { "epoch": 5.08, "learning_rate": 4.7462e-05, "loss": 8.6666, "step": 634500 }, { "epoch": 5.08, "learning_rate": 4.746e-05, "loss": 8.6728, "step": 635000 }, { "epoch": 5.08, "learning_rate": 4.7458000000000005e-05, "loss": 8.7154, "step": 635500 }, { "epoch": 5.09, "learning_rate": 4.7456e-05, "loss": 8.7059, "step": 636000 }, { "epoch": 5.09, "learning_rate": 4.7454000000000003e-05, "loss": 8.6599, "step": 636500 }, { "epoch": 5.1, "learning_rate": 4.7452e-05, "loss": 8.671, "step": 637000 }, { "epoch": 5.1, "learning_rate": 4.745e-05, "loss": 8.6911, "step": 637500 }, { "epoch": 5.1, "learning_rate": 4.7448000000000004e-05, "loss": 8.7119, "step": 638000 }, { "epoch": 5.11, "learning_rate": 4.7446e-05, "loss": 8.7202, "step": 638500 }, { "epoch": 5.11, "learning_rate": 4.7444e-05, "loss": 8.6662, "step": 639000 }, { "epoch": 5.12, "learning_rate": 4.7442000000000004e-05, "loss": 8.7054, "step": 639500 }, { "epoch": 5.12, "learning_rate": 4.744e-05, "loss": 8.673, "step": 640000 }, { "epoch": 5.12, "learning_rate": 4.7438e-05, "loss": 8.6985, "step": 640500 }, { "epoch": 5.13, "learning_rate": 4.7436000000000005e-05, "loss": 8.6946, "step": 641000 }, { "epoch": 5.13, "learning_rate": 4.7434e-05, "loss": 8.6743, "step": 641500 }, { "epoch": 5.14, "learning_rate": 4.7432e-05, "loss": 8.6483, "step": 642000 }, { "epoch": 5.14, "learning_rate": 4.7430000000000005e-05, "loss": 8.6967, "step": 642500 }, { "epoch": 5.14, "learning_rate": 4.7428e-05, "loss": 8.6999, "step": 643000 }, { "epoch": 5.15, "learning_rate": 4.7426e-05, "loss": 8.6907, "step": 643500 }, { "epoch": 5.15, "learning_rate": 4.7424e-05, "loss": 8.6941, "step": 644000 }, { "epoch": 5.16, "learning_rate": 4.7422e-05, "loss": 8.6523, "step": 644500 }, { "epoch": 5.16, "learning_rate": 4.742e-05, "loss": 8.7051, "step": 645000 }, { "epoch": 5.16, "learning_rate": 4.7418e-05, "loss": 8.6798, "step": 645500 }, { "epoch": 5.17, "learning_rate": 4.741600000000001e-05, "loss": 8.65, "step": 646000 }, { "epoch": 5.17, "learning_rate": 4.7414000000000004e-05, "loss": 8.6642, "step": 646500 }, { "epoch": 5.18, "learning_rate": 4.7412e-05, "loss": 8.6939, "step": 647000 }, { "epoch": 5.18, "learning_rate": 4.741e-05, "loss": 8.706, "step": 647500 }, { "epoch": 5.18, "learning_rate": 4.7408000000000004e-05, "loss": 8.6656, "step": 648000 }, { "epoch": 5.19, "learning_rate": 4.7406e-05, "loss": 8.6735, "step": 648500 }, { "epoch": 5.19, "learning_rate": 4.7404e-05, "loss": 8.7023, "step": 649000 }, { "epoch": 5.2, "learning_rate": 4.7402000000000005e-05, "loss": 8.6907, "step": 649500 }, { "epoch": 5.2, "learning_rate": 4.74e-05, "loss": 8.6788, "step": 650000 }, { "epoch": 5.2, "learning_rate": 4.7398e-05, "loss": 8.6952, "step": 650500 }, { "epoch": 5.21, "learning_rate": 4.7396e-05, "loss": 8.6699, "step": 651000 }, { "epoch": 5.21, "learning_rate": 4.7394e-05, "loss": 8.6877, "step": 651500 }, { "epoch": 5.22, "learning_rate": 4.7392e-05, "loss": 8.6928, "step": 652000 }, { "epoch": 5.22, "learning_rate": 4.739e-05, "loss": 8.6826, "step": 652500 }, { "epoch": 5.22, "learning_rate": 4.738800000000001e-05, "loss": 8.6977, "step": 653000 }, { "epoch": 5.23, "learning_rate": 4.7386e-05, "loss": 8.6922, "step": 653500 }, { "epoch": 5.23, "learning_rate": 4.7384e-05, "loss": 8.6922, "step": 654000 }, { "epoch": 5.24, "learning_rate": 4.7382e-05, "loss": 8.6917, "step": 654500 }, { "epoch": 5.24, "learning_rate": 4.7380000000000004e-05, "loss": 8.7062, "step": 655000 }, { "epoch": 5.24, "learning_rate": 4.7378e-05, "loss": 8.6872, "step": 655500 }, { "epoch": 5.25, "learning_rate": 4.7376e-05, "loss": 8.6742, "step": 656000 }, { "epoch": 5.25, "learning_rate": 4.7374000000000004e-05, "loss": 8.7035, "step": 656500 }, { "epoch": 5.26, "learning_rate": 4.7372e-05, "loss": 8.6762, "step": 657000 }, { "epoch": 5.26, "learning_rate": 4.737e-05, "loss": 8.7042, "step": 657500 }, { "epoch": 5.26, "learning_rate": 4.7368000000000005e-05, "loss": 8.687, "step": 658000 }, { "epoch": 5.27, "learning_rate": 4.736600000000001e-05, "loss": 8.6841, "step": 658500 }, { "epoch": 5.27, "learning_rate": 4.7364e-05, "loss": 8.7086, "step": 659000 }, { "epoch": 5.28, "learning_rate": 4.7362e-05, "loss": 8.6768, "step": 659500 }, { "epoch": 5.28, "learning_rate": 4.736000000000001e-05, "loss": 8.683, "step": 660000 }, { "epoch": 5.28, "learning_rate": 4.7358e-05, "loss": 8.6867, "step": 660500 }, { "epoch": 5.29, "learning_rate": 4.7356e-05, "loss": 8.6706, "step": 661000 }, { "epoch": 5.29, "learning_rate": 4.7354e-05, "loss": 8.6344, "step": 661500 }, { "epoch": 5.3, "learning_rate": 4.7352e-05, "loss": 8.6803, "step": 662000 }, { "epoch": 5.3, "learning_rate": 4.735e-05, "loss": 8.691, "step": 662500 }, { "epoch": 5.3, "learning_rate": 4.7348e-05, "loss": 8.6917, "step": 663000 }, { "epoch": 5.31, "learning_rate": 4.7346000000000004e-05, "loss": 8.6831, "step": 663500 }, { "epoch": 5.31, "learning_rate": 4.7344000000000006e-05, "loss": 8.6985, "step": 664000 }, { "epoch": 5.32, "learning_rate": 4.7342e-05, "loss": 8.6908, "step": 664500 }, { "epoch": 5.32, "learning_rate": 4.7340000000000004e-05, "loss": 8.6704, "step": 665000 }, { "epoch": 5.32, "learning_rate": 4.7338000000000007e-05, "loss": 8.685, "step": 665500 }, { "epoch": 5.33, "learning_rate": 4.7336e-05, "loss": 8.7153, "step": 666000 }, { "epoch": 5.33, "learning_rate": 4.7334e-05, "loss": 8.7006, "step": 666500 }, { "epoch": 5.34, "learning_rate": 4.733200000000001e-05, "loss": 8.6747, "step": 667000 }, { "epoch": 5.34, "learning_rate": 4.733e-05, "loss": 8.6613, "step": 667500 }, { "epoch": 5.34, "learning_rate": 4.7328e-05, "loss": 8.7035, "step": 668000 }, { "epoch": 5.35, "learning_rate": 4.7326e-05, "loss": 8.6896, "step": 668500 }, { "epoch": 5.35, "learning_rate": 4.7324e-05, "loss": 8.6955, "step": 669000 }, { "epoch": 5.36, "learning_rate": 4.7322e-05, "loss": 8.7121, "step": 669500 }, { "epoch": 5.36, "learning_rate": 4.732e-05, "loss": 8.6922, "step": 670000 }, { "epoch": 5.36, "learning_rate": 4.7318e-05, "loss": 8.6892, "step": 670500 }, { "epoch": 5.37, "learning_rate": 4.7316000000000006e-05, "loss": 8.6795, "step": 671000 }, { "epoch": 5.37, "learning_rate": 4.7314e-05, "loss": 8.7076, "step": 671500 }, { "epoch": 5.38, "learning_rate": 4.7312000000000004e-05, "loss": 8.6899, "step": 672000 }, { "epoch": 5.38, "learning_rate": 4.7310000000000006e-05, "loss": 8.7144, "step": 672500 }, { "epoch": 5.38, "learning_rate": 4.7308e-05, "loss": 8.7106, "step": 673000 }, { "epoch": 5.39, "learning_rate": 4.7306e-05, "loss": 8.683, "step": 673500 }, { "epoch": 5.39, "learning_rate": 4.7304000000000007e-05, "loss": 8.7013, "step": 674000 }, { "epoch": 5.4, "learning_rate": 4.7302e-05, "loss": 8.6933, "step": 674500 }, { "epoch": 5.4, "learning_rate": 4.73e-05, "loss": 8.7011, "step": 675000 }, { "epoch": 5.4, "learning_rate": 4.7298e-05, "loss": 8.6761, "step": 675500 }, { "epoch": 5.41, "learning_rate": 4.7296e-05, "loss": 8.676, "step": 676000 }, { "epoch": 5.41, "learning_rate": 4.7294000000000005e-05, "loss": 8.7049, "step": 676500 }, { "epoch": 5.42, "learning_rate": 4.7292e-05, "loss": 8.6862, "step": 677000 }, { "epoch": 5.42, "learning_rate": 4.729e-05, "loss": 8.6861, "step": 677500 }, { "epoch": 5.42, "learning_rate": 4.7288000000000005e-05, "loss": 8.6773, "step": 678000 }, { "epoch": 5.43, "learning_rate": 4.7286e-05, "loss": 8.6915, "step": 678500 }, { "epoch": 5.43, "learning_rate": 4.7284e-05, "loss": 8.6942, "step": 679000 }, { "epoch": 5.44, "learning_rate": 4.7282000000000006e-05, "loss": 8.6837, "step": 679500 }, { "epoch": 5.44, "learning_rate": 4.728e-05, "loss": 8.6891, "step": 680000 }, { "epoch": 5.44, "learning_rate": 4.7278e-05, "loss": 8.6752, "step": 680500 }, { "epoch": 5.45, "learning_rate": 4.7276000000000006e-05, "loss": 8.6769, "step": 681000 }, { "epoch": 5.45, "learning_rate": 4.7274e-05, "loss": 8.6738, "step": 681500 }, { "epoch": 5.46, "learning_rate": 4.7272e-05, "loss": 8.6892, "step": 682000 }, { "epoch": 5.46, "learning_rate": 4.7270000000000007e-05, "loss": 8.7074, "step": 682500 }, { "epoch": 5.46, "learning_rate": 4.7268e-05, "loss": 8.6909, "step": 683000 }, { "epoch": 5.47, "learning_rate": 4.7266000000000005e-05, "loss": 8.6848, "step": 683500 }, { "epoch": 5.47, "learning_rate": 4.7264e-05, "loss": 8.6926, "step": 684000 }, { "epoch": 5.48, "learning_rate": 4.7262e-05, "loss": 8.6606, "step": 684500 }, { "epoch": 5.48, "learning_rate": 4.7260000000000005e-05, "loss": 8.696, "step": 685000 }, { "epoch": 5.48, "learning_rate": 4.7258e-05, "loss": 8.6629, "step": 685500 }, { "epoch": 5.49, "learning_rate": 4.7256e-05, "loss": 8.6887, "step": 686000 }, { "epoch": 5.49, "learning_rate": 4.7254000000000005e-05, "loss": 8.6852, "step": 686500 }, { "epoch": 5.5, "learning_rate": 4.7252e-05, "loss": 8.6677, "step": 687000 }, { "epoch": 5.5, "learning_rate": 4.7249999999999997e-05, "loss": 8.6951, "step": 687500 }, { "epoch": 5.5, "learning_rate": 4.7248000000000006e-05, "loss": 8.6928, "step": 688000 }, { "epoch": 5.51, "learning_rate": 4.7246e-05, "loss": 8.7003, "step": 688500 }, { "epoch": 5.51, "learning_rate": 4.7244000000000004e-05, "loss": 8.6892, "step": 689000 }, { "epoch": 5.52, "learning_rate": 4.7242000000000006e-05, "loss": 8.6872, "step": 689500 }, { "epoch": 5.52, "learning_rate": 4.724e-05, "loss": 8.711, "step": 690000 }, { "epoch": 5.52, "learning_rate": 4.7238000000000004e-05, "loss": 8.7015, "step": 690500 }, { "epoch": 5.53, "learning_rate": 4.7236e-05, "loss": 8.6695, "step": 691000 }, { "epoch": 5.53, "learning_rate": 4.7234e-05, "loss": 8.7176, "step": 691500 }, { "epoch": 5.54, "learning_rate": 4.7232000000000005e-05, "loss": 8.7359, "step": 692000 }, { "epoch": 5.54, "learning_rate": 4.723e-05, "loss": 8.6934, "step": 692500 }, { "epoch": 5.54, "learning_rate": 4.7228e-05, "loss": 8.6977, "step": 693000 }, { "epoch": 5.55, "learning_rate": 4.7226000000000005e-05, "loss": 8.6904, "step": 693500 }, { "epoch": 5.55, "learning_rate": 4.7224e-05, "loss": 8.6897, "step": 694000 }, { "epoch": 5.56, "learning_rate": 4.7222e-05, "loss": 8.6611, "step": 694500 }, { "epoch": 5.56, "learning_rate": 4.7220000000000005e-05, "loss": 8.6864, "step": 695000 }, { "epoch": 5.56, "learning_rate": 4.7218e-05, "loss": 8.6747, "step": 695500 }, { "epoch": 5.57, "learning_rate": 4.7216e-05, "loss": 8.6676, "step": 696000 }, { "epoch": 5.57, "learning_rate": 4.7214000000000006e-05, "loss": 8.7055, "step": 696500 }, { "epoch": 5.58, "learning_rate": 4.7212e-05, "loss": 8.6965, "step": 697000 }, { "epoch": 5.58, "learning_rate": 4.7210000000000004e-05, "loss": 8.6965, "step": 697500 }, { "epoch": 5.58, "learning_rate": 4.7208e-05, "loss": 8.6918, "step": 698000 }, { "epoch": 5.59, "learning_rate": 4.7206e-05, "loss": 8.6898, "step": 698500 }, { "epoch": 5.59, "learning_rate": 4.7204000000000004e-05, "loss": 8.7017, "step": 699000 }, { "epoch": 5.6, "learning_rate": 4.7202e-05, "loss": 8.6897, "step": 699500 }, { "epoch": 5.6, "learning_rate": 4.72e-05, "loss": 8.6989, "step": 700000 }, { "epoch": 5.6, "learning_rate": 4.7198000000000004e-05, "loss": 8.6906, "step": 700500 }, { "epoch": 5.61, "learning_rate": 4.7196e-05, "loss": 8.6889, "step": 701000 }, { "epoch": 5.61, "learning_rate": 4.7194e-05, "loss": 8.6767, "step": 701500 }, { "epoch": 5.62, "learning_rate": 4.7192000000000005e-05, "loss": 8.6921, "step": 702000 }, { "epoch": 5.62, "learning_rate": 4.719e-05, "loss": 8.6512, "step": 702500 }, { "epoch": 5.62, "learning_rate": 4.7188e-05, "loss": 8.6645, "step": 703000 }, { "epoch": 5.63, "learning_rate": 4.7186000000000005e-05, "loss": 8.6867, "step": 703500 }, { "epoch": 5.63, "learning_rate": 4.7184e-05, "loss": 8.6958, "step": 704000 }, { "epoch": 5.64, "learning_rate": 4.7182e-05, "loss": 8.6656, "step": 704500 }, { "epoch": 5.64, "learning_rate": 4.718e-05, "loss": 8.6697, "step": 705000 }, { "epoch": 5.64, "learning_rate": 4.7178e-05, "loss": 8.6867, "step": 705500 }, { "epoch": 5.65, "learning_rate": 4.7176000000000004e-05, "loss": 8.6707, "step": 706000 }, { "epoch": 5.65, "learning_rate": 4.7174e-05, "loss": 8.6832, "step": 706500 }, { "epoch": 5.66, "learning_rate": 4.7172e-05, "loss": 8.6698, "step": 707000 }, { "epoch": 5.66, "learning_rate": 4.7170000000000004e-05, "loss": 8.6951, "step": 707500 }, { "epoch": 5.66, "learning_rate": 4.7168e-05, "loss": 8.6829, "step": 708000 }, { "epoch": 5.67, "learning_rate": 4.7166e-05, "loss": 8.6971, "step": 708500 }, { "epoch": 5.67, "learning_rate": 4.7164000000000004e-05, "loss": 8.6797, "step": 709000 }, { "epoch": 5.68, "learning_rate": 4.7162e-05, "loss": 8.7025, "step": 709500 }, { "epoch": 5.68, "learning_rate": 4.716e-05, "loss": 8.6876, "step": 710000 }, { "epoch": 5.68, "learning_rate": 4.7158000000000005e-05, "loss": 8.6836, "step": 710500 }, { "epoch": 5.69, "learning_rate": 4.7156e-05, "loss": 8.6867, "step": 711000 }, { "epoch": 5.69, "learning_rate": 4.7154e-05, "loss": 8.6948, "step": 711500 }, { "epoch": 5.7, "learning_rate": 4.7152e-05, "loss": 8.7156, "step": 712000 }, { "epoch": 5.7, "learning_rate": 4.715e-05, "loss": 8.6833, "step": 712500 }, { "epoch": 5.7, "learning_rate": 4.7148e-05, "loss": 8.6823, "step": 713000 }, { "epoch": 5.71, "learning_rate": 4.7146e-05, "loss": 8.6815, "step": 713500 }, { "epoch": 5.71, "learning_rate": 4.714400000000001e-05, "loss": 8.6914, "step": 714000 }, { "epoch": 5.72, "learning_rate": 4.7142000000000004e-05, "loss": 8.6879, "step": 714500 }, { "epoch": 5.72, "learning_rate": 4.714e-05, "loss": 8.6631, "step": 715000 }, { "epoch": 5.72, "learning_rate": 4.7138e-05, "loss": 8.6916, "step": 715500 }, { "epoch": 5.73, "learning_rate": 4.7136000000000004e-05, "loss": 8.6991, "step": 716000 }, { "epoch": 5.73, "learning_rate": 4.7134e-05, "loss": 8.6888, "step": 716500 }, { "epoch": 5.74, "learning_rate": 4.7132e-05, "loss": 8.6925, "step": 717000 }, { "epoch": 5.74, "learning_rate": 4.7130000000000004e-05, "loss": 8.6895, "step": 717500 }, { "epoch": 5.74, "learning_rate": 4.7128e-05, "loss": 8.6821, "step": 718000 }, { "epoch": 5.75, "learning_rate": 4.7126e-05, "loss": 8.6825, "step": 718500 }, { "epoch": 5.75, "learning_rate": 4.7124000000000005e-05, "loss": 8.6447, "step": 719000 }, { "epoch": 5.76, "learning_rate": 4.712200000000001e-05, "loss": 8.6865, "step": 719500 }, { "epoch": 5.76, "learning_rate": 4.712e-05, "loss": 8.6965, "step": 720000 }, { "epoch": 5.76, "learning_rate": 4.7118e-05, "loss": 8.6994, "step": 720500 }, { "epoch": 5.77, "learning_rate": 4.711600000000001e-05, "loss": 8.7109, "step": 721000 }, { "epoch": 5.77, "learning_rate": 4.7114e-05, "loss": 8.6661, "step": 721500 }, { "epoch": 5.78, "learning_rate": 4.7112e-05, "loss": 8.6944, "step": 722000 }, { "epoch": 5.78, "learning_rate": 4.711e-05, "loss": 8.7156, "step": 722500 }, { "epoch": 5.78, "learning_rate": 4.7108000000000004e-05, "loss": 8.6916, "step": 723000 }, { "epoch": 5.79, "learning_rate": 4.7106e-05, "loss": 8.6661, "step": 723500 }, { "epoch": 5.79, "learning_rate": 4.7104e-05, "loss": 8.6902, "step": 724000 }, { "epoch": 5.8, "learning_rate": 4.7102000000000004e-05, "loss": 8.6957, "step": 724500 }, { "epoch": 5.8, "learning_rate": 4.71e-05, "loss": 8.687, "step": 725000 }, { "epoch": 5.8, "learning_rate": 4.7098e-05, "loss": 8.7003, "step": 725500 }, { "epoch": 5.81, "learning_rate": 4.7096000000000004e-05, "loss": 8.6713, "step": 726000 }, { "epoch": 5.81, "learning_rate": 4.709400000000001e-05, "loss": 8.6699, "step": 726500 }, { "epoch": 5.82, "learning_rate": 4.7092e-05, "loss": 8.6869, "step": 727000 }, { "epoch": 5.82, "learning_rate": 4.709e-05, "loss": 8.6778, "step": 727500 }, { "epoch": 5.82, "learning_rate": 4.708800000000001e-05, "loss": 8.675, "step": 728000 }, { "epoch": 5.83, "learning_rate": 4.7086e-05, "loss": 8.6962, "step": 728500 }, { "epoch": 5.83, "learning_rate": 4.7084e-05, "loss": 8.7078, "step": 729000 }, { "epoch": 5.84, "learning_rate": 4.7082e-05, "loss": 8.6918, "step": 729500 }, { "epoch": 5.84, "learning_rate": 4.708e-05, "loss": 8.7071, "step": 730000 }, { "epoch": 5.84, "learning_rate": 4.7078e-05, "loss": 8.6868, "step": 730500 }, { "epoch": 5.85, "learning_rate": 4.7076e-05, "loss": 8.6995, "step": 731000 }, { "epoch": 5.85, "learning_rate": 4.7074000000000004e-05, "loss": 8.6819, "step": 731500 }, { "epoch": 5.86, "learning_rate": 4.7072000000000006e-05, "loss": 8.6853, "step": 732000 }, { "epoch": 5.86, "learning_rate": 4.707e-05, "loss": 8.695, "step": 732500 }, { "epoch": 5.86, "learning_rate": 4.7068000000000004e-05, "loss": 8.6754, "step": 733000 }, { "epoch": 5.87, "learning_rate": 4.7066000000000006e-05, "loss": 8.7053, "step": 733500 }, { "epoch": 5.87, "learning_rate": 4.7064e-05, "loss": 8.6838, "step": 734000 }, { "epoch": 5.88, "learning_rate": 4.7062e-05, "loss": 8.6893, "step": 734500 }, { "epoch": 5.88, "learning_rate": 4.706000000000001e-05, "loss": 8.7103, "step": 735000 }, { "epoch": 5.88, "learning_rate": 4.7058e-05, "loss": 8.6732, "step": 735500 }, { "epoch": 5.89, "learning_rate": 4.7056e-05, "loss": 8.7037, "step": 736000 }, { "epoch": 5.89, "learning_rate": 4.7054e-05, "loss": 8.6949, "step": 736500 }, { "epoch": 5.9, "learning_rate": 4.7052e-05, "loss": 8.7115, "step": 737000 }, { "epoch": 5.9, "learning_rate": 4.705e-05, "loss": 8.6782, "step": 737500 }, { "epoch": 5.9, "learning_rate": 4.7048e-05, "loss": 8.6852, "step": 738000 }, { "epoch": 5.91, "learning_rate": 4.7046e-05, "loss": 8.6756, "step": 738500 }, { "epoch": 5.91, "learning_rate": 4.7044000000000006e-05, "loss": 8.6945, "step": 739000 }, { "epoch": 5.92, "learning_rate": 4.7042e-05, "loss": 8.6926, "step": 739500 }, { "epoch": 5.92, "learning_rate": 4.7040000000000004e-05, "loss": 8.6959, "step": 740000 }, { "epoch": 5.92, "learning_rate": 4.7038000000000006e-05, "loss": 8.6791, "step": 740500 }, { "epoch": 5.93, "learning_rate": 4.7036e-05, "loss": 8.6837, "step": 741000 }, { "epoch": 5.93, "learning_rate": 4.7034e-05, "loss": 8.6567, "step": 741500 }, { "epoch": 5.94, "learning_rate": 4.7032000000000006e-05, "loss": 8.6724, "step": 742000 }, { "epoch": 5.94, "learning_rate": 4.703e-05, "loss": 8.6793, "step": 742500 }, { "epoch": 5.94, "learning_rate": 4.7028e-05, "loss": 8.7018, "step": 743000 }, { "epoch": 5.95, "learning_rate": 4.7026e-05, "loss": 8.6535, "step": 743500 }, { "epoch": 5.95, "learning_rate": 4.7024e-05, "loss": 8.6763, "step": 744000 }, { "epoch": 5.96, "learning_rate": 4.7022000000000005e-05, "loss": 8.6904, "step": 744500 }, { "epoch": 5.96, "learning_rate": 4.702e-05, "loss": 8.7006, "step": 745000 }, { "epoch": 5.96, "learning_rate": 4.7018e-05, "loss": 8.705, "step": 745500 }, { "epoch": 5.97, "learning_rate": 4.7016000000000005e-05, "loss": 8.685, "step": 746000 }, { "epoch": 5.97, "learning_rate": 4.7014e-05, "loss": 8.6814, "step": 746500 }, { "epoch": 5.98, "learning_rate": 4.7012e-05, "loss": 8.6848, "step": 747000 }, { "epoch": 5.98, "learning_rate": 4.7010000000000006e-05, "loss": 8.6957, "step": 747500 }, { "epoch": 5.98, "learning_rate": 4.7008e-05, "loss": 8.6929, "step": 748000 }, { "epoch": 5.99, "learning_rate": 4.7006e-05, "loss": 8.6925, "step": 748500 }, { "epoch": 5.99, "learning_rate": 4.7004000000000006e-05, "loss": 8.7093, "step": 749000 }, { "epoch": 6.0, "learning_rate": 4.7002e-05, "loss": 8.6777, "step": 749500 }, { "epoch": 6.0, "learning_rate": 4.7e-05, "loss": 8.6913, "step": 750000 }, { "epoch": 6.0, "learning_rate": 4.6998000000000006e-05, "loss": 8.7201, "step": 750500 }, { "epoch": 6.01, "learning_rate": 4.6996e-05, "loss": 8.6985, "step": 751000 }, { "epoch": 6.01, "learning_rate": 4.6994000000000004e-05, "loss": 8.673, "step": 751500 }, { "epoch": 6.02, "learning_rate": 4.6992e-05, "loss": 8.6662, "step": 752000 }, { "epoch": 6.02, "learning_rate": 4.699e-05, "loss": 8.7033, "step": 752500 }, { "epoch": 6.02, "learning_rate": 4.6988000000000005e-05, "loss": 8.7036, "step": 753000 }, { "epoch": 6.03, "learning_rate": 4.6986e-05, "loss": 8.7168, "step": 753500 }, { "epoch": 6.03, "learning_rate": 4.6984e-05, "loss": 8.6793, "step": 754000 }, { "epoch": 6.04, "learning_rate": 4.6982000000000005e-05, "loss": 8.658, "step": 754500 }, { "epoch": 6.04, "learning_rate": 4.698e-05, "loss": 8.6972, "step": 755000 }, { "epoch": 6.04, "learning_rate": 4.6977999999999996e-05, "loss": 8.681, "step": 755500 }, { "epoch": 6.05, "learning_rate": 4.6976000000000006e-05, "loss": 8.6965, "step": 756000 }, { "epoch": 6.05, "learning_rate": 4.6974e-05, "loss": 8.6773, "step": 756500 }, { "epoch": 6.06, "learning_rate": 4.6972000000000004e-05, "loss": 8.6668, "step": 757000 }, { "epoch": 6.06, "learning_rate": 4.6970000000000006e-05, "loss": 8.696, "step": 757500 }, { "epoch": 6.06, "learning_rate": 4.6968e-05, "loss": 8.6799, "step": 758000 }, { "epoch": 6.07, "learning_rate": 4.6966000000000004e-05, "loss": 8.6923, "step": 758500 }, { "epoch": 6.07, "learning_rate": 4.6964e-05, "loss": 8.6856, "step": 759000 }, { "epoch": 6.08, "learning_rate": 4.6962e-05, "loss": 8.6911, "step": 759500 }, { "epoch": 6.08, "learning_rate": 4.6960000000000004e-05, "loss": 8.6814, "step": 760000 }, { "epoch": 6.08, "learning_rate": 4.6958e-05, "loss": 8.6729, "step": 760500 }, { "epoch": 6.09, "learning_rate": 4.6956e-05, "loss": 8.7033, "step": 761000 }, { "epoch": 6.09, "learning_rate": 4.6954000000000005e-05, "loss": 8.6683, "step": 761500 }, { "epoch": 6.1, "learning_rate": 4.6952e-05, "loss": 8.6567, "step": 762000 }, { "epoch": 6.1, "learning_rate": 4.695e-05, "loss": 8.6747, "step": 762500 }, { "epoch": 6.1, "learning_rate": 4.6948000000000005e-05, "loss": 8.6623, "step": 763000 }, { "epoch": 6.11, "learning_rate": 4.6946e-05, "loss": 8.69, "step": 763500 }, { "epoch": 6.11, "learning_rate": 4.6944e-05, "loss": 8.6797, "step": 764000 }, { "epoch": 6.12, "learning_rate": 4.6942000000000006e-05, "loss": 8.694, "step": 764500 }, { "epoch": 6.12, "learning_rate": 4.694e-05, "loss": 8.6899, "step": 765000 }, { "epoch": 6.12, "learning_rate": 4.6938000000000004e-05, "loss": 8.67, "step": 765500 }, { "epoch": 6.13, "learning_rate": 4.6936e-05, "loss": 8.6741, "step": 766000 }, { "epoch": 6.13, "learning_rate": 4.6934e-05, "loss": 8.7204, "step": 766500 }, { "epoch": 6.14, "learning_rate": 4.6932000000000004e-05, "loss": 8.6861, "step": 767000 }, { "epoch": 6.14, "learning_rate": 4.693e-05, "loss": 8.7019, "step": 767500 }, { "epoch": 6.14, "learning_rate": 4.6928e-05, "loss": 8.6781, "step": 768000 }, { "epoch": 6.15, "learning_rate": 4.6926000000000004e-05, "loss": 8.6855, "step": 768500 }, { "epoch": 6.15, "learning_rate": 4.6924e-05, "loss": 8.6801, "step": 769000 }, { "epoch": 6.16, "learning_rate": 4.6922e-05, "loss": 8.7049, "step": 769500 }, { "epoch": 6.16, "learning_rate": 4.6920000000000005e-05, "loss": 8.6789, "step": 770000 }, { "epoch": 6.16, "learning_rate": 4.6918e-05, "loss": 8.6692, "step": 770500 }, { "epoch": 6.17, "learning_rate": 4.6916e-05, "loss": 8.7287, "step": 771000 }, { "epoch": 6.17, "learning_rate": 4.6914000000000005e-05, "loss": 8.6889, "step": 771500 }, { "epoch": 6.18, "learning_rate": 4.6912e-05, "loss": 8.666, "step": 772000 }, { "epoch": 6.18, "learning_rate": 4.691e-05, "loss": 8.6938, "step": 772500 }, { "epoch": 6.18, "learning_rate": 4.6908e-05, "loss": 8.6645, "step": 773000 }, { "epoch": 6.19, "learning_rate": 4.6906e-05, "loss": 8.6783, "step": 773500 }, { "epoch": 6.19, "learning_rate": 4.6904000000000004e-05, "loss": 8.656, "step": 774000 }, { "epoch": 6.2, "learning_rate": 4.6902e-05, "loss": 8.6943, "step": 774500 }, { "epoch": 6.2, "learning_rate": 4.69e-05, "loss": 8.7057, "step": 775000 }, { "epoch": 6.2, "learning_rate": 4.6898000000000004e-05, "loss": 8.6791, "step": 775500 }, { "epoch": 6.21, "learning_rate": 4.6896e-05, "loss": 8.6785, "step": 776000 }, { "epoch": 6.21, "learning_rate": 4.6894e-05, "loss": 8.6989, "step": 776500 }, { "epoch": 6.22, "learning_rate": 4.6892000000000004e-05, "loss": 8.6844, "step": 777000 }, { "epoch": 6.22, "learning_rate": 4.689e-05, "loss": 8.6658, "step": 777500 }, { "epoch": 6.22, "learning_rate": 4.6888e-05, "loss": 8.6335, "step": 778000 }, { "epoch": 6.23, "learning_rate": 4.6886000000000005e-05, "loss": 8.6875, "step": 778500 }, { "epoch": 6.23, "learning_rate": 4.6884e-05, "loss": 8.6967, "step": 779000 }, { "epoch": 6.24, "learning_rate": 4.6882e-05, "loss": 8.66, "step": 779500 }, { "epoch": 6.24, "learning_rate": 4.688e-05, "loss": 8.6612, "step": 780000 }, { "epoch": 6.24, "learning_rate": 4.6878e-05, "loss": 8.6867, "step": 780500 }, { "epoch": 6.25, "learning_rate": 4.6876e-05, "loss": 8.6855, "step": 781000 }, { "epoch": 6.25, "learning_rate": 4.6874e-05, "loss": 8.6826, "step": 781500 }, { "epoch": 6.26, "learning_rate": 4.687200000000001e-05, "loss": 8.6773, "step": 782000 }, { "epoch": 6.26, "learning_rate": 4.6870000000000004e-05, "loss": 8.6837, "step": 782500 }, { "epoch": 6.26, "learning_rate": 4.6868e-05, "loss": 8.6954, "step": 783000 }, { "epoch": 6.27, "learning_rate": 4.6866e-05, "loss": 8.6884, "step": 783500 }, { "epoch": 6.27, "learning_rate": 4.6864000000000004e-05, "loss": 8.7012, "step": 784000 }, { "epoch": 6.28, "learning_rate": 4.6862e-05, "loss": 8.686, "step": 784500 }, { "epoch": 6.28, "learning_rate": 4.686e-05, "loss": 8.6754, "step": 785000 }, { "epoch": 6.28, "learning_rate": 4.6858000000000004e-05, "loss": 8.6742, "step": 785500 }, { "epoch": 6.29, "learning_rate": 4.6856e-05, "loss": 8.7039, "step": 786000 }, { "epoch": 6.29, "learning_rate": 4.6854e-05, "loss": 8.6997, "step": 786500 }, { "epoch": 6.3, "learning_rate": 4.6852000000000005e-05, "loss": 8.6888, "step": 787000 }, { "epoch": 6.3, "learning_rate": 4.685000000000001e-05, "loss": 8.6968, "step": 787500 }, { "epoch": 6.3, "learning_rate": 4.6848e-05, "loss": 8.6881, "step": 788000 }, { "epoch": 6.31, "learning_rate": 4.6846e-05, "loss": 8.7016, "step": 788500 }, { "epoch": 6.31, "learning_rate": 4.684400000000001e-05, "loss": 8.674, "step": 789000 }, { "epoch": 6.32, "learning_rate": 4.6842e-05, "loss": 8.6957, "step": 789500 }, { "epoch": 6.32, "learning_rate": 4.684e-05, "loss": 8.6614, "step": 790000 }, { "epoch": 6.32, "learning_rate": 4.6838e-05, "loss": 8.681, "step": 790500 }, { "epoch": 6.33, "learning_rate": 4.6836000000000004e-05, "loss": 8.6727, "step": 791000 }, { "epoch": 6.33, "learning_rate": 4.6834e-05, "loss": 8.6681, "step": 791500 }, { "epoch": 6.34, "learning_rate": 4.6832e-05, "loss": 8.6964, "step": 792000 }, { "epoch": 6.34, "learning_rate": 4.6830000000000004e-05, "loss": 8.6826, "step": 792500 }, { "epoch": 6.34, "learning_rate": 4.6828e-05, "loss": 8.7072, "step": 793000 }, { "epoch": 6.35, "learning_rate": 4.6826e-05, "loss": 8.6884, "step": 793500 }, { "epoch": 6.35, "learning_rate": 4.6824000000000004e-05, "loss": 8.7096, "step": 794000 }, { "epoch": 6.36, "learning_rate": 4.682200000000001e-05, "loss": 8.6898, "step": 794500 }, { "epoch": 6.36, "learning_rate": 4.682e-05, "loss": 8.6621, "step": 795000 }, { "epoch": 6.36, "learning_rate": 4.6818e-05, "loss": 8.7063, "step": 795500 }, { "epoch": 6.37, "learning_rate": 4.681600000000001e-05, "loss": 8.6877, "step": 796000 }, { "epoch": 6.37, "learning_rate": 4.6814e-05, "loss": 8.691, "step": 796500 }, { "epoch": 6.38, "learning_rate": 4.6812e-05, "loss": 8.6942, "step": 797000 }, { "epoch": 6.38, "learning_rate": 4.681e-05, "loss": 8.6936, "step": 797500 }, { "epoch": 6.38, "learning_rate": 4.6808e-05, "loss": 8.688, "step": 798000 }, { "epoch": 6.39, "learning_rate": 4.6806e-05, "loss": 8.7132, "step": 798500 }, { "epoch": 6.39, "learning_rate": 4.6804e-05, "loss": 8.6968, "step": 799000 }, { "epoch": 6.4, "learning_rate": 4.6802000000000004e-05, "loss": 8.6962, "step": 799500 }, { "epoch": 6.4, "learning_rate": 4.6800000000000006e-05, "loss": 8.6979, "step": 800000 }, { "epoch": 6.4, "learning_rate": 4.6798e-05, "loss": 8.6787, "step": 800500 }, { "epoch": 6.41, "learning_rate": 4.6796000000000004e-05, "loss": 8.6847, "step": 801000 }, { "epoch": 6.41, "learning_rate": 4.6794000000000006e-05, "loss": 8.7164, "step": 801500 }, { "epoch": 6.42, "learning_rate": 4.6792e-05, "loss": 8.6892, "step": 802000 }, { "epoch": 6.42, "learning_rate": 4.679e-05, "loss": 8.6569, "step": 802500 }, { "epoch": 6.42, "learning_rate": 4.678800000000001e-05, "loss": 8.7136, "step": 803000 }, { "epoch": 6.43, "learning_rate": 4.6786e-05, "loss": 8.6801, "step": 803500 }, { "epoch": 6.43, "learning_rate": 4.6784e-05, "loss": 8.6836, "step": 804000 }, { "epoch": 6.44, "learning_rate": 4.6782e-05, "loss": 8.685, "step": 804500 }, { "epoch": 6.44, "learning_rate": 4.678e-05, "loss": 8.7094, "step": 805000 }, { "epoch": 6.44, "learning_rate": 4.6778e-05, "loss": 8.691, "step": 805500 }, { "epoch": 6.45, "learning_rate": 4.6776e-05, "loss": 8.6758, "step": 806000 }, { "epoch": 6.45, "learning_rate": 4.6774e-05, "loss": 8.7133, "step": 806500 }, { "epoch": 6.46, "learning_rate": 4.6772000000000006e-05, "loss": 8.6901, "step": 807000 }, { "epoch": 6.46, "learning_rate": 4.677e-05, "loss": 8.6884, "step": 807500 }, { "epoch": 6.46, "learning_rate": 4.6768000000000004e-05, "loss": 8.6815, "step": 808000 }, { "epoch": 6.47, "learning_rate": 4.6766000000000006e-05, "loss": 8.6945, "step": 808500 }, { "epoch": 6.47, "learning_rate": 4.6764e-05, "loss": 8.6881, "step": 809000 }, { "epoch": 6.48, "learning_rate": 4.6762e-05, "loss": 8.6891, "step": 809500 }, { "epoch": 6.48, "learning_rate": 4.6760000000000006e-05, "loss": 8.6968, "step": 810000 }, { "epoch": 6.48, "learning_rate": 4.6758e-05, "loss": 8.6841, "step": 810500 }, { "epoch": 6.49, "learning_rate": 4.6756e-05, "loss": 8.6876, "step": 811000 }, { "epoch": 6.49, "learning_rate": 4.675400000000001e-05, "loss": 8.6979, "step": 811500 }, { "epoch": 6.5, "learning_rate": 4.6752e-05, "loss": 8.678, "step": 812000 }, { "epoch": 6.5, "learning_rate": 4.6750000000000005e-05, "loss": 8.6972, "step": 812500 }, { "epoch": 6.5, "learning_rate": 4.6748e-05, "loss": 8.6811, "step": 813000 }, { "epoch": 6.51, "learning_rate": 4.6746e-05, "loss": 8.6973, "step": 813500 }, { "epoch": 6.51, "learning_rate": 4.6744000000000005e-05, "loss": 8.6943, "step": 814000 }, { "epoch": 6.52, "learning_rate": 4.6742e-05, "loss": 8.6855, "step": 814500 }, { "epoch": 6.52, "learning_rate": 4.674e-05, "loss": 8.6963, "step": 815000 }, { "epoch": 6.52, "learning_rate": 4.6738000000000006e-05, "loss": 8.7114, "step": 815500 }, { "epoch": 6.53, "learning_rate": 4.6736e-05, "loss": 8.6996, "step": 816000 }, { "epoch": 6.53, "learning_rate": 4.6734e-05, "loss": 8.6951, "step": 816500 }, { "epoch": 6.54, "learning_rate": 4.6732000000000006e-05, "loss": 8.6772, "step": 817000 }, { "epoch": 6.54, "learning_rate": 4.673e-05, "loss": 8.6724, "step": 817500 }, { "epoch": 6.54, "learning_rate": 4.6728e-05, "loss": 8.7174, "step": 818000 }, { "epoch": 6.55, "learning_rate": 4.6726000000000006e-05, "loss": 8.6783, "step": 818500 }, { "epoch": 6.55, "learning_rate": 4.6724e-05, "loss": 8.6842, "step": 819000 }, { "epoch": 6.56, "learning_rate": 4.6722000000000004e-05, "loss": 8.6893, "step": 819500 }, { "epoch": 6.56, "learning_rate": 4.672e-05, "loss": 8.6598, "step": 820000 }, { "epoch": 6.56, "learning_rate": 4.6718e-05, "loss": 8.6785, "step": 820500 }, { "epoch": 6.57, "learning_rate": 4.6716000000000005e-05, "loss": 8.6983, "step": 821000 }, { "epoch": 6.57, "learning_rate": 4.6714e-05, "loss": 8.6691, "step": 821500 }, { "epoch": 6.58, "learning_rate": 4.6712e-05, "loss": 8.6649, "step": 822000 }, { "epoch": 6.58, "learning_rate": 4.6710000000000005e-05, "loss": 8.6774, "step": 822500 }, { "epoch": 6.58, "learning_rate": 4.6708e-05, "loss": 8.662, "step": 823000 }, { "epoch": 6.59, "learning_rate": 4.6706e-05, "loss": 8.7077, "step": 823500 }, { "epoch": 6.59, "learning_rate": 4.6704000000000005e-05, "loss": 8.6697, "step": 824000 }, { "epoch": 6.6, "learning_rate": 4.6702e-05, "loss": 8.6873, "step": 824500 }, { "epoch": 6.6, "learning_rate": 4.6700000000000003e-05, "loss": 8.6606, "step": 825000 }, { "epoch": 6.6, "learning_rate": 4.6698000000000006e-05, "loss": 8.6672, "step": 825500 }, { "epoch": 6.61, "learning_rate": 4.6696e-05, "loss": 8.7246, "step": 826000 }, { "epoch": 6.61, "learning_rate": 4.6694000000000004e-05, "loss": 8.6949, "step": 826500 }, { "epoch": 6.62, "learning_rate": 4.6692e-05, "loss": 8.6766, "step": 827000 }, { "epoch": 6.62, "learning_rate": 4.669e-05, "loss": 8.6795, "step": 827500 }, { "epoch": 6.62, "learning_rate": 4.6688000000000004e-05, "loss": 8.6992, "step": 828000 }, { "epoch": 6.63, "learning_rate": 4.6686e-05, "loss": 8.6833, "step": 828500 }, { "epoch": 6.63, "learning_rate": 4.6684e-05, "loss": 8.692, "step": 829000 }, { "epoch": 6.64, "learning_rate": 4.6682000000000005e-05, "loss": 8.686, "step": 829500 }, { "epoch": 6.64, "learning_rate": 4.668e-05, "loss": 8.695, "step": 830000 }, { "epoch": 6.64, "learning_rate": 4.6678e-05, "loss": 8.6997, "step": 830500 }, { "epoch": 6.65, "learning_rate": 4.6676000000000005e-05, "loss": 8.7094, "step": 831000 }, { "epoch": 6.65, "learning_rate": 4.6674e-05, "loss": 8.7044, "step": 831500 }, { "epoch": 6.66, "learning_rate": 4.6672e-05, "loss": 8.7036, "step": 832000 }, { "epoch": 6.66, "learning_rate": 4.6670000000000005e-05, "loss": 8.6943, "step": 832500 }, { "epoch": 6.66, "learning_rate": 4.6668e-05, "loss": 8.6921, "step": 833000 }, { "epoch": 6.67, "learning_rate": 4.6666000000000003e-05, "loss": 8.6799, "step": 833500 }, { "epoch": 6.67, "learning_rate": 4.6664e-05, "loss": 8.6947, "step": 834000 }, { "epoch": 6.68, "learning_rate": 4.6662e-05, "loss": 8.6906, "step": 834500 }, { "epoch": 6.68, "learning_rate": 4.6660000000000004e-05, "loss": 8.694, "step": 835000 }, { "epoch": 6.68, "learning_rate": 4.6658e-05, "loss": 8.6796, "step": 835500 }, { "epoch": 6.69, "learning_rate": 4.6656e-05, "loss": 8.6982, "step": 836000 }, { "epoch": 6.69, "learning_rate": 4.6654000000000004e-05, "loss": 8.7206, "step": 836500 }, { "epoch": 6.7, "learning_rate": 4.6652e-05, "loss": 8.6691, "step": 837000 }, { "epoch": 6.7, "learning_rate": 4.665e-05, "loss": 8.6577, "step": 837500 }, { "epoch": 6.7, "learning_rate": 4.6648000000000005e-05, "loss": 8.714, "step": 838000 }, { "epoch": 6.71, "learning_rate": 4.6646e-05, "loss": 8.651, "step": 838500 }, { "epoch": 6.71, "learning_rate": 4.6644e-05, "loss": 8.6644, "step": 839000 }, { "epoch": 6.72, "learning_rate": 4.6642000000000005e-05, "loss": 8.684, "step": 839500 }, { "epoch": 6.72, "learning_rate": 4.664e-05, "loss": 8.6915, "step": 840000 }, { "epoch": 6.72, "learning_rate": 4.6638e-05, "loss": 8.6944, "step": 840500 }, { "epoch": 6.73, "learning_rate": 4.6636e-05, "loss": 8.694, "step": 841000 }, { "epoch": 6.73, "learning_rate": 4.6634e-05, "loss": 8.664, "step": 841500 }, { "epoch": 6.74, "learning_rate": 4.6632000000000003e-05, "loss": 8.6761, "step": 842000 }, { "epoch": 6.74, "learning_rate": 4.663e-05, "loss": 8.6971, "step": 842500 }, { "epoch": 6.74, "learning_rate": 4.6628e-05, "loss": 8.6782, "step": 843000 }, { "epoch": 6.75, "learning_rate": 4.6626000000000004e-05, "loss": 8.6581, "step": 843500 }, { "epoch": 6.75, "learning_rate": 4.6624e-05, "loss": 8.6781, "step": 844000 }, { "epoch": 6.76, "learning_rate": 4.6622e-05, "loss": 8.6633, "step": 844500 }, { "epoch": 6.76, "learning_rate": 4.6620000000000004e-05, "loss": 8.6702, "step": 845000 }, { "epoch": 6.76, "learning_rate": 4.6618e-05, "loss": 8.6883, "step": 845500 }, { "epoch": 6.77, "learning_rate": 4.6616e-05, "loss": 8.6945, "step": 846000 }, { "epoch": 6.77, "learning_rate": 4.6614000000000005e-05, "loss": 8.6573, "step": 846500 }, { "epoch": 6.78, "learning_rate": 4.6612e-05, "loss": 8.672, "step": 847000 }, { "epoch": 6.78, "learning_rate": 4.661e-05, "loss": 8.659, "step": 847500 }, { "epoch": 6.78, "learning_rate": 4.6608e-05, "loss": 8.6699, "step": 848000 }, { "epoch": 6.79, "learning_rate": 4.6606e-05, "loss": 8.6829, "step": 848500 }, { "epoch": 6.79, "learning_rate": 4.6604e-05, "loss": 8.6908, "step": 849000 }, { "epoch": 6.8, "learning_rate": 4.6602e-05, "loss": 8.6943, "step": 849500 }, { "epoch": 6.8, "learning_rate": 4.660000000000001e-05, "loss": 8.6991, "step": 850000 }, { "epoch": 6.8, "learning_rate": 4.6598000000000003e-05, "loss": 8.6771, "step": 850500 }, { "epoch": 6.81, "learning_rate": 4.6596e-05, "loss": 8.719, "step": 851000 }, { "epoch": 6.81, "learning_rate": 4.6594e-05, "loss": 8.6948, "step": 851500 }, { "epoch": 6.82, "learning_rate": 4.6592000000000004e-05, "loss": 8.6727, "step": 852000 }, { "epoch": 6.82, "learning_rate": 4.659e-05, "loss": 8.6538, "step": 852500 }, { "epoch": 6.82, "learning_rate": 4.6588e-05, "loss": 8.6852, "step": 853000 }, { "epoch": 6.83, "learning_rate": 4.6586000000000004e-05, "loss": 8.7012, "step": 853500 }, { "epoch": 6.83, "learning_rate": 4.6584e-05, "loss": 8.679, "step": 854000 }, { "epoch": 6.84, "learning_rate": 4.6582e-05, "loss": 8.6946, "step": 854500 }, { "epoch": 6.84, "learning_rate": 4.6580000000000005e-05, "loss": 8.6983, "step": 855000 }, { "epoch": 6.84, "learning_rate": 4.657800000000001e-05, "loss": 8.6911, "step": 855500 }, { "epoch": 6.85, "learning_rate": 4.6576e-05, "loss": 8.7233, "step": 856000 }, { "epoch": 6.85, "learning_rate": 4.6574e-05, "loss": 8.6793, "step": 856500 }, { "epoch": 6.86, "learning_rate": 4.657200000000001e-05, "loss": 8.6707, "step": 857000 }, { "epoch": 6.86, "learning_rate": 4.657e-05, "loss": 8.6818, "step": 857500 }, { "epoch": 6.86, "learning_rate": 4.6568e-05, "loss": 8.6773, "step": 858000 }, { "epoch": 6.87, "learning_rate": 4.6566e-05, "loss": 8.7184, "step": 858500 }, { "epoch": 6.87, "learning_rate": 4.6564000000000003e-05, "loss": 8.6863, "step": 859000 }, { "epoch": 6.88, "learning_rate": 4.6562e-05, "loss": 8.6887, "step": 859500 }, { "epoch": 6.88, "learning_rate": 4.656e-05, "loss": 8.6982, "step": 860000 }, { "epoch": 6.88, "learning_rate": 4.6558000000000004e-05, "loss": 8.7029, "step": 860500 }, { "epoch": 6.89, "learning_rate": 4.6556e-05, "loss": 8.6883, "step": 861000 }, { "epoch": 6.89, "learning_rate": 4.6554e-05, "loss": 8.6869, "step": 861500 }, { "epoch": 6.9, "learning_rate": 4.6552000000000004e-05, "loss": 8.6949, "step": 862000 }, { "epoch": 6.9, "learning_rate": 4.655000000000001e-05, "loss": 8.6647, "step": 862500 }, { "epoch": 6.9, "learning_rate": 4.6548e-05, "loss": 8.6981, "step": 863000 }, { "epoch": 6.91, "learning_rate": 4.6546e-05, "loss": 8.706, "step": 863500 }, { "epoch": 6.91, "learning_rate": 4.654400000000001e-05, "loss": 8.6744, "step": 864000 }, { "epoch": 6.92, "learning_rate": 4.6542e-05, "loss": 8.6722, "step": 864500 }, { "epoch": 6.92, "learning_rate": 4.654e-05, "loss": 8.6963, "step": 865000 }, { "epoch": 6.92, "learning_rate": 4.6538e-05, "loss": 8.6823, "step": 865500 }, { "epoch": 6.93, "learning_rate": 4.6536e-05, "loss": 8.6683, "step": 866000 }, { "epoch": 6.93, "learning_rate": 4.6534e-05, "loss": 8.6902, "step": 866500 }, { "epoch": 6.94, "learning_rate": 4.6532e-05, "loss": 8.679, "step": 867000 }, { "epoch": 6.94, "learning_rate": 4.6530000000000003e-05, "loss": 8.689, "step": 867500 }, { "epoch": 6.94, "learning_rate": 4.6528000000000006e-05, "loss": 8.682, "step": 868000 }, { "epoch": 6.95, "learning_rate": 4.6526e-05, "loss": 8.6504, "step": 868500 }, { "epoch": 6.95, "learning_rate": 4.6524000000000004e-05, "loss": 8.6971, "step": 869000 }, { "epoch": 6.96, "learning_rate": 4.6522000000000006e-05, "loss": 8.695, "step": 869500 }, { "epoch": 6.96, "learning_rate": 4.652e-05, "loss": 8.671, "step": 870000 }, { "epoch": 6.96, "learning_rate": 4.6518e-05, "loss": 8.6795, "step": 870500 }, { "epoch": 6.97, "learning_rate": 4.651600000000001e-05, "loss": 8.6897, "step": 871000 }, { "epoch": 6.97, "learning_rate": 4.6514e-05, "loss": 8.686, "step": 871500 }, { "epoch": 6.98, "learning_rate": 4.6512e-05, "loss": 8.6801, "step": 872000 }, { "epoch": 6.98, "learning_rate": 4.651e-05, "loss": 8.7021, "step": 872500 }, { "epoch": 6.98, "learning_rate": 4.6508e-05, "loss": 8.68, "step": 873000 }, { "epoch": 6.99, "learning_rate": 4.6506e-05, "loss": 8.6635, "step": 873500 }, { "epoch": 6.99, "learning_rate": 4.6504e-05, "loss": 8.6801, "step": 874000 }, { "epoch": 7.0, "learning_rate": 4.6502e-05, "loss": 8.6499, "step": 874500 }, { "epoch": 7.0, "learning_rate": 4.6500000000000005e-05, "loss": 8.7053, "step": 875000 }, { "epoch": 7.0, "learning_rate": 4.6498e-05, "loss": 8.6667, "step": 875500 }, { "epoch": 7.01, "learning_rate": 4.6496000000000003e-05, "loss": 8.6919, "step": 876000 }, { "epoch": 7.01, "learning_rate": 4.6494000000000006e-05, "loss": 8.6901, "step": 876500 }, { "epoch": 7.02, "learning_rate": 4.6492e-05, "loss": 8.7098, "step": 877000 }, { "epoch": 7.02, "learning_rate": 4.649e-05, "loss": 8.6854, "step": 877500 }, { "epoch": 7.02, "learning_rate": 4.6488000000000006e-05, "loss": 8.6944, "step": 878000 }, { "epoch": 7.03, "learning_rate": 4.6486e-05, "loss": 8.704, "step": 878500 }, { "epoch": 7.03, "learning_rate": 4.6484e-05, "loss": 8.6889, "step": 879000 }, { "epoch": 7.04, "learning_rate": 4.6482000000000007e-05, "loss": 8.6797, "step": 879500 }, { "epoch": 7.04, "learning_rate": 4.648e-05, "loss": 8.6706, "step": 880000 }, { "epoch": 7.04, "learning_rate": 4.6478000000000005e-05, "loss": 8.689, "step": 880500 }, { "epoch": 7.05, "learning_rate": 4.6476e-05, "loss": 8.7023, "step": 881000 }, { "epoch": 7.05, "learning_rate": 4.6474e-05, "loss": 8.6985, "step": 881500 }, { "epoch": 7.06, "learning_rate": 4.6472000000000005e-05, "loss": 8.6837, "step": 882000 }, { "epoch": 7.06, "learning_rate": 4.647e-05, "loss": 8.6838, "step": 882500 }, { "epoch": 7.06, "learning_rate": 4.6468e-05, "loss": 8.7124, "step": 883000 }, { "epoch": 7.07, "learning_rate": 4.6466000000000005e-05, "loss": 8.7076, "step": 883500 }, { "epoch": 7.07, "learning_rate": 4.6464e-05, "loss": 8.7046, "step": 884000 }, { "epoch": 7.08, "learning_rate": 4.6462e-05, "loss": 8.6597, "step": 884500 }, { "epoch": 7.08, "learning_rate": 4.6460000000000006e-05, "loss": 8.6988, "step": 885000 }, { "epoch": 7.08, "learning_rate": 4.6458e-05, "loss": 8.6622, "step": 885500 }, { "epoch": 7.09, "learning_rate": 4.6456e-05, "loss": 8.6962, "step": 886000 }, { "epoch": 7.09, "learning_rate": 4.6454000000000006e-05, "loss": 8.6824, "step": 886500 }, { "epoch": 7.1, "learning_rate": 4.6452e-05, "loss": 8.6729, "step": 887000 }, { "epoch": 7.1, "learning_rate": 4.6450000000000004e-05, "loss": 8.7063, "step": 887500 }, { "epoch": 7.1, "learning_rate": 4.6448e-05, "loss": 8.6514, "step": 888000 }, { "epoch": 7.11, "learning_rate": 4.6446e-05, "loss": 8.7005, "step": 888500 }, { "epoch": 7.11, "learning_rate": 4.6444000000000005e-05, "loss": 8.6967, "step": 889000 }, { "epoch": 7.12, "learning_rate": 4.6442e-05, "loss": 8.6805, "step": 889500 }, { "epoch": 7.12, "learning_rate": 4.644e-05, "loss": 8.6851, "step": 890000 }, { "epoch": 7.12, "learning_rate": 4.6438000000000005e-05, "loss": 8.7003, "step": 890500 }, { "epoch": 7.13, "learning_rate": 4.6436e-05, "loss": 8.6987, "step": 891000 }, { "epoch": 7.13, "learning_rate": 4.6434e-05, "loss": 8.694, "step": 891500 }, { "epoch": 7.14, "learning_rate": 4.6432000000000005e-05, "loss": 8.6898, "step": 892000 }, { "epoch": 7.14, "learning_rate": 4.643e-05, "loss": 8.6917, "step": 892500 }, { "epoch": 7.14, "learning_rate": 4.6428000000000003e-05, "loss": 8.6972, "step": 893000 }, { "epoch": 7.15, "learning_rate": 4.6426000000000006e-05, "loss": 8.7035, "step": 893500 }, { "epoch": 7.15, "learning_rate": 4.6424e-05, "loss": 8.7104, "step": 894000 }, { "epoch": 7.16, "learning_rate": 4.6422000000000004e-05, "loss": 8.7077, "step": 894500 }, { "epoch": 7.16, "learning_rate": 4.642e-05, "loss": 8.6918, "step": 895000 }, { "epoch": 7.16, "learning_rate": 4.6418e-05, "loss": 8.6994, "step": 895500 }, { "epoch": 7.17, "learning_rate": 4.6416000000000004e-05, "loss": 8.6486, "step": 896000 }, { "epoch": 7.17, "learning_rate": 4.6414e-05, "loss": 8.6843, "step": 896500 }, { "epoch": 7.18, "learning_rate": 4.6412e-05, "loss": 8.6733, "step": 897000 }, { "epoch": 7.18, "learning_rate": 4.6410000000000005e-05, "loss": 8.6797, "step": 897500 }, { "epoch": 7.18, "learning_rate": 4.6408e-05, "loss": 8.6684, "step": 898000 }, { "epoch": 7.19, "learning_rate": 4.6406e-05, "loss": 8.6925, "step": 898500 }, { "epoch": 7.19, "learning_rate": 4.6404000000000005e-05, "loss": 8.6618, "step": 899000 }, { "epoch": 7.2, "learning_rate": 4.6402e-05, "loss": 8.7055, "step": 899500 }, { "epoch": 7.2, "learning_rate": 4.64e-05, "loss": 8.6637, "step": 900000 }, { "epoch": 7.2, "learning_rate": 4.6398000000000005e-05, "loss": 8.6877, "step": 900500 }, { "epoch": 7.21, "learning_rate": 4.6396e-05, "loss": 8.6848, "step": 901000 }, { "epoch": 7.21, "learning_rate": 4.6394e-05, "loss": 8.6867, "step": 901500 }, { "epoch": 7.22, "learning_rate": 4.6392e-05, "loss": 8.6845, "step": 902000 }, { "epoch": 7.22, "learning_rate": 4.639e-05, "loss": 8.6648, "step": 902500 }, { "epoch": 7.22, "learning_rate": 4.6388000000000004e-05, "loss": 8.6905, "step": 903000 }, { "epoch": 7.23, "learning_rate": 4.6386e-05, "loss": 8.6813, "step": 903500 }, { "epoch": 7.23, "learning_rate": 4.6384e-05, "loss": 8.7004, "step": 904000 }, { "epoch": 7.24, "learning_rate": 4.6382000000000004e-05, "loss": 8.6759, "step": 904500 }, { "epoch": 7.24, "learning_rate": 4.638e-05, "loss": 8.6784, "step": 905000 }, { "epoch": 7.24, "learning_rate": 4.6378e-05, "loss": 8.6783, "step": 905500 }, { "epoch": 7.25, "learning_rate": 4.6376000000000005e-05, "loss": 8.6821, "step": 906000 }, { "epoch": 7.25, "learning_rate": 4.6374e-05, "loss": 8.704, "step": 906500 }, { "epoch": 7.26, "learning_rate": 4.6372e-05, "loss": 8.681, "step": 907000 }, { "epoch": 7.26, "learning_rate": 4.6370000000000005e-05, "loss": 8.664, "step": 907500 }, { "epoch": 7.26, "learning_rate": 4.6368e-05, "loss": 8.6814, "step": 908000 }, { "epoch": 7.27, "learning_rate": 4.6366e-05, "loss": 8.6596, "step": 908500 }, { "epoch": 7.27, "learning_rate": 4.6364e-05, "loss": 8.6903, "step": 909000 }, { "epoch": 7.28, "learning_rate": 4.6362e-05, "loss": 8.696, "step": 909500 }, { "epoch": 7.28, "learning_rate": 4.636e-05, "loss": 8.6709, "step": 910000 }, { "epoch": 7.28, "learning_rate": 4.6358e-05, "loss": 8.6807, "step": 910500 }, { "epoch": 7.29, "learning_rate": 4.635600000000001e-05, "loss": 8.6804, "step": 911000 }, { "epoch": 7.29, "learning_rate": 4.6354000000000004e-05, "loss": 8.6734, "step": 911500 }, { "epoch": 7.3, "learning_rate": 4.6352e-05, "loss": 8.6951, "step": 912000 }, { "epoch": 7.3, "learning_rate": 4.635e-05, "loss": 8.6676, "step": 912500 }, { "epoch": 7.3, "learning_rate": 4.6348000000000004e-05, "loss": 8.6494, "step": 913000 }, { "epoch": 7.31, "learning_rate": 4.6346e-05, "loss": 8.6651, "step": 913500 }, { "epoch": 7.31, "learning_rate": 4.6344e-05, "loss": 8.6902, "step": 914000 }, { "epoch": 7.32, "learning_rate": 4.6342000000000005e-05, "loss": 8.6748, "step": 914500 }, { "epoch": 7.32, "learning_rate": 4.634e-05, "loss": 8.7043, "step": 915000 }, { "epoch": 7.32, "learning_rate": 4.6338e-05, "loss": 8.6911, "step": 915500 }, { "epoch": 7.33, "learning_rate": 4.6336000000000005e-05, "loss": 8.6861, "step": 916000 }, { "epoch": 7.33, "learning_rate": 4.6334e-05, "loss": 8.7087, "step": 916500 }, { "epoch": 7.34, "learning_rate": 4.6332e-05, "loss": 8.6657, "step": 917000 }, { "epoch": 7.34, "learning_rate": 4.633e-05, "loss": 8.6511, "step": 917500 }, { "epoch": 7.34, "learning_rate": 4.632800000000001e-05, "loss": 8.6858, "step": 918000 }, { "epoch": 7.35, "learning_rate": 4.6326e-05, "loss": 8.6662, "step": 918500 }, { "epoch": 7.35, "learning_rate": 4.6324e-05, "loss": 8.6781, "step": 919000 }, { "epoch": 7.36, "learning_rate": 4.6322e-05, "loss": 8.697, "step": 919500 }, { "epoch": 7.36, "learning_rate": 4.6320000000000004e-05, "loss": 8.6628, "step": 920000 }, { "epoch": 7.36, "learning_rate": 4.6318e-05, "loss": 8.6862, "step": 920500 }, { "epoch": 7.37, "learning_rate": 4.6316e-05, "loss": 8.6931, "step": 921000 }, { "epoch": 7.37, "learning_rate": 4.6314000000000004e-05, "loss": 8.6492, "step": 921500 }, { "epoch": 7.38, "learning_rate": 4.6312e-05, "loss": 8.6837, "step": 922000 }, { "epoch": 7.38, "learning_rate": 4.631e-05, "loss": 8.7073, "step": 922500 }, { "epoch": 7.38, "learning_rate": 4.6308000000000005e-05, "loss": 8.6857, "step": 923000 }, { "epoch": 7.39, "learning_rate": 4.630600000000001e-05, "loss": 8.6884, "step": 923500 }, { "epoch": 7.39, "learning_rate": 4.6304e-05, "loss": 8.6743, "step": 924000 }, { "epoch": 7.4, "learning_rate": 4.6302e-05, "loss": 8.6903, "step": 924500 }, { "epoch": 7.4, "learning_rate": 4.630000000000001e-05, "loss": 8.6847, "step": 925000 }, { "epoch": 7.4, "learning_rate": 4.6298e-05, "loss": 8.698, "step": 925500 }, { "epoch": 7.41, "learning_rate": 4.6296e-05, "loss": 8.6978, "step": 926000 }, { "epoch": 7.41, "learning_rate": 4.6294e-05, "loss": 8.6721, "step": 926500 }, { "epoch": 7.42, "learning_rate": 4.6292e-05, "loss": 8.6805, "step": 927000 }, { "epoch": 7.42, "learning_rate": 4.629e-05, "loss": 8.6839, "step": 927500 }, { "epoch": 7.42, "learning_rate": 4.6288e-05, "loss": 8.6973, "step": 928000 }, { "epoch": 7.43, "learning_rate": 4.6286000000000004e-05, "loss": 8.6982, "step": 928500 }, { "epoch": 7.43, "learning_rate": 4.6284e-05, "loss": 8.6642, "step": 929000 }, { "epoch": 7.44, "learning_rate": 4.6282e-05, "loss": 8.6692, "step": 929500 }, { "epoch": 7.44, "learning_rate": 4.6280000000000004e-05, "loss": 8.6817, "step": 930000 }, { "epoch": 7.44, "learning_rate": 4.6278000000000007e-05, "loss": 8.6982, "step": 930500 }, { "epoch": 7.45, "learning_rate": 4.6276e-05, "loss": 8.7005, "step": 931000 }, { "epoch": 7.45, "learning_rate": 4.6274e-05, "loss": 8.6706, "step": 931500 }, { "epoch": 7.46, "learning_rate": 4.627200000000001e-05, "loss": 8.6844, "step": 932000 }, { "epoch": 7.46, "learning_rate": 4.627e-05, "loss": 8.6853, "step": 932500 }, { "epoch": 7.46, "learning_rate": 4.6268e-05, "loss": 8.7154, "step": 933000 }, { "epoch": 7.47, "learning_rate": 4.6266e-05, "loss": 8.6622, "step": 933500 }, { "epoch": 7.47, "learning_rate": 4.6264e-05, "loss": 8.7021, "step": 934000 }, { "epoch": 7.48, "learning_rate": 4.6262e-05, "loss": 8.6999, "step": 934500 }, { "epoch": 7.48, "learning_rate": 4.626e-05, "loss": 8.7232, "step": 935000 }, { "epoch": 7.48, "learning_rate": 4.6258e-05, "loss": 8.6903, "step": 935500 }, { "epoch": 7.49, "learning_rate": 4.6256000000000006e-05, "loss": 8.7058, "step": 936000 }, { "epoch": 7.49, "learning_rate": 4.6254e-05, "loss": 8.678, "step": 936500 }, { "epoch": 7.5, "learning_rate": 4.6252000000000004e-05, "loss": 8.6845, "step": 937000 }, { "epoch": 7.5, "learning_rate": 4.6250000000000006e-05, "loss": 8.7047, "step": 937500 }, { "epoch": 7.5, "learning_rate": 4.6248e-05, "loss": 8.7072, "step": 938000 }, { "epoch": 7.51, "learning_rate": 4.6246e-05, "loss": 8.6988, "step": 938500 }, { "epoch": 7.51, "learning_rate": 4.6244000000000007e-05, "loss": 8.708, "step": 939000 }, { "epoch": 7.52, "learning_rate": 4.6242e-05, "loss": 8.6982, "step": 939500 }, { "epoch": 7.52, "learning_rate": 4.624e-05, "loss": 8.6931, "step": 940000 }, { "epoch": 7.52, "learning_rate": 4.623800000000001e-05, "loss": 8.6934, "step": 940500 }, { "epoch": 7.53, "learning_rate": 4.6236e-05, "loss": 8.7143, "step": 941000 }, { "epoch": 7.53, "learning_rate": 4.6234e-05, "loss": 8.6808, "step": 941500 }, { "epoch": 7.54, "learning_rate": 4.6232e-05, "loss": 8.6717, "step": 942000 }, { "epoch": 7.54, "learning_rate": 4.623e-05, "loss": 8.6726, "step": 942500 }, { "epoch": 7.54, "learning_rate": 4.6228000000000005e-05, "loss": 8.6821, "step": 943000 }, { "epoch": 7.55, "learning_rate": 4.6226e-05, "loss": 8.6614, "step": 943500 }, { "epoch": 7.55, "learning_rate": 4.6224e-05, "loss": 8.6905, "step": 944000 }, { "epoch": 7.56, "learning_rate": 4.6222000000000006e-05, "loss": 8.6846, "step": 944500 }, { "epoch": 7.56, "learning_rate": 4.622e-05, "loss": 8.676, "step": 945000 }, { "epoch": 7.56, "learning_rate": 4.6218e-05, "loss": 8.6599, "step": 945500 }, { "epoch": 7.57, "learning_rate": 4.6216000000000006e-05, "loss": 8.7162, "step": 946000 }, { "epoch": 7.57, "learning_rate": 4.6214e-05, "loss": 8.6802, "step": 946500 }, { "epoch": 7.58, "learning_rate": 4.6212e-05, "loss": 8.6806, "step": 947000 }, { "epoch": 7.58, "learning_rate": 4.6210000000000006e-05, "loss": 8.6999, "step": 947500 }, { "epoch": 7.58, "learning_rate": 4.6208e-05, "loss": 8.7034, "step": 948000 }, { "epoch": 7.59, "learning_rate": 4.6206000000000005e-05, "loss": 8.687, "step": 948500 }, { "epoch": 7.59, "learning_rate": 4.6204e-05, "loss": 8.6834, "step": 949000 }, { "epoch": 7.6, "learning_rate": 4.6202e-05, "loss": 8.6872, "step": 949500 }, { "epoch": 7.6, "learning_rate": 4.6200000000000005e-05, "loss": 8.6548, "step": 950000 }, { "epoch": 7.6, "learning_rate": 4.6198e-05, "loss": 8.6762, "step": 950500 }, { "epoch": 7.61, "learning_rate": 4.6196e-05, "loss": 8.6646, "step": 951000 }, { "epoch": 7.61, "learning_rate": 4.6194000000000005e-05, "loss": 8.6795, "step": 951500 }, { "epoch": 7.62, "learning_rate": 4.6192e-05, "loss": 8.6805, "step": 952000 }, { "epoch": 7.62, "learning_rate": 4.619e-05, "loss": 8.6733, "step": 952500 }, { "epoch": 7.62, "learning_rate": 4.6188000000000006e-05, "loss": 8.6975, "step": 953000 }, { "epoch": 7.63, "learning_rate": 4.6186e-05, "loss": 8.6797, "step": 953500 }, { "epoch": 7.63, "learning_rate": 4.6184e-05, "loss": 8.6787, "step": 954000 }, { "epoch": 7.64, "learning_rate": 4.6182000000000006e-05, "loss": 8.6848, "step": 954500 }, { "epoch": 7.64, "learning_rate": 4.618e-05, "loss": 8.6965, "step": 955000 }, { "epoch": 7.64, "learning_rate": 4.6178000000000004e-05, "loss": 8.6782, "step": 955500 }, { "epoch": 7.65, "learning_rate": 4.6176e-05, "loss": 8.6756, "step": 956000 }, { "epoch": 7.65, "learning_rate": 4.6174e-05, "loss": 8.712, "step": 956500 }, { "epoch": 7.66, "learning_rate": 4.6172000000000004e-05, "loss": 8.6875, "step": 957000 }, { "epoch": 7.66, "learning_rate": 4.617e-05, "loss": 8.6896, "step": 957500 }, { "epoch": 7.66, "learning_rate": 4.6168e-05, "loss": 8.69, "step": 958000 }, { "epoch": 7.67, "learning_rate": 4.6166000000000005e-05, "loss": 8.6729, "step": 958500 }, { "epoch": 7.67, "learning_rate": 4.6164e-05, "loss": 8.6989, "step": 959000 }, { "epoch": 7.68, "learning_rate": 4.6162e-05, "loss": 8.6812, "step": 959500 }, { "epoch": 7.68, "learning_rate": 4.6160000000000005e-05, "loss": 8.7029, "step": 960000 }, { "epoch": 7.68, "learning_rate": 4.6158e-05, "loss": 8.6698, "step": 960500 }, { "epoch": 7.69, "learning_rate": 4.6156e-05, "loss": 8.6891, "step": 961000 }, { "epoch": 7.69, "learning_rate": 4.6154000000000006e-05, "loss": 8.6966, "step": 961500 }, { "epoch": 7.7, "learning_rate": 4.6152e-05, "loss": 8.6876, "step": 962000 }, { "epoch": 7.7, "learning_rate": 4.6150000000000004e-05, "loss": 8.6973, "step": 962500 }, { "epoch": 7.7, "learning_rate": 4.6148e-05, "loss": 8.6773, "step": 963000 }, { "epoch": 7.71, "learning_rate": 4.6146e-05, "loss": 8.6886, "step": 963500 }, { "epoch": 7.71, "learning_rate": 4.6144000000000004e-05, "loss": 8.6869, "step": 964000 }, { "epoch": 7.72, "learning_rate": 4.6142e-05, "loss": 8.6692, "step": 964500 }, { "epoch": 7.72, "learning_rate": 4.614e-05, "loss": 8.6766, "step": 965000 }, { "epoch": 7.72, "learning_rate": 4.6138000000000004e-05, "loss": 8.6666, "step": 965500 }, { "epoch": 7.73, "learning_rate": 4.6136e-05, "loss": 8.687, "step": 966000 }, { "epoch": 7.73, "learning_rate": 4.6134e-05, "loss": 8.6504, "step": 966500 }, { "epoch": 7.74, "learning_rate": 4.6132000000000005e-05, "loss": 8.6796, "step": 967000 }, { "epoch": 7.74, "learning_rate": 4.613e-05, "loss": 8.6913, "step": 967500 }, { "epoch": 7.74, "learning_rate": 4.6128e-05, "loss": 8.6593, "step": 968000 }, { "epoch": 7.75, "learning_rate": 4.6126000000000005e-05, "loss": 8.6936, "step": 968500 }, { "epoch": 7.75, "learning_rate": 4.6124e-05, "loss": 8.6986, "step": 969000 }, { "epoch": 7.76, "learning_rate": 4.6122e-05, "loss": 8.7003, "step": 969500 }, { "epoch": 7.76, "learning_rate": 4.612e-05, "loss": 8.7268, "step": 970000 }, { "epoch": 7.76, "learning_rate": 4.6118e-05, "loss": 8.6642, "step": 970500 }, { "epoch": 7.77, "learning_rate": 4.6116000000000004e-05, "loss": 8.7087, "step": 971000 }, { "epoch": 7.77, "learning_rate": 4.6114e-05, "loss": 8.6864, "step": 971500 }, { "epoch": 7.78, "learning_rate": 4.6112e-05, "loss": 8.6814, "step": 972000 }, { "epoch": 7.78, "learning_rate": 4.6110000000000004e-05, "loss": 8.6834, "step": 972500 }, { "epoch": 7.78, "learning_rate": 4.6108e-05, "loss": 8.6783, "step": 973000 }, { "epoch": 7.79, "learning_rate": 4.6106e-05, "loss": 8.6716, "step": 973500 }, { "epoch": 7.79, "learning_rate": 4.6104000000000004e-05, "loss": 8.691, "step": 974000 }, { "epoch": 7.8, "learning_rate": 4.6102e-05, "loss": 8.6724, "step": 974500 }, { "epoch": 7.8, "learning_rate": 4.61e-05, "loss": 8.6989, "step": 975000 }, { "epoch": 7.8, "learning_rate": 4.6098000000000005e-05, "loss": 8.6891, "step": 975500 }, { "epoch": 7.81, "learning_rate": 4.6096e-05, "loss": 8.7081, "step": 976000 }, { "epoch": 7.81, "learning_rate": 4.6094e-05, "loss": 8.6749, "step": 976500 }, { "epoch": 7.82, "learning_rate": 4.6092e-05, "loss": 8.6825, "step": 977000 }, { "epoch": 7.82, "learning_rate": 4.609e-05, "loss": 8.6799, "step": 977500 }, { "epoch": 7.82, "learning_rate": 4.6088e-05, "loss": 8.6761, "step": 978000 }, { "epoch": 7.83, "learning_rate": 4.6086e-05, "loss": 8.6895, "step": 978500 }, { "epoch": 7.83, "learning_rate": 4.608400000000001e-05, "loss": 8.6833, "step": 979000 }, { "epoch": 7.84, "learning_rate": 4.6082000000000004e-05, "loss": 8.6868, "step": 979500 }, { "epoch": 7.84, "learning_rate": 4.608e-05, "loss": 8.6998, "step": 980000 }, { "epoch": 7.84, "learning_rate": 4.6078e-05, "loss": 8.6742, "step": 980500 }, { "epoch": 7.85, "learning_rate": 4.6076000000000004e-05, "loss": 8.6903, "step": 981000 }, { "epoch": 7.85, "learning_rate": 4.6074e-05, "loss": 8.7028, "step": 981500 }, { "epoch": 7.86, "learning_rate": 4.6072e-05, "loss": 8.6664, "step": 982000 }, { "epoch": 7.86, "learning_rate": 4.6070000000000004e-05, "loss": 8.7171, "step": 982500 }, { "epoch": 7.86, "learning_rate": 4.6068e-05, "loss": 8.6859, "step": 983000 }, { "epoch": 7.87, "learning_rate": 4.6066e-05, "loss": 8.6715, "step": 983500 }, { "epoch": 7.87, "learning_rate": 4.6064000000000005e-05, "loss": 8.6685, "step": 984000 }, { "epoch": 7.88, "learning_rate": 4.6062e-05, "loss": 8.7039, "step": 984500 }, { "epoch": 7.88, "learning_rate": 4.606e-05, "loss": 8.6847, "step": 985000 }, { "epoch": 7.88, "learning_rate": 4.6058e-05, "loss": 8.6586, "step": 985500 }, { "epoch": 7.89, "learning_rate": 4.605600000000001e-05, "loss": 8.7179, "step": 986000 }, { "epoch": 7.89, "learning_rate": 4.6054e-05, "loss": 8.6705, "step": 986500 }, { "epoch": 7.9, "learning_rate": 4.6052e-05, "loss": 8.6893, "step": 987000 }, { "epoch": 7.9, "learning_rate": 4.605e-05, "loss": 8.6609, "step": 987500 }, { "epoch": 7.9, "learning_rate": 4.6048000000000004e-05, "loss": 8.684, "step": 988000 }, { "epoch": 7.91, "learning_rate": 4.6046e-05, "loss": 8.6849, "step": 988500 }, { "epoch": 7.91, "learning_rate": 4.6044e-05, "loss": 8.681, "step": 989000 }, { "epoch": 7.92, "learning_rate": 4.6042000000000004e-05, "loss": 8.6703, "step": 989500 }, { "epoch": 7.92, "learning_rate": 4.604e-05, "loss": 8.6585, "step": 990000 }, { "epoch": 7.92, "learning_rate": 4.6038e-05, "loss": 8.6652, "step": 990500 }, { "epoch": 7.93, "learning_rate": 4.6036000000000004e-05, "loss": 8.6842, "step": 991000 }, { "epoch": 7.93, "learning_rate": 4.603400000000001e-05, "loss": 8.6643, "step": 991500 }, { "epoch": 7.94, "learning_rate": 4.6032e-05, "loss": 8.6786, "step": 992000 }, { "epoch": 7.94, "learning_rate": 4.603e-05, "loss": 8.6923, "step": 992500 }, { "epoch": 7.94, "learning_rate": 4.602800000000001e-05, "loss": 8.687, "step": 993000 }, { "epoch": 7.95, "learning_rate": 4.6026e-05, "loss": 8.6822, "step": 993500 }, { "epoch": 7.95, "learning_rate": 4.6024e-05, "loss": 8.692, "step": 994000 }, { "epoch": 7.96, "learning_rate": 4.6022e-05, "loss": 8.7046, "step": 994500 }, { "epoch": 7.96, "learning_rate": 4.602e-05, "loss": 8.6693, "step": 995000 }, { "epoch": 7.96, "learning_rate": 4.6018e-05, "loss": 8.6736, "step": 995500 }, { "epoch": 7.97, "learning_rate": 4.6016e-05, "loss": 8.6926, "step": 996000 }, { "epoch": 7.97, "learning_rate": 4.6014000000000004e-05, "loss": 8.6698, "step": 996500 }, { "epoch": 7.98, "learning_rate": 4.6012e-05, "loss": 8.6779, "step": 997000 }, { "epoch": 7.98, "learning_rate": 4.601e-05, "loss": 8.7049, "step": 997500 }, { "epoch": 7.98, "learning_rate": 4.6008000000000004e-05, "loss": 8.6701, "step": 998000 }, { "epoch": 7.99, "learning_rate": 4.6006000000000006e-05, "loss": 8.6645, "step": 998500 }, { "epoch": 7.99, "learning_rate": 4.6004e-05, "loss": 8.6496, "step": 999000 }, { "epoch": 8.0, "learning_rate": 4.6002e-05, "loss": 8.6946, "step": 999500 }, { "epoch": 8.0, "learning_rate": 4.600000000000001e-05, "loss": 8.6764, "step": 1000000 }, { "epoch": 8.0, "learning_rate": 4.5998e-05, "loss": 8.697, "step": 1000500 }, { "epoch": 8.01, "learning_rate": 4.5996e-05, "loss": 8.7044, "step": 1001000 }, { "epoch": 8.01, "learning_rate": 4.5994e-05, "loss": 8.7141, "step": 1001500 }, { "epoch": 8.02, "learning_rate": 4.5992e-05, "loss": 8.6652, "step": 1002000 }, { "epoch": 8.02, "learning_rate": 4.599e-05, "loss": 8.6987, "step": 1002500 }, { "epoch": 8.02, "learning_rate": 4.5988e-05, "loss": 8.6945, "step": 1003000 }, { "epoch": 8.03, "learning_rate": 4.5986e-05, "loss": 8.6813, "step": 1003500 }, { "epoch": 8.03, "learning_rate": 4.5984000000000006e-05, "loss": 8.7214, "step": 1004000 }, { "epoch": 8.04, "learning_rate": 4.5982e-05, "loss": 8.6751, "step": 1004500 }, { "epoch": 8.04, "learning_rate": 4.5980000000000004e-05, "loss": 8.6723, "step": 1005000 }, { "epoch": 8.04, "learning_rate": 4.5978000000000006e-05, "loss": 8.6717, "step": 1005500 }, { "epoch": 8.05, "learning_rate": 4.5976e-05, "loss": 8.679, "step": 1006000 }, { "epoch": 8.05, "learning_rate": 4.5974e-05, "loss": 8.658, "step": 1006500 }, { "epoch": 8.06, "learning_rate": 4.5972000000000006e-05, "loss": 8.6902, "step": 1007000 }, { "epoch": 8.06, "learning_rate": 4.597e-05, "loss": 8.7246, "step": 1007500 }, { "epoch": 8.06, "learning_rate": 4.5968e-05, "loss": 8.7052, "step": 1008000 }, { "epoch": 8.07, "learning_rate": 4.596600000000001e-05, "loss": 8.6782, "step": 1008500 }, { "epoch": 8.07, "learning_rate": 4.5964e-05, "loss": 8.6826, "step": 1009000 }, { "epoch": 8.08, "learning_rate": 4.5962e-05, "loss": 8.6854, "step": 1009500 }, { "epoch": 8.08, "learning_rate": 4.596e-05, "loss": 8.6999, "step": 1010000 }, { "epoch": 8.08, "learning_rate": 4.5958e-05, "loss": 8.7048, "step": 1010500 }, { "epoch": 8.09, "learning_rate": 4.5956000000000005e-05, "loss": 8.676, "step": 1011000 }, { "epoch": 8.09, "learning_rate": 4.5954e-05, "loss": 8.6606, "step": 1011500 }, { "epoch": 8.1, "learning_rate": 4.5952e-05, "loss": 8.6777, "step": 1012000 }, { "epoch": 8.1, "learning_rate": 4.5950000000000006e-05, "loss": 8.6933, "step": 1012500 }, { "epoch": 8.1, "learning_rate": 4.5948e-05, "loss": 8.6798, "step": 1013000 }, { "epoch": 8.11, "learning_rate": 4.5946e-05, "loss": 8.7053, "step": 1013500 }, { "epoch": 8.11, "learning_rate": 4.5944000000000006e-05, "loss": 8.6938, "step": 1014000 }, { "epoch": 8.12, "learning_rate": 4.5942e-05, "loss": 8.6784, "step": 1014500 }, { "epoch": 8.12, "learning_rate": 4.594e-05, "loss": 8.676, "step": 1015000 }, { "epoch": 8.12, "learning_rate": 4.5938000000000006e-05, "loss": 8.6565, "step": 1015500 }, { "epoch": 8.13, "learning_rate": 4.5936e-05, "loss": 8.6663, "step": 1016000 }, { "epoch": 8.13, "learning_rate": 4.5934000000000004e-05, "loss": 8.6991, "step": 1016500 }, { "epoch": 8.14, "learning_rate": 4.5932e-05, "loss": 8.6868, "step": 1017000 }, { "epoch": 8.14, "learning_rate": 4.593e-05, "loss": 8.6851, "step": 1017500 }, { "epoch": 8.14, "learning_rate": 4.5928000000000005e-05, "loss": 8.6733, "step": 1018000 }, { "epoch": 8.15, "learning_rate": 4.5926e-05, "loss": 8.6802, "step": 1018500 }, { "epoch": 8.15, "learning_rate": 4.5924e-05, "loss": 8.6988, "step": 1019000 }, { "epoch": 8.16, "learning_rate": 4.5922000000000005e-05, "loss": 8.6794, "step": 1019500 }, { "epoch": 8.16, "learning_rate": 4.592e-05, "loss": 8.7025, "step": 1020000 }, { "epoch": 8.16, "learning_rate": 4.5918e-05, "loss": 8.6893, "step": 1020500 }, { "epoch": 8.17, "learning_rate": 4.5916000000000006e-05, "loss": 8.6904, "step": 1021000 }, { "epoch": 8.17, "learning_rate": 4.5914e-05, "loss": 8.6713, "step": 1021500 }, { "epoch": 8.18, "learning_rate": 4.5912e-05, "loss": 8.6845, "step": 1022000 }, { "epoch": 8.18, "learning_rate": 4.5910000000000006e-05, "loss": 8.6934, "step": 1022500 }, { "epoch": 8.18, "learning_rate": 4.5908e-05, "loss": 8.7045, "step": 1023000 }, { "epoch": 8.19, "learning_rate": 4.5906000000000004e-05, "loss": 8.6809, "step": 1023500 }, { "epoch": 8.19, "learning_rate": 4.5904e-05, "loss": 8.6934, "step": 1024000 }, { "epoch": 8.2, "learning_rate": 4.5902e-05, "loss": 8.7136, "step": 1024500 }, { "epoch": 8.2, "learning_rate": 4.5900000000000004e-05, "loss": 8.7, "step": 1025000 }, { "epoch": 8.2, "learning_rate": 4.5898e-05, "loss": 8.6985, "step": 1025500 }, { "epoch": 8.21, "learning_rate": 4.5896e-05, "loss": 8.6758, "step": 1026000 }, { "epoch": 8.21, "learning_rate": 4.5894000000000005e-05, "loss": 8.679, "step": 1026500 }, { "epoch": 8.22, "learning_rate": 4.5892e-05, "loss": 8.689, "step": 1027000 }, { "epoch": 8.22, "learning_rate": 4.589e-05, "loss": 8.6771, "step": 1027500 }, { "epoch": 8.22, "learning_rate": 4.5888000000000005e-05, "loss": 8.6877, "step": 1028000 }, { "epoch": 8.23, "learning_rate": 4.5886e-05, "loss": 8.7039, "step": 1028500 }, { "epoch": 8.23, "learning_rate": 4.5884e-05, "loss": 8.6799, "step": 1029000 }, { "epoch": 8.24, "learning_rate": 4.5882000000000006e-05, "loss": 8.6733, "step": 1029500 }, { "epoch": 8.24, "learning_rate": 4.588e-05, "loss": 8.6921, "step": 1030000 }, { "epoch": 8.24, "learning_rate": 4.5878000000000004e-05, "loss": 8.7019, "step": 1030500 }, { "epoch": 8.25, "learning_rate": 4.5876e-05, "loss": 8.6973, "step": 1031000 }, { "epoch": 8.25, "learning_rate": 4.5874e-05, "loss": 8.6742, "step": 1031500 }, { "epoch": 8.26, "learning_rate": 4.5872000000000004e-05, "loss": 8.6983, "step": 1032000 }, { "epoch": 8.26, "learning_rate": 4.587e-05, "loss": 8.6928, "step": 1032500 }, { "epoch": 8.26, "learning_rate": 4.5868e-05, "loss": 8.6773, "step": 1033000 }, { "epoch": 8.27, "learning_rate": 4.5866000000000004e-05, "loss": 8.6524, "step": 1033500 }, { "epoch": 8.27, "learning_rate": 4.5864e-05, "loss": 8.6835, "step": 1034000 }, { "epoch": 8.28, "learning_rate": 4.5862e-05, "loss": 8.681, "step": 1034500 }, { "epoch": 8.28, "learning_rate": 4.5860000000000005e-05, "loss": 8.6665, "step": 1035000 }, { "epoch": 8.28, "learning_rate": 4.5858e-05, "loss": 8.6794, "step": 1035500 }, { "epoch": 8.29, "learning_rate": 4.5856e-05, "loss": 8.6984, "step": 1036000 }, { "epoch": 8.29, "learning_rate": 4.5854000000000005e-05, "loss": 8.6684, "step": 1036500 }, { "epoch": 8.3, "learning_rate": 4.5852e-05, "loss": 8.6814, "step": 1037000 }, { "epoch": 8.3, "learning_rate": 4.585e-05, "loss": 8.654, "step": 1037500 }, { "epoch": 8.3, "learning_rate": 4.5848e-05, "loss": 8.6615, "step": 1038000 }, { "epoch": 8.31, "learning_rate": 4.5846e-05, "loss": 8.7002, "step": 1038500 }, { "epoch": 8.31, "learning_rate": 4.5844000000000004e-05, "loss": 8.6894, "step": 1039000 }, { "epoch": 8.32, "learning_rate": 4.5842e-05, "loss": 8.673, "step": 1039500 }, { "epoch": 8.32, "learning_rate": 4.584e-05, "loss": 8.6806, "step": 1040000 }, { "epoch": 8.32, "learning_rate": 4.5838000000000004e-05, "loss": 8.6709, "step": 1040500 }, { "epoch": 8.33, "learning_rate": 4.5836e-05, "loss": 8.6795, "step": 1041000 }, { "epoch": 8.33, "learning_rate": 4.5834e-05, "loss": 8.6674, "step": 1041500 }, { "epoch": 8.34, "learning_rate": 4.5832000000000004e-05, "loss": 8.6833, "step": 1042000 }, { "epoch": 8.34, "learning_rate": 4.583e-05, "loss": 8.669, "step": 1042500 }, { "epoch": 8.34, "learning_rate": 4.5828e-05, "loss": 8.7021, "step": 1043000 }, { "epoch": 8.35, "learning_rate": 4.5826000000000005e-05, "loss": 8.688, "step": 1043500 }, { "epoch": 8.35, "learning_rate": 4.5824e-05, "loss": 8.6934, "step": 1044000 }, { "epoch": 8.36, "learning_rate": 4.5822e-05, "loss": 8.6851, "step": 1044500 }, { "epoch": 8.36, "learning_rate": 4.5820000000000005e-05, "loss": 8.6744, "step": 1045000 }, { "epoch": 8.36, "learning_rate": 4.5818e-05, "loss": 8.6615, "step": 1045500 }, { "epoch": 8.37, "learning_rate": 4.5816e-05, "loss": 8.7014, "step": 1046000 }, { "epoch": 8.37, "learning_rate": 4.5814e-05, "loss": 8.697, "step": 1046500 }, { "epoch": 8.38, "learning_rate": 4.581200000000001e-05, "loss": 8.6944, "step": 1047000 }, { "epoch": 8.38, "learning_rate": 4.5810000000000004e-05, "loss": 8.6951, "step": 1047500 }, { "epoch": 8.38, "learning_rate": 4.5808e-05, "loss": 8.6984, "step": 1048000 }, { "epoch": 8.39, "learning_rate": 4.5806e-05, "loss": 8.6842, "step": 1048500 }, { "epoch": 8.39, "learning_rate": 4.5804000000000004e-05, "loss": 8.6664, "step": 1049000 }, { "epoch": 8.4, "learning_rate": 4.5802e-05, "loss": 8.6947, "step": 1049500 }, { "epoch": 8.4, "learning_rate": 4.58e-05, "loss": 8.701, "step": 1050000 }, { "epoch": 8.4, "learning_rate": 4.5798000000000004e-05, "loss": 8.6766, "step": 1050500 }, { "epoch": 8.41, "learning_rate": 4.5796e-05, "loss": 8.7013, "step": 1051000 }, { "epoch": 8.41, "learning_rate": 4.5794e-05, "loss": 8.687, "step": 1051500 }, { "epoch": 8.42, "learning_rate": 4.5792000000000005e-05, "loss": 8.6919, "step": 1052000 }, { "epoch": 8.42, "learning_rate": 4.579e-05, "loss": 8.6885, "step": 1052500 }, { "epoch": 8.42, "learning_rate": 4.5788e-05, "loss": 8.6599, "step": 1053000 }, { "epoch": 8.43, "learning_rate": 4.5786e-05, "loss": 8.7044, "step": 1053500 }, { "epoch": 8.43, "learning_rate": 4.578400000000001e-05, "loss": 8.6804, "step": 1054000 }, { "epoch": 8.44, "learning_rate": 4.5782e-05, "loss": 8.7054, "step": 1054500 }, { "epoch": 8.44, "learning_rate": 4.578e-05, "loss": 8.6756, "step": 1055000 }, { "epoch": 8.44, "learning_rate": 4.5778e-05, "loss": 8.6731, "step": 1055500 }, { "epoch": 8.45, "learning_rate": 4.5776000000000004e-05, "loss": 8.6934, "step": 1056000 }, { "epoch": 8.45, "learning_rate": 4.5774e-05, "loss": 8.6564, "step": 1056500 }, { "epoch": 8.46, "learning_rate": 4.5772e-05, "loss": 8.6726, "step": 1057000 }, { "epoch": 8.46, "learning_rate": 4.5770000000000004e-05, "loss": 8.6737, "step": 1057500 }, { "epoch": 8.46, "learning_rate": 4.5768e-05, "loss": 8.7018, "step": 1058000 }, { "epoch": 8.47, "learning_rate": 4.5766e-05, "loss": 8.661, "step": 1058500 }, { "epoch": 8.47, "learning_rate": 4.5764000000000004e-05, "loss": 8.6806, "step": 1059000 }, { "epoch": 8.48, "learning_rate": 4.576200000000001e-05, "loss": 8.7031, "step": 1059500 }, { "epoch": 8.48, "learning_rate": 4.576e-05, "loss": 8.6937, "step": 1060000 }, { "epoch": 8.48, "learning_rate": 4.5758e-05, "loss": 8.7177, "step": 1060500 }, { "epoch": 8.49, "learning_rate": 4.575600000000001e-05, "loss": 8.6888, "step": 1061000 }, { "epoch": 8.49, "learning_rate": 4.5754e-05, "loss": 8.704, "step": 1061500 }, { "epoch": 8.5, "learning_rate": 4.5752e-05, "loss": 8.692, "step": 1062000 }, { "epoch": 8.5, "learning_rate": 4.575e-05, "loss": 8.6758, "step": 1062500 }, { "epoch": 8.5, "learning_rate": 4.5748e-05, "loss": 8.6767, "step": 1063000 }, { "epoch": 8.51, "learning_rate": 4.5746e-05, "loss": 8.6824, "step": 1063500 }, { "epoch": 8.51, "learning_rate": 4.5744e-05, "loss": 8.6717, "step": 1064000 }, { "epoch": 8.52, "learning_rate": 4.5742000000000004e-05, "loss": 8.67, "step": 1064500 }, { "epoch": 8.52, "learning_rate": 4.574e-05, "loss": 8.6797, "step": 1065000 }, { "epoch": 8.52, "learning_rate": 4.5738e-05, "loss": 8.6688, "step": 1065500 }, { "epoch": 8.53, "learning_rate": 4.5736000000000004e-05, "loss": 8.6806, "step": 1066000 }, { "epoch": 8.53, "learning_rate": 4.5734000000000006e-05, "loss": 8.6835, "step": 1066500 }, { "epoch": 8.54, "learning_rate": 4.5732e-05, "loss": 8.699, "step": 1067000 }, { "epoch": 8.54, "learning_rate": 4.573e-05, "loss": 8.688, "step": 1067500 }, { "epoch": 8.54, "learning_rate": 4.572800000000001e-05, "loss": 8.6814, "step": 1068000 }, { "epoch": 8.55, "learning_rate": 4.5726e-05, "loss": 8.6903, "step": 1068500 }, { "epoch": 8.55, "learning_rate": 4.5724e-05, "loss": 8.7022, "step": 1069000 }, { "epoch": 8.56, "learning_rate": 4.572200000000001e-05, "loss": 8.7054, "step": 1069500 }, { "epoch": 8.56, "learning_rate": 4.572e-05, "loss": 8.6911, "step": 1070000 }, { "epoch": 8.56, "learning_rate": 4.5718e-05, "loss": 8.6921, "step": 1070500 }, { "epoch": 8.57, "learning_rate": 4.5716e-05, "loss": 8.6771, "step": 1071000 }, { "epoch": 8.57, "learning_rate": 4.5714e-05, "loss": 8.6779, "step": 1071500 }, { "epoch": 8.58, "learning_rate": 4.5712000000000006e-05, "loss": 8.706, "step": 1072000 }, { "epoch": 8.58, "learning_rate": 4.571e-05, "loss": 8.6893, "step": 1072500 }, { "epoch": 8.58, "learning_rate": 4.5708000000000004e-05, "loss": 8.7018, "step": 1073000 }, { "epoch": 8.59, "learning_rate": 4.5706000000000006e-05, "loss": 8.7055, "step": 1073500 }, { "epoch": 8.59, "learning_rate": 4.5704e-05, "loss": 8.7174, "step": 1074000 }, { "epoch": 8.6, "learning_rate": 4.5702e-05, "loss": 8.6836, "step": 1074500 }, { "epoch": 8.6, "learning_rate": 4.5700000000000006e-05, "loss": 8.7157, "step": 1075000 }, { "epoch": 8.6, "learning_rate": 4.5698e-05, "loss": 8.6948, "step": 1075500 }, { "epoch": 8.61, "learning_rate": 4.5696e-05, "loss": 8.6763, "step": 1076000 }, { "epoch": 8.61, "learning_rate": 4.569400000000001e-05, "loss": 8.711, "step": 1076500 }, { "epoch": 8.62, "learning_rate": 4.5692e-05, "loss": 8.6814, "step": 1077000 }, { "epoch": 8.62, "learning_rate": 4.569e-05, "loss": 8.695, "step": 1077500 }, { "epoch": 8.62, "learning_rate": 4.5688e-05, "loss": 8.6985, "step": 1078000 }, { "epoch": 8.63, "learning_rate": 4.5686e-05, "loss": 8.6937, "step": 1078500 }, { "epoch": 8.63, "learning_rate": 4.5684000000000005e-05, "loss": 8.674, "step": 1079000 }, { "epoch": 8.64, "learning_rate": 4.5682e-05, "loss": 8.6655, "step": 1079500 }, { "epoch": 8.64, "learning_rate": 4.568e-05, "loss": 8.6605, "step": 1080000 }, { "epoch": 8.64, "learning_rate": 4.5678000000000005e-05, "loss": 8.6464, "step": 1080500 }, { "epoch": 8.65, "learning_rate": 4.5676e-05, "loss": 8.6928, "step": 1081000 }, { "epoch": 8.65, "learning_rate": 4.5674000000000003e-05, "loss": 8.6973, "step": 1081500 }, { "epoch": 8.66, "learning_rate": 4.5672000000000006e-05, "loss": 8.6953, "step": 1082000 }, { "epoch": 8.66, "learning_rate": 4.567e-05, "loss": 8.7098, "step": 1082500 }, { "epoch": 8.66, "learning_rate": 4.5668e-05, "loss": 8.6848, "step": 1083000 }, { "epoch": 8.67, "learning_rate": 4.5666000000000006e-05, "loss": 8.6531, "step": 1083500 }, { "epoch": 8.67, "learning_rate": 4.5664e-05, "loss": 8.6983, "step": 1084000 }, { "epoch": 8.68, "learning_rate": 4.5662000000000004e-05, "loss": 8.6984, "step": 1084500 }, { "epoch": 8.68, "learning_rate": 4.566e-05, "loss": 8.6849, "step": 1085000 }, { "epoch": 8.68, "learning_rate": 4.5658e-05, "loss": 8.6652, "step": 1085500 }, { "epoch": 8.69, "learning_rate": 4.5656000000000005e-05, "loss": 8.6564, "step": 1086000 }, { "epoch": 8.69, "learning_rate": 4.5654e-05, "loss": 8.7027, "step": 1086500 }, { "epoch": 8.7, "learning_rate": 4.5652e-05, "loss": 8.6681, "step": 1087000 }, { "epoch": 8.7, "learning_rate": 4.5650000000000005e-05, "loss": 8.6785, "step": 1087500 }, { "epoch": 8.7, "learning_rate": 4.5648e-05, "loss": 8.6725, "step": 1088000 }, { "epoch": 8.71, "learning_rate": 4.5646e-05, "loss": 8.6948, "step": 1088500 }, { "epoch": 8.71, "learning_rate": 4.5644000000000005e-05, "loss": 8.672, "step": 1089000 }, { "epoch": 8.72, "learning_rate": 4.5642e-05, "loss": 8.703, "step": 1089500 }, { "epoch": 8.72, "learning_rate": 4.564e-05, "loss": 8.6858, "step": 1090000 }, { "epoch": 8.72, "learning_rate": 4.5638000000000006e-05, "loss": 8.7108, "step": 1090500 }, { "epoch": 8.73, "learning_rate": 4.5636e-05, "loss": 8.6863, "step": 1091000 }, { "epoch": 8.73, "learning_rate": 4.5634000000000004e-05, "loss": 8.6943, "step": 1091500 }, { "epoch": 8.74, "learning_rate": 4.5632e-05, "loss": 8.6723, "step": 1092000 }, { "epoch": 8.74, "learning_rate": 4.563e-05, "loss": 8.6849, "step": 1092500 }, { "epoch": 8.74, "learning_rate": 4.5628000000000004e-05, "loss": 8.6747, "step": 1093000 }, { "epoch": 8.75, "learning_rate": 4.5626e-05, "loss": 8.683, "step": 1093500 }, { "epoch": 8.75, "learning_rate": 4.5624e-05, "loss": 8.6828, "step": 1094000 }, { "epoch": 8.76, "learning_rate": 4.5622000000000005e-05, "loss": 8.668, "step": 1094500 }, { "epoch": 8.76, "learning_rate": 4.562e-05, "loss": 8.7027, "step": 1095000 }, { "epoch": 8.76, "learning_rate": 4.5618e-05, "loss": 8.6762, "step": 1095500 }, { "epoch": 8.77, "learning_rate": 4.5616000000000005e-05, "loss": 8.6559, "step": 1096000 }, { "epoch": 8.77, "learning_rate": 4.5614e-05, "loss": 8.682, "step": 1096500 }, { "epoch": 8.78, "learning_rate": 4.5612e-05, "loss": 8.6751, "step": 1097000 }, { "epoch": 8.78, "learning_rate": 4.5610000000000005e-05, "loss": 8.6736, "step": 1097500 }, { "epoch": 8.78, "learning_rate": 4.5608e-05, "loss": 8.6695, "step": 1098000 }, { "epoch": 8.79, "learning_rate": 4.5606000000000003e-05, "loss": 8.6935, "step": 1098500 }, { "epoch": 8.79, "learning_rate": 4.5604e-05, "loss": 8.6871, "step": 1099000 }, { "epoch": 8.8, "learning_rate": 4.5602e-05, "loss": 8.6717, "step": 1099500 }, { "epoch": 8.8, "learning_rate": 4.5600000000000004e-05, "loss": 8.7084, "step": 1100000 }, { "epoch": 8.8, "learning_rate": 4.5598e-05, "loss": 8.6653, "step": 1100500 }, { "epoch": 8.81, "learning_rate": 4.5596e-05, "loss": 8.6648, "step": 1101000 }, { "epoch": 8.81, "learning_rate": 4.5594000000000004e-05, "loss": 8.695, "step": 1101500 }, { "epoch": 8.82, "learning_rate": 4.5592e-05, "loss": 8.6835, "step": 1102000 }, { "epoch": 8.82, "learning_rate": 4.559e-05, "loss": 8.6787, "step": 1102500 }, { "epoch": 8.82, "learning_rate": 4.5588000000000005e-05, "loss": 8.6911, "step": 1103000 }, { "epoch": 8.83, "learning_rate": 4.5586e-05, "loss": 8.6841, "step": 1103500 }, { "epoch": 8.83, "learning_rate": 4.5584e-05, "loss": 8.6883, "step": 1104000 }, { "epoch": 8.84, "learning_rate": 4.5582000000000005e-05, "loss": 8.6887, "step": 1104500 }, { "epoch": 8.84, "learning_rate": 4.558e-05, "loss": 8.6936, "step": 1105000 }, { "epoch": 8.84, "learning_rate": 4.5578e-05, "loss": 8.6631, "step": 1105500 }, { "epoch": 8.85, "learning_rate": 4.5576e-05, "loss": 8.692, "step": 1106000 }, { "epoch": 8.85, "learning_rate": 4.5574e-05, "loss": 8.6803, "step": 1106500 }, { "epoch": 8.86, "learning_rate": 4.5572000000000003e-05, "loss": 8.6719, "step": 1107000 }, { "epoch": 8.86, "learning_rate": 4.557e-05, "loss": 8.6884, "step": 1107500 }, { "epoch": 8.86, "learning_rate": 4.5568e-05, "loss": 8.7145, "step": 1108000 }, { "epoch": 8.87, "learning_rate": 4.5566000000000004e-05, "loss": 8.6818, "step": 1108500 }, { "epoch": 8.87, "learning_rate": 4.5564e-05, "loss": 8.6981, "step": 1109000 }, { "epoch": 8.88, "learning_rate": 4.5562e-05, "loss": 8.6619, "step": 1109500 }, { "epoch": 8.88, "learning_rate": 4.5560000000000004e-05, "loss": 8.6769, "step": 1110000 }, { "epoch": 8.88, "learning_rate": 4.5558e-05, "loss": 8.7165, "step": 1110500 }, { "epoch": 8.89, "learning_rate": 4.5556e-05, "loss": 8.6883, "step": 1111000 }, { "epoch": 8.89, "learning_rate": 4.5554000000000005e-05, "loss": 8.6881, "step": 1111500 }, { "epoch": 8.9, "learning_rate": 4.5552e-05, "loss": 8.6761, "step": 1112000 }, { "epoch": 8.9, "learning_rate": 4.555e-05, "loss": 8.6914, "step": 1112500 }, { "epoch": 8.9, "learning_rate": 4.5548000000000005e-05, "loss": 8.7039, "step": 1113000 }, { "epoch": 8.91, "learning_rate": 4.5546e-05, "loss": 8.6931, "step": 1113500 }, { "epoch": 8.91, "learning_rate": 4.5544e-05, "loss": 8.6731, "step": 1114000 }, { "epoch": 8.92, "learning_rate": 4.5542e-05, "loss": 8.6768, "step": 1114500 }, { "epoch": 8.92, "learning_rate": 4.554000000000001e-05, "loss": 8.6697, "step": 1115000 }, { "epoch": 8.92, "learning_rate": 4.5538000000000003e-05, "loss": 8.679, "step": 1115500 }, { "epoch": 8.93, "learning_rate": 4.5536e-05, "loss": 8.6865, "step": 1116000 }, { "epoch": 8.93, "learning_rate": 4.5534e-05, "loss": 8.6871, "step": 1116500 }, { "epoch": 8.94, "learning_rate": 4.5532000000000004e-05, "loss": 8.6906, "step": 1117000 }, { "epoch": 8.94, "learning_rate": 4.553e-05, "loss": 8.7003, "step": 1117500 }, { "epoch": 8.94, "learning_rate": 4.5528e-05, "loss": 8.6846, "step": 1118000 }, { "epoch": 8.95, "learning_rate": 4.5526000000000004e-05, "loss": 8.6789, "step": 1118500 }, { "epoch": 8.95, "learning_rate": 4.5524e-05, "loss": 8.667, "step": 1119000 }, { "epoch": 8.96, "learning_rate": 4.5522e-05, "loss": 8.6753, "step": 1119500 }, { "epoch": 8.96, "learning_rate": 4.5520000000000005e-05, "loss": 8.7123, "step": 1120000 }, { "epoch": 8.96, "learning_rate": 4.5518e-05, "loss": 8.6812, "step": 1120500 }, { "epoch": 8.97, "learning_rate": 4.5516e-05, "loss": 8.6849, "step": 1121000 }, { "epoch": 8.97, "learning_rate": 4.5514e-05, "loss": 8.6712, "step": 1121500 }, { "epoch": 8.98, "learning_rate": 4.551200000000001e-05, "loss": 8.6826, "step": 1122000 }, { "epoch": 8.98, "learning_rate": 4.551e-05, "loss": 8.6913, "step": 1122500 }, { "epoch": 8.98, "learning_rate": 4.5508e-05, "loss": 8.6773, "step": 1123000 }, { "epoch": 8.99, "learning_rate": 4.5506e-05, "loss": 8.6498, "step": 1123500 }, { "epoch": 8.99, "learning_rate": 4.5504000000000003e-05, "loss": 8.6851, "step": 1124000 }, { "epoch": 9.0, "learning_rate": 4.5502e-05, "loss": 8.6908, "step": 1124500 }, { "epoch": 9.0, "learning_rate": 4.55e-05, "loss": 8.6865, "step": 1125000 }, { "epoch": 9.0, "learning_rate": 4.5498000000000004e-05, "loss": 8.69, "step": 1125500 }, { "epoch": 9.01, "learning_rate": 4.5496e-05, "loss": 8.6816, "step": 1126000 }, { "epoch": 9.01, "learning_rate": 4.5494e-05, "loss": 8.6707, "step": 1126500 }, { "epoch": 9.02, "learning_rate": 4.5492000000000004e-05, "loss": 8.6813, "step": 1127000 }, { "epoch": 9.02, "learning_rate": 4.549000000000001e-05, "loss": 8.6957, "step": 1127500 }, { "epoch": 9.02, "learning_rate": 4.5488e-05, "loss": 8.6902, "step": 1128000 }, { "epoch": 9.03, "learning_rate": 4.5486e-05, "loss": 8.6867, "step": 1128500 }, { "epoch": 9.03, "learning_rate": 4.548400000000001e-05, "loss": 8.7026, "step": 1129000 }, { "epoch": 9.04, "learning_rate": 4.5482e-05, "loss": 8.7217, "step": 1129500 }, { "epoch": 9.04, "learning_rate": 4.548e-05, "loss": 8.716, "step": 1130000 }, { "epoch": 9.04, "learning_rate": 4.5478e-05, "loss": 8.6752, "step": 1130500 }, { "epoch": 9.05, "learning_rate": 4.5476e-05, "loss": 8.6636, "step": 1131000 }, { "epoch": 9.05, "learning_rate": 4.5474e-05, "loss": 8.6819, "step": 1131500 }, { "epoch": 9.06, "learning_rate": 4.5472e-05, "loss": 8.6725, "step": 1132000 }, { "epoch": 9.06, "learning_rate": 4.5470000000000003e-05, "loss": 8.6871, "step": 1132500 }, { "epoch": 9.06, "learning_rate": 4.5468e-05, "loss": 8.683, "step": 1133000 }, { "epoch": 9.07, "learning_rate": 4.5466e-05, "loss": 8.6964, "step": 1133500 }, { "epoch": 9.07, "learning_rate": 4.5464000000000004e-05, "loss": 8.6957, "step": 1134000 }, { "epoch": 9.08, "learning_rate": 4.5462000000000006e-05, "loss": 8.6726, "step": 1134500 }, { "epoch": 9.08, "learning_rate": 4.546e-05, "loss": 8.6876, "step": 1135000 }, { "epoch": 9.08, "learning_rate": 4.5458e-05, "loss": 8.6787, "step": 1135500 }, { "epoch": 9.09, "learning_rate": 4.5456000000000007e-05, "loss": 8.6688, "step": 1136000 }, { "epoch": 9.09, "learning_rate": 4.5454e-05, "loss": 8.6881, "step": 1136500 }, { "epoch": 9.1, "learning_rate": 4.5452e-05, "loss": 8.6567, "step": 1137000 }, { "epoch": 9.1, "learning_rate": 4.545000000000001e-05, "loss": 8.6703, "step": 1137500 }, { "epoch": 9.1, "learning_rate": 4.5448e-05, "loss": 8.6733, "step": 1138000 }, { "epoch": 9.11, "learning_rate": 4.5446e-05, "loss": 8.7065, "step": 1138500 }, { "epoch": 9.11, "learning_rate": 4.5444e-05, "loss": 8.6827, "step": 1139000 }, { "epoch": 9.12, "learning_rate": 4.5442e-05, "loss": 8.7059, "step": 1139500 }, { "epoch": 9.12, "learning_rate": 4.5440000000000005e-05, "loss": 8.6822, "step": 1140000 }, { "epoch": 9.12, "learning_rate": 4.5438e-05, "loss": 8.673, "step": 1140500 }, { "epoch": 9.13, "learning_rate": 4.5436000000000003e-05, "loss": 8.6909, "step": 1141000 }, { "epoch": 9.13, "learning_rate": 4.5434000000000006e-05, "loss": 8.6873, "step": 1141500 }, { "epoch": 9.14, "learning_rate": 4.5432e-05, "loss": 8.6955, "step": 1142000 }, { "epoch": 9.14, "learning_rate": 4.543e-05, "loss": 8.7092, "step": 1142500 }, { "epoch": 9.14, "learning_rate": 4.5428000000000006e-05, "loss": 8.7171, "step": 1143000 }, { "epoch": 9.15, "learning_rate": 4.5426e-05, "loss": 8.6882, "step": 1143500 }, { "epoch": 9.15, "learning_rate": 4.5424e-05, "loss": 8.7058, "step": 1144000 }, { "epoch": 9.16, "learning_rate": 4.5422000000000007e-05, "loss": 8.6528, "step": 1144500 }, { "epoch": 9.16, "learning_rate": 4.542e-05, "loss": 8.7011, "step": 1145000 }, { "epoch": 9.16, "learning_rate": 4.5418e-05, "loss": 8.6872, "step": 1145500 }, { "epoch": 9.17, "learning_rate": 4.5416e-05, "loss": 8.6969, "step": 1146000 }, { "epoch": 9.17, "learning_rate": 4.5414e-05, "loss": 8.6982, "step": 1146500 }, { "epoch": 9.18, "learning_rate": 4.5412000000000005e-05, "loss": 8.7002, "step": 1147000 }, { "epoch": 9.18, "learning_rate": 4.541e-05, "loss": 8.6873, "step": 1147500 }, { "epoch": 9.18, "learning_rate": 4.5408e-05, "loss": 8.6973, "step": 1148000 }, { "epoch": 9.19, "learning_rate": 4.5406000000000005e-05, "loss": 8.694, "step": 1148500 }, { "epoch": 9.19, "learning_rate": 4.5404e-05, "loss": 8.6527, "step": 1149000 }, { "epoch": 9.2, "learning_rate": 4.5402000000000003e-05, "loss": 8.6799, "step": 1149500 }, { "epoch": 9.2, "learning_rate": 4.5400000000000006e-05, "loss": 8.6838, "step": 1150000 }, { "epoch": 9.2, "learning_rate": 4.5398e-05, "loss": 8.6942, "step": 1150500 }, { "epoch": 9.21, "learning_rate": 4.5396e-05, "loss": 8.6611, "step": 1151000 }, { "epoch": 9.21, "learning_rate": 4.5394000000000006e-05, "loss": 8.6773, "step": 1151500 }, { "epoch": 9.22, "learning_rate": 4.5392e-05, "loss": 8.6724, "step": 1152000 }, { "epoch": 9.22, "learning_rate": 4.5390000000000004e-05, "loss": 8.6972, "step": 1152500 }, { "epoch": 9.22, "learning_rate": 4.5388e-05, "loss": 8.6775, "step": 1153000 }, { "epoch": 9.23, "learning_rate": 4.5386e-05, "loss": 8.6831, "step": 1153500 }, { "epoch": 9.23, "learning_rate": 4.5384000000000005e-05, "loss": 8.6901, "step": 1154000 }, { "epoch": 9.24, "learning_rate": 4.5382e-05, "loss": 8.677, "step": 1154500 }, { "epoch": 9.24, "learning_rate": 4.538e-05, "loss": 8.6739, "step": 1155000 }, { "epoch": 9.24, "learning_rate": 4.5378000000000005e-05, "loss": 8.6889, "step": 1155500 }, { "epoch": 9.25, "learning_rate": 4.5376e-05, "loss": 8.6539, "step": 1156000 }, { "epoch": 9.25, "learning_rate": 4.5374e-05, "loss": 8.6665, "step": 1156500 }, { "epoch": 9.26, "learning_rate": 4.5372000000000005e-05, "loss": 8.6792, "step": 1157000 }, { "epoch": 9.26, "learning_rate": 4.537e-05, "loss": 8.6745, "step": 1157500 }, { "epoch": 9.26, "learning_rate": 4.5368e-05, "loss": 8.6843, "step": 1158000 }, { "epoch": 9.27, "learning_rate": 4.5366000000000006e-05, "loss": 8.6662, "step": 1158500 }, { "epoch": 9.27, "learning_rate": 4.5364e-05, "loss": 8.6862, "step": 1159000 }, { "epoch": 9.28, "learning_rate": 4.5362000000000004e-05, "loss": 8.6739, "step": 1159500 }, { "epoch": 9.28, "learning_rate": 4.536e-05, "loss": 8.6797, "step": 1160000 }, { "epoch": 9.28, "learning_rate": 4.5358e-05, "loss": 8.6775, "step": 1160500 }, { "epoch": 9.29, "learning_rate": 4.5356000000000004e-05, "loss": 8.6791, "step": 1161000 }, { "epoch": 9.29, "learning_rate": 4.5354e-05, "loss": 8.6946, "step": 1161500 }, { "epoch": 9.3, "learning_rate": 4.5352e-05, "loss": 8.6883, "step": 1162000 }, { "epoch": 9.3, "learning_rate": 4.5350000000000005e-05, "loss": 8.6724, "step": 1162500 }, { "epoch": 9.3, "learning_rate": 4.5348e-05, "loss": 8.6816, "step": 1163000 }, { "epoch": 9.31, "learning_rate": 4.5346e-05, "loss": 8.6815, "step": 1163500 }, { "epoch": 9.31, "learning_rate": 4.5344000000000005e-05, "loss": 8.6697, "step": 1164000 }, { "epoch": 9.32, "learning_rate": 4.5342e-05, "loss": 8.7017, "step": 1164500 }, { "epoch": 9.32, "learning_rate": 4.534e-05, "loss": 8.6955, "step": 1165000 }, { "epoch": 9.32, "learning_rate": 4.5338000000000005e-05, "loss": 8.6704, "step": 1165500 }, { "epoch": 9.33, "learning_rate": 4.5336e-05, "loss": 8.6589, "step": 1166000 }, { "epoch": 9.33, "learning_rate": 4.5334e-05, "loss": 8.6468, "step": 1166500 }, { "epoch": 9.34, "learning_rate": 4.5332e-05, "loss": 8.693, "step": 1167000 }, { "epoch": 9.34, "learning_rate": 4.533e-05, "loss": 8.6931, "step": 1167500 }, { "epoch": 9.34, "learning_rate": 4.5328000000000004e-05, "loss": 8.7162, "step": 1168000 }, { "epoch": 9.35, "learning_rate": 4.5326e-05, "loss": 8.6884, "step": 1168500 }, { "epoch": 9.35, "learning_rate": 4.5324e-05, "loss": 8.712, "step": 1169000 }, { "epoch": 9.36, "learning_rate": 4.5322000000000004e-05, "loss": 8.6747, "step": 1169500 }, { "epoch": 9.36, "learning_rate": 4.532e-05, "loss": 8.6712, "step": 1170000 }, { "epoch": 9.36, "learning_rate": 4.5318e-05, "loss": 8.677, "step": 1170500 }, { "epoch": 9.37, "learning_rate": 4.5316000000000005e-05, "loss": 8.6598, "step": 1171000 }, { "epoch": 9.37, "learning_rate": 4.5314e-05, "loss": 8.685, "step": 1171500 }, { "epoch": 9.38, "learning_rate": 4.5312e-05, "loss": 8.6576, "step": 1172000 }, { "epoch": 9.38, "learning_rate": 4.5310000000000005e-05, "loss": 8.6396, "step": 1172500 }, { "epoch": 9.38, "learning_rate": 4.5308e-05, "loss": 8.6703, "step": 1173000 }, { "epoch": 9.39, "learning_rate": 4.5306e-05, "loss": 8.6643, "step": 1173500 }, { "epoch": 9.39, "learning_rate": 4.5304000000000005e-05, "loss": 8.6905, "step": 1174000 }, { "epoch": 9.4, "learning_rate": 4.5302e-05, "loss": 8.7058, "step": 1174500 }, { "epoch": 9.4, "learning_rate": 4.53e-05, "loss": 8.667, "step": 1175000 }, { "epoch": 9.4, "learning_rate": 4.5298e-05, "loss": 8.6675, "step": 1175500 }, { "epoch": 9.41, "learning_rate": 4.5296e-05, "loss": 8.7209, "step": 1176000 }, { "epoch": 9.41, "learning_rate": 4.5294000000000004e-05, "loss": 8.6623, "step": 1176500 }, { "epoch": 9.42, "learning_rate": 4.5292e-05, "loss": 8.6906, "step": 1177000 }, { "epoch": 9.42, "learning_rate": 4.529e-05, "loss": 8.6851, "step": 1177500 }, { "epoch": 9.42, "learning_rate": 4.5288000000000004e-05, "loss": 8.6968, "step": 1178000 }, { "epoch": 9.43, "learning_rate": 4.5286e-05, "loss": 8.6732, "step": 1178500 }, { "epoch": 9.43, "learning_rate": 4.5284e-05, "loss": 8.6564, "step": 1179000 }, { "epoch": 9.44, "learning_rate": 4.5282000000000005e-05, "loss": 8.6834, "step": 1179500 }, { "epoch": 9.44, "learning_rate": 4.528e-05, "loss": 8.6975, "step": 1180000 }, { "epoch": 9.44, "learning_rate": 4.5278e-05, "loss": 8.704, "step": 1180500 }, { "epoch": 9.45, "learning_rate": 4.5276000000000005e-05, "loss": 8.6882, "step": 1181000 }, { "epoch": 9.45, "learning_rate": 4.5274e-05, "loss": 8.6667, "step": 1181500 }, { "epoch": 9.46, "learning_rate": 4.5272e-05, "loss": 8.6596, "step": 1182000 }, { "epoch": 9.46, "learning_rate": 4.527e-05, "loss": 8.6778, "step": 1182500 }, { "epoch": 9.46, "learning_rate": 4.526800000000001e-05, "loss": 8.6934, "step": 1183000 }, { "epoch": 9.47, "learning_rate": 4.5266e-05, "loss": 8.677, "step": 1183500 }, { "epoch": 9.47, "learning_rate": 4.5264e-05, "loss": 8.6835, "step": 1184000 }, { "epoch": 9.48, "learning_rate": 4.5262e-05, "loss": 8.6924, "step": 1184500 }, { "epoch": 9.48, "learning_rate": 4.5260000000000004e-05, "loss": 8.6882, "step": 1185000 }, { "epoch": 9.48, "learning_rate": 4.5258e-05, "loss": 8.6857, "step": 1185500 }, { "epoch": 9.49, "learning_rate": 4.5256e-05, "loss": 8.7064, "step": 1186000 }, { "epoch": 9.49, "learning_rate": 4.5254000000000004e-05, "loss": 8.6854, "step": 1186500 }, { "epoch": 9.5, "learning_rate": 4.5252e-05, "loss": 8.6783, "step": 1187000 }, { "epoch": 9.5, "learning_rate": 4.525e-05, "loss": 8.698, "step": 1187500 }, { "epoch": 9.5, "learning_rate": 4.5248000000000005e-05, "loss": 8.6608, "step": 1188000 }, { "epoch": 9.51, "learning_rate": 4.5246e-05, "loss": 8.6811, "step": 1188500 }, { "epoch": 9.51, "learning_rate": 4.5244e-05, "loss": 8.6703, "step": 1189000 }, { "epoch": 9.52, "learning_rate": 4.5242e-05, "loss": 8.6833, "step": 1189500 }, { "epoch": 9.52, "learning_rate": 4.524000000000001e-05, "loss": 8.6919, "step": 1190000 }, { "epoch": 9.52, "learning_rate": 4.5238e-05, "loss": 8.6848, "step": 1190500 }, { "epoch": 9.53, "learning_rate": 4.5236e-05, "loss": 8.7048, "step": 1191000 }, { "epoch": 9.53, "learning_rate": 4.5234e-05, "loss": 8.6826, "step": 1191500 }, { "epoch": 9.54, "learning_rate": 4.5232e-05, "loss": 8.6761, "step": 1192000 }, { "epoch": 9.54, "learning_rate": 4.523e-05, "loss": 8.6748, "step": 1192500 }, { "epoch": 9.54, "learning_rate": 4.5228e-05, "loss": 8.6893, "step": 1193000 }, { "epoch": 9.55, "learning_rate": 4.5226000000000004e-05, "loss": 8.6839, "step": 1193500 }, { "epoch": 9.55, "learning_rate": 4.5224e-05, "loss": 8.6602, "step": 1194000 }, { "epoch": 9.56, "learning_rate": 4.5222e-05, "loss": 8.6843, "step": 1194500 }, { "epoch": 9.56, "learning_rate": 4.5220000000000004e-05, "loss": 8.6746, "step": 1195000 }, { "epoch": 9.56, "learning_rate": 4.5218000000000007e-05, "loss": 8.6799, "step": 1195500 }, { "epoch": 9.57, "learning_rate": 4.5216e-05, "loss": 8.6897, "step": 1196000 }, { "epoch": 9.57, "learning_rate": 4.5214e-05, "loss": 8.6752, "step": 1196500 }, { "epoch": 9.58, "learning_rate": 4.521200000000001e-05, "loss": 8.6867, "step": 1197000 }, { "epoch": 9.58, "learning_rate": 4.521e-05, "loss": 8.7146, "step": 1197500 }, { "epoch": 9.58, "learning_rate": 4.5208e-05, "loss": 8.6859, "step": 1198000 }, { "epoch": 9.59, "learning_rate": 4.5206e-05, "loss": 8.6733, "step": 1198500 }, { "epoch": 9.59, "learning_rate": 4.5204e-05, "loss": 8.6976, "step": 1199000 }, { "epoch": 9.6, "learning_rate": 4.5202e-05, "loss": 8.7035, "step": 1199500 }, { "epoch": 9.6, "learning_rate": 4.52e-05, "loss": 8.7009, "step": 1200000 }, { "epoch": 9.6, "learning_rate": 4.5198e-05, "loss": 8.6647, "step": 1200500 }, { "epoch": 9.61, "learning_rate": 4.5196e-05, "loss": 8.6873, "step": 1201000 }, { "epoch": 9.61, "learning_rate": 4.5194e-05, "loss": 8.6858, "step": 1201500 }, { "epoch": 9.62, "learning_rate": 4.5192000000000004e-05, "loss": 8.715, "step": 1202000 }, { "epoch": 9.62, "learning_rate": 4.5190000000000006e-05, "loss": 8.7143, "step": 1202500 }, { "epoch": 9.62, "learning_rate": 4.5188e-05, "loss": 8.6628, "step": 1203000 }, { "epoch": 9.63, "learning_rate": 4.5186e-05, "loss": 8.7013, "step": 1203500 }, { "epoch": 9.63, "learning_rate": 4.5184000000000006e-05, "loss": 8.6764, "step": 1204000 }, { "epoch": 9.64, "learning_rate": 4.5182e-05, "loss": 8.6965, "step": 1204500 }, { "epoch": 9.64, "learning_rate": 4.518e-05, "loss": 8.6844, "step": 1205000 }, { "epoch": 9.64, "learning_rate": 4.517800000000001e-05, "loss": 8.6834, "step": 1205500 }, { "epoch": 9.65, "learning_rate": 4.5176e-05, "loss": 8.705, "step": 1206000 }, { "epoch": 9.65, "learning_rate": 4.5174e-05, "loss": 8.683, "step": 1206500 }, { "epoch": 9.66, "learning_rate": 4.5172e-05, "loss": 8.6824, "step": 1207000 }, { "epoch": 9.66, "learning_rate": 4.517e-05, "loss": 8.6833, "step": 1207500 }, { "epoch": 9.66, "learning_rate": 4.5168000000000005e-05, "loss": 8.6748, "step": 1208000 }, { "epoch": 9.67, "learning_rate": 4.5166e-05, "loss": 8.6791, "step": 1208500 }, { "epoch": 9.67, "learning_rate": 4.5164e-05, "loss": 8.6444, "step": 1209000 }, { "epoch": 9.68, "learning_rate": 4.5162000000000006e-05, "loss": 8.6819, "step": 1209500 }, { "epoch": 9.68, "learning_rate": 4.516e-05, "loss": 8.6921, "step": 1210000 }, { "epoch": 9.68, "learning_rate": 4.5158000000000004e-05, "loss": 8.6828, "step": 1210500 }, { "epoch": 9.69, "learning_rate": 4.5156000000000006e-05, "loss": 8.6836, "step": 1211000 }, { "epoch": 9.69, "learning_rate": 4.5154e-05, "loss": 8.7005, "step": 1211500 }, { "epoch": 9.7, "learning_rate": 4.5152e-05, "loss": 8.6894, "step": 1212000 }, { "epoch": 9.7, "learning_rate": 4.5150000000000006e-05, "loss": 8.6928, "step": 1212500 }, { "epoch": 9.7, "learning_rate": 4.5148e-05, "loss": 8.6539, "step": 1213000 }, { "epoch": 9.71, "learning_rate": 4.5146e-05, "loss": 8.6771, "step": 1213500 }, { "epoch": 9.71, "learning_rate": 4.5144e-05, "loss": 8.6776, "step": 1214000 }, { "epoch": 9.72, "learning_rate": 4.5142e-05, "loss": 8.7029, "step": 1214500 }, { "epoch": 9.72, "learning_rate": 4.5140000000000005e-05, "loss": 8.687, "step": 1215000 }, { "epoch": 9.72, "learning_rate": 4.5138e-05, "loss": 8.6748, "step": 1215500 }, { "epoch": 9.73, "learning_rate": 4.5136e-05, "loss": 8.673, "step": 1216000 }, { "epoch": 9.73, "learning_rate": 4.5134000000000005e-05, "loss": 8.6738, "step": 1216500 }, { "epoch": 9.74, "learning_rate": 4.5132e-05, "loss": 8.6695, "step": 1217000 }, { "epoch": 9.74, "learning_rate": 4.513e-05, "loss": 8.6698, "step": 1217500 }, { "epoch": 9.74, "learning_rate": 4.5128000000000006e-05, "loss": 8.6836, "step": 1218000 }, { "epoch": 9.75, "learning_rate": 4.5126e-05, "loss": 8.6994, "step": 1218500 }, { "epoch": 9.75, "learning_rate": 4.5124e-05, "loss": 8.6496, "step": 1219000 }, { "epoch": 9.76, "learning_rate": 4.5122000000000006e-05, "loss": 8.6957, "step": 1219500 }, { "epoch": 9.76, "learning_rate": 4.512e-05, "loss": 8.6748, "step": 1220000 }, { "epoch": 9.76, "learning_rate": 4.5118000000000004e-05, "loss": 8.6937, "step": 1220500 }, { "epoch": 9.77, "learning_rate": 4.5116e-05, "loss": 8.6993, "step": 1221000 }, { "epoch": 9.77, "learning_rate": 4.5114e-05, "loss": 8.6919, "step": 1221500 }, { "epoch": 9.78, "learning_rate": 4.5112000000000004e-05, "loss": 8.6906, "step": 1222000 }, { "epoch": 9.78, "learning_rate": 4.511e-05, "loss": 8.669, "step": 1222500 }, { "epoch": 9.78, "learning_rate": 4.5108e-05, "loss": 8.678, "step": 1223000 }, { "epoch": 9.79, "learning_rate": 4.5106000000000005e-05, "loss": 8.6985, "step": 1223500 }, { "epoch": 9.79, "learning_rate": 4.5104e-05, "loss": 8.6903, "step": 1224000 }, { "epoch": 9.8, "learning_rate": 4.5102e-05, "loss": 8.7035, "step": 1224500 }, { "epoch": 9.8, "learning_rate": 4.5100000000000005e-05, "loss": 8.6777, "step": 1225000 }, { "epoch": 9.8, "learning_rate": 4.5098e-05, "loss": 8.6877, "step": 1225500 }, { "epoch": 9.81, "learning_rate": 4.5096e-05, "loss": 8.6793, "step": 1226000 }, { "epoch": 9.81, "learning_rate": 4.5094000000000006e-05, "loss": 8.6831, "step": 1226500 }, { "epoch": 9.82, "learning_rate": 4.5092e-05, "loss": 8.6731, "step": 1227000 }, { "epoch": 9.82, "learning_rate": 4.5090000000000004e-05, "loss": 8.6718, "step": 1227500 }, { "epoch": 9.82, "learning_rate": 4.5088e-05, "loss": 8.6769, "step": 1228000 }, { "epoch": 9.83, "learning_rate": 4.5086e-05, "loss": 8.6699, "step": 1228500 }, { "epoch": 9.83, "learning_rate": 4.5084000000000004e-05, "loss": 8.7083, "step": 1229000 }, { "epoch": 9.84, "learning_rate": 4.5082e-05, "loss": 8.6809, "step": 1229500 }, { "epoch": 9.84, "learning_rate": 4.508e-05, "loss": 8.6919, "step": 1230000 }, { "epoch": 9.84, "learning_rate": 4.5078000000000004e-05, "loss": 8.6801, "step": 1230500 }, { "epoch": 9.85, "learning_rate": 4.5076e-05, "loss": 8.6558, "step": 1231000 }, { "epoch": 9.85, "learning_rate": 4.5074e-05, "loss": 8.6635, "step": 1231500 }, { "epoch": 9.86, "learning_rate": 4.5072000000000005e-05, "loss": 8.6968, "step": 1232000 }, { "epoch": 9.86, "learning_rate": 4.507e-05, "loss": 8.675, "step": 1232500 }, { "epoch": 9.86, "learning_rate": 4.5068e-05, "loss": 8.6717, "step": 1233000 }, { "epoch": 9.87, "learning_rate": 4.5066000000000005e-05, "loss": 8.6742, "step": 1233500 }, { "epoch": 9.87, "learning_rate": 4.5064e-05, "loss": 8.6858, "step": 1234000 }, { "epoch": 9.88, "learning_rate": 4.5062e-05, "loss": 8.6819, "step": 1234500 }, { "epoch": 9.88, "learning_rate": 4.506e-05, "loss": 8.6757, "step": 1235000 }, { "epoch": 9.88, "learning_rate": 4.5058e-05, "loss": 8.6685, "step": 1235500 }, { "epoch": 9.89, "learning_rate": 4.5056000000000004e-05, "loss": 8.6693, "step": 1236000 }, { "epoch": 9.89, "learning_rate": 4.5054e-05, "loss": 8.6885, "step": 1236500 }, { "epoch": 9.9, "learning_rate": 4.5052e-05, "loss": 8.6897, "step": 1237000 }, { "epoch": 9.9, "learning_rate": 4.5050000000000004e-05, "loss": 8.6522, "step": 1237500 }, { "epoch": 9.9, "learning_rate": 4.5048e-05, "loss": 8.6907, "step": 1238000 }, { "epoch": 9.91, "learning_rate": 4.5046e-05, "loss": 8.6739, "step": 1238500 }, { "epoch": 9.91, "learning_rate": 4.5044000000000004e-05, "loss": 8.7044, "step": 1239000 }, { "epoch": 9.92, "learning_rate": 4.5042e-05, "loss": 8.663, "step": 1239500 }, { "epoch": 9.92, "learning_rate": 4.504e-05, "loss": 8.6775, "step": 1240000 }, { "epoch": 9.92, "learning_rate": 4.5038000000000005e-05, "loss": 8.6476, "step": 1240500 }, { "epoch": 9.93, "learning_rate": 4.5036e-05, "loss": 8.698, "step": 1241000 }, { "epoch": 9.93, "learning_rate": 4.5034e-05, "loss": 8.658, "step": 1241500 }, { "epoch": 9.94, "learning_rate": 4.5032000000000005e-05, "loss": 8.6879, "step": 1242000 }, { "epoch": 9.94, "learning_rate": 4.503e-05, "loss": 8.6903, "step": 1242500 }, { "epoch": 9.94, "learning_rate": 4.5028e-05, "loss": 8.6587, "step": 1243000 }, { "epoch": 9.95, "learning_rate": 4.5026e-05, "loss": 8.6678, "step": 1243500 }, { "epoch": 9.95, "learning_rate": 4.5024e-05, "loss": 8.6691, "step": 1244000 }, { "epoch": 9.96, "learning_rate": 4.5022000000000004e-05, "loss": 8.6914, "step": 1244500 }, { "epoch": 9.96, "learning_rate": 4.502e-05, "loss": 8.6846, "step": 1245000 }, { "epoch": 9.96, "learning_rate": 4.5018e-05, "loss": 8.6758, "step": 1245500 }, { "epoch": 9.97, "learning_rate": 4.5016000000000004e-05, "loss": 8.6839, "step": 1246000 }, { "epoch": 9.97, "learning_rate": 4.5014e-05, "loss": 8.7049, "step": 1246500 }, { "epoch": 9.98, "learning_rate": 4.5012e-05, "loss": 8.6841, "step": 1247000 }, { "epoch": 9.98, "learning_rate": 4.5010000000000004e-05, "loss": 8.702, "step": 1247500 }, { "epoch": 9.98, "learning_rate": 4.5008e-05, "loss": 8.699, "step": 1248000 }, { "epoch": 9.99, "learning_rate": 4.5006e-05, "loss": 8.693, "step": 1248500 }, { "epoch": 9.99, "learning_rate": 4.5004000000000005e-05, "loss": 8.6829, "step": 1249000 }, { "epoch": 10.0, "learning_rate": 4.5002e-05, "loss": 8.7048, "step": 1249500 }, { "epoch": 10.0, "learning_rate": 4.5e-05, "loss": 8.6988, "step": 1250000 }, { "epoch": 10.0, "learning_rate": 4.4998e-05, "loss": 8.6821, "step": 1250500 }, { "epoch": 10.01, "learning_rate": 4.499600000000001e-05, "loss": 8.6763, "step": 1251000 }, { "epoch": 10.01, "learning_rate": 4.4994e-05, "loss": 8.7007, "step": 1251500 }, { "epoch": 10.02, "learning_rate": 4.4992e-05, "loss": 8.694, "step": 1252000 }, { "epoch": 10.02, "learning_rate": 4.499e-05, "loss": 8.6748, "step": 1252500 }, { "epoch": 10.02, "learning_rate": 4.4988000000000004e-05, "loss": 8.6479, "step": 1253000 }, { "epoch": 10.03, "learning_rate": 4.4986e-05, "loss": 8.6867, "step": 1253500 }, { "epoch": 10.03, "learning_rate": 4.4984e-05, "loss": 8.6867, "step": 1254000 }, { "epoch": 10.04, "learning_rate": 4.4982000000000004e-05, "loss": 8.6843, "step": 1254500 }, { "epoch": 10.04, "learning_rate": 4.498e-05, "loss": 8.6788, "step": 1255000 }, { "epoch": 10.04, "learning_rate": 4.4978e-05, "loss": 8.6696, "step": 1255500 }, { "epoch": 10.05, "learning_rate": 4.4976000000000004e-05, "loss": 8.6893, "step": 1256000 }, { "epoch": 10.05, "learning_rate": 4.4974e-05, "loss": 8.6841, "step": 1256500 }, { "epoch": 10.06, "learning_rate": 4.4972e-05, "loss": 8.6759, "step": 1257000 }, { "epoch": 10.06, "learning_rate": 4.497e-05, "loss": 8.7065, "step": 1257500 }, { "epoch": 10.06, "learning_rate": 4.496800000000001e-05, "loss": 8.6793, "step": 1258000 }, { "epoch": 10.07, "learning_rate": 4.4966e-05, "loss": 8.6588, "step": 1258500 }, { "epoch": 10.07, "learning_rate": 4.4964e-05, "loss": 8.6842, "step": 1259000 }, { "epoch": 10.08, "learning_rate": 4.4962e-05, "loss": 8.704, "step": 1259500 }, { "epoch": 10.08, "learning_rate": 4.496e-05, "loss": 8.6781, "step": 1260000 }, { "epoch": 10.08, "learning_rate": 4.4958e-05, "loss": 8.6905, "step": 1260500 }, { "epoch": 10.09, "learning_rate": 4.4956e-05, "loss": 8.6818, "step": 1261000 }, { "epoch": 10.09, "learning_rate": 4.4954000000000004e-05, "loss": 8.6809, "step": 1261500 }, { "epoch": 10.1, "learning_rate": 4.4952e-05, "loss": 8.6735, "step": 1262000 }, { "epoch": 10.1, "learning_rate": 4.495e-05, "loss": 8.6851, "step": 1262500 }, { "epoch": 10.1, "learning_rate": 4.4948000000000004e-05, "loss": 8.6629, "step": 1263000 }, { "epoch": 10.11, "learning_rate": 4.4946000000000006e-05, "loss": 8.6771, "step": 1263500 }, { "epoch": 10.11, "learning_rate": 4.4944e-05, "loss": 8.6778, "step": 1264000 }, { "epoch": 10.12, "learning_rate": 4.4942e-05, "loss": 8.6912, "step": 1264500 }, { "epoch": 10.12, "learning_rate": 4.494000000000001e-05, "loss": 8.6871, "step": 1265000 }, { "epoch": 10.12, "learning_rate": 4.4938e-05, "loss": 8.6768, "step": 1265500 }, { "epoch": 10.13, "learning_rate": 4.4936e-05, "loss": 8.6829, "step": 1266000 }, { "epoch": 10.13, "learning_rate": 4.493400000000001e-05, "loss": 8.6759, "step": 1266500 }, { "epoch": 10.14, "learning_rate": 4.4932e-05, "loss": 8.6955, "step": 1267000 }, { "epoch": 10.14, "learning_rate": 4.493e-05, "loss": 8.6918, "step": 1267500 }, { "epoch": 10.14, "learning_rate": 4.4928e-05, "loss": 8.6779, "step": 1268000 }, { "epoch": 10.15, "learning_rate": 4.4926e-05, "loss": 8.7041, "step": 1268500 }, { "epoch": 10.15, "learning_rate": 4.4924e-05, "loss": 8.6847, "step": 1269000 }, { "epoch": 10.16, "learning_rate": 4.4922e-05, "loss": 8.6974, "step": 1269500 }, { "epoch": 10.16, "learning_rate": 4.4920000000000004e-05, "loss": 8.6668, "step": 1270000 }, { "epoch": 10.16, "learning_rate": 4.4918000000000006e-05, "loss": 8.6764, "step": 1270500 }, { "epoch": 10.17, "learning_rate": 4.4916e-05, "loss": 8.7033, "step": 1271000 }, { "epoch": 10.17, "learning_rate": 4.4914e-05, "loss": 8.6782, "step": 1271500 }, { "epoch": 10.18, "learning_rate": 4.4912000000000006e-05, "loss": 8.6733, "step": 1272000 }, { "epoch": 10.18, "learning_rate": 4.491e-05, "loss": 8.6912, "step": 1272500 }, { "epoch": 10.18, "learning_rate": 4.4908e-05, "loss": 8.6948, "step": 1273000 }, { "epoch": 10.19, "learning_rate": 4.490600000000001e-05, "loss": 8.6829, "step": 1273500 }, { "epoch": 10.19, "learning_rate": 4.4904e-05, "loss": 8.6759, "step": 1274000 }, { "epoch": 10.2, "learning_rate": 4.4902e-05, "loss": 8.6712, "step": 1274500 }, { "epoch": 10.2, "learning_rate": 4.49e-05, "loss": 8.6979, "step": 1275000 }, { "epoch": 10.2, "learning_rate": 4.4898e-05, "loss": 8.677, "step": 1275500 }, { "epoch": 10.21, "learning_rate": 4.4896000000000005e-05, "loss": 8.692, "step": 1276000 }, { "epoch": 10.21, "learning_rate": 4.4894e-05, "loss": 8.6974, "step": 1276500 }, { "epoch": 10.22, "learning_rate": 4.4892e-05, "loss": 8.6646, "step": 1277000 }, { "epoch": 10.22, "learning_rate": 4.4890000000000006e-05, "loss": 8.703, "step": 1277500 }, { "epoch": 10.22, "learning_rate": 4.4888e-05, "loss": 8.6844, "step": 1278000 }, { "epoch": 10.23, "learning_rate": 4.4886000000000004e-05, "loss": 8.6685, "step": 1278500 }, { "epoch": 10.23, "learning_rate": 4.4884000000000006e-05, "loss": 8.6807, "step": 1279000 }, { "epoch": 10.24, "learning_rate": 4.4882e-05, "loss": 8.6909, "step": 1279500 }, { "epoch": 10.24, "learning_rate": 4.488e-05, "loss": 8.6838, "step": 1280000 }, { "epoch": 10.24, "learning_rate": 4.4878000000000006e-05, "loss": 8.6648, "step": 1280500 }, { "epoch": 10.25, "learning_rate": 4.4876e-05, "loss": 8.6703, "step": 1281000 }, { "epoch": 10.25, "learning_rate": 4.4874000000000004e-05, "loss": 8.7103, "step": 1281500 }, { "epoch": 10.26, "learning_rate": 4.4872e-05, "loss": 8.684, "step": 1282000 }, { "epoch": 10.26, "learning_rate": 4.487e-05, "loss": 8.645, "step": 1282500 }, { "epoch": 10.26, "learning_rate": 4.4868000000000005e-05, "loss": 8.6707, "step": 1283000 }, { "epoch": 10.27, "learning_rate": 4.4866e-05, "loss": 8.6758, "step": 1283500 }, { "epoch": 10.27, "learning_rate": 4.4864e-05, "loss": 8.679, "step": 1284000 }, { "epoch": 10.28, "learning_rate": 4.4862000000000005e-05, "loss": 8.672, "step": 1284500 }, { "epoch": 10.28, "learning_rate": 4.486e-05, "loss": 8.6981, "step": 1285000 }, { "epoch": 10.28, "learning_rate": 4.4858e-05, "loss": 8.6861, "step": 1285500 }, { "epoch": 10.29, "learning_rate": 4.4856000000000006e-05, "loss": 8.6675, "step": 1286000 }, { "epoch": 10.29, "learning_rate": 4.4854e-05, "loss": 8.6978, "step": 1286500 }, { "epoch": 10.3, "learning_rate": 4.4852e-05, "loss": 8.7052, "step": 1287000 }, { "epoch": 10.3, "learning_rate": 4.4850000000000006e-05, "loss": 8.6748, "step": 1287500 }, { "epoch": 10.3, "learning_rate": 4.4848e-05, "loss": 8.6747, "step": 1288000 }, { "epoch": 10.31, "learning_rate": 4.4846000000000004e-05, "loss": 8.6947, "step": 1288500 }, { "epoch": 10.31, "learning_rate": 4.4844e-05, "loss": 8.6767, "step": 1289000 }, { "epoch": 10.32, "learning_rate": 4.4842e-05, "loss": 8.6911, "step": 1289500 }, { "epoch": 10.32, "learning_rate": 4.4840000000000004e-05, "loss": 8.6862, "step": 1290000 }, { "epoch": 10.32, "learning_rate": 4.4838e-05, "loss": 8.6846, "step": 1290500 }, { "epoch": 10.33, "learning_rate": 4.4836e-05, "loss": 8.6587, "step": 1291000 }, { "epoch": 10.33, "learning_rate": 4.4834000000000005e-05, "loss": 8.6888, "step": 1291500 }, { "epoch": 10.34, "learning_rate": 4.4832e-05, "loss": 8.7028, "step": 1292000 }, { "epoch": 10.34, "learning_rate": 4.483e-05, "loss": 8.669, "step": 1292500 }, { "epoch": 10.34, "learning_rate": 4.4828000000000005e-05, "loss": 8.6689, "step": 1293000 }, { "epoch": 10.35, "learning_rate": 4.4826e-05, "loss": 8.6661, "step": 1293500 }, { "epoch": 10.35, "learning_rate": 4.4824e-05, "loss": 8.6998, "step": 1294000 }, { "epoch": 10.36, "learning_rate": 4.4822000000000006e-05, "loss": 8.6838, "step": 1294500 }, { "epoch": 10.36, "learning_rate": 4.482e-05, "loss": 8.6834, "step": 1295000 }, { "epoch": 10.36, "learning_rate": 4.4818000000000004e-05, "loss": 8.6713, "step": 1295500 }, { "epoch": 10.37, "learning_rate": 4.4816e-05, "loss": 8.672, "step": 1296000 }, { "epoch": 10.37, "learning_rate": 4.4814e-05, "loss": 8.6941, "step": 1296500 }, { "epoch": 10.38, "learning_rate": 4.4812000000000004e-05, "loss": 8.6948, "step": 1297000 }, { "epoch": 10.38, "learning_rate": 4.481e-05, "loss": 8.6743, "step": 1297500 }, { "epoch": 10.38, "learning_rate": 4.4808e-05, "loss": 8.6913, "step": 1298000 }, { "epoch": 10.39, "learning_rate": 4.4806000000000004e-05, "loss": 8.699, "step": 1298500 }, { "epoch": 10.39, "learning_rate": 4.4804e-05, "loss": 8.6978, "step": 1299000 }, { "epoch": 10.4, "learning_rate": 4.4802e-05, "loss": 8.6837, "step": 1299500 }, { "epoch": 10.4, "learning_rate": 4.4800000000000005e-05, "loss": 8.6797, "step": 1300000 }, { "epoch": 10.4, "learning_rate": 4.4798e-05, "loss": 8.6873, "step": 1300500 }, { "epoch": 10.41, "learning_rate": 4.4796e-05, "loss": 8.6678, "step": 1301000 }, { "epoch": 10.41, "learning_rate": 4.4794000000000005e-05, "loss": 8.6768, "step": 1301500 }, { "epoch": 10.42, "learning_rate": 4.4792e-05, "loss": 8.6882, "step": 1302000 }, { "epoch": 10.42, "learning_rate": 4.479e-05, "loss": 8.6682, "step": 1302500 }, { "epoch": 10.42, "learning_rate": 4.4788000000000006e-05, "loss": 8.6659, "step": 1303000 }, { "epoch": 10.43, "learning_rate": 4.4786e-05, "loss": 8.676, "step": 1303500 }, { "epoch": 10.43, "learning_rate": 4.4784000000000004e-05, "loss": 8.6675, "step": 1304000 }, { "epoch": 10.44, "learning_rate": 4.4782e-05, "loss": 8.6916, "step": 1304500 }, { "epoch": 10.44, "learning_rate": 4.478e-05, "loss": 8.674, "step": 1305000 }, { "epoch": 10.44, "learning_rate": 4.4778000000000004e-05, "loss": 8.7042, "step": 1305500 }, { "epoch": 10.45, "learning_rate": 4.4776e-05, "loss": 8.6648, "step": 1306000 }, { "epoch": 10.45, "learning_rate": 4.4774e-05, "loss": 8.6879, "step": 1306500 }, { "epoch": 10.46, "learning_rate": 4.4772000000000004e-05, "loss": 8.6917, "step": 1307000 }, { "epoch": 10.46, "learning_rate": 4.477e-05, "loss": 8.6734, "step": 1307500 }, { "epoch": 10.46, "learning_rate": 4.4768e-05, "loss": 8.6804, "step": 1308000 }, { "epoch": 10.47, "learning_rate": 4.4766000000000005e-05, "loss": 8.6807, "step": 1308500 }, { "epoch": 10.47, "learning_rate": 4.4764e-05, "loss": 8.6974, "step": 1309000 }, { "epoch": 10.48, "learning_rate": 4.4762e-05, "loss": 8.6956, "step": 1309500 }, { "epoch": 10.48, "learning_rate": 4.4760000000000005e-05, "loss": 8.6606, "step": 1310000 }, { "epoch": 10.48, "learning_rate": 4.4758e-05, "loss": 8.6837, "step": 1310500 }, { "epoch": 10.49, "learning_rate": 4.4756e-05, "loss": 8.6747, "step": 1311000 }, { "epoch": 10.49, "learning_rate": 4.4754e-05, "loss": 8.6753, "step": 1311500 }, { "epoch": 10.5, "learning_rate": 4.4752e-05, "loss": 8.6538, "step": 1312000 }, { "epoch": 10.5, "learning_rate": 4.4750000000000004e-05, "loss": 8.6761, "step": 1312500 }, { "epoch": 10.5, "learning_rate": 4.4748e-05, "loss": 8.68, "step": 1313000 }, { "epoch": 10.51, "learning_rate": 4.4746e-05, "loss": 8.689, "step": 1313500 }, { "epoch": 10.51, "learning_rate": 4.4744000000000004e-05, "loss": 8.6889, "step": 1314000 }, { "epoch": 10.52, "learning_rate": 4.4742e-05, "loss": 8.6552, "step": 1314500 }, { "epoch": 10.52, "learning_rate": 4.474e-05, "loss": 8.6928, "step": 1315000 }, { "epoch": 10.52, "learning_rate": 4.4738000000000004e-05, "loss": 8.6594, "step": 1315500 }, { "epoch": 10.53, "learning_rate": 4.4736e-05, "loss": 8.6797, "step": 1316000 }, { "epoch": 10.53, "learning_rate": 4.4734e-05, "loss": 8.6711, "step": 1316500 }, { "epoch": 10.54, "learning_rate": 4.4732000000000005e-05, "loss": 8.6532, "step": 1317000 }, { "epoch": 10.54, "learning_rate": 4.473e-05, "loss": 8.6831, "step": 1317500 }, { "epoch": 10.54, "learning_rate": 4.4728e-05, "loss": 8.6986, "step": 1318000 }, { "epoch": 10.55, "learning_rate": 4.4726e-05, "loss": 8.6883, "step": 1318500 }, { "epoch": 10.55, "learning_rate": 4.472400000000001e-05, "loss": 8.7141, "step": 1319000 }, { "epoch": 10.56, "learning_rate": 4.4722e-05, "loss": 8.6755, "step": 1319500 }, { "epoch": 10.56, "learning_rate": 4.472e-05, "loss": 8.6628, "step": 1320000 }, { "epoch": 10.56, "learning_rate": 4.4718e-05, "loss": 8.6673, "step": 1320500 }, { "epoch": 10.57, "learning_rate": 4.4716000000000004e-05, "loss": 8.6782, "step": 1321000 }, { "epoch": 10.57, "learning_rate": 4.4714e-05, "loss": 8.6657, "step": 1321500 }, { "epoch": 10.58, "learning_rate": 4.4712e-05, "loss": 8.6986, "step": 1322000 }, { "epoch": 10.58, "learning_rate": 4.4710000000000004e-05, "loss": 8.673, "step": 1322500 }, { "epoch": 10.58, "learning_rate": 4.4708e-05, "loss": 8.6761, "step": 1323000 }, { "epoch": 10.59, "learning_rate": 4.4706e-05, "loss": 8.6888, "step": 1323500 }, { "epoch": 10.59, "learning_rate": 4.4704000000000004e-05, "loss": 8.6774, "step": 1324000 }, { "epoch": 10.6, "learning_rate": 4.4702e-05, "loss": 8.6629, "step": 1324500 }, { "epoch": 10.6, "learning_rate": 4.47e-05, "loss": 8.6626, "step": 1325000 }, { "epoch": 10.6, "learning_rate": 4.4698e-05, "loss": 8.6831, "step": 1325500 }, { "epoch": 10.61, "learning_rate": 4.469600000000001e-05, "loss": 8.6885, "step": 1326000 }, { "epoch": 10.61, "learning_rate": 4.4694e-05, "loss": 8.7342, "step": 1326500 }, { "epoch": 10.62, "learning_rate": 4.4692e-05, "loss": 8.7031, "step": 1327000 }, { "epoch": 10.62, "learning_rate": 4.469e-05, "loss": 8.7079, "step": 1327500 }, { "epoch": 10.62, "learning_rate": 4.4688e-05, "loss": 8.6874, "step": 1328000 }, { "epoch": 10.63, "learning_rate": 4.4686e-05, "loss": 8.7168, "step": 1328500 }, { "epoch": 10.63, "learning_rate": 4.4684e-05, "loss": 8.683, "step": 1329000 }, { "epoch": 10.64, "learning_rate": 4.4682000000000004e-05, "loss": 8.676, "step": 1329500 }, { "epoch": 10.64, "learning_rate": 4.468e-05, "loss": 8.6367, "step": 1330000 }, { "epoch": 10.64, "learning_rate": 4.4678e-05, "loss": 8.6868, "step": 1330500 }, { "epoch": 10.65, "learning_rate": 4.4676000000000004e-05, "loss": 8.7022, "step": 1331000 }, { "epoch": 10.65, "learning_rate": 4.4674000000000006e-05, "loss": 8.7035, "step": 1331500 }, { "epoch": 10.66, "learning_rate": 4.4672e-05, "loss": 8.6841, "step": 1332000 }, { "epoch": 10.66, "learning_rate": 4.467e-05, "loss": 8.6874, "step": 1332500 }, { "epoch": 10.66, "learning_rate": 4.466800000000001e-05, "loss": 8.7151, "step": 1333000 }, { "epoch": 10.67, "learning_rate": 4.4666e-05, "loss": 8.6965, "step": 1333500 }, { "epoch": 10.67, "learning_rate": 4.4664e-05, "loss": 8.6873, "step": 1334000 }, { "epoch": 10.68, "learning_rate": 4.466200000000001e-05, "loss": 8.6843, "step": 1334500 }, { "epoch": 10.68, "learning_rate": 4.466e-05, "loss": 8.6629, "step": 1335000 }, { "epoch": 10.68, "learning_rate": 4.4658e-05, "loss": 8.6947, "step": 1335500 }, { "epoch": 10.69, "learning_rate": 4.4656e-05, "loss": 8.6782, "step": 1336000 }, { "epoch": 10.69, "learning_rate": 4.4654e-05, "loss": 8.6953, "step": 1336500 }, { "epoch": 10.7, "learning_rate": 4.4652e-05, "loss": 8.6806, "step": 1337000 }, { "epoch": 10.7, "learning_rate": 4.465e-05, "loss": 8.6673, "step": 1337500 }, { "epoch": 10.7, "learning_rate": 4.4648000000000003e-05, "loss": 8.6953, "step": 1338000 }, { "epoch": 10.71, "learning_rate": 4.4646000000000006e-05, "loss": 8.6766, "step": 1338500 }, { "epoch": 10.71, "learning_rate": 4.4644e-05, "loss": 8.6922, "step": 1339000 }, { "epoch": 10.72, "learning_rate": 4.4642e-05, "loss": 8.6896, "step": 1339500 }, { "epoch": 10.72, "learning_rate": 4.4640000000000006e-05, "loss": 8.6947, "step": 1340000 }, { "epoch": 10.72, "learning_rate": 4.4638e-05, "loss": 8.6774, "step": 1340500 }, { "epoch": 10.73, "learning_rate": 4.4636e-05, "loss": 8.6984, "step": 1341000 }, { "epoch": 10.73, "learning_rate": 4.463400000000001e-05, "loss": 8.6892, "step": 1341500 }, { "epoch": 10.74, "learning_rate": 4.4632e-05, "loss": 8.6877, "step": 1342000 }, { "epoch": 10.74, "learning_rate": 4.463e-05, "loss": 8.6854, "step": 1342500 }, { "epoch": 10.74, "learning_rate": 4.4628e-05, "loss": 8.7147, "step": 1343000 }, { "epoch": 10.75, "learning_rate": 4.4626e-05, "loss": 8.6754, "step": 1343500 }, { "epoch": 10.75, "learning_rate": 4.4624000000000005e-05, "loss": 8.6926, "step": 1344000 }, { "epoch": 10.76, "learning_rate": 4.4622e-05, "loss": 8.6954, "step": 1344500 }, { "epoch": 10.76, "learning_rate": 4.462e-05, "loss": 8.6694, "step": 1345000 }, { "epoch": 10.76, "learning_rate": 4.4618000000000005e-05, "loss": 8.6997, "step": 1345500 }, { "epoch": 10.77, "learning_rate": 4.4616e-05, "loss": 8.6677, "step": 1346000 }, { "epoch": 10.77, "learning_rate": 4.4614000000000003e-05, "loss": 8.6774, "step": 1346500 }, { "epoch": 10.78, "learning_rate": 4.4612000000000006e-05, "loss": 8.6694, "step": 1347000 }, { "epoch": 10.78, "learning_rate": 4.461e-05, "loss": 8.6681, "step": 1347500 }, { "epoch": 10.78, "learning_rate": 4.4608e-05, "loss": 8.6813, "step": 1348000 }, { "epoch": 10.79, "learning_rate": 4.4606000000000006e-05, "loss": 8.6797, "step": 1348500 }, { "epoch": 10.79, "learning_rate": 4.4604e-05, "loss": 8.6917, "step": 1349000 }, { "epoch": 10.8, "learning_rate": 4.4602000000000004e-05, "loss": 8.6854, "step": 1349500 }, { "epoch": 10.8, "learning_rate": 4.46e-05, "loss": 8.6866, "step": 1350000 }, { "epoch": 10.8, "learning_rate": 4.4598e-05, "loss": 8.6949, "step": 1350500 }, { "epoch": 10.81, "learning_rate": 4.4596000000000005e-05, "loss": 8.6847, "step": 1351000 }, { "epoch": 10.81, "learning_rate": 4.4594e-05, "loss": 8.6616, "step": 1351500 }, { "epoch": 10.82, "learning_rate": 4.4592e-05, "loss": 8.6827, "step": 1352000 }, { "epoch": 10.82, "learning_rate": 4.4590000000000005e-05, "loss": 8.6497, "step": 1352500 }, { "epoch": 10.82, "learning_rate": 4.4588e-05, "loss": 8.685, "step": 1353000 }, { "epoch": 10.83, "learning_rate": 4.4586e-05, "loss": 8.6852, "step": 1353500 }, { "epoch": 10.83, "learning_rate": 4.4584000000000005e-05, "loss": 8.669, "step": 1354000 }, { "epoch": 10.84, "learning_rate": 4.4582e-05, "loss": 8.6911, "step": 1354500 }, { "epoch": 10.84, "learning_rate": 4.458e-05, "loss": 8.6871, "step": 1355000 }, { "epoch": 10.84, "learning_rate": 4.4578000000000006e-05, "loss": 8.6838, "step": 1355500 }, { "epoch": 10.85, "learning_rate": 4.4576e-05, "loss": 8.6989, "step": 1356000 }, { "epoch": 10.85, "learning_rate": 4.4574000000000004e-05, "loss": 8.6843, "step": 1356500 }, { "epoch": 10.86, "learning_rate": 4.4572e-05, "loss": 8.6736, "step": 1357000 }, { "epoch": 10.86, "learning_rate": 4.457e-05, "loss": 8.6755, "step": 1357500 }, { "epoch": 10.86, "learning_rate": 4.4568000000000004e-05, "loss": 8.6829, "step": 1358000 }, { "epoch": 10.87, "learning_rate": 4.4566e-05, "loss": 8.6925, "step": 1358500 }, { "epoch": 10.87, "learning_rate": 4.4564e-05, "loss": 8.6795, "step": 1359000 }, { "epoch": 10.88, "learning_rate": 4.4562000000000005e-05, "loss": 8.6818, "step": 1359500 }, { "epoch": 10.88, "learning_rate": 4.456e-05, "loss": 8.6958, "step": 1360000 }, { "epoch": 10.88, "learning_rate": 4.4558e-05, "loss": 8.6859, "step": 1360500 }, { "epoch": 10.89, "learning_rate": 4.4556000000000005e-05, "loss": 8.6639, "step": 1361000 }, { "epoch": 10.89, "learning_rate": 4.4554e-05, "loss": 8.6765, "step": 1361500 }, { "epoch": 10.9, "learning_rate": 4.4552e-05, "loss": 8.6821, "step": 1362000 }, { "epoch": 10.9, "learning_rate": 4.4550000000000005e-05, "loss": 8.6866, "step": 1362500 }, { "epoch": 10.9, "learning_rate": 4.4548e-05, "loss": 8.6553, "step": 1363000 }, { "epoch": 10.91, "learning_rate": 4.4546000000000003e-05, "loss": 8.6824, "step": 1363500 }, { "epoch": 10.91, "learning_rate": 4.4544e-05, "loss": 8.6674, "step": 1364000 }, { "epoch": 10.92, "learning_rate": 4.4542e-05, "loss": 8.6805, "step": 1364500 }, { "epoch": 10.92, "learning_rate": 4.4540000000000004e-05, "loss": 8.6714, "step": 1365000 }, { "epoch": 10.92, "learning_rate": 4.4538e-05, "loss": 8.7068, "step": 1365500 }, { "epoch": 10.93, "learning_rate": 4.4536e-05, "loss": 8.6629, "step": 1366000 }, { "epoch": 10.93, "learning_rate": 4.4534000000000004e-05, "loss": 8.6774, "step": 1366500 }, { "epoch": 10.94, "learning_rate": 4.4532e-05, "loss": 8.6886, "step": 1367000 }, { "epoch": 10.94, "learning_rate": 4.453e-05, "loss": 8.6808, "step": 1367500 }, { "epoch": 10.94, "learning_rate": 4.4528000000000005e-05, "loss": 8.6658, "step": 1368000 }, { "epoch": 10.95, "learning_rate": 4.4526e-05, "loss": 8.6964, "step": 1368500 }, { "epoch": 10.95, "learning_rate": 4.4524e-05, "loss": 8.6908, "step": 1369000 }, { "epoch": 10.96, "learning_rate": 4.4522000000000005e-05, "loss": 8.6808, "step": 1369500 }, { "epoch": 10.96, "learning_rate": 4.452e-05, "loss": 8.6823, "step": 1370000 }, { "epoch": 10.96, "learning_rate": 4.4518e-05, "loss": 8.6761, "step": 1370500 }, { "epoch": 10.97, "learning_rate": 4.4516000000000005e-05, "loss": 8.685, "step": 1371000 }, { "epoch": 10.97, "learning_rate": 4.4514e-05, "loss": 8.6699, "step": 1371500 }, { "epoch": 10.98, "learning_rate": 4.4512000000000003e-05, "loss": 8.6561, "step": 1372000 }, { "epoch": 10.98, "learning_rate": 4.451e-05, "loss": 8.6768, "step": 1372500 }, { "epoch": 10.98, "learning_rate": 4.4508e-05, "loss": 8.6836, "step": 1373000 }, { "epoch": 10.99, "learning_rate": 4.4506000000000004e-05, "loss": 8.6828, "step": 1373500 }, { "epoch": 10.99, "learning_rate": 4.4504e-05, "loss": 8.6874, "step": 1374000 }, { "epoch": 11.0, "learning_rate": 4.4502e-05, "loss": 8.6723, "step": 1374500 }, { "epoch": 11.0, "learning_rate": 4.4500000000000004e-05, "loss": 8.6568, "step": 1375000 }, { "epoch": 11.0, "learning_rate": 4.4498e-05, "loss": 8.6732, "step": 1375500 }, { "epoch": 11.01, "learning_rate": 4.4496e-05, "loss": 8.6856, "step": 1376000 }, { "epoch": 11.01, "learning_rate": 4.4494000000000005e-05, "loss": 8.6748, "step": 1376500 }, { "epoch": 11.02, "learning_rate": 4.4492e-05, "loss": 8.6798, "step": 1377000 }, { "epoch": 11.02, "learning_rate": 4.449e-05, "loss": 8.6773, "step": 1377500 }, { "epoch": 11.02, "learning_rate": 4.4488000000000005e-05, "loss": 8.686, "step": 1378000 }, { "epoch": 11.03, "learning_rate": 4.4486e-05, "loss": 8.6831, "step": 1378500 }, { "epoch": 11.03, "learning_rate": 4.4484e-05, "loss": 8.6704, "step": 1379000 }, { "epoch": 11.04, "learning_rate": 4.4482e-05, "loss": 8.6722, "step": 1379500 }, { "epoch": 11.04, "learning_rate": 4.448e-05, "loss": 8.6715, "step": 1380000 }, { "epoch": 11.04, "learning_rate": 4.4478000000000003e-05, "loss": 8.6763, "step": 1380500 }, { "epoch": 11.05, "learning_rate": 4.4476e-05, "loss": 8.6949, "step": 1381000 }, { "epoch": 11.05, "learning_rate": 4.4474e-05, "loss": 8.6873, "step": 1381500 }, { "epoch": 11.06, "learning_rate": 4.4472000000000004e-05, "loss": 8.6794, "step": 1382000 }, { "epoch": 11.06, "learning_rate": 4.447e-05, "loss": 8.6865, "step": 1382500 }, { "epoch": 11.06, "learning_rate": 4.4468e-05, "loss": 8.6937, "step": 1383000 }, { "epoch": 11.07, "learning_rate": 4.4466000000000004e-05, "loss": 8.6774, "step": 1383500 }, { "epoch": 11.07, "learning_rate": 4.4464e-05, "loss": 8.671, "step": 1384000 }, { "epoch": 11.08, "learning_rate": 4.4462e-05, "loss": 8.6739, "step": 1384500 }, { "epoch": 11.08, "learning_rate": 4.4460000000000005e-05, "loss": 8.6876, "step": 1385000 }, { "epoch": 11.08, "learning_rate": 4.4458e-05, "loss": 8.6725, "step": 1385500 }, { "epoch": 11.09, "learning_rate": 4.4456e-05, "loss": 8.696, "step": 1386000 }, { "epoch": 11.09, "learning_rate": 4.4454e-05, "loss": 8.6979, "step": 1386500 }, { "epoch": 11.1, "learning_rate": 4.445200000000001e-05, "loss": 8.6636, "step": 1387000 }, { "epoch": 11.1, "learning_rate": 4.445e-05, "loss": 8.6806, "step": 1387500 }, { "epoch": 11.1, "learning_rate": 4.4448e-05, "loss": 8.6741, "step": 1388000 }, { "epoch": 11.11, "learning_rate": 4.4446e-05, "loss": 8.6797, "step": 1388500 }, { "epoch": 11.11, "learning_rate": 4.4444000000000003e-05, "loss": 8.675, "step": 1389000 }, { "epoch": 11.12, "learning_rate": 4.4442e-05, "loss": 8.6788, "step": 1389500 }, { "epoch": 11.12, "learning_rate": 4.444e-05, "loss": 8.6812, "step": 1390000 }, { "epoch": 11.12, "learning_rate": 4.4438000000000004e-05, "loss": 8.7191, "step": 1390500 }, { "epoch": 11.13, "learning_rate": 4.4436e-05, "loss": 8.6671, "step": 1391000 }, { "epoch": 11.13, "learning_rate": 4.4434e-05, "loss": 8.7002, "step": 1391500 }, { "epoch": 11.14, "learning_rate": 4.4432000000000004e-05, "loss": 8.6935, "step": 1392000 }, { "epoch": 11.14, "learning_rate": 4.443e-05, "loss": 8.6774, "step": 1392500 }, { "epoch": 11.14, "learning_rate": 4.4428e-05, "loss": 8.6507, "step": 1393000 }, { "epoch": 11.15, "learning_rate": 4.4426e-05, "loss": 8.7016, "step": 1393500 }, { "epoch": 11.15, "learning_rate": 4.442400000000001e-05, "loss": 8.7073, "step": 1394000 }, { "epoch": 11.16, "learning_rate": 4.4422e-05, "loss": 8.6642, "step": 1394500 }, { "epoch": 11.16, "learning_rate": 4.442e-05, "loss": 8.7026, "step": 1395000 }, { "epoch": 11.16, "learning_rate": 4.441800000000001e-05, "loss": 8.6713, "step": 1395500 }, { "epoch": 11.17, "learning_rate": 4.4416e-05, "loss": 8.6974, "step": 1396000 }, { "epoch": 11.17, "learning_rate": 4.4414e-05, "loss": 8.6899, "step": 1396500 }, { "epoch": 11.18, "learning_rate": 4.4412e-05, "loss": 8.6838, "step": 1397000 }, { "epoch": 11.18, "learning_rate": 4.4410000000000003e-05, "loss": 8.68, "step": 1397500 }, { "epoch": 11.18, "learning_rate": 4.4408e-05, "loss": 8.7208, "step": 1398000 }, { "epoch": 11.19, "learning_rate": 4.4406e-05, "loss": 8.6746, "step": 1398500 }, { "epoch": 11.19, "learning_rate": 4.4404000000000004e-05, "loss": 8.666, "step": 1399000 }, { "epoch": 11.2, "learning_rate": 4.4402000000000006e-05, "loss": 8.6831, "step": 1399500 }, { "epoch": 11.2, "learning_rate": 4.44e-05, "loss": 8.6936, "step": 1400000 }, { "epoch": 11.2, "learning_rate": 4.4398e-05, "loss": 8.6704, "step": 1400500 }, { "epoch": 11.21, "learning_rate": 4.4396000000000007e-05, "loss": 8.6812, "step": 1401000 }, { "epoch": 11.21, "learning_rate": 4.4394e-05, "loss": 8.6934, "step": 1401500 }, { "epoch": 11.22, "learning_rate": 4.4392e-05, "loss": 8.6618, "step": 1402000 }, { "epoch": 11.22, "learning_rate": 4.439000000000001e-05, "loss": 8.689, "step": 1402500 }, { "epoch": 11.22, "learning_rate": 4.4388e-05, "loss": 8.6973, "step": 1403000 }, { "epoch": 11.23, "learning_rate": 4.4386e-05, "loss": 8.677, "step": 1403500 }, { "epoch": 11.23, "learning_rate": 4.4384e-05, "loss": 8.6716, "step": 1404000 }, { "epoch": 11.24, "learning_rate": 4.4382e-05, "loss": 8.6798, "step": 1404500 }, { "epoch": 11.24, "learning_rate": 4.438e-05, "loss": 8.6936, "step": 1405000 }, { "epoch": 11.24, "learning_rate": 4.4378e-05, "loss": 8.7097, "step": 1405500 }, { "epoch": 11.25, "learning_rate": 4.4376e-05, "loss": 8.6674, "step": 1406000 }, { "epoch": 11.25, "learning_rate": 4.4374000000000006e-05, "loss": 8.6778, "step": 1406500 }, { "epoch": 11.26, "learning_rate": 4.4372e-05, "loss": 8.673, "step": 1407000 }, { "epoch": 11.26, "learning_rate": 4.4370000000000004e-05, "loss": 8.6796, "step": 1407500 }, { "epoch": 11.26, "learning_rate": 4.4368000000000006e-05, "loss": 8.6807, "step": 1408000 }, { "epoch": 11.27, "learning_rate": 4.4366e-05, "loss": 8.684, "step": 1408500 }, { "epoch": 11.27, "learning_rate": 4.4364e-05, "loss": 8.6913, "step": 1409000 }, { "epoch": 11.28, "learning_rate": 4.4362000000000007e-05, "loss": 8.717, "step": 1409500 }, { "epoch": 11.28, "learning_rate": 4.436e-05, "loss": 8.6598, "step": 1410000 }, { "epoch": 11.28, "learning_rate": 4.4358e-05, "loss": 8.681, "step": 1410500 }, { "epoch": 11.29, "learning_rate": 4.4356e-05, "loss": 8.6636, "step": 1411000 }, { "epoch": 11.29, "learning_rate": 4.4354e-05, "loss": 8.6663, "step": 1411500 }, { "epoch": 11.3, "learning_rate": 4.4352000000000005e-05, "loss": 8.6876, "step": 1412000 }, { "epoch": 11.3, "learning_rate": 4.435e-05, "loss": 8.6941, "step": 1412500 }, { "epoch": 11.3, "learning_rate": 4.4348e-05, "loss": 8.6862, "step": 1413000 }, { "epoch": 11.31, "learning_rate": 4.4346000000000005e-05, "loss": 8.6784, "step": 1413500 }, { "epoch": 11.31, "learning_rate": 4.4344e-05, "loss": 8.6941, "step": 1414000 }, { "epoch": 11.32, "learning_rate": 4.4342e-05, "loss": 8.6919, "step": 1414500 }, { "epoch": 11.32, "learning_rate": 4.4340000000000006e-05, "loss": 8.6629, "step": 1415000 }, { "epoch": 11.32, "learning_rate": 4.4338e-05, "loss": 8.6665, "step": 1415500 }, { "epoch": 11.33, "learning_rate": 4.4336e-05, "loss": 8.6617, "step": 1416000 }, { "epoch": 11.33, "learning_rate": 4.4334000000000006e-05, "loss": 8.688, "step": 1416500 }, { "epoch": 11.34, "learning_rate": 4.4332e-05, "loss": 8.6783, "step": 1417000 }, { "epoch": 11.34, "learning_rate": 4.4330000000000004e-05, "loss": 8.6857, "step": 1417500 }, { "epoch": 11.34, "learning_rate": 4.4328e-05, "loss": 8.6568, "step": 1418000 }, { "epoch": 11.35, "learning_rate": 4.4326e-05, "loss": 8.7021, "step": 1418500 }, { "epoch": 11.35, "learning_rate": 4.4324000000000005e-05, "loss": 8.6954, "step": 1419000 }, { "epoch": 11.36, "learning_rate": 4.4322e-05, "loss": 8.6757, "step": 1419500 }, { "epoch": 11.36, "learning_rate": 4.432e-05, "loss": 8.6948, "step": 1420000 }, { "epoch": 11.36, "learning_rate": 4.4318000000000005e-05, "loss": 8.6815, "step": 1420500 }, { "epoch": 11.37, "learning_rate": 4.4316e-05, "loss": 8.6836, "step": 1421000 }, { "epoch": 11.37, "learning_rate": 4.4314e-05, "loss": 8.6762, "step": 1421500 }, { "epoch": 11.38, "learning_rate": 4.4312000000000005e-05, "loss": 8.6873, "step": 1422000 }, { "epoch": 11.38, "learning_rate": 4.431e-05, "loss": 8.6883, "step": 1422500 }, { "epoch": 11.38, "learning_rate": 4.4307999999999997e-05, "loss": 8.6727, "step": 1423000 }, { "epoch": 11.39, "learning_rate": 4.4306000000000006e-05, "loss": 8.6866, "step": 1423500 }, { "epoch": 11.39, "learning_rate": 4.4304e-05, "loss": 8.6688, "step": 1424000 }, { "epoch": 11.4, "learning_rate": 4.4302000000000004e-05, "loss": 8.691, "step": 1424500 }, { "epoch": 11.4, "learning_rate": 4.43e-05, "loss": 8.6905, "step": 1425000 }, { "epoch": 11.4, "learning_rate": 4.4298e-05, "loss": 8.6575, "step": 1425500 }, { "epoch": 11.41, "learning_rate": 4.4296000000000004e-05, "loss": 8.6747, "step": 1426000 }, { "epoch": 11.41, "learning_rate": 4.4294e-05, "loss": 8.6863, "step": 1426500 }, { "epoch": 11.42, "learning_rate": 4.4292e-05, "loss": 8.6581, "step": 1427000 }, { "epoch": 11.42, "learning_rate": 4.4290000000000005e-05, "loss": 8.6637, "step": 1427500 }, { "epoch": 11.42, "learning_rate": 4.4288e-05, "loss": 8.6897, "step": 1428000 }, { "epoch": 11.43, "learning_rate": 4.4286e-05, "loss": 8.7029, "step": 1428500 }, { "epoch": 11.43, "learning_rate": 4.4284000000000005e-05, "loss": 8.694, "step": 1429000 }, { "epoch": 11.44, "learning_rate": 4.4282e-05, "loss": 8.6907, "step": 1429500 }, { "epoch": 11.44, "learning_rate": 4.428e-05, "loss": 8.6931, "step": 1430000 }, { "epoch": 11.44, "learning_rate": 4.4278000000000005e-05, "loss": 8.6977, "step": 1430500 }, { "epoch": 11.45, "learning_rate": 4.4276e-05, "loss": 8.68, "step": 1431000 }, { "epoch": 11.45, "learning_rate": 4.4274e-05, "loss": 8.6818, "step": 1431500 }, { "epoch": 11.46, "learning_rate": 4.4272000000000006e-05, "loss": 8.6891, "step": 1432000 }, { "epoch": 11.46, "learning_rate": 4.427e-05, "loss": 8.6873, "step": 1432500 }, { "epoch": 11.46, "learning_rate": 4.4268000000000004e-05, "loss": 8.7087, "step": 1433000 }, { "epoch": 11.47, "learning_rate": 4.4266e-05, "loss": 8.6785, "step": 1433500 }, { "epoch": 11.47, "learning_rate": 4.4264e-05, "loss": 8.6784, "step": 1434000 }, { "epoch": 11.48, "learning_rate": 4.4262000000000004e-05, "loss": 8.6885, "step": 1434500 }, { "epoch": 11.48, "learning_rate": 4.426e-05, "loss": 8.7019, "step": 1435000 }, { "epoch": 11.48, "learning_rate": 4.4258e-05, "loss": 8.698, "step": 1435500 }, { "epoch": 11.49, "learning_rate": 4.4256000000000005e-05, "loss": 8.7079, "step": 1436000 }, { "epoch": 11.49, "learning_rate": 4.4254e-05, "loss": 8.661, "step": 1436500 }, { "epoch": 11.5, "learning_rate": 4.4252e-05, "loss": 8.6553, "step": 1437000 }, { "epoch": 11.5, "learning_rate": 4.4250000000000005e-05, "loss": 8.6629, "step": 1437500 }, { "epoch": 11.5, "learning_rate": 4.4248e-05, "loss": 8.6927, "step": 1438000 }, { "epoch": 11.51, "learning_rate": 4.4246e-05, "loss": 8.6908, "step": 1438500 }, { "epoch": 11.51, "learning_rate": 4.4244000000000005e-05, "loss": 8.6646, "step": 1439000 }, { "epoch": 11.52, "learning_rate": 4.4242e-05, "loss": 8.6639, "step": 1439500 }, { "epoch": 11.52, "learning_rate": 4.424e-05, "loss": 8.6975, "step": 1440000 }, { "epoch": 11.52, "learning_rate": 4.4238e-05, "loss": 8.67, "step": 1440500 }, { "epoch": 11.53, "learning_rate": 4.4236e-05, "loss": 8.6734, "step": 1441000 }, { "epoch": 11.53, "learning_rate": 4.4234000000000004e-05, "loss": 8.6764, "step": 1441500 }, { "epoch": 11.54, "learning_rate": 4.4232e-05, "loss": 8.676, "step": 1442000 }, { "epoch": 11.54, "learning_rate": 4.423e-05, "loss": 8.646, "step": 1442500 }, { "epoch": 11.54, "learning_rate": 4.4228000000000004e-05, "loss": 8.7027, "step": 1443000 }, { "epoch": 11.55, "learning_rate": 4.4226e-05, "loss": 8.6741, "step": 1443500 }, { "epoch": 11.55, "learning_rate": 4.4224e-05, "loss": 8.6565, "step": 1444000 }, { "epoch": 11.56, "learning_rate": 4.4222000000000005e-05, "loss": 8.6644, "step": 1444500 }, { "epoch": 11.56, "learning_rate": 4.422e-05, "loss": 8.6796, "step": 1445000 }, { "epoch": 11.56, "learning_rate": 4.4218e-05, "loss": 8.6761, "step": 1445500 }, { "epoch": 11.57, "learning_rate": 4.4216000000000005e-05, "loss": 8.6797, "step": 1446000 }, { "epoch": 11.57, "learning_rate": 4.4214e-05, "loss": 8.6821, "step": 1446500 }, { "epoch": 11.58, "learning_rate": 4.4212e-05, "loss": 8.6804, "step": 1447000 }, { "epoch": 11.58, "learning_rate": 4.421e-05, "loss": 8.6837, "step": 1447500 }, { "epoch": 11.58, "learning_rate": 4.4208e-05, "loss": 8.6764, "step": 1448000 }, { "epoch": 11.59, "learning_rate": 4.4206e-05, "loss": 8.7004, "step": 1448500 }, { "epoch": 11.59, "learning_rate": 4.4204e-05, "loss": 8.6578, "step": 1449000 }, { "epoch": 11.6, "learning_rate": 4.4202e-05, "loss": 8.657, "step": 1449500 }, { "epoch": 11.6, "learning_rate": 4.4200000000000004e-05, "loss": 8.6699, "step": 1450000 }, { "epoch": 11.6, "learning_rate": 4.4198e-05, "loss": 8.6896, "step": 1450500 }, { "epoch": 11.61, "learning_rate": 4.4196e-05, "loss": 8.6403, "step": 1451000 }, { "epoch": 11.61, "learning_rate": 4.4194000000000004e-05, "loss": 8.6853, "step": 1451500 }, { "epoch": 11.62, "learning_rate": 4.4192e-05, "loss": 8.6746, "step": 1452000 }, { "epoch": 11.62, "learning_rate": 4.419e-05, "loss": 8.6948, "step": 1452500 }, { "epoch": 11.62, "learning_rate": 4.4188000000000005e-05, "loss": 8.6854, "step": 1453000 }, { "epoch": 11.63, "learning_rate": 4.4186e-05, "loss": 8.6629, "step": 1453500 }, { "epoch": 11.63, "learning_rate": 4.4184e-05, "loss": 8.6872, "step": 1454000 }, { "epoch": 11.64, "learning_rate": 4.4182e-05, "loss": 8.673, "step": 1454500 }, { "epoch": 11.64, "learning_rate": 4.418000000000001e-05, "loss": 8.6516, "step": 1455000 }, { "epoch": 11.64, "learning_rate": 4.4178e-05, "loss": 8.669, "step": 1455500 }, { "epoch": 11.65, "learning_rate": 4.4176e-05, "loss": 8.6677, "step": 1456000 }, { "epoch": 11.65, "learning_rate": 4.4174e-05, "loss": 8.6699, "step": 1456500 }, { "epoch": 11.66, "learning_rate": 4.4172e-05, "loss": 8.665, "step": 1457000 }, { "epoch": 11.66, "learning_rate": 4.417e-05, "loss": 8.7142, "step": 1457500 }, { "epoch": 11.66, "learning_rate": 4.4168e-05, "loss": 8.6866, "step": 1458000 }, { "epoch": 11.67, "learning_rate": 4.4166000000000004e-05, "loss": 8.6632, "step": 1458500 }, { "epoch": 11.67, "learning_rate": 4.4164e-05, "loss": 8.6833, "step": 1459000 }, { "epoch": 11.68, "learning_rate": 4.4162e-05, "loss": 8.6737, "step": 1459500 }, { "epoch": 11.68, "learning_rate": 4.4160000000000004e-05, "loss": 8.7098, "step": 1460000 }, { "epoch": 11.68, "learning_rate": 4.4158e-05, "loss": 8.6475, "step": 1460500 }, { "epoch": 11.69, "learning_rate": 4.4156e-05, "loss": 8.6624, "step": 1461000 }, { "epoch": 11.69, "learning_rate": 4.4154e-05, "loss": 8.6969, "step": 1461500 }, { "epoch": 11.7, "learning_rate": 4.415200000000001e-05, "loss": 8.6833, "step": 1462000 }, { "epoch": 11.7, "learning_rate": 4.415e-05, "loss": 8.689, "step": 1462500 }, { "epoch": 11.7, "learning_rate": 4.4148e-05, "loss": 8.6595, "step": 1463000 }, { "epoch": 11.71, "learning_rate": 4.414600000000001e-05, "loss": 8.6789, "step": 1463500 }, { "epoch": 11.71, "learning_rate": 4.4144e-05, "loss": 8.6726, "step": 1464000 }, { "epoch": 11.72, "learning_rate": 4.4142e-05, "loss": 8.6825, "step": 1464500 }, { "epoch": 11.72, "learning_rate": 4.414e-05, "loss": 8.6773, "step": 1465000 }, { "epoch": 11.72, "learning_rate": 4.4138e-05, "loss": 8.6724, "step": 1465500 }, { "epoch": 11.73, "learning_rate": 4.4136e-05, "loss": 8.6929, "step": 1466000 }, { "epoch": 11.73, "learning_rate": 4.4134e-05, "loss": 8.6711, "step": 1466500 }, { "epoch": 11.74, "learning_rate": 4.4132000000000004e-05, "loss": 8.6716, "step": 1467000 }, { "epoch": 11.74, "learning_rate": 4.4130000000000006e-05, "loss": 8.71, "step": 1467500 }, { "epoch": 11.74, "learning_rate": 4.4128e-05, "loss": 8.6614, "step": 1468000 }, { "epoch": 11.75, "learning_rate": 4.4126e-05, "loss": 8.6917, "step": 1468500 }, { "epoch": 11.75, "learning_rate": 4.4124000000000006e-05, "loss": 8.6831, "step": 1469000 }, { "epoch": 11.76, "learning_rate": 4.4122e-05, "loss": 8.668, "step": 1469500 }, { "epoch": 11.76, "learning_rate": 4.412e-05, "loss": 8.667, "step": 1470000 }, { "epoch": 11.76, "learning_rate": 4.411800000000001e-05, "loss": 8.6614, "step": 1470500 }, { "epoch": 11.77, "learning_rate": 4.4116e-05, "loss": 8.6821, "step": 1471000 }, { "epoch": 11.77, "learning_rate": 4.4114e-05, "loss": 8.6653, "step": 1471500 }, { "epoch": 11.78, "learning_rate": 4.4112e-05, "loss": 8.6863, "step": 1472000 }, { "epoch": 11.78, "learning_rate": 4.411e-05, "loss": 8.6759, "step": 1472500 }, { "epoch": 11.78, "learning_rate": 4.4108000000000005e-05, "loss": 8.6805, "step": 1473000 }, { "epoch": 11.79, "learning_rate": 4.4106e-05, "loss": 8.7015, "step": 1473500 }, { "epoch": 11.79, "learning_rate": 4.4104e-05, "loss": 8.6562, "step": 1474000 }, { "epoch": 11.8, "learning_rate": 4.4102000000000006e-05, "loss": 8.6747, "step": 1474500 }, { "epoch": 11.8, "learning_rate": 4.41e-05, "loss": 8.6789, "step": 1475000 }, { "epoch": 11.8, "learning_rate": 4.4098000000000004e-05, "loss": 8.6524, "step": 1475500 }, { "epoch": 11.81, "learning_rate": 4.4096000000000006e-05, "loss": 8.6618, "step": 1476000 }, { "epoch": 11.81, "learning_rate": 4.4094e-05, "loss": 8.6817, "step": 1476500 }, { "epoch": 11.82, "learning_rate": 4.4092e-05, "loss": 8.6948, "step": 1477000 }, { "epoch": 11.82, "learning_rate": 4.4090000000000006e-05, "loss": 8.6682, "step": 1477500 }, { "epoch": 11.82, "learning_rate": 4.4088e-05, "loss": 8.6452, "step": 1478000 }, { "epoch": 11.83, "learning_rate": 4.4086e-05, "loss": 8.6851, "step": 1478500 }, { "epoch": 11.83, "learning_rate": 4.4084e-05, "loss": 8.6777, "step": 1479000 }, { "epoch": 11.84, "learning_rate": 4.4082e-05, "loss": 8.6732, "step": 1479500 }, { "epoch": 11.84, "learning_rate": 4.4080000000000005e-05, "loss": 8.6927, "step": 1480000 }, { "epoch": 11.84, "learning_rate": 4.4078e-05, "loss": 8.6913, "step": 1480500 }, { "epoch": 11.85, "learning_rate": 4.4076e-05, "loss": 8.6612, "step": 1481000 }, { "epoch": 11.85, "learning_rate": 4.4074000000000005e-05, "loss": 8.7139, "step": 1481500 }, { "epoch": 11.86, "learning_rate": 4.4072e-05, "loss": 8.6856, "step": 1482000 }, { "epoch": 11.86, "learning_rate": 4.407e-05, "loss": 8.6723, "step": 1482500 }, { "epoch": 11.86, "learning_rate": 4.4068000000000006e-05, "loss": 8.6858, "step": 1483000 }, { "epoch": 11.87, "learning_rate": 4.4066e-05, "loss": 8.6761, "step": 1483500 }, { "epoch": 11.87, "learning_rate": 4.4064e-05, "loss": 8.6919, "step": 1484000 }, { "epoch": 11.88, "learning_rate": 4.4062000000000006e-05, "loss": 8.6761, "step": 1484500 }, { "epoch": 11.88, "learning_rate": 4.406e-05, "loss": 8.689, "step": 1485000 }, { "epoch": 11.88, "learning_rate": 4.4058000000000004e-05, "loss": 8.6675, "step": 1485500 }, { "epoch": 11.89, "learning_rate": 4.4056e-05, "loss": 8.6809, "step": 1486000 }, { "epoch": 11.89, "learning_rate": 4.4054e-05, "loss": 8.7032, "step": 1486500 }, { "epoch": 11.9, "learning_rate": 4.4052000000000004e-05, "loss": 8.7098, "step": 1487000 }, { "epoch": 11.9, "learning_rate": 4.405e-05, "loss": 8.6612, "step": 1487500 }, { "epoch": 11.9, "learning_rate": 4.4048e-05, "loss": 8.6746, "step": 1488000 }, { "epoch": 11.91, "learning_rate": 4.4046000000000005e-05, "loss": 8.6695, "step": 1488500 }, { "epoch": 11.91, "learning_rate": 4.4044e-05, "loss": 8.6714, "step": 1489000 }, { "epoch": 11.92, "learning_rate": 4.4042e-05, "loss": 8.6852, "step": 1489500 }, { "epoch": 11.92, "learning_rate": 4.4040000000000005e-05, "loss": 8.6772, "step": 1490000 }, { "epoch": 11.92, "learning_rate": 4.4038e-05, "loss": 8.6697, "step": 1490500 }, { "epoch": 11.93, "learning_rate": 4.4035999999999996e-05, "loss": 8.6456, "step": 1491000 }, { "epoch": 11.93, "learning_rate": 4.4034000000000006e-05, "loss": 8.6959, "step": 1491500 }, { "epoch": 11.94, "learning_rate": 4.4032e-05, "loss": 8.6616, "step": 1492000 }, { "epoch": 11.94, "learning_rate": 4.4030000000000004e-05, "loss": 8.6814, "step": 1492500 }, { "epoch": 11.94, "learning_rate": 4.4028e-05, "loss": 8.6567, "step": 1493000 }, { "epoch": 11.95, "learning_rate": 4.4026e-05, "loss": 8.6518, "step": 1493500 }, { "epoch": 11.95, "learning_rate": 4.4024000000000004e-05, "loss": 8.6799, "step": 1494000 }, { "epoch": 11.96, "learning_rate": 4.4022e-05, "loss": 8.6895, "step": 1494500 }, { "epoch": 11.96, "learning_rate": 4.402e-05, "loss": 8.677, "step": 1495000 }, { "epoch": 11.96, "learning_rate": 4.4018000000000004e-05, "loss": 8.6905, "step": 1495500 }, { "epoch": 11.97, "learning_rate": 4.4016e-05, "loss": 8.6613, "step": 1496000 }, { "epoch": 11.97, "learning_rate": 4.4014e-05, "loss": 8.6763, "step": 1496500 }, { "epoch": 11.98, "learning_rate": 4.4012000000000005e-05, "loss": 8.6546, "step": 1497000 }, { "epoch": 11.98, "learning_rate": 4.401e-05, "loss": 8.6658, "step": 1497500 }, { "epoch": 11.98, "learning_rate": 4.4008e-05, "loss": 8.6746, "step": 1498000 }, { "epoch": 11.99, "learning_rate": 4.4006000000000005e-05, "loss": 8.666, "step": 1498500 }, { "epoch": 11.99, "learning_rate": 4.4004e-05, "loss": 8.6715, "step": 1499000 }, { "epoch": 12.0, "learning_rate": 4.4002e-05, "loss": 8.6713, "step": 1499500 }, { "epoch": 12.0, "learning_rate": 4.4000000000000006e-05, "loss": 8.6739, "step": 1500000 }, { "epoch": 12.0, "learning_rate": 4.3998e-05, "loss": 8.6672, "step": 1500500 }, { "epoch": 12.01, "learning_rate": 4.3996000000000004e-05, "loss": 8.6654, "step": 1501000 }, { "epoch": 12.01, "learning_rate": 4.3994e-05, "loss": 8.71, "step": 1501500 }, { "epoch": 12.02, "learning_rate": 4.3992e-05, "loss": 8.6906, "step": 1502000 }, { "epoch": 12.02, "learning_rate": 4.3990000000000004e-05, "loss": 8.7011, "step": 1502500 }, { "epoch": 12.02, "learning_rate": 4.3988e-05, "loss": 8.6885, "step": 1503000 }, { "epoch": 12.03, "learning_rate": 4.3986e-05, "loss": 8.6526, "step": 1503500 }, { "epoch": 12.03, "learning_rate": 4.3984000000000004e-05, "loss": 8.6879, "step": 1504000 }, { "epoch": 12.04, "learning_rate": 4.3982e-05, "loss": 8.6624, "step": 1504500 }, { "epoch": 12.04, "learning_rate": 4.398e-05, "loss": 8.7053, "step": 1505000 }, { "epoch": 12.04, "learning_rate": 4.3978000000000005e-05, "loss": 8.6805, "step": 1505500 }, { "epoch": 12.05, "learning_rate": 4.3976e-05, "loss": 8.6868, "step": 1506000 }, { "epoch": 12.05, "learning_rate": 4.3974e-05, "loss": 8.7021, "step": 1506500 }, { "epoch": 12.06, "learning_rate": 4.3972000000000005e-05, "loss": 8.6909, "step": 1507000 }, { "epoch": 12.06, "learning_rate": 4.397e-05, "loss": 8.6705, "step": 1507500 }, { "epoch": 12.06, "learning_rate": 4.3968e-05, "loss": 8.6963, "step": 1508000 }, { "epoch": 12.07, "learning_rate": 4.3966e-05, "loss": 8.6846, "step": 1508500 }, { "epoch": 12.07, "learning_rate": 4.3964e-05, "loss": 8.6831, "step": 1509000 }, { "epoch": 12.08, "learning_rate": 4.3962000000000004e-05, "loss": 8.6882, "step": 1509500 }, { "epoch": 12.08, "learning_rate": 4.396e-05, "loss": 8.6903, "step": 1510000 }, { "epoch": 12.08, "learning_rate": 4.3958e-05, "loss": 8.6875, "step": 1510500 }, { "epoch": 12.09, "learning_rate": 4.3956000000000004e-05, "loss": 8.6809, "step": 1511000 }, { "epoch": 12.09, "learning_rate": 4.3954e-05, "loss": 8.6633, "step": 1511500 }, { "epoch": 12.1, "learning_rate": 4.3952e-05, "loss": 8.6722, "step": 1512000 }, { "epoch": 12.1, "learning_rate": 4.3950000000000004e-05, "loss": 8.6903, "step": 1512500 }, { "epoch": 12.1, "learning_rate": 4.3948e-05, "loss": 8.6771, "step": 1513000 }, { "epoch": 12.11, "learning_rate": 4.3946e-05, "loss": 8.6747, "step": 1513500 }, { "epoch": 12.11, "learning_rate": 4.3944000000000005e-05, "loss": 8.691, "step": 1514000 }, { "epoch": 12.12, "learning_rate": 4.3942e-05, "loss": 8.6689, "step": 1514500 }, { "epoch": 12.12, "learning_rate": 4.394e-05, "loss": 8.6619, "step": 1515000 }, { "epoch": 12.12, "learning_rate": 4.3938e-05, "loss": 8.6476, "step": 1515500 }, { "epoch": 12.13, "learning_rate": 4.3936e-05, "loss": 8.6996, "step": 1516000 }, { "epoch": 12.13, "learning_rate": 4.3934e-05, "loss": 8.6747, "step": 1516500 }, { "epoch": 12.14, "learning_rate": 4.3932e-05, "loss": 8.6774, "step": 1517000 }, { "epoch": 12.14, "learning_rate": 4.393e-05, "loss": 8.6869, "step": 1517500 }, { "epoch": 12.14, "learning_rate": 4.3928000000000004e-05, "loss": 8.6653, "step": 1518000 }, { "epoch": 12.15, "learning_rate": 4.3926e-05, "loss": 8.6809, "step": 1518500 }, { "epoch": 12.15, "learning_rate": 4.3924e-05, "loss": 8.6973, "step": 1519000 }, { "epoch": 12.16, "learning_rate": 4.3922000000000004e-05, "loss": 8.6989, "step": 1519500 }, { "epoch": 12.16, "learning_rate": 4.392e-05, "loss": 8.6899, "step": 1520000 }, { "epoch": 12.16, "learning_rate": 4.3918e-05, "loss": 8.6943, "step": 1520500 }, { "epoch": 12.17, "learning_rate": 4.3916000000000004e-05, "loss": 8.6592, "step": 1521000 }, { "epoch": 12.17, "learning_rate": 4.3914e-05, "loss": 8.6819, "step": 1521500 }, { "epoch": 12.18, "learning_rate": 4.3912e-05, "loss": 8.6925, "step": 1522000 }, { "epoch": 12.18, "learning_rate": 4.391e-05, "loss": 8.6697, "step": 1522500 }, { "epoch": 12.18, "learning_rate": 4.390800000000001e-05, "loss": 8.6586, "step": 1523000 }, { "epoch": 12.19, "learning_rate": 4.3906e-05, "loss": 8.6722, "step": 1523500 }, { "epoch": 12.19, "learning_rate": 4.3904e-05, "loss": 8.6586, "step": 1524000 }, { "epoch": 12.2, "learning_rate": 4.390200000000001e-05, "loss": 8.6938, "step": 1524500 }, { "epoch": 12.2, "learning_rate": 4.39e-05, "loss": 8.6725, "step": 1525000 }, { "epoch": 12.2, "learning_rate": 4.3898e-05, "loss": 8.6467, "step": 1525500 }, { "epoch": 12.21, "learning_rate": 4.3896e-05, "loss": 8.7079, "step": 1526000 }, { "epoch": 12.21, "learning_rate": 4.3894000000000004e-05, "loss": 8.6854, "step": 1526500 }, { "epoch": 12.22, "learning_rate": 4.3892e-05, "loss": 8.7007, "step": 1527000 }, { "epoch": 12.22, "learning_rate": 4.389e-05, "loss": 8.6889, "step": 1527500 }, { "epoch": 12.22, "learning_rate": 4.3888000000000004e-05, "loss": 8.7038, "step": 1528000 }, { "epoch": 12.23, "learning_rate": 4.3886e-05, "loss": 8.6536, "step": 1528500 }, { "epoch": 12.23, "learning_rate": 4.3884e-05, "loss": 8.6831, "step": 1529000 }, { "epoch": 12.24, "learning_rate": 4.3882e-05, "loss": 8.6792, "step": 1529500 }, { "epoch": 12.24, "learning_rate": 4.388000000000001e-05, "loss": 8.6932, "step": 1530000 }, { "epoch": 12.24, "learning_rate": 4.3878e-05, "loss": 8.6891, "step": 1530500 }, { "epoch": 12.25, "learning_rate": 4.3876e-05, "loss": 8.6866, "step": 1531000 }, { "epoch": 12.25, "learning_rate": 4.387400000000001e-05, "loss": 8.672, "step": 1531500 }, { "epoch": 12.26, "learning_rate": 4.3872e-05, "loss": 8.672, "step": 1532000 }, { "epoch": 12.26, "learning_rate": 4.387e-05, "loss": 8.7002, "step": 1532500 }, { "epoch": 12.26, "learning_rate": 4.3868e-05, "loss": 8.6728, "step": 1533000 }, { "epoch": 12.27, "learning_rate": 4.3866e-05, "loss": 8.6812, "step": 1533500 }, { "epoch": 12.27, "learning_rate": 4.3864e-05, "loss": 8.644, "step": 1534000 }, { "epoch": 12.28, "learning_rate": 4.3862e-05, "loss": 8.6899, "step": 1534500 }, { "epoch": 12.28, "learning_rate": 4.3860000000000004e-05, "loss": 8.688, "step": 1535000 }, { "epoch": 12.28, "learning_rate": 4.3858000000000006e-05, "loss": 8.6622, "step": 1535500 }, { "epoch": 12.29, "learning_rate": 4.3856e-05, "loss": 8.6991, "step": 1536000 }, { "epoch": 12.29, "learning_rate": 4.3854000000000004e-05, "loss": 8.699, "step": 1536500 }, { "epoch": 12.3, "learning_rate": 4.3852000000000006e-05, "loss": 8.6681, "step": 1537000 }, { "epoch": 12.3, "learning_rate": 4.385e-05, "loss": 8.6747, "step": 1537500 }, { "epoch": 12.3, "learning_rate": 4.3848e-05, "loss": 8.6442, "step": 1538000 }, { "epoch": 12.31, "learning_rate": 4.384600000000001e-05, "loss": 8.6469, "step": 1538500 }, { "epoch": 12.31, "learning_rate": 4.3844e-05, "loss": 8.6613, "step": 1539000 }, { "epoch": 12.32, "learning_rate": 4.3842e-05, "loss": 8.6914, "step": 1539500 }, { "epoch": 12.32, "learning_rate": 4.384e-05, "loss": 8.6721, "step": 1540000 }, { "epoch": 12.32, "learning_rate": 4.3838e-05, "loss": 8.6913, "step": 1540500 }, { "epoch": 12.33, "learning_rate": 4.3836000000000005e-05, "loss": 8.6717, "step": 1541000 }, { "epoch": 12.33, "learning_rate": 4.3834e-05, "loss": 8.6817, "step": 1541500 }, { "epoch": 12.34, "learning_rate": 4.3832e-05, "loss": 8.6997, "step": 1542000 }, { "epoch": 12.34, "learning_rate": 4.3830000000000006e-05, "loss": 8.7035, "step": 1542500 }, { "epoch": 12.34, "learning_rate": 4.3828e-05, "loss": 8.6876, "step": 1543000 }, { "epoch": 12.35, "learning_rate": 4.3826000000000004e-05, "loss": 8.6981, "step": 1543500 }, { "epoch": 12.35, "learning_rate": 4.3824000000000006e-05, "loss": 8.7151, "step": 1544000 }, { "epoch": 12.36, "learning_rate": 4.3822e-05, "loss": 8.7211, "step": 1544500 }, { "epoch": 12.36, "learning_rate": 4.382e-05, "loss": 8.699, "step": 1545000 }, { "epoch": 12.36, "learning_rate": 4.3818000000000006e-05, "loss": 8.6745, "step": 1545500 }, { "epoch": 12.37, "learning_rate": 4.3816e-05, "loss": 8.6977, "step": 1546000 }, { "epoch": 12.37, "learning_rate": 4.3814e-05, "loss": 8.6718, "step": 1546500 }, { "epoch": 12.38, "learning_rate": 4.3812e-05, "loss": 8.7128, "step": 1547000 }, { "epoch": 12.38, "learning_rate": 4.381e-05, "loss": 8.6882, "step": 1547500 }, { "epoch": 12.38, "learning_rate": 4.3808000000000005e-05, "loss": 8.6773, "step": 1548000 }, { "epoch": 12.39, "learning_rate": 4.3806e-05, "loss": 8.6646, "step": 1548500 }, { "epoch": 12.39, "learning_rate": 4.3804e-05, "loss": 8.6737, "step": 1549000 }, { "epoch": 12.4, "learning_rate": 4.3802000000000005e-05, "loss": 8.65, "step": 1549500 }, { "epoch": 12.4, "learning_rate": 4.38e-05, "loss": 8.6774, "step": 1550000 }, { "epoch": 12.4, "learning_rate": 4.3798e-05, "loss": 8.6685, "step": 1550500 }, { "epoch": 12.41, "learning_rate": 4.3796000000000006e-05, "loss": 8.6778, "step": 1551000 }, { "epoch": 12.41, "learning_rate": 4.3794e-05, "loss": 8.6895, "step": 1551500 }, { "epoch": 12.42, "learning_rate": 4.3792e-05, "loss": 8.6687, "step": 1552000 }, { "epoch": 12.42, "learning_rate": 4.3790000000000006e-05, "loss": 8.6951, "step": 1552500 }, { "epoch": 12.42, "learning_rate": 4.3788e-05, "loss": 8.6959, "step": 1553000 }, { "epoch": 12.43, "learning_rate": 4.3786000000000004e-05, "loss": 8.67, "step": 1553500 }, { "epoch": 12.43, "learning_rate": 4.3784e-05, "loss": 8.6531, "step": 1554000 }, { "epoch": 12.44, "learning_rate": 4.3782e-05, "loss": 8.7009, "step": 1554500 }, { "epoch": 12.44, "learning_rate": 4.3780000000000004e-05, "loss": 8.6707, "step": 1555000 }, { "epoch": 12.44, "learning_rate": 4.3778e-05, "loss": 8.676, "step": 1555500 }, { "epoch": 12.45, "learning_rate": 4.3776e-05, "loss": 8.6667, "step": 1556000 }, { "epoch": 12.45, "learning_rate": 4.3774000000000005e-05, "loss": 8.7089, "step": 1556500 }, { "epoch": 12.46, "learning_rate": 4.3772e-05, "loss": 8.6605, "step": 1557000 }, { "epoch": 12.46, "learning_rate": 4.377e-05, "loss": 8.7017, "step": 1557500 }, { "epoch": 12.46, "learning_rate": 4.3768000000000005e-05, "loss": 8.6731, "step": 1558000 }, { "epoch": 12.47, "learning_rate": 4.3766e-05, "loss": 8.6783, "step": 1558500 }, { "epoch": 12.47, "learning_rate": 4.3763999999999996e-05, "loss": 8.715, "step": 1559000 }, { "epoch": 12.48, "learning_rate": 4.3762000000000006e-05, "loss": 8.6533, "step": 1559500 }, { "epoch": 12.48, "learning_rate": 4.376e-05, "loss": 8.6847, "step": 1560000 } ], "max_steps": 12500000, "num_train_epochs": 100, "total_flos": 4.871898683434107e+17, "trial_name": null, "trial_params": null }