diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,18736 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 12.48, + "global_step": 1560000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9998e-05, + "loss": 167.4813, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 4.999600000000001e-05, + "loss": 339.0435, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994e-05, + "loss": 460.3145, + "step": 1500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9992e-05, + "loss": 391.4744, + "step": 2000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999e-05, + "loss": 429.6304, + "step": 2500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988e-05, + "loss": 402.7589, + "step": 3000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9986000000000006e-05, + "loss": 346.8141, + "step": 3500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9984e-05, + "loss": 269.2469, + "step": 4000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9982000000000004e-05, + "loss": 249.9555, + "step": 4500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9980000000000006e-05, + "loss": 282.5226, + "step": 5000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9978e-05, + "loss": 276.7962, + "step": 5500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9976000000000004e-05, + "loss": 239.1784, + "step": 6000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9974000000000006e-05, + "loss": 214.4642, + "step": 6500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9972e-05, + "loss": 206.2337, + "step": 7000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997e-05, + "loss": 202.7909, + "step": 7500 + }, + { + "epoch": 0.06, + "learning_rate": 4.996800000000001e-05, + "loss": 207.2598, + "step": 8000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9966e-05, + "loss": 198.4287, + "step": 8500 + }, + { + "epoch": 0.07, + "learning_rate": 4.9964e-05, + "loss": 202.6692, + "step": 9000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9962e-05, + "loss": 172.0607, + "step": 9500 + }, + { + "epoch": 0.08, + "learning_rate": 4.996e-05, + "loss": 152.3532, + "step": 10000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9958000000000005e-05, + "loss": 141.7099, + "step": 10500 + }, + { + "epoch": 0.09, + "learning_rate": 4.9956e-05, + "loss": 131.85, + "step": 11000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9954e-05, + "loss": 131.9644, + "step": 11500 + }, + { + "epoch": 0.1, + "learning_rate": 4.9952000000000006e-05, + "loss": 120.3319, + "step": 12000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995e-05, + "loss": 125.1385, + "step": 12500 + }, + { + "epoch": 0.1, + "learning_rate": 4.9948000000000004e-05, + "loss": 102.6195, + "step": 13000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9946000000000006e-05, + "loss": 97.9456, + "step": 13500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9944e-05, + "loss": 90.2301, + "step": 14000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9942e-05, + "loss": 85.6502, + "step": 14500 + }, + { + "epoch": 0.12, + "learning_rate": 4.9940000000000006e-05, + "loss": 71.9912, + "step": 15000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9938e-05, + "loss": 66.2195, + "step": 15500 + }, + { + "epoch": 0.13, + "learning_rate": 4.9936000000000004e-05, + "loss": 59.0722, + "step": 16000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9934e-05, + "loss": 52.456, + "step": 16500 + }, + { + "epoch": 0.14, + "learning_rate": 4.9932e-05, + "loss": 44.7411, + "step": 17000 + }, + { + "epoch": 0.14, + "learning_rate": 4.9930000000000005e-05, + "loss": 37.4041, + "step": 17500 + }, + { + "epoch": 0.14, + "learning_rate": 4.9928e-05, + "loss": 33.0052, + "step": 18000 + }, + { + "epoch": 0.15, + "learning_rate": 4.9926e-05, + "loss": 27.9028, + "step": 18500 + }, + { + "epoch": 0.15, + "learning_rate": 4.9924000000000005e-05, + "loss": 23.4986, + "step": 19000 + }, + { + "epoch": 0.16, + "learning_rate": 4.9922e-05, + "loss": 22.3736, + "step": 19500 + }, + { + "epoch": 0.16, + "learning_rate": 4.992e-05, + "loss": 19.143, + "step": 20000 + }, + { + "epoch": 0.16, + "learning_rate": 4.9918000000000006e-05, + "loss": 16.728, + "step": 20500 + }, + { + "epoch": 0.17, + "learning_rate": 4.9916e-05, + "loss": 14.6861, + "step": 21000 + }, + { + "epoch": 0.17, + "learning_rate": 4.9914e-05, + "loss": 13.893, + "step": 21500 + }, + { + "epoch": 0.18, + "learning_rate": 4.9912000000000006e-05, + "loss": 13.4733, + "step": 22000 + }, + { + "epoch": 0.18, + "learning_rate": 4.991e-05, + "loss": 12.3987, + "step": 22500 + }, + { + "epoch": 0.18, + "learning_rate": 4.9908000000000004e-05, + "loss": 11.5524, + "step": 23000 + }, + { + "epoch": 0.19, + "learning_rate": 4.9906e-05, + "loss": 11.2799, + "step": 23500 + }, + { + "epoch": 0.19, + "learning_rate": 4.9904e-05, + "loss": 10.9152, + "step": 24000 + }, + { + "epoch": 0.2, + "learning_rate": 4.9902000000000004e-05, + "loss": 10.5949, + "step": 24500 + }, + { + "epoch": 0.2, + "learning_rate": 4.99e-05, + "loss": 10.8094, + "step": 25000 + }, + { + "epoch": 0.2, + "learning_rate": 4.9898e-05, + "loss": 10.2322, + "step": 25500 + }, + { + "epoch": 0.21, + "learning_rate": 4.9896000000000005e-05, + "loss": 10.0505, + "step": 26000 + }, + { + "epoch": 0.21, + "learning_rate": 4.9894e-05, + "loss": 10.1343, + "step": 26500 + }, + { + "epoch": 0.22, + "learning_rate": 4.9892e-05, + "loss": 9.9442, + "step": 27000 + }, + { + "epoch": 0.22, + "learning_rate": 4.9890000000000005e-05, + "loss": 9.7331, + "step": 27500 + }, + { + "epoch": 0.22, + "learning_rate": 4.9888e-05, + "loss": 9.7229, + "step": 28000 + }, + { + "epoch": 0.23, + "learning_rate": 4.9886e-05, + "loss": 9.7828, + "step": 28500 + }, + { + "epoch": 0.23, + "learning_rate": 4.9884000000000006e-05, + "loss": 9.7042, + "step": 29000 + }, + { + "epoch": 0.24, + "learning_rate": 4.9882e-05, + "loss": 9.6642, + "step": 29500 + }, + { + "epoch": 0.24, + "learning_rate": 4.9880000000000004e-05, + "loss": 9.8011, + "step": 30000 + }, + { + "epoch": 0.24, + "learning_rate": 4.9878e-05, + "loss": 9.7558, + "step": 30500 + }, + { + "epoch": 0.25, + "learning_rate": 4.9876e-05, + "loss": 9.8933, + "step": 31000 + }, + { + "epoch": 0.25, + "learning_rate": 4.9874000000000004e-05, + "loss": 9.6691, + "step": 31500 + }, + { + "epoch": 0.26, + "learning_rate": 4.9872e-05, + "loss": 9.5504, + "step": 32000 + }, + { + "epoch": 0.26, + "learning_rate": 4.987e-05, + "loss": 9.4497, + "step": 32500 + }, + { + "epoch": 0.26, + "learning_rate": 4.9868000000000004e-05, + "loss": 9.4204, + "step": 33000 + }, + { + "epoch": 0.27, + "learning_rate": 4.9866e-05, + "loss": 9.3652, + "step": 33500 + }, + { + "epoch": 0.27, + "learning_rate": 4.9864e-05, + "loss": 9.7497, + "step": 34000 + }, + { + "epoch": 0.28, + "learning_rate": 4.9862000000000005e-05, + "loss": 9.5389, + "step": 34500 + }, + { + "epoch": 0.28, + "learning_rate": 4.986e-05, + "loss": 9.6565, + "step": 35000 + }, + { + "epoch": 0.28, + "learning_rate": 4.9858e-05, + "loss": 9.3094, + "step": 35500 + }, + { + "epoch": 0.29, + "learning_rate": 4.9856000000000005e-05, + "loss": 9.3693, + "step": 36000 + }, + { + "epoch": 0.29, + "learning_rate": 4.9854e-05, + "loss": 9.3462, + "step": 36500 + }, + { + "epoch": 0.3, + "learning_rate": 4.9852e-05, + "loss": 9.4076, + "step": 37000 + }, + { + "epoch": 0.3, + "learning_rate": 4.9850000000000006e-05, + "loss": 9.2531, + "step": 37500 + }, + { + "epoch": 0.3, + "learning_rate": 4.9848e-05, + "loss": 9.227, + "step": 38000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9846000000000004e-05, + "loss": 9.3106, + "step": 38500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9844e-05, + "loss": 9.2394, + "step": 39000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9842e-05, + "loss": 9.2606, + "step": 39500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9840000000000004e-05, + "loss": 9.2406, + "step": 40000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9838e-05, + "loss": 9.2116, + "step": 40500 + }, + { + "epoch": 0.33, + "learning_rate": 4.9836e-05, + "loss": 9.2192, + "step": 41000 + }, + { + "epoch": 0.33, + "learning_rate": 4.9834000000000004e-05, + "loss": 9.2842, + "step": 41500 + }, + { + "epoch": 0.34, + "learning_rate": 4.9832e-05, + "loss": 9.2024, + "step": 42000 + }, + { + "epoch": 0.34, + "learning_rate": 4.983e-05, + "loss": 9.3669, + "step": 42500 + }, + { + "epoch": 0.34, + "learning_rate": 4.9828000000000005e-05, + "loss": 9.3505, + "step": 43000 + }, + { + "epoch": 0.35, + "learning_rate": 4.9826e-05, + "loss": 9.2114, + "step": 43500 + }, + { + "epoch": 0.35, + "learning_rate": 4.9824e-05, + "loss": 9.1553, + "step": 44000 + }, + { + "epoch": 0.36, + "learning_rate": 4.9822000000000005e-05, + "loss": 9.1217, + "step": 44500 + }, + { + "epoch": 0.36, + "learning_rate": 4.982e-05, + "loss": 9.2315, + "step": 45000 + }, + { + "epoch": 0.36, + "learning_rate": 4.9818e-05, + "loss": 9.3219, + "step": 45500 + }, + { + "epoch": 0.37, + "learning_rate": 4.9816e-05, + "loss": 9.0737, + "step": 46000 + }, + { + "epoch": 0.37, + "learning_rate": 4.981400000000001e-05, + "loss": 9.1281, + "step": 46500 + }, + { + "epoch": 0.38, + "learning_rate": 4.9812000000000004e-05, + "loss": 9.1292, + "step": 47000 + }, + { + "epoch": 0.38, + "learning_rate": 4.981e-05, + "loss": 9.2538, + "step": 47500 + }, + { + "epoch": 0.38, + "learning_rate": 4.9808e-05, + "loss": 9.1272, + "step": 48000 + }, + { + "epoch": 0.39, + "learning_rate": 4.9806000000000004e-05, + "loss": 9.225, + "step": 48500 + }, + { + "epoch": 0.39, + "learning_rate": 4.9804e-05, + "loss": 9.0919, + "step": 49000 + }, + { + "epoch": 0.4, + "learning_rate": 4.9802e-05, + "loss": 9.122, + "step": 49500 + }, + { + "epoch": 0.4, + "learning_rate": 4.9800000000000004e-05, + "loss": 9.1869, + "step": 50000 + }, + { + "epoch": 0.4, + "learning_rate": 4.9798e-05, + "loss": 9.0186, + "step": 50500 + }, + { + "epoch": 0.41, + "learning_rate": 4.9796e-05, + "loss": 9.1333, + "step": 51000 + }, + { + "epoch": 0.41, + "learning_rate": 4.9794000000000005e-05, + "loss": 9.1123, + "step": 51500 + }, + { + "epoch": 0.42, + "learning_rate": 4.9792e-05, + "loss": 9.1951, + "step": 52000 + }, + { + "epoch": 0.42, + "learning_rate": 4.979e-05, + "loss": 9.1012, + "step": 52500 + }, + { + "epoch": 0.42, + "learning_rate": 4.9788e-05, + "loss": 8.9948, + "step": 53000 + }, + { + "epoch": 0.43, + "learning_rate": 4.978600000000001e-05, + "loss": 9.2808, + "step": 53500 + }, + { + "epoch": 0.43, + "learning_rate": 4.9784e-05, + "loss": 9.0797, + "step": 54000 + }, + { + "epoch": 0.44, + "learning_rate": 4.9782e-05, + "loss": 9.0534, + "step": 54500 + }, + { + "epoch": 0.44, + "learning_rate": 4.978e-05, + "loss": 8.995, + "step": 55000 + }, + { + "epoch": 0.44, + "learning_rate": 4.9778000000000004e-05, + "loss": 9.0096, + "step": 55500 + }, + { + "epoch": 0.45, + "learning_rate": 4.9776e-05, + "loss": 8.9898, + "step": 56000 + }, + { + "epoch": 0.45, + "learning_rate": 4.9774e-05, + "loss": 8.9447, + "step": 56500 + }, + { + "epoch": 0.46, + "learning_rate": 4.9772000000000004e-05, + "loss": 9.2217, + "step": 57000 + }, + { + "epoch": 0.46, + "learning_rate": 4.977e-05, + "loss": 9.0623, + "step": 57500 + }, + { + "epoch": 0.46, + "learning_rate": 4.9768e-05, + "loss": 9.0159, + "step": 58000 + }, + { + "epoch": 0.47, + "learning_rate": 4.9766000000000004e-05, + "loss": 9.0159, + "step": 58500 + }, + { + "epoch": 0.47, + "learning_rate": 4.976400000000001e-05, + "loss": 8.938, + "step": 59000 + }, + { + "epoch": 0.48, + "learning_rate": 4.9762e-05, + "loss": 8.9805, + "step": 59500 + }, + { + "epoch": 0.48, + "learning_rate": 4.976e-05, + "loss": 8.9252, + "step": 60000 + }, + { + "epoch": 0.48, + "learning_rate": 4.975800000000001e-05, + "loss": 9.0771, + "step": 60500 + }, + { + "epoch": 0.49, + "learning_rate": 4.9756e-05, + "loss": 9.0443, + "step": 61000 + }, + { + "epoch": 0.49, + "learning_rate": 4.9754e-05, + "loss": 9.1127, + "step": 61500 + }, + { + "epoch": 0.5, + "learning_rate": 4.975200000000001e-05, + "loss": 8.9619, + "step": 62000 + }, + { + "epoch": 0.5, + "learning_rate": 4.975e-05, + "loss": 8.9725, + "step": 62500 + }, + { + "epoch": 0.5, + "learning_rate": 4.9748e-05, + "loss": 8.9863, + "step": 63000 + }, + { + "epoch": 0.51, + "learning_rate": 4.9746e-05, + "loss": 8.9595, + "step": 63500 + }, + { + "epoch": 0.51, + "learning_rate": 4.9744000000000003e-05, + "loss": 8.8928, + "step": 64000 + }, + { + "epoch": 0.52, + "learning_rate": 4.9742e-05, + "loss": 9.0314, + "step": 64500 + }, + { + "epoch": 0.52, + "learning_rate": 4.974e-05, + "loss": 8.9132, + "step": 65000 + }, + { + "epoch": 0.52, + "learning_rate": 4.9738000000000004e-05, + "loss": 8.9137, + "step": 65500 + }, + { + "epoch": 0.53, + "learning_rate": 4.9736000000000006e-05, + "loss": 8.9657, + "step": 66000 + }, + { + "epoch": 0.53, + "learning_rate": 4.9734e-05, + "loss": 8.9497, + "step": 66500 + }, + { + "epoch": 0.54, + "learning_rate": 4.9732e-05, + "loss": 8.8762, + "step": 67000 + }, + { + "epoch": 0.54, + "learning_rate": 4.973000000000001e-05, + "loss": 8.8903, + "step": 67500 + }, + { + "epoch": 0.54, + "learning_rate": 4.9728e-05, + "loss": 8.8748, + "step": 68000 + }, + { + "epoch": 0.55, + "learning_rate": 4.9726e-05, + "loss": 8.8706, + "step": 68500 + }, + { + "epoch": 0.55, + "learning_rate": 4.972400000000001e-05, + "loss": 8.8593, + "step": 69000 + }, + { + "epoch": 0.56, + "learning_rate": 4.9722e-05, + "loss": 8.9197, + "step": 69500 + }, + { + "epoch": 0.56, + "learning_rate": 4.972e-05, + "loss": 8.8718, + "step": 70000 + }, + { + "epoch": 0.56, + "learning_rate": 4.9718e-05, + "loss": 8.8917, + "step": 70500 + }, + { + "epoch": 0.57, + "learning_rate": 4.9716e-05, + "loss": 8.859, + "step": 71000 + }, + { + "epoch": 0.57, + "learning_rate": 4.9714000000000005e-05, + "loss": 9.1695, + "step": 71500 + }, + { + "epoch": 0.58, + "learning_rate": 4.9712e-05, + "loss": 8.8216, + "step": 72000 + }, + { + "epoch": 0.58, + "learning_rate": 4.9710000000000003e-05, + "loss": 8.8896, + "step": 72500 + }, + { + "epoch": 0.58, + "learning_rate": 4.9708000000000006e-05, + "loss": 8.857, + "step": 73000 + }, + { + "epoch": 0.59, + "learning_rate": 4.9706e-05, + "loss": 8.9091, + "step": 73500 + }, + { + "epoch": 0.59, + "learning_rate": 4.9704000000000004e-05, + "loss": 8.9185, + "step": 74000 + }, + { + "epoch": 0.6, + "learning_rate": 4.9702000000000006e-05, + "loss": 8.8406, + "step": 74500 + }, + { + "epoch": 0.6, + "learning_rate": 4.97e-05, + "loss": 8.8969, + "step": 75000 + }, + { + "epoch": 0.6, + "learning_rate": 4.9698e-05, + "loss": 8.8779, + "step": 75500 + }, + { + "epoch": 0.61, + "learning_rate": 4.969600000000001e-05, + "loss": 8.8665, + "step": 76000 + }, + { + "epoch": 0.61, + "learning_rate": 4.9694e-05, + "loss": 8.8258, + "step": 76500 + }, + { + "epoch": 0.62, + "learning_rate": 4.9692e-05, + "loss": 8.9677, + "step": 77000 + }, + { + "epoch": 0.62, + "learning_rate": 4.969e-05, + "loss": 8.8426, + "step": 77500 + }, + { + "epoch": 0.62, + "learning_rate": 4.9688e-05, + "loss": 8.8894, + "step": 78000 + }, + { + "epoch": 0.63, + "learning_rate": 4.9686000000000005e-05, + "loss": 8.81, + "step": 78500 + }, + { + "epoch": 0.63, + "learning_rate": 4.9684e-05, + "loss": 8.8793, + "step": 79000 + }, + { + "epoch": 0.64, + "learning_rate": 4.9682e-05, + "loss": 8.8209, + "step": 79500 + }, + { + "epoch": 0.64, + "learning_rate": 4.9680000000000005e-05, + "loss": 8.8762, + "step": 80000 + }, + { + "epoch": 0.64, + "learning_rate": 4.9678e-05, + "loss": 8.8526, + "step": 80500 + }, + { + "epoch": 0.65, + "learning_rate": 4.9676000000000003e-05, + "loss": 8.8207, + "step": 81000 + }, + { + "epoch": 0.65, + "learning_rate": 4.9674000000000006e-05, + "loss": 8.8084, + "step": 81500 + }, + { + "epoch": 0.66, + "learning_rate": 4.9672e-05, + "loss": 8.8289, + "step": 82000 + }, + { + "epoch": 0.66, + "learning_rate": 4.967e-05, + "loss": 8.8631, + "step": 82500 + }, + { + "epoch": 0.66, + "learning_rate": 4.9668000000000006e-05, + "loss": 8.7972, + "step": 83000 + }, + { + "epoch": 0.67, + "learning_rate": 4.9666e-05, + "loss": 8.8412, + "step": 83500 + }, + { + "epoch": 0.67, + "learning_rate": 4.9664000000000004e-05, + "loss": 8.8133, + "step": 84000 + }, + { + "epoch": 0.68, + "learning_rate": 4.9662e-05, + "loss": 8.8066, + "step": 84500 + }, + { + "epoch": 0.68, + "learning_rate": 4.966e-05, + "loss": 8.8591, + "step": 85000 + }, + { + "epoch": 0.68, + "learning_rate": 4.9658000000000005e-05, + "loss": 8.8006, + "step": 85500 + }, + { + "epoch": 0.69, + "learning_rate": 4.9656e-05, + "loss": 8.8801, + "step": 86000 + }, + { + "epoch": 0.69, + "learning_rate": 4.9654e-05, + "loss": 8.9259, + "step": 86500 + }, + { + "epoch": 0.7, + "learning_rate": 4.9652000000000005e-05, + "loss": 8.8784, + "step": 87000 + }, + { + "epoch": 0.7, + "learning_rate": 4.965e-05, + "loss": 8.8027, + "step": 87500 + }, + { + "epoch": 0.7, + "learning_rate": 4.9648e-05, + "loss": 8.7856, + "step": 88000 + }, + { + "epoch": 0.71, + "learning_rate": 4.9646000000000005e-05, + "loss": 8.8136, + "step": 88500 + }, + { + "epoch": 0.71, + "learning_rate": 4.9644e-05, + "loss": 8.7518, + "step": 89000 + }, + { + "epoch": 0.72, + "learning_rate": 4.9642e-05, + "loss": 9.0135, + "step": 89500 + }, + { + "epoch": 0.72, + "learning_rate": 4.9640000000000006e-05, + "loss": 8.8534, + "step": 90000 + }, + { + "epoch": 0.72, + "learning_rate": 4.9638e-05, + "loss": 8.7789, + "step": 90500 + }, + { + "epoch": 0.73, + "learning_rate": 4.9636000000000004e-05, + "loss": 8.8706, + "step": 91000 + }, + { + "epoch": 0.73, + "learning_rate": 4.9634e-05, + "loss": 8.7578, + "step": 91500 + }, + { + "epoch": 0.74, + "learning_rate": 4.9632e-05, + "loss": 8.8256, + "step": 92000 + }, + { + "epoch": 0.74, + "learning_rate": 4.9630000000000004e-05, + "loss": 8.8399, + "step": 92500 + }, + { + "epoch": 0.74, + "learning_rate": 4.9628e-05, + "loss": 8.9445, + "step": 93000 + }, + { + "epoch": 0.75, + "learning_rate": 4.9626e-05, + "loss": 8.8409, + "step": 93500 + }, + { + "epoch": 0.75, + "learning_rate": 4.9624000000000005e-05, + "loss": 8.8985, + "step": 94000 + }, + { + "epoch": 0.76, + "learning_rate": 4.9622e-05, + "loss": 8.8067, + "step": 94500 + }, + { + "epoch": 0.76, + "learning_rate": 4.962e-05, + "loss": 8.7973, + "step": 95000 + }, + { + "epoch": 0.76, + "learning_rate": 4.9618000000000005e-05, + "loss": 8.8158, + "step": 95500 + }, + { + "epoch": 0.77, + "learning_rate": 4.9616e-05, + "loss": 8.7935, + "step": 96000 + }, + { + "epoch": 0.77, + "learning_rate": 4.9614e-05, + "loss": 8.8262, + "step": 96500 + }, + { + "epoch": 0.78, + "learning_rate": 4.9612000000000005e-05, + "loss": 8.7424, + "step": 97000 + }, + { + "epoch": 0.78, + "learning_rate": 4.961e-05, + "loss": 8.8062, + "step": 97500 + }, + { + "epoch": 0.78, + "learning_rate": 4.9608000000000003e-05, + "loss": 8.821, + "step": 98000 + }, + { + "epoch": 0.79, + "learning_rate": 4.9606000000000006e-05, + "loss": 8.7761, + "step": 98500 + }, + { + "epoch": 0.79, + "learning_rate": 4.9604e-05, + "loss": 8.7752, + "step": 99000 + }, + { + "epoch": 0.8, + "learning_rate": 4.9602000000000004e-05, + "loss": 8.7686, + "step": 99500 + }, + { + "epoch": 0.8, + "learning_rate": 4.96e-05, + "loss": 8.8078, + "step": 100000 + }, + { + "epoch": 0.8, + "learning_rate": 4.9598e-05, + "loss": 8.755, + "step": 100500 + }, + { + "epoch": 0.81, + "learning_rate": 4.9596000000000004e-05, + "loss": 8.8267, + "step": 101000 + }, + { + "epoch": 0.81, + "learning_rate": 4.9594e-05, + "loss": 8.7747, + "step": 101500 + }, + { + "epoch": 0.82, + "learning_rate": 4.9592e-05, + "loss": 8.8316, + "step": 102000 + }, + { + "epoch": 0.82, + "learning_rate": 4.9590000000000005e-05, + "loss": 8.7557, + "step": 102500 + }, + { + "epoch": 0.82, + "learning_rate": 4.9588e-05, + "loss": 8.7278, + "step": 103000 + }, + { + "epoch": 0.83, + "learning_rate": 4.9586e-05, + "loss": 8.7551, + "step": 103500 + }, + { + "epoch": 0.83, + "learning_rate": 4.9584000000000005e-05, + "loss": 8.7773, + "step": 104000 + }, + { + "epoch": 0.84, + "learning_rate": 4.9582e-05, + "loss": 8.7889, + "step": 104500 + }, + { + "epoch": 0.84, + "learning_rate": 4.958e-05, + "loss": 8.769, + "step": 105000 + }, + { + "epoch": 0.84, + "learning_rate": 4.9578000000000005e-05, + "loss": 8.7733, + "step": 105500 + }, + { + "epoch": 0.85, + "learning_rate": 4.9576e-05, + "loss": 8.772, + "step": 106000 + }, + { + "epoch": 0.85, + "learning_rate": 4.9574000000000003e-05, + "loss": 8.7841, + "step": 106500 + }, + { + "epoch": 0.86, + "learning_rate": 4.9572e-05, + "loss": 8.763, + "step": 107000 + }, + { + "epoch": 0.86, + "learning_rate": 4.957e-05, + "loss": 8.7616, + "step": 107500 + }, + { + "epoch": 0.86, + "learning_rate": 4.9568000000000004e-05, + "loss": 8.8227, + "step": 108000 + }, + { + "epoch": 0.87, + "learning_rate": 4.9566e-05, + "loss": 8.7499, + "step": 108500 + }, + { + "epoch": 0.87, + "learning_rate": 4.9564e-05, + "loss": 8.7877, + "step": 109000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9562000000000004e-05, + "loss": 8.7573, + "step": 109500 + }, + { + "epoch": 0.88, + "learning_rate": 4.956e-05, + "loss": 8.7705, + "step": 110000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9558e-05, + "loss": 8.7416, + "step": 110500 + }, + { + "epoch": 0.89, + "learning_rate": 4.9556000000000005e-05, + "loss": 8.7569, + "step": 111000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9554e-05, + "loss": 8.786, + "step": 111500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9552e-05, + "loss": 8.7313, + "step": 112000 + }, + { + "epoch": 0.9, + "learning_rate": 4.9550000000000005e-05, + "loss": 8.7426, + "step": 112500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9548e-05, + "loss": 8.7644, + "step": 113000 + }, + { + "epoch": 0.91, + "learning_rate": 4.9546e-05, + "loss": 8.777, + "step": 113500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9544e-05, + "loss": 8.7138, + "step": 114000 + }, + { + "epoch": 0.92, + "learning_rate": 4.954200000000001e-05, + "loss": 8.745, + "step": 114500 + }, + { + "epoch": 0.92, + "learning_rate": 4.9540000000000003e-05, + "loss": 8.7519, + "step": 115000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9538e-05, + "loss": 8.7538, + "step": 115500 + }, + { + "epoch": 0.93, + "learning_rate": 4.9536e-05, + "loss": 8.7966, + "step": 116000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9534000000000004e-05, + "loss": 8.7846, + "step": 116500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9532e-05, + "loss": 8.739, + "step": 117000 + }, + { + "epoch": 0.94, + "learning_rate": 4.953e-05, + "loss": 8.7326, + "step": 117500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9528000000000004e-05, + "loss": 8.7237, + "step": 118000 + }, + { + "epoch": 0.95, + "learning_rate": 4.9526e-05, + "loss": 8.7634, + "step": 118500 + }, + { + "epoch": 0.95, + "learning_rate": 4.9524e-05, + "loss": 8.7499, + "step": 119000 + }, + { + "epoch": 0.96, + "learning_rate": 4.9522000000000005e-05, + "loss": 8.7802, + "step": 119500 + }, + { + "epoch": 0.96, + "learning_rate": 4.952e-05, + "loss": 8.7407, + "step": 120000 + }, + { + "epoch": 0.96, + "learning_rate": 4.9518e-05, + "loss": 8.7336, + "step": 120500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9516e-05, + "loss": 8.7446, + "step": 121000 + }, + { + "epoch": 0.97, + "learning_rate": 4.951400000000001e-05, + "loss": 8.7434, + "step": 121500 + }, + { + "epoch": 0.98, + "learning_rate": 4.9512e-05, + "loss": 8.7396, + "step": 122000 + }, + { + "epoch": 0.98, + "learning_rate": 4.951e-05, + "loss": 8.7272, + "step": 122500 + }, + { + "epoch": 0.98, + "learning_rate": 4.9508e-05, + "loss": 8.751, + "step": 123000 + }, + { + "epoch": 0.99, + "learning_rate": 4.9506000000000003e-05, + "loss": 8.7366, + "step": 123500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9504e-05, + "loss": 8.761, + "step": 124000 + }, + { + "epoch": 1.0, + "learning_rate": 4.9502e-05, + "loss": 8.7528, + "step": 124500 + }, + { + "epoch": 1.0, + "learning_rate": 4.9500000000000004e-05, + "loss": 8.77, + "step": 125000 + }, + { + "epoch": 1.0, + "learning_rate": 4.9498e-05, + "loss": 8.7479, + "step": 125500 + }, + { + "epoch": 1.01, + "learning_rate": 4.9496e-05, + "loss": 8.7402, + "step": 126000 + }, + { + "epoch": 1.01, + "learning_rate": 4.9494000000000004e-05, + "loss": 8.7317, + "step": 126500 + }, + { + "epoch": 1.02, + "learning_rate": 4.9492000000000007e-05, + "loss": 8.7582, + "step": 127000 + }, + { + "epoch": 1.02, + "learning_rate": 4.949e-05, + "loss": 8.785, + "step": 127500 + }, + { + "epoch": 1.02, + "learning_rate": 4.9488e-05, + "loss": 8.7487, + "step": 128000 + }, + { + "epoch": 1.03, + "learning_rate": 4.948600000000001e-05, + "loss": 8.7275, + "step": 128500 + }, + { + "epoch": 1.03, + "learning_rate": 4.9484e-05, + "loss": 8.7491, + "step": 129000 + }, + { + "epoch": 1.04, + "learning_rate": 4.9482e-05, + "loss": 8.7415, + "step": 129500 + }, + { + "epoch": 1.04, + "learning_rate": 4.948000000000001e-05, + "loss": 8.7348, + "step": 130000 + }, + { + "epoch": 1.04, + "learning_rate": 4.9478e-05, + "loss": 8.7247, + "step": 130500 + }, + { + "epoch": 1.05, + "learning_rate": 4.9476e-05, + "loss": 8.7298, + "step": 131000 + }, + { + "epoch": 1.05, + "learning_rate": 4.9474e-05, + "loss": 8.7433, + "step": 131500 + }, + { + "epoch": 1.06, + "learning_rate": 4.9472e-05, + "loss": 8.7733, + "step": 132000 + }, + { + "epoch": 1.06, + "learning_rate": 4.947e-05, + "loss": 8.7228, + "step": 132500 + }, + { + "epoch": 1.06, + "learning_rate": 4.9468e-05, + "loss": 8.7441, + "step": 133000 + }, + { + "epoch": 1.07, + "learning_rate": 4.9466000000000004e-05, + "loss": 8.7274, + "step": 133500 + }, + { + "epoch": 1.07, + "learning_rate": 4.9464000000000006e-05, + "loss": 8.7556, + "step": 134000 + }, + { + "epoch": 1.08, + "learning_rate": 4.9462e-05, + "loss": 8.7592, + "step": 134500 + }, + { + "epoch": 1.08, + "learning_rate": 4.946e-05, + "loss": 8.7349, + "step": 135000 + }, + { + "epoch": 1.08, + "learning_rate": 4.9458000000000007e-05, + "loss": 8.7432, + "step": 135500 + }, + { + "epoch": 1.09, + "learning_rate": 4.9456e-05, + "loss": 8.7744, + "step": 136000 + }, + { + "epoch": 1.09, + "learning_rate": 4.9454e-05, + "loss": 8.7506, + "step": 136500 + }, + { + "epoch": 1.1, + "learning_rate": 4.945200000000001e-05, + "loss": 8.7566, + "step": 137000 + }, + { + "epoch": 1.1, + "learning_rate": 4.945e-05, + "loss": 8.7312, + "step": 137500 + }, + { + "epoch": 1.1, + "learning_rate": 4.9448e-05, + "loss": 8.7084, + "step": 138000 + }, + { + "epoch": 1.11, + "learning_rate": 4.9446e-05, + "loss": 8.7298, + "step": 138500 + }, + { + "epoch": 1.11, + "learning_rate": 4.9444e-05, + "loss": 8.7195, + "step": 139000 + }, + { + "epoch": 1.12, + "learning_rate": 4.9442000000000005e-05, + "loss": 8.7922, + "step": 139500 + }, + { + "epoch": 1.12, + "learning_rate": 4.944e-05, + "loss": 8.7165, + "step": 140000 + }, + { + "epoch": 1.12, + "learning_rate": 4.9438e-05, + "loss": 8.7332, + "step": 140500 + }, + { + "epoch": 1.13, + "learning_rate": 4.9436000000000006e-05, + "loss": 8.7391, + "step": 141000 + }, + { + "epoch": 1.13, + "learning_rate": 4.9434e-05, + "loss": 8.7532, + "step": 141500 + }, + { + "epoch": 1.14, + "learning_rate": 4.9432000000000004e-05, + "loss": 8.7782, + "step": 142000 + }, + { + "epoch": 1.14, + "learning_rate": 4.9430000000000006e-05, + "loss": 8.7489, + "step": 142500 + }, + { + "epoch": 1.14, + "learning_rate": 4.9428e-05, + "loss": 8.7545, + "step": 143000 + }, + { + "epoch": 1.15, + "learning_rate": 4.9426e-05, + "loss": 8.7656, + "step": 143500 + }, + { + "epoch": 1.15, + "learning_rate": 4.9424000000000007e-05, + "loss": 8.7302, + "step": 144000 + }, + { + "epoch": 1.16, + "learning_rate": 4.9422e-05, + "loss": 8.7296, + "step": 144500 + }, + { + "epoch": 1.16, + "learning_rate": 4.942e-05, + "loss": 8.7566, + "step": 145000 + }, + { + "epoch": 1.16, + "learning_rate": 4.9418e-05, + "loss": 8.7108, + "step": 145500 + }, + { + "epoch": 1.17, + "learning_rate": 4.9416e-05, + "loss": 8.7323, + "step": 146000 + }, + { + "epoch": 1.17, + "learning_rate": 4.9414000000000005e-05, + "loss": 8.7436, + "step": 146500 + }, + { + "epoch": 1.18, + "learning_rate": 4.9412e-05, + "loss": 8.6866, + "step": 147000 + }, + { + "epoch": 1.18, + "learning_rate": 4.941e-05, + "loss": 8.7285, + "step": 147500 + }, + { + "epoch": 1.18, + "learning_rate": 4.9408000000000005e-05, + "loss": 8.7386, + "step": 148000 + }, + { + "epoch": 1.19, + "learning_rate": 4.9406e-05, + "loss": 8.7394, + "step": 148500 + }, + { + "epoch": 1.19, + "learning_rate": 4.9404e-05, + "loss": 8.7729, + "step": 149000 + }, + { + "epoch": 1.2, + "learning_rate": 4.9402000000000006e-05, + "loss": 8.7637, + "step": 149500 + }, + { + "epoch": 1.2, + "learning_rate": 4.94e-05, + "loss": 8.7381, + "step": 150000 + }, + { + "epoch": 1.2, + "learning_rate": 4.9398e-05, + "loss": 8.7235, + "step": 150500 + }, + { + "epoch": 1.21, + "learning_rate": 4.9396000000000006e-05, + "loss": 8.7198, + "step": 151000 + }, + { + "epoch": 1.21, + "learning_rate": 4.9394e-05, + "loss": 8.7512, + "step": 151500 + }, + { + "epoch": 1.22, + "learning_rate": 4.9392000000000004e-05, + "loss": 8.7068, + "step": 152000 + }, + { + "epoch": 1.22, + "learning_rate": 4.939e-05, + "loss": 8.7282, + "step": 152500 + }, + { + "epoch": 1.22, + "learning_rate": 4.9388e-05, + "loss": 8.732, + "step": 153000 + }, + { + "epoch": 1.23, + "learning_rate": 4.9386000000000005e-05, + "loss": 8.7478, + "step": 153500 + }, + { + "epoch": 1.23, + "learning_rate": 4.9384e-05, + "loss": 8.7374, + "step": 154000 + }, + { + "epoch": 1.24, + "learning_rate": 4.9382e-05, + "loss": 8.7333, + "step": 154500 + }, + { + "epoch": 1.24, + "learning_rate": 4.9380000000000005e-05, + "loss": 8.7092, + "step": 155000 + }, + { + "epoch": 1.24, + "learning_rate": 4.9378e-05, + "loss": 8.7646, + "step": 155500 + }, + { + "epoch": 1.25, + "learning_rate": 4.9376e-05, + "loss": 8.7004, + "step": 156000 + }, + { + "epoch": 1.25, + "learning_rate": 4.9374000000000005e-05, + "loss": 8.7022, + "step": 156500 + }, + { + "epoch": 1.26, + "learning_rate": 4.9372e-05, + "loss": 8.7469, + "step": 157000 + }, + { + "epoch": 1.26, + "learning_rate": 4.937e-05, + "loss": 8.7383, + "step": 157500 + }, + { + "epoch": 1.26, + "learning_rate": 4.9368000000000006e-05, + "loss": 8.7374, + "step": 158000 + }, + { + "epoch": 1.27, + "learning_rate": 4.9366e-05, + "loss": 8.7537, + "step": 158500 + }, + { + "epoch": 1.27, + "learning_rate": 4.9364000000000004e-05, + "loss": 8.7513, + "step": 159000 + }, + { + "epoch": 1.28, + "learning_rate": 4.9362e-05, + "loss": 8.7303, + "step": 159500 + }, + { + "epoch": 1.28, + "learning_rate": 4.936e-05, + "loss": 8.734, + "step": 160000 + }, + { + "epoch": 1.28, + "learning_rate": 4.9358000000000004e-05, + "loss": 8.7497, + "step": 160500 + }, + { + "epoch": 1.29, + "learning_rate": 4.9356e-05, + "loss": 8.7377, + "step": 161000 + }, + { + "epoch": 1.29, + "learning_rate": 4.9354e-05, + "loss": 8.7308, + "step": 161500 + }, + { + "epoch": 1.3, + "learning_rate": 4.9352000000000005e-05, + "loss": 8.7586, + "step": 162000 + }, + { + "epoch": 1.3, + "learning_rate": 4.935e-05, + "loss": 8.744, + "step": 162500 + }, + { + "epoch": 1.3, + "learning_rate": 4.9348e-05, + "loss": 8.7429, + "step": 163000 + }, + { + "epoch": 1.31, + "learning_rate": 4.9346000000000005e-05, + "loss": 8.7083, + "step": 163500 + }, + { + "epoch": 1.31, + "learning_rate": 4.9344e-05, + "loss": 8.7558, + "step": 164000 + }, + { + "epoch": 1.32, + "learning_rate": 4.9342e-05, + "loss": 8.7276, + "step": 164500 + }, + { + "epoch": 1.32, + "learning_rate": 4.9340000000000005e-05, + "loss": 8.7388, + "step": 165000 + }, + { + "epoch": 1.32, + "learning_rate": 4.9338e-05, + "loss": 8.7236, + "step": 165500 + }, + { + "epoch": 1.33, + "learning_rate": 4.9336e-05, + "loss": 8.6968, + "step": 166000 + }, + { + "epoch": 1.33, + "learning_rate": 4.9334000000000006e-05, + "loss": 8.738, + "step": 166500 + }, + { + "epoch": 1.34, + "learning_rate": 4.9332e-05, + "loss": 8.6997, + "step": 167000 + }, + { + "epoch": 1.34, + "learning_rate": 4.9330000000000004e-05, + "loss": 8.7297, + "step": 167500 + }, + { + "epoch": 1.34, + "learning_rate": 4.9328e-05, + "loss": 8.7298, + "step": 168000 + }, + { + "epoch": 1.35, + "learning_rate": 4.9326e-05, + "loss": 8.7035, + "step": 168500 + }, + { + "epoch": 1.35, + "learning_rate": 4.9324000000000004e-05, + "loss": 8.7204, + "step": 169000 + }, + { + "epoch": 1.36, + "learning_rate": 4.9322e-05, + "loss": 8.7452, + "step": 169500 + }, + { + "epoch": 1.36, + "learning_rate": 4.932e-05, + "loss": 8.6998, + "step": 170000 + }, + { + "epoch": 1.36, + "learning_rate": 4.9318000000000005e-05, + "loss": 8.7377, + "step": 170500 + }, + { + "epoch": 1.37, + "learning_rate": 4.9316e-05, + "loss": 8.7331, + "step": 171000 + }, + { + "epoch": 1.37, + "learning_rate": 4.9314e-05, + "loss": 8.7163, + "step": 171500 + }, + { + "epoch": 1.38, + "learning_rate": 4.9312000000000005e-05, + "loss": 8.7268, + "step": 172000 + }, + { + "epoch": 1.38, + "learning_rate": 4.931e-05, + "loss": 8.6967, + "step": 172500 + }, + { + "epoch": 1.38, + "learning_rate": 4.9308e-05, + "loss": 8.7252, + "step": 173000 + }, + { + "epoch": 1.39, + "learning_rate": 4.9306000000000005e-05, + "loss": 8.7349, + "step": 173500 + }, + { + "epoch": 1.39, + "learning_rate": 4.9304e-05, + "loss": 8.7176, + "step": 174000 + }, + { + "epoch": 1.4, + "learning_rate": 4.9302e-05, + "loss": 8.7122, + "step": 174500 + }, + { + "epoch": 1.4, + "learning_rate": 4.93e-05, + "loss": 8.7241, + "step": 175000 + }, + { + "epoch": 1.4, + "learning_rate": 4.9298e-05, + "loss": 8.7116, + "step": 175500 + }, + { + "epoch": 1.41, + "learning_rate": 4.9296000000000004e-05, + "loss": 8.7077, + "step": 176000 + }, + { + "epoch": 1.41, + "learning_rate": 4.9294e-05, + "loss": 8.7184, + "step": 176500 + }, + { + "epoch": 1.42, + "learning_rate": 4.9292e-05, + "loss": 8.7507, + "step": 177000 + }, + { + "epoch": 1.42, + "learning_rate": 4.9290000000000004e-05, + "loss": 8.7461, + "step": 177500 + }, + { + "epoch": 1.42, + "learning_rate": 4.9288e-05, + "loss": 8.7107, + "step": 178000 + }, + { + "epoch": 1.43, + "learning_rate": 4.9286e-05, + "loss": 8.7484, + "step": 178500 + }, + { + "epoch": 1.43, + "learning_rate": 4.9284000000000005e-05, + "loss": 8.7338, + "step": 179000 + }, + { + "epoch": 1.44, + "learning_rate": 4.9282e-05, + "loss": 8.7505, + "step": 179500 + }, + { + "epoch": 1.44, + "learning_rate": 4.928e-05, + "loss": 8.6856, + "step": 180000 + }, + { + "epoch": 1.44, + "learning_rate": 4.9278000000000005e-05, + "loss": 8.7148, + "step": 180500 + }, + { + "epoch": 1.45, + "learning_rate": 4.9276e-05, + "loss": 8.727, + "step": 181000 + }, + { + "epoch": 1.45, + "learning_rate": 4.9274e-05, + "loss": 8.7365, + "step": 181500 + }, + { + "epoch": 1.46, + "learning_rate": 4.9272e-05, + "loss": 8.7123, + "step": 182000 + }, + { + "epoch": 1.46, + "learning_rate": 4.927000000000001e-05, + "loss": 8.7501, + "step": 182500 + }, + { + "epoch": 1.46, + "learning_rate": 4.9268e-05, + "loss": 8.7205, + "step": 183000 + }, + { + "epoch": 1.47, + "learning_rate": 4.9266e-05, + "loss": 8.7338, + "step": 183500 + }, + { + "epoch": 1.47, + "learning_rate": 4.9264e-05, + "loss": 8.725, + "step": 184000 + }, + { + "epoch": 1.48, + "learning_rate": 4.9262000000000004e-05, + "loss": 8.7439, + "step": 184500 + }, + { + "epoch": 1.48, + "learning_rate": 4.926e-05, + "loss": 8.7272, + "step": 185000 + }, + { + "epoch": 1.48, + "learning_rate": 4.9258e-05, + "loss": 8.711, + "step": 185500 + }, + { + "epoch": 1.49, + "learning_rate": 4.9256000000000004e-05, + "loss": 8.7415, + "step": 186000 + }, + { + "epoch": 1.49, + "learning_rate": 4.9254e-05, + "loss": 8.7222, + "step": 186500 + }, + { + "epoch": 1.5, + "learning_rate": 4.9252e-05, + "loss": 8.6877, + "step": 187000 + }, + { + "epoch": 1.5, + "learning_rate": 4.9250000000000004e-05, + "loss": 8.718, + "step": 187500 + }, + { + "epoch": 1.5, + "learning_rate": 4.9248e-05, + "loss": 8.7498, + "step": 188000 + }, + { + "epoch": 1.51, + "learning_rate": 4.9246e-05, + "loss": 8.7576, + "step": 188500 + }, + { + "epoch": 1.51, + "learning_rate": 4.9244e-05, + "loss": 8.7261, + "step": 189000 + }, + { + "epoch": 1.52, + "learning_rate": 4.924200000000001e-05, + "loss": 8.6984, + "step": 189500 + }, + { + "epoch": 1.52, + "learning_rate": 4.924e-05, + "loss": 8.7172, + "step": 190000 + }, + { + "epoch": 1.52, + "learning_rate": 4.9238e-05, + "loss": 8.7044, + "step": 190500 + }, + { + "epoch": 1.53, + "learning_rate": 4.923600000000001e-05, + "loss": 8.7316, + "step": 191000 + }, + { + "epoch": 1.53, + "learning_rate": 4.9234e-05, + "loss": 8.7253, + "step": 191500 + }, + { + "epoch": 1.54, + "learning_rate": 4.9232e-05, + "loss": 8.7278, + "step": 192000 + }, + { + "epoch": 1.54, + "learning_rate": 4.923e-05, + "loss": 8.7171, + "step": 192500 + }, + { + "epoch": 1.54, + "learning_rate": 4.9228000000000004e-05, + "loss": 8.7442, + "step": 193000 + }, + { + "epoch": 1.55, + "learning_rate": 4.9226e-05, + "loss": 8.7353, + "step": 193500 + }, + { + "epoch": 1.55, + "learning_rate": 4.9224e-05, + "loss": 8.7023, + "step": 194000 + }, + { + "epoch": 1.56, + "learning_rate": 4.9222000000000004e-05, + "loss": 8.6957, + "step": 194500 + }, + { + "epoch": 1.56, + "learning_rate": 4.9220000000000006e-05, + "loss": 8.7123, + "step": 195000 + }, + { + "epoch": 1.56, + "learning_rate": 4.9218e-05, + "loss": 8.7047, + "step": 195500 + }, + { + "epoch": 1.57, + "learning_rate": 4.9216e-05, + "loss": 8.7271, + "step": 196000 + }, + { + "epoch": 1.57, + "learning_rate": 4.921400000000001e-05, + "loss": 8.7182, + "step": 196500 + }, + { + "epoch": 1.58, + "learning_rate": 4.9212e-05, + "loss": 8.6991, + "step": 197000 + }, + { + "epoch": 1.58, + "learning_rate": 4.921e-05, + "loss": 8.695, + "step": 197500 + }, + { + "epoch": 1.58, + "learning_rate": 4.920800000000001e-05, + "loss": 8.7277, + "step": 198000 + }, + { + "epoch": 1.59, + "learning_rate": 4.9206e-05, + "loss": 8.7258, + "step": 198500 + }, + { + "epoch": 1.59, + "learning_rate": 4.9204e-05, + "loss": 8.7348, + "step": 199000 + }, + { + "epoch": 1.6, + "learning_rate": 4.9202e-05, + "loss": 8.7168, + "step": 199500 + }, + { + "epoch": 1.6, + "learning_rate": 4.92e-05, + "loss": 8.7074, + "step": 200000 + }, + { + "epoch": 1.6, + "learning_rate": 4.9198e-05, + "loss": 8.7199, + "step": 200500 + }, + { + "epoch": 1.61, + "learning_rate": 4.9196e-05, + "loss": 8.7036, + "step": 201000 + }, + { + "epoch": 1.61, + "learning_rate": 4.9194000000000004e-05, + "loss": 8.6906, + "step": 201500 + }, + { + "epoch": 1.62, + "learning_rate": 4.9192000000000006e-05, + "loss": 8.7086, + "step": 202000 + }, + { + "epoch": 1.62, + "learning_rate": 4.919e-05, + "loss": 8.708, + "step": 202500 + }, + { + "epoch": 1.62, + "learning_rate": 4.9188000000000004e-05, + "loss": 8.7123, + "step": 203000 + }, + { + "epoch": 1.63, + "learning_rate": 4.9186000000000006e-05, + "loss": 8.7107, + "step": 203500 + }, + { + "epoch": 1.63, + "learning_rate": 4.9184e-05, + "loss": 8.7279, + "step": 204000 + }, + { + "epoch": 1.64, + "learning_rate": 4.9182e-05, + "loss": 8.7128, + "step": 204500 + }, + { + "epoch": 1.64, + "learning_rate": 4.918000000000001e-05, + "loss": 8.7156, + "step": 205000 + }, + { + "epoch": 1.64, + "learning_rate": 4.9178e-05, + "loss": 8.7054, + "step": 205500 + }, + { + "epoch": 1.65, + "learning_rate": 4.9176e-05, + "loss": 8.6972, + "step": 206000 + }, + { + "epoch": 1.65, + "learning_rate": 4.9174e-05, + "loss": 8.7417, + "step": 206500 + }, + { + "epoch": 1.66, + "learning_rate": 4.9172e-05, + "loss": 8.7225, + "step": 207000 + }, + { + "epoch": 1.66, + "learning_rate": 4.9170000000000005e-05, + "loss": 8.6858, + "step": 207500 + }, + { + "epoch": 1.66, + "learning_rate": 4.9168e-05, + "loss": 8.7073, + "step": 208000 + }, + { + "epoch": 1.67, + "learning_rate": 4.9166e-05, + "loss": 8.7324, + "step": 208500 + }, + { + "epoch": 1.67, + "learning_rate": 4.9164000000000006e-05, + "loss": 8.7009, + "step": 209000 + }, + { + "epoch": 1.68, + "learning_rate": 4.9162e-05, + "loss": 8.7222, + "step": 209500 + }, + { + "epoch": 1.68, + "learning_rate": 4.9160000000000004e-05, + "loss": 8.7189, + "step": 210000 + }, + { + "epoch": 1.68, + "learning_rate": 4.9158000000000006e-05, + "loss": 8.6845, + "step": 210500 + }, + { + "epoch": 1.69, + "learning_rate": 4.9156e-05, + "loss": 8.7141, + "step": 211000 + }, + { + "epoch": 1.69, + "learning_rate": 4.9154e-05, + "loss": 8.6976, + "step": 211500 + }, + { + "epoch": 1.7, + "learning_rate": 4.9152000000000006e-05, + "loss": 8.7004, + "step": 212000 + }, + { + "epoch": 1.7, + "learning_rate": 4.915e-05, + "loss": 8.7042, + "step": 212500 + }, + { + "epoch": 1.7, + "learning_rate": 4.9148e-05, + "loss": 8.7278, + "step": 213000 + }, + { + "epoch": 1.71, + "learning_rate": 4.9146e-05, + "loss": 8.7039, + "step": 213500 + }, + { + "epoch": 1.71, + "learning_rate": 4.9144e-05, + "loss": 8.7104, + "step": 214000 + }, + { + "epoch": 1.72, + "learning_rate": 4.9142000000000005e-05, + "loss": 8.6993, + "step": 214500 + }, + { + "epoch": 1.72, + "learning_rate": 4.914e-05, + "loss": 8.7215, + "step": 215000 + }, + { + "epoch": 1.72, + "learning_rate": 4.9138e-05, + "loss": 8.7066, + "step": 215500 + }, + { + "epoch": 1.73, + "learning_rate": 4.9136000000000005e-05, + "loss": 8.6924, + "step": 216000 + }, + { + "epoch": 1.73, + "learning_rate": 4.9134e-05, + "loss": 8.6996, + "step": 216500 + }, + { + "epoch": 1.74, + "learning_rate": 4.9132e-05, + "loss": 8.7252, + "step": 217000 + }, + { + "epoch": 1.74, + "learning_rate": 4.9130000000000006e-05, + "loss": 8.698, + "step": 217500 + }, + { + "epoch": 1.74, + "learning_rate": 4.9128e-05, + "loss": 8.7188, + "step": 218000 + }, + { + "epoch": 1.75, + "learning_rate": 4.9126e-05, + "loss": 8.7053, + "step": 218500 + }, + { + "epoch": 1.75, + "learning_rate": 4.9124000000000006e-05, + "loss": 8.7161, + "step": 219000 + }, + { + "epoch": 1.76, + "learning_rate": 4.9122e-05, + "loss": 8.711, + "step": 219500 + }, + { + "epoch": 1.76, + "learning_rate": 4.9120000000000004e-05, + "loss": 8.6989, + "step": 220000 + }, + { + "epoch": 1.76, + "learning_rate": 4.9118e-05, + "loss": 8.6944, + "step": 220500 + }, + { + "epoch": 1.77, + "learning_rate": 4.9116e-05, + "loss": 8.7141, + "step": 221000 + }, + { + "epoch": 1.77, + "learning_rate": 4.9114000000000004e-05, + "loss": 8.7241, + "step": 221500 + }, + { + "epoch": 1.78, + "learning_rate": 4.9112e-05, + "loss": 8.736, + "step": 222000 + }, + { + "epoch": 1.78, + "learning_rate": 4.911e-05, + "loss": 8.6733, + "step": 222500 + }, + { + "epoch": 1.78, + "learning_rate": 4.9108000000000005e-05, + "loss": 8.7222, + "step": 223000 + }, + { + "epoch": 1.79, + "learning_rate": 4.9106e-05, + "loss": 8.6865, + "step": 223500 + }, + { + "epoch": 1.79, + "learning_rate": 4.9104e-05, + "loss": 8.7143, + "step": 224000 + }, + { + "epoch": 1.8, + "learning_rate": 4.9102000000000005e-05, + "loss": 8.7387, + "step": 224500 + }, + { + "epoch": 1.8, + "learning_rate": 4.91e-05, + "loss": 8.6927, + "step": 225000 + }, + { + "epoch": 1.8, + "learning_rate": 4.9098e-05, + "loss": 8.7262, + "step": 225500 + }, + { + "epoch": 1.81, + "learning_rate": 4.9096000000000006e-05, + "loss": 8.7573, + "step": 226000 + }, + { + "epoch": 1.81, + "learning_rate": 4.9094e-05, + "loss": 8.7058, + "step": 226500 + }, + { + "epoch": 1.82, + "learning_rate": 4.9092000000000004e-05, + "loss": 8.7144, + "step": 227000 + }, + { + "epoch": 1.82, + "learning_rate": 4.9090000000000006e-05, + "loss": 8.6902, + "step": 227500 + }, + { + "epoch": 1.82, + "learning_rate": 4.9088e-05, + "loss": 8.7049, + "step": 228000 + }, + { + "epoch": 1.83, + "learning_rate": 4.9086000000000004e-05, + "loss": 8.7139, + "step": 228500 + }, + { + "epoch": 1.83, + "learning_rate": 4.9084e-05, + "loss": 8.7206, + "step": 229000 + }, + { + "epoch": 1.84, + "learning_rate": 4.9082e-05, + "loss": 8.7224, + "step": 229500 + }, + { + "epoch": 1.84, + "learning_rate": 4.9080000000000004e-05, + "loss": 8.7396, + "step": 230000 + }, + { + "epoch": 1.84, + "learning_rate": 4.9078e-05, + "loss": 8.7352, + "step": 230500 + }, + { + "epoch": 1.85, + "learning_rate": 4.9076e-05, + "loss": 8.7382, + "step": 231000 + }, + { + "epoch": 1.85, + "learning_rate": 4.9074000000000005e-05, + "loss": 8.7159, + "step": 231500 + }, + { + "epoch": 1.86, + "learning_rate": 4.9072e-05, + "loss": 8.6957, + "step": 232000 + }, + { + "epoch": 1.86, + "learning_rate": 4.907e-05, + "loss": 8.7114, + "step": 232500 + }, + { + "epoch": 1.86, + "learning_rate": 4.9068000000000005e-05, + "loss": 8.7356, + "step": 233000 + }, + { + "epoch": 1.87, + "learning_rate": 4.9066e-05, + "loss": 8.692, + "step": 233500 + }, + { + "epoch": 1.87, + "learning_rate": 4.9064e-05, + "loss": 8.7049, + "step": 234000 + }, + { + "epoch": 1.88, + "learning_rate": 4.9062000000000006e-05, + "loss": 8.7047, + "step": 234500 + }, + { + "epoch": 1.88, + "learning_rate": 4.906e-05, + "loss": 8.7047, + "step": 235000 + }, + { + "epoch": 1.88, + "learning_rate": 4.9058000000000004e-05, + "loss": 8.6655, + "step": 235500 + }, + { + "epoch": 1.89, + "learning_rate": 4.9056e-05, + "loss": 8.7262, + "step": 236000 + }, + { + "epoch": 1.89, + "learning_rate": 4.9054e-05, + "loss": 8.7033, + "step": 236500 + }, + { + "epoch": 1.9, + "learning_rate": 4.9052000000000004e-05, + "loss": 8.7357, + "step": 237000 + }, + { + "epoch": 1.9, + "learning_rate": 4.905e-05, + "loss": 8.719, + "step": 237500 + }, + { + "epoch": 1.9, + "learning_rate": 4.9048e-05, + "loss": 8.6847, + "step": 238000 + }, + { + "epoch": 1.91, + "learning_rate": 4.9046000000000004e-05, + "loss": 8.7089, + "step": 238500 + }, + { + "epoch": 1.91, + "learning_rate": 4.9044e-05, + "loss": 8.7355, + "step": 239000 + }, + { + "epoch": 1.92, + "learning_rate": 4.9042e-05, + "loss": 8.7264, + "step": 239500 + }, + { + "epoch": 1.92, + "learning_rate": 4.9040000000000005e-05, + "loss": 8.7056, + "step": 240000 + }, + { + "epoch": 1.92, + "learning_rate": 4.9038e-05, + "loss": 8.7343, + "step": 240500 + }, + { + "epoch": 1.93, + "learning_rate": 4.9036e-05, + "loss": 8.682, + "step": 241000 + }, + { + "epoch": 1.93, + "learning_rate": 4.9034000000000005e-05, + "loss": 8.717, + "step": 241500 + }, + { + "epoch": 1.94, + "learning_rate": 4.9032e-05, + "loss": 8.7235, + "step": 242000 + }, + { + "epoch": 1.94, + "learning_rate": 4.903e-05, + "loss": 8.7069, + "step": 242500 + }, + { + "epoch": 1.94, + "learning_rate": 4.9028e-05, + "loss": 8.7021, + "step": 243000 + }, + { + "epoch": 1.95, + "learning_rate": 4.9026e-05, + "loss": 8.6757, + "step": 243500 + }, + { + "epoch": 1.95, + "learning_rate": 4.9024000000000004e-05, + "loss": 8.7079, + "step": 244000 + }, + { + "epoch": 1.96, + "learning_rate": 4.9022e-05, + "loss": 8.6958, + "step": 244500 + }, + { + "epoch": 1.96, + "learning_rate": 4.902e-05, + "loss": 8.6831, + "step": 245000 + }, + { + "epoch": 1.96, + "learning_rate": 4.9018000000000004e-05, + "loss": 8.7316, + "step": 245500 + }, + { + "epoch": 1.97, + "learning_rate": 4.9016e-05, + "loss": 8.691, + "step": 246000 + }, + { + "epoch": 1.97, + "learning_rate": 4.9014e-05, + "loss": 8.7349, + "step": 246500 + }, + { + "epoch": 1.98, + "learning_rate": 4.9012000000000004e-05, + "loss": 8.7, + "step": 247000 + }, + { + "epoch": 1.98, + "learning_rate": 4.901e-05, + "loss": 8.7117, + "step": 247500 + }, + { + "epoch": 1.98, + "learning_rate": 4.9008e-05, + "loss": 8.7021, + "step": 248000 + }, + { + "epoch": 1.99, + "learning_rate": 4.9006000000000005e-05, + "loss": 8.7323, + "step": 248500 + }, + { + "epoch": 1.99, + "learning_rate": 4.9004e-05, + "loss": 8.7232, + "step": 249000 + }, + { + "epoch": 2.0, + "learning_rate": 4.9002e-05, + "loss": 8.6874, + "step": 249500 + }, + { + "epoch": 2.0, + "learning_rate": 4.9e-05, + "loss": 8.7074, + "step": 250000 + }, + { + "epoch": 2.0, + "learning_rate": 4.899800000000001e-05, + "loss": 8.7001, + "step": 250500 + }, + { + "epoch": 2.01, + "learning_rate": 4.8996e-05, + "loss": 8.7197, + "step": 251000 + }, + { + "epoch": 2.01, + "learning_rate": 4.8994e-05, + "loss": 8.6888, + "step": 251500 + }, + { + "epoch": 2.02, + "learning_rate": 4.8992e-05, + "loss": 8.6867, + "step": 252000 + }, + { + "epoch": 2.02, + "learning_rate": 4.8990000000000004e-05, + "loss": 8.7181, + "step": 252500 + }, + { + "epoch": 2.02, + "learning_rate": 4.8988e-05, + "loss": 8.7285, + "step": 253000 + }, + { + "epoch": 2.03, + "learning_rate": 4.8986e-05, + "loss": 8.6933, + "step": 253500 + }, + { + "epoch": 2.03, + "learning_rate": 4.8984000000000004e-05, + "loss": 8.7063, + "step": 254000 + }, + { + "epoch": 2.04, + "learning_rate": 4.8982e-05, + "loss": 8.6891, + "step": 254500 + }, + { + "epoch": 2.04, + "learning_rate": 4.898e-05, + "loss": 8.7265, + "step": 255000 + }, + { + "epoch": 2.04, + "learning_rate": 4.8978000000000004e-05, + "loss": 8.7078, + "step": 255500 + }, + { + "epoch": 2.05, + "learning_rate": 4.8976e-05, + "loss": 8.7243, + "step": 256000 + }, + { + "epoch": 2.05, + "learning_rate": 4.8974e-05, + "loss": 8.7045, + "step": 256500 + }, + { + "epoch": 2.06, + "learning_rate": 4.8972e-05, + "loss": 8.681, + "step": 257000 + }, + { + "epoch": 2.06, + "learning_rate": 4.897000000000001e-05, + "loss": 8.7018, + "step": 257500 + }, + { + "epoch": 2.06, + "learning_rate": 4.8968e-05, + "loss": 8.6847, + "step": 258000 + }, + { + "epoch": 2.07, + "learning_rate": 4.8966e-05, + "loss": 8.7208, + "step": 258500 + }, + { + "epoch": 2.07, + "learning_rate": 4.896400000000001e-05, + "loss": 8.7038, + "step": 259000 + }, + { + "epoch": 2.08, + "learning_rate": 4.8962e-05, + "loss": 8.7077, + "step": 259500 + }, + { + "epoch": 2.08, + "learning_rate": 4.896e-05, + "loss": 8.7176, + "step": 260000 + }, + { + "epoch": 2.08, + "learning_rate": 4.8958e-05, + "loss": 8.71, + "step": 260500 + }, + { + "epoch": 2.09, + "learning_rate": 4.8956000000000004e-05, + "loss": 8.6918, + "step": 261000 + }, + { + "epoch": 2.09, + "learning_rate": 4.8954e-05, + "loss": 8.6844, + "step": 261500 + }, + { + "epoch": 2.1, + "learning_rate": 4.8952e-05, + "loss": 8.7114, + "step": 262000 + }, + { + "epoch": 2.1, + "learning_rate": 4.8950000000000004e-05, + "loss": 8.6925, + "step": 262500 + }, + { + "epoch": 2.1, + "learning_rate": 4.8948000000000006e-05, + "loss": 8.7158, + "step": 263000 + }, + { + "epoch": 2.11, + "learning_rate": 4.8946e-05, + "loss": 8.7094, + "step": 263500 + }, + { + "epoch": 2.11, + "learning_rate": 4.8944e-05, + "loss": 8.6932, + "step": 264000 + }, + { + "epoch": 2.12, + "learning_rate": 4.894200000000001e-05, + "loss": 8.7112, + "step": 264500 + }, + { + "epoch": 2.12, + "learning_rate": 4.894e-05, + "loss": 8.7103, + "step": 265000 + }, + { + "epoch": 2.12, + "learning_rate": 4.8938e-05, + "loss": 8.7032, + "step": 265500 + }, + { + "epoch": 2.13, + "learning_rate": 4.893600000000001e-05, + "loss": 8.7108, + "step": 266000 + }, + { + "epoch": 2.13, + "learning_rate": 4.8934e-05, + "loss": 8.7083, + "step": 266500 + }, + { + "epoch": 2.14, + "learning_rate": 4.8932e-05, + "loss": 8.734, + "step": 267000 + }, + { + "epoch": 2.14, + "learning_rate": 4.893e-05, + "loss": 8.7262, + "step": 267500 + }, + { + "epoch": 2.14, + "learning_rate": 4.8928e-05, + "loss": 8.7284, + "step": 268000 + }, + { + "epoch": 2.15, + "learning_rate": 4.8926e-05, + "loss": 8.6903, + "step": 268500 + }, + { + "epoch": 2.15, + "learning_rate": 4.8924e-05, + "loss": 8.7144, + "step": 269000 + }, + { + "epoch": 2.16, + "learning_rate": 4.8922000000000004e-05, + "loss": 8.6904, + "step": 269500 + }, + { + "epoch": 2.16, + "learning_rate": 4.8920000000000006e-05, + "loss": 8.6849, + "step": 270000 + }, + { + "epoch": 2.16, + "learning_rate": 4.8918e-05, + "loss": 8.6964, + "step": 270500 + }, + { + "epoch": 2.17, + "learning_rate": 4.8916000000000004e-05, + "loss": 8.7546, + "step": 271000 + }, + { + "epoch": 2.17, + "learning_rate": 4.8914000000000006e-05, + "loss": 8.72, + "step": 271500 + }, + { + "epoch": 2.18, + "learning_rate": 4.8912e-05, + "loss": 8.7185, + "step": 272000 + }, + { + "epoch": 2.18, + "learning_rate": 4.891e-05, + "loss": 8.7199, + "step": 272500 + }, + { + "epoch": 2.18, + "learning_rate": 4.890800000000001e-05, + "loss": 8.7157, + "step": 273000 + }, + { + "epoch": 2.19, + "learning_rate": 4.8906e-05, + "loss": 8.7014, + "step": 273500 + }, + { + "epoch": 2.19, + "learning_rate": 4.8904e-05, + "loss": 8.7041, + "step": 274000 + }, + { + "epoch": 2.2, + "learning_rate": 4.8902e-05, + "loss": 8.695, + "step": 274500 + }, + { + "epoch": 2.2, + "learning_rate": 4.89e-05, + "loss": 8.7169, + "step": 275000 + }, + { + "epoch": 2.2, + "learning_rate": 4.8898000000000005e-05, + "loss": 8.7167, + "step": 275500 + }, + { + "epoch": 2.21, + "learning_rate": 4.8896e-05, + "loss": 8.6953, + "step": 276000 + }, + { + "epoch": 2.21, + "learning_rate": 4.8894e-05, + "loss": 8.7152, + "step": 276500 + }, + { + "epoch": 2.22, + "learning_rate": 4.8892000000000006e-05, + "loss": 8.71, + "step": 277000 + }, + { + "epoch": 2.22, + "learning_rate": 4.889e-05, + "loss": 8.7259, + "step": 277500 + }, + { + "epoch": 2.22, + "learning_rate": 4.8888000000000004e-05, + "loss": 8.698, + "step": 278000 + }, + { + "epoch": 2.23, + "learning_rate": 4.8886000000000006e-05, + "loss": 8.6828, + "step": 278500 + }, + { + "epoch": 2.23, + "learning_rate": 4.8884e-05, + "loss": 8.6984, + "step": 279000 + }, + { + "epoch": 2.24, + "learning_rate": 4.8882e-05, + "loss": 8.7121, + "step": 279500 + }, + { + "epoch": 2.24, + "learning_rate": 4.8880000000000006e-05, + "loss": 8.6799, + "step": 280000 + }, + { + "epoch": 2.24, + "learning_rate": 4.8878e-05, + "loss": 8.7074, + "step": 280500 + }, + { + "epoch": 2.25, + "learning_rate": 4.8876e-05, + "loss": 8.6898, + "step": 281000 + }, + { + "epoch": 2.25, + "learning_rate": 4.8874e-05, + "loss": 8.7204, + "step": 281500 + }, + { + "epoch": 2.26, + "learning_rate": 4.8872e-05, + "loss": 8.6998, + "step": 282000 + }, + { + "epoch": 2.26, + "learning_rate": 4.8870000000000005e-05, + "loss": 8.7008, + "step": 282500 + }, + { + "epoch": 2.26, + "learning_rate": 4.8868e-05, + "loss": 8.6955, + "step": 283000 + }, + { + "epoch": 2.27, + "learning_rate": 4.8866e-05, + "loss": 8.6972, + "step": 283500 + }, + { + "epoch": 2.27, + "learning_rate": 4.8864000000000005e-05, + "loss": 8.7064, + "step": 284000 + }, + { + "epoch": 2.28, + "learning_rate": 4.8862e-05, + "loss": 8.6976, + "step": 284500 + }, + { + "epoch": 2.28, + "learning_rate": 4.886e-05, + "loss": 8.6878, + "step": 285000 + }, + { + "epoch": 2.28, + "learning_rate": 4.8858000000000006e-05, + "loss": 8.7264, + "step": 285500 + }, + { + "epoch": 2.29, + "learning_rate": 4.8856e-05, + "loss": 8.6855, + "step": 286000 + }, + { + "epoch": 2.29, + "learning_rate": 4.8854e-05, + "loss": 8.7167, + "step": 286500 + }, + { + "epoch": 2.3, + "learning_rate": 4.8852000000000006e-05, + "loss": 8.7011, + "step": 287000 + }, + { + "epoch": 2.3, + "learning_rate": 4.885e-05, + "loss": 8.6987, + "step": 287500 + }, + { + "epoch": 2.3, + "learning_rate": 4.8848000000000004e-05, + "loss": 8.678, + "step": 288000 + }, + { + "epoch": 2.31, + "learning_rate": 4.8846e-05, + "loss": 8.7051, + "step": 288500 + }, + { + "epoch": 2.31, + "learning_rate": 4.8844e-05, + "loss": 8.7251, + "step": 289000 + }, + { + "epoch": 2.32, + "learning_rate": 4.8842000000000004e-05, + "loss": 8.6853, + "step": 289500 + }, + { + "epoch": 2.32, + "learning_rate": 4.884e-05, + "loss": 8.6958, + "step": 290000 + }, + { + "epoch": 2.32, + "learning_rate": 4.8838e-05, + "loss": 8.7028, + "step": 290500 + }, + { + "epoch": 2.33, + "learning_rate": 4.8836000000000005e-05, + "loss": 8.698, + "step": 291000 + }, + { + "epoch": 2.33, + "learning_rate": 4.8834e-05, + "loss": 8.7032, + "step": 291500 + }, + { + "epoch": 2.34, + "learning_rate": 4.8832e-05, + "loss": 8.7055, + "step": 292000 + }, + { + "epoch": 2.34, + "learning_rate": 4.8830000000000005e-05, + "loss": 8.7102, + "step": 292500 + }, + { + "epoch": 2.34, + "learning_rate": 4.8828e-05, + "loss": 8.6988, + "step": 293000 + }, + { + "epoch": 2.35, + "learning_rate": 4.8826e-05, + "loss": 8.7191, + "step": 293500 + }, + { + "epoch": 2.35, + "learning_rate": 4.8824000000000006e-05, + "loss": 8.7392, + "step": 294000 + }, + { + "epoch": 2.36, + "learning_rate": 4.8822e-05, + "loss": 8.7151, + "step": 294500 + }, + { + "epoch": 2.36, + "learning_rate": 4.8820000000000004e-05, + "loss": 8.6793, + "step": 295000 + }, + { + "epoch": 2.36, + "learning_rate": 4.8818000000000006e-05, + "loss": 8.677, + "step": 295500 + }, + { + "epoch": 2.37, + "learning_rate": 4.8816e-05, + "loss": 8.7137, + "step": 296000 + }, + { + "epoch": 2.37, + "learning_rate": 4.8814000000000004e-05, + "loss": 8.6906, + "step": 296500 + }, + { + "epoch": 2.38, + "learning_rate": 4.8812e-05, + "loss": 8.6967, + "step": 297000 + }, + { + "epoch": 2.38, + "learning_rate": 4.881e-05, + "loss": 8.6804, + "step": 297500 + }, + { + "epoch": 2.38, + "learning_rate": 4.8808000000000004e-05, + "loss": 8.7055, + "step": 298000 + }, + { + "epoch": 2.39, + "learning_rate": 4.8806e-05, + "loss": 8.7177, + "step": 298500 + }, + { + "epoch": 2.39, + "learning_rate": 4.8804e-05, + "loss": 8.7065, + "step": 299000 + }, + { + "epoch": 2.4, + "learning_rate": 4.8802000000000005e-05, + "loss": 8.7076, + "step": 299500 + }, + { + "epoch": 2.4, + "learning_rate": 4.88e-05, + "loss": 8.7121, + "step": 300000 + }, + { + "epoch": 2.4, + "learning_rate": 4.8798e-05, + "loss": 8.7141, + "step": 300500 + }, + { + "epoch": 2.41, + "learning_rate": 4.8796000000000005e-05, + "loss": 8.6999, + "step": 301000 + }, + { + "epoch": 2.41, + "learning_rate": 4.8794e-05, + "loss": 8.6949, + "step": 301500 + }, + { + "epoch": 2.42, + "learning_rate": 4.8792e-05, + "loss": 8.6949, + "step": 302000 + }, + { + "epoch": 2.42, + "learning_rate": 4.8790000000000006e-05, + "loss": 8.7159, + "step": 302500 + }, + { + "epoch": 2.42, + "learning_rate": 4.8788e-05, + "loss": 8.7052, + "step": 303000 + }, + { + "epoch": 2.43, + "learning_rate": 4.8786000000000004e-05, + "loss": 8.6986, + "step": 303500 + }, + { + "epoch": 2.43, + "learning_rate": 4.8784e-05, + "loss": 8.7041, + "step": 304000 + }, + { + "epoch": 2.44, + "learning_rate": 4.8782e-05, + "loss": 8.7078, + "step": 304500 + }, + { + "epoch": 2.44, + "learning_rate": 4.8780000000000004e-05, + "loss": 8.6994, + "step": 305000 + }, + { + "epoch": 2.44, + "learning_rate": 4.8778e-05, + "loss": 8.6767, + "step": 305500 + }, + { + "epoch": 2.45, + "learning_rate": 4.8776e-05, + "loss": 8.6984, + "step": 306000 + }, + { + "epoch": 2.45, + "learning_rate": 4.8774000000000004e-05, + "loss": 8.7268, + "step": 306500 + }, + { + "epoch": 2.46, + "learning_rate": 4.8772e-05, + "loss": 8.7017, + "step": 307000 + }, + { + "epoch": 2.46, + "learning_rate": 4.877e-05, + "loss": 8.7056, + "step": 307500 + }, + { + "epoch": 2.46, + "learning_rate": 4.8768000000000005e-05, + "loss": 8.7253, + "step": 308000 + }, + { + "epoch": 2.47, + "learning_rate": 4.8766e-05, + "loss": 8.7167, + "step": 308500 + }, + { + "epoch": 2.47, + "learning_rate": 4.8764e-05, + "loss": 8.6715, + "step": 309000 + }, + { + "epoch": 2.48, + "learning_rate": 4.8762000000000005e-05, + "loss": 8.7112, + "step": 309500 + }, + { + "epoch": 2.48, + "learning_rate": 4.876e-05, + "loss": 8.7181, + "step": 310000 + }, + { + "epoch": 2.48, + "learning_rate": 4.8758e-05, + "loss": 8.6818, + "step": 310500 + }, + { + "epoch": 2.49, + "learning_rate": 4.8756e-05, + "loss": 8.6989, + "step": 311000 + }, + { + "epoch": 2.49, + "learning_rate": 4.8754e-05, + "loss": 8.7037, + "step": 311500 + }, + { + "epoch": 2.5, + "learning_rate": 4.8752000000000004e-05, + "loss": 8.6931, + "step": 312000 + }, + { + "epoch": 2.5, + "learning_rate": 4.875e-05, + "loss": 8.7028, + "step": 312500 + }, + { + "epoch": 2.5, + "learning_rate": 4.8748e-05, + "loss": 8.6934, + "step": 313000 + }, + { + "epoch": 2.51, + "learning_rate": 4.8746000000000004e-05, + "loss": 8.7059, + "step": 313500 + }, + { + "epoch": 2.51, + "learning_rate": 4.8744e-05, + "loss": 8.6945, + "step": 314000 + }, + { + "epoch": 2.52, + "learning_rate": 4.8742e-05, + "loss": 8.67, + "step": 314500 + }, + { + "epoch": 2.52, + "learning_rate": 4.8740000000000004e-05, + "loss": 8.6978, + "step": 315000 + }, + { + "epoch": 2.52, + "learning_rate": 4.8738e-05, + "loss": 8.7081, + "step": 315500 + }, + { + "epoch": 2.53, + "learning_rate": 4.8736e-05, + "loss": 8.6743, + "step": 316000 + }, + { + "epoch": 2.53, + "learning_rate": 4.8734000000000005e-05, + "loss": 8.7259, + "step": 316500 + }, + { + "epoch": 2.54, + "learning_rate": 4.8732e-05, + "loss": 8.7153, + "step": 317000 + }, + { + "epoch": 2.54, + "learning_rate": 4.873e-05, + "loss": 8.7164, + "step": 317500 + }, + { + "epoch": 2.54, + "learning_rate": 4.8728e-05, + "loss": 8.6748, + "step": 318000 + }, + { + "epoch": 2.55, + "learning_rate": 4.872600000000001e-05, + "loss": 8.6927, + "step": 318500 + }, + { + "epoch": 2.55, + "learning_rate": 4.8724e-05, + "loss": 8.7285, + "step": 319000 + }, + { + "epoch": 2.56, + "learning_rate": 4.8722e-05, + "loss": 8.6986, + "step": 319500 + }, + { + "epoch": 2.56, + "learning_rate": 4.872000000000001e-05, + "loss": 8.7152, + "step": 320000 + }, + { + "epoch": 2.56, + "learning_rate": 4.8718000000000003e-05, + "loss": 8.7072, + "step": 320500 + }, + { + "epoch": 2.57, + "learning_rate": 4.8716e-05, + "loss": 8.6971, + "step": 321000 + }, + { + "epoch": 2.57, + "learning_rate": 4.8714e-05, + "loss": 8.714, + "step": 321500 + }, + { + "epoch": 2.58, + "learning_rate": 4.8712000000000004e-05, + "loss": 8.703, + "step": 322000 + }, + { + "epoch": 2.58, + "learning_rate": 4.871e-05, + "loss": 8.7072, + "step": 322500 + }, + { + "epoch": 2.58, + "learning_rate": 4.8708e-05, + "loss": 8.6667, + "step": 323000 + }, + { + "epoch": 2.59, + "learning_rate": 4.8706000000000004e-05, + "loss": 8.7107, + "step": 323500 + }, + { + "epoch": 2.59, + "learning_rate": 4.8704e-05, + "loss": 8.701, + "step": 324000 + }, + { + "epoch": 2.6, + "learning_rate": 4.8702e-05, + "loss": 8.7082, + "step": 324500 + }, + { + "epoch": 2.6, + "learning_rate": 4.87e-05, + "loss": 8.701, + "step": 325000 + }, + { + "epoch": 2.6, + "learning_rate": 4.869800000000001e-05, + "loss": 8.6732, + "step": 325500 + }, + { + "epoch": 2.61, + "learning_rate": 4.8696e-05, + "loss": 8.7238, + "step": 326000 + }, + { + "epoch": 2.61, + "learning_rate": 4.8694e-05, + "loss": 8.7107, + "step": 326500 + }, + { + "epoch": 2.62, + "learning_rate": 4.869200000000001e-05, + "loss": 8.7347, + "step": 327000 + }, + { + "epoch": 2.62, + "learning_rate": 4.869e-05, + "loss": 8.6986, + "step": 327500 + }, + { + "epoch": 2.62, + "learning_rate": 4.8688e-05, + "loss": 8.6949, + "step": 328000 + }, + { + "epoch": 2.63, + "learning_rate": 4.8686e-05, + "loss": 8.6919, + "step": 328500 + }, + { + "epoch": 2.63, + "learning_rate": 4.8684000000000003e-05, + "loss": 8.7026, + "step": 329000 + }, + { + "epoch": 2.64, + "learning_rate": 4.8682e-05, + "loss": 8.6933, + "step": 329500 + }, + { + "epoch": 2.64, + "learning_rate": 4.868e-05, + "loss": 8.7021, + "step": 330000 + }, + { + "epoch": 2.64, + "learning_rate": 4.8678000000000004e-05, + "loss": 8.7339, + "step": 330500 + }, + { + "epoch": 2.65, + "learning_rate": 4.8676000000000006e-05, + "loss": 8.7103, + "step": 331000 + }, + { + "epoch": 2.65, + "learning_rate": 4.8674e-05, + "loss": 8.683, + "step": 331500 + }, + { + "epoch": 2.66, + "learning_rate": 4.8672000000000004e-05, + "loss": 8.707, + "step": 332000 + }, + { + "epoch": 2.66, + "learning_rate": 4.867000000000001e-05, + "loss": 8.713, + "step": 332500 + }, + { + "epoch": 2.66, + "learning_rate": 4.8668e-05, + "loss": 8.7049, + "step": 333000 + }, + { + "epoch": 2.67, + "learning_rate": 4.8666e-05, + "loss": 8.6956, + "step": 333500 + }, + { + "epoch": 2.67, + "learning_rate": 4.866400000000001e-05, + "loss": 8.6998, + "step": 334000 + }, + { + "epoch": 2.68, + "learning_rate": 4.8662e-05, + "loss": 8.7074, + "step": 334500 + }, + { + "epoch": 2.68, + "learning_rate": 4.866e-05, + "loss": 8.7095, + "step": 335000 + }, + { + "epoch": 2.68, + "learning_rate": 4.8658e-05, + "loss": 8.7078, + "step": 335500 + }, + { + "epoch": 2.69, + "learning_rate": 4.8656e-05, + "loss": 8.6895, + "step": 336000 + }, + { + "epoch": 2.69, + "learning_rate": 4.8654e-05, + "loss": 8.6883, + "step": 336500 + }, + { + "epoch": 2.7, + "learning_rate": 4.8652e-05, + "loss": 8.7053, + "step": 337000 + }, + { + "epoch": 2.7, + "learning_rate": 4.8650000000000003e-05, + "loss": 8.6985, + "step": 337500 + }, + { + "epoch": 2.7, + "learning_rate": 4.8648000000000006e-05, + "loss": 8.7011, + "step": 338000 + }, + { + "epoch": 2.71, + "learning_rate": 4.8646e-05, + "loss": 8.6913, + "step": 338500 + }, + { + "epoch": 2.71, + "learning_rate": 4.8644000000000004e-05, + "loss": 8.699, + "step": 339000 + }, + { + "epoch": 2.72, + "learning_rate": 4.8642000000000006e-05, + "loss": 8.7051, + "step": 339500 + }, + { + "epoch": 2.72, + "learning_rate": 4.864e-05, + "loss": 8.6863, + "step": 340000 + }, + { + "epoch": 2.72, + "learning_rate": 4.8638e-05, + "loss": 8.69, + "step": 340500 + }, + { + "epoch": 2.73, + "learning_rate": 4.863600000000001e-05, + "loss": 8.6958, + "step": 341000 + }, + { + "epoch": 2.73, + "learning_rate": 4.8634e-05, + "loss": 8.6927, + "step": 341500 + }, + { + "epoch": 2.74, + "learning_rate": 4.8632e-05, + "loss": 8.6934, + "step": 342000 + }, + { + "epoch": 2.74, + "learning_rate": 4.863e-05, + "loss": 8.7063, + "step": 342500 + }, + { + "epoch": 2.74, + "learning_rate": 4.8628e-05, + "loss": 8.7023, + "step": 343000 + }, + { + "epoch": 2.75, + "learning_rate": 4.8626000000000005e-05, + "loss": 8.7075, + "step": 343500 + }, + { + "epoch": 2.75, + "learning_rate": 4.8624e-05, + "loss": 8.6927, + "step": 344000 + }, + { + "epoch": 2.76, + "learning_rate": 4.8622e-05, + "loss": 8.7182, + "step": 344500 + }, + { + "epoch": 2.76, + "learning_rate": 4.8620000000000005e-05, + "loss": 8.6924, + "step": 345000 + }, + { + "epoch": 2.76, + "learning_rate": 4.8618e-05, + "loss": 8.6963, + "step": 345500 + }, + { + "epoch": 2.77, + "learning_rate": 4.8616000000000003e-05, + "loss": 8.6809, + "step": 346000 + }, + { + "epoch": 2.77, + "learning_rate": 4.8614000000000006e-05, + "loss": 8.6995, + "step": 346500 + }, + { + "epoch": 2.78, + "learning_rate": 4.8612e-05, + "loss": 8.7175, + "step": 347000 + }, + { + "epoch": 2.78, + "learning_rate": 4.861e-05, + "loss": 8.7025, + "step": 347500 + }, + { + "epoch": 2.78, + "learning_rate": 4.8608000000000006e-05, + "loss": 8.7097, + "step": 348000 + }, + { + "epoch": 2.79, + "learning_rate": 4.8606e-05, + "loss": 8.699, + "step": 348500 + }, + { + "epoch": 2.79, + "learning_rate": 4.8604000000000004e-05, + "loss": 8.7232, + "step": 349000 + }, + { + "epoch": 2.8, + "learning_rate": 4.8602e-05, + "loss": 8.713, + "step": 349500 + }, + { + "epoch": 2.8, + "learning_rate": 4.86e-05, + "loss": 8.7031, + "step": 350000 + }, + { + "epoch": 2.8, + "learning_rate": 4.8598000000000005e-05, + "loss": 8.6899, + "step": 350500 + }, + { + "epoch": 2.81, + "learning_rate": 4.8596e-05, + "loss": 8.6683, + "step": 351000 + }, + { + "epoch": 2.81, + "learning_rate": 4.8594e-05, + "loss": 8.6702, + "step": 351500 + }, + { + "epoch": 2.82, + "learning_rate": 4.8592000000000005e-05, + "loss": 8.6919, + "step": 352000 + }, + { + "epoch": 2.82, + "learning_rate": 4.859e-05, + "loss": 8.6994, + "step": 352500 + }, + { + "epoch": 2.82, + "learning_rate": 4.8588e-05, + "loss": 8.7184, + "step": 353000 + }, + { + "epoch": 2.83, + "learning_rate": 4.8586000000000005e-05, + "loss": 8.7062, + "step": 353500 + }, + { + "epoch": 2.83, + "learning_rate": 4.8584e-05, + "loss": 8.7013, + "step": 354000 + }, + { + "epoch": 2.84, + "learning_rate": 4.8582e-05, + "loss": 8.7126, + "step": 354500 + }, + { + "epoch": 2.84, + "learning_rate": 4.8580000000000006e-05, + "loss": 8.6769, + "step": 355000 + }, + { + "epoch": 2.84, + "learning_rate": 4.8578e-05, + "loss": 8.6851, + "step": 355500 + }, + { + "epoch": 2.85, + "learning_rate": 4.8576000000000004e-05, + "loss": 8.6912, + "step": 356000 + }, + { + "epoch": 2.85, + "learning_rate": 4.8574000000000006e-05, + "loss": 8.711, + "step": 356500 + }, + { + "epoch": 2.86, + "learning_rate": 4.8572e-05, + "loss": 8.7133, + "step": 357000 + }, + { + "epoch": 2.86, + "learning_rate": 4.8570000000000004e-05, + "loss": 8.7251, + "step": 357500 + }, + { + "epoch": 2.86, + "learning_rate": 4.8568e-05, + "loss": 8.6906, + "step": 358000 + }, + { + "epoch": 2.87, + "learning_rate": 4.8566e-05, + "loss": 8.688, + "step": 358500 + }, + { + "epoch": 2.87, + "learning_rate": 4.8564000000000005e-05, + "loss": 8.7029, + "step": 359000 + }, + { + "epoch": 2.88, + "learning_rate": 4.8562e-05, + "loss": 8.7185, + "step": 359500 + }, + { + "epoch": 2.88, + "learning_rate": 4.856e-05, + "loss": 8.7108, + "step": 360000 + }, + { + "epoch": 2.88, + "learning_rate": 4.8558000000000005e-05, + "loss": 8.7167, + "step": 360500 + }, + { + "epoch": 2.89, + "learning_rate": 4.8556e-05, + "loss": 8.6894, + "step": 361000 + }, + { + "epoch": 2.89, + "learning_rate": 4.8554e-05, + "loss": 8.6856, + "step": 361500 + }, + { + "epoch": 2.9, + "learning_rate": 4.8552000000000005e-05, + "loss": 8.675, + "step": 362000 + }, + { + "epoch": 2.9, + "learning_rate": 4.855e-05, + "loss": 8.6954, + "step": 362500 + }, + { + "epoch": 2.9, + "learning_rate": 4.8548000000000003e-05, + "loss": 8.6984, + "step": 363000 + }, + { + "epoch": 2.91, + "learning_rate": 4.8546000000000006e-05, + "loss": 8.7377, + "step": 363500 + }, + { + "epoch": 2.91, + "learning_rate": 4.8544e-05, + "loss": 8.6971, + "step": 364000 + }, + { + "epoch": 2.92, + "learning_rate": 4.8542000000000004e-05, + "loss": 8.6765, + "step": 364500 + }, + { + "epoch": 2.92, + "learning_rate": 4.854e-05, + "loss": 8.7073, + "step": 365000 + }, + { + "epoch": 2.92, + "learning_rate": 4.8538e-05, + "loss": 8.6889, + "step": 365500 + }, + { + "epoch": 2.93, + "learning_rate": 4.8536000000000004e-05, + "loss": 8.7079, + "step": 366000 + }, + { + "epoch": 2.93, + "learning_rate": 4.8534e-05, + "loss": 8.6962, + "step": 366500 + }, + { + "epoch": 2.94, + "learning_rate": 4.8532e-05, + "loss": 8.6914, + "step": 367000 + }, + { + "epoch": 2.94, + "learning_rate": 4.8530000000000005e-05, + "loss": 8.7295, + "step": 367500 + }, + { + "epoch": 2.94, + "learning_rate": 4.8528e-05, + "loss": 8.7068, + "step": 368000 + }, + { + "epoch": 2.95, + "learning_rate": 4.8526e-05, + "loss": 8.6933, + "step": 368500 + }, + { + "epoch": 2.95, + "learning_rate": 4.8524000000000005e-05, + "loss": 8.6943, + "step": 369000 + }, + { + "epoch": 2.96, + "learning_rate": 4.8522e-05, + "loss": 8.6945, + "step": 369500 + }, + { + "epoch": 2.96, + "learning_rate": 4.852e-05, + "loss": 8.7121, + "step": 370000 + }, + { + "epoch": 2.96, + "learning_rate": 4.8518000000000005e-05, + "loss": 8.686, + "step": 370500 + }, + { + "epoch": 2.97, + "learning_rate": 4.8516e-05, + "loss": 8.7053, + "step": 371000 + }, + { + "epoch": 2.97, + "learning_rate": 4.8514000000000003e-05, + "loss": 8.6727, + "step": 371500 + }, + { + "epoch": 2.98, + "learning_rate": 4.8512e-05, + "loss": 8.6801, + "step": 372000 + }, + { + "epoch": 2.98, + "learning_rate": 4.851e-05, + "loss": 8.6943, + "step": 372500 + }, + { + "epoch": 2.98, + "learning_rate": 4.8508000000000004e-05, + "loss": 8.687, + "step": 373000 + }, + { + "epoch": 2.99, + "learning_rate": 4.8506e-05, + "loss": 8.7085, + "step": 373500 + }, + { + "epoch": 2.99, + "learning_rate": 4.8504e-05, + "loss": 8.7113, + "step": 374000 + }, + { + "epoch": 3.0, + "learning_rate": 4.8502000000000004e-05, + "loss": 8.7011, + "step": 374500 + }, + { + "epoch": 3.0, + "learning_rate": 4.85e-05, + "loss": 8.699, + "step": 375000 + }, + { + "epoch": 3.0, + "learning_rate": 4.8498e-05, + "loss": 8.6982, + "step": 375500 + }, + { + "epoch": 3.01, + "learning_rate": 4.8496000000000005e-05, + "loss": 8.7074, + "step": 376000 + }, + { + "epoch": 3.01, + "learning_rate": 4.8494e-05, + "loss": 8.6757, + "step": 376500 + }, + { + "epoch": 3.02, + "learning_rate": 4.8492e-05, + "loss": 8.7023, + "step": 377000 + }, + { + "epoch": 3.02, + "learning_rate": 4.8490000000000005e-05, + "loss": 8.6954, + "step": 377500 + }, + { + "epoch": 3.02, + "learning_rate": 4.8488e-05, + "loss": 8.6936, + "step": 378000 + }, + { + "epoch": 3.03, + "learning_rate": 4.8486e-05, + "loss": 8.7095, + "step": 378500 + }, + { + "epoch": 3.03, + "learning_rate": 4.8484e-05, + "loss": 8.7091, + "step": 379000 + }, + { + "epoch": 3.04, + "learning_rate": 4.8482e-05, + "loss": 8.7081, + "step": 379500 + }, + { + "epoch": 3.04, + "learning_rate": 4.8480000000000003e-05, + "loss": 8.6958, + "step": 380000 + }, + { + "epoch": 3.04, + "learning_rate": 4.8478e-05, + "loss": 8.6931, + "step": 380500 + }, + { + "epoch": 3.05, + "learning_rate": 4.8476e-05, + "loss": 8.7034, + "step": 381000 + }, + { + "epoch": 3.05, + "learning_rate": 4.8474000000000004e-05, + "loss": 8.6621, + "step": 381500 + }, + { + "epoch": 3.06, + "learning_rate": 4.8472e-05, + "loss": 8.6779, + "step": 382000 + }, + { + "epoch": 3.06, + "learning_rate": 4.847e-05, + "loss": 8.7087, + "step": 382500 + }, + { + "epoch": 3.06, + "learning_rate": 4.8468000000000004e-05, + "loss": 8.7243, + "step": 383000 + }, + { + "epoch": 3.07, + "learning_rate": 4.8466e-05, + "loss": 8.6768, + "step": 383500 + }, + { + "epoch": 3.07, + "learning_rate": 4.8464e-05, + "loss": 8.6821, + "step": 384000 + }, + { + "epoch": 3.08, + "learning_rate": 4.8462000000000005e-05, + "loss": 8.7114, + "step": 384500 + }, + { + "epoch": 3.08, + "learning_rate": 4.846e-05, + "loss": 8.6725, + "step": 385000 + }, + { + "epoch": 3.08, + "learning_rate": 4.8458e-05, + "loss": 8.7012, + "step": 385500 + }, + { + "epoch": 3.09, + "learning_rate": 4.8456e-05, + "loss": 8.6869, + "step": 386000 + }, + { + "epoch": 3.09, + "learning_rate": 4.845400000000001e-05, + "loss": 8.7117, + "step": 386500 + }, + { + "epoch": 3.1, + "learning_rate": 4.8452e-05, + "loss": 8.715, + "step": 387000 + }, + { + "epoch": 3.1, + "learning_rate": 4.845e-05, + "loss": 8.6688, + "step": 387500 + }, + { + "epoch": 3.1, + "learning_rate": 4.844800000000001e-05, + "loss": 8.6967, + "step": 388000 + }, + { + "epoch": 3.11, + "learning_rate": 4.8446e-05, + "loss": 8.6825, + "step": 388500 + }, + { + "epoch": 3.11, + "learning_rate": 4.8444e-05, + "loss": 8.6951, + "step": 389000 + }, + { + "epoch": 3.12, + "learning_rate": 4.8442e-05, + "loss": 8.6929, + "step": 389500 + }, + { + "epoch": 3.12, + "learning_rate": 4.8440000000000004e-05, + "loss": 8.7185, + "step": 390000 + }, + { + "epoch": 3.12, + "learning_rate": 4.8438e-05, + "loss": 8.6915, + "step": 390500 + }, + { + "epoch": 3.13, + "learning_rate": 4.8436e-05, + "loss": 8.7071, + "step": 391000 + }, + { + "epoch": 3.13, + "learning_rate": 4.8434000000000004e-05, + "loss": 8.6841, + "step": 391500 + }, + { + "epoch": 3.14, + "learning_rate": 4.8432e-05, + "loss": 8.6782, + "step": 392000 + }, + { + "epoch": 3.14, + "learning_rate": 4.843e-05, + "loss": 8.6894, + "step": 392500 + }, + { + "epoch": 3.14, + "learning_rate": 4.8428e-05, + "loss": 8.6945, + "step": 393000 + }, + { + "epoch": 3.15, + "learning_rate": 4.842600000000001e-05, + "loss": 8.684, + "step": 393500 + }, + { + "epoch": 3.15, + "learning_rate": 4.8424e-05, + "loss": 8.654, + "step": 394000 + }, + { + "epoch": 3.16, + "learning_rate": 4.8422e-05, + "loss": 8.6775, + "step": 394500 + }, + { + "epoch": 3.16, + "learning_rate": 4.842000000000001e-05, + "loss": 8.6904, + "step": 395000 + }, + { + "epoch": 3.16, + "learning_rate": 4.8418e-05, + "loss": 8.7069, + "step": 395500 + }, + { + "epoch": 3.17, + "learning_rate": 4.8416e-05, + "loss": 8.691, + "step": 396000 + }, + { + "epoch": 3.17, + "learning_rate": 4.8414e-05, + "loss": 8.6932, + "step": 396500 + }, + { + "epoch": 3.18, + "learning_rate": 4.8412e-05, + "loss": 8.6928, + "step": 397000 + }, + { + "epoch": 3.18, + "learning_rate": 4.841e-05, + "loss": 8.7049, + "step": 397500 + }, + { + "epoch": 3.18, + "learning_rate": 4.8408e-05, + "loss": 8.7108, + "step": 398000 + }, + { + "epoch": 3.19, + "learning_rate": 4.8406000000000004e-05, + "loss": 8.7185, + "step": 398500 + }, + { + "epoch": 3.19, + "learning_rate": 4.8404000000000006e-05, + "loss": 8.718, + "step": 399000 + }, + { + "epoch": 3.2, + "learning_rate": 4.8402e-05, + "loss": 8.678, + "step": 399500 + }, + { + "epoch": 3.2, + "learning_rate": 4.8400000000000004e-05, + "loss": 8.6971, + "step": 400000 + }, + { + "epoch": 3.2, + "learning_rate": 4.8398000000000007e-05, + "loss": 8.6769, + "step": 400500 + }, + { + "epoch": 3.21, + "learning_rate": 4.8396e-05, + "loss": 8.7164, + "step": 401000 + }, + { + "epoch": 3.21, + "learning_rate": 4.8394e-05, + "loss": 8.6986, + "step": 401500 + }, + { + "epoch": 3.22, + "learning_rate": 4.839200000000001e-05, + "loss": 8.6726, + "step": 402000 + }, + { + "epoch": 3.22, + "learning_rate": 4.839e-05, + "loss": 8.6929, + "step": 402500 + }, + { + "epoch": 3.22, + "learning_rate": 4.8388e-05, + "loss": 8.7256, + "step": 403000 + }, + { + "epoch": 3.23, + "learning_rate": 4.8386e-05, + "loss": 8.6922, + "step": 403500 + }, + { + "epoch": 3.23, + "learning_rate": 4.8384e-05, + "loss": 8.6959, + "step": 404000 + }, + { + "epoch": 3.24, + "learning_rate": 4.8382e-05, + "loss": 8.6909, + "step": 404500 + }, + { + "epoch": 3.24, + "learning_rate": 4.838e-05, + "loss": 8.6923, + "step": 405000 + }, + { + "epoch": 3.24, + "learning_rate": 4.8378e-05, + "loss": 8.6927, + "step": 405500 + }, + { + "epoch": 3.25, + "learning_rate": 4.8376000000000006e-05, + "loss": 8.7172, + "step": 406000 + }, + { + "epoch": 3.25, + "learning_rate": 4.8374e-05, + "loss": 8.6758, + "step": 406500 + }, + { + "epoch": 3.26, + "learning_rate": 4.8372000000000004e-05, + "loss": 8.7046, + "step": 407000 + }, + { + "epoch": 3.26, + "learning_rate": 4.8370000000000006e-05, + "loss": 8.7108, + "step": 407500 + }, + { + "epoch": 3.26, + "learning_rate": 4.8368e-05, + "loss": 8.692, + "step": 408000 + }, + { + "epoch": 3.27, + "learning_rate": 4.8366e-05, + "loss": 8.689, + "step": 408500 + }, + { + "epoch": 3.27, + "learning_rate": 4.8364000000000007e-05, + "loss": 8.6759, + "step": 409000 + }, + { + "epoch": 3.28, + "learning_rate": 4.8362e-05, + "loss": 8.6801, + "step": 409500 + }, + { + "epoch": 3.28, + "learning_rate": 4.836e-05, + "loss": 8.6988, + "step": 410000 + }, + { + "epoch": 3.28, + "learning_rate": 4.8358e-05, + "loss": 8.6677, + "step": 410500 + }, + { + "epoch": 3.29, + "learning_rate": 4.8356e-05, + "loss": 8.6791, + "step": 411000 + }, + { + "epoch": 3.29, + "learning_rate": 4.8354000000000005e-05, + "loss": 8.6931, + "step": 411500 + }, + { + "epoch": 3.3, + "learning_rate": 4.8352e-05, + "loss": 8.668, + "step": 412000 + }, + { + "epoch": 3.3, + "learning_rate": 4.835e-05, + "loss": 8.694, + "step": 412500 + }, + { + "epoch": 3.3, + "learning_rate": 4.8348000000000005e-05, + "loss": 8.6952, + "step": 413000 + }, + { + "epoch": 3.31, + "learning_rate": 4.8346e-05, + "loss": 8.6957, + "step": 413500 + }, + { + "epoch": 3.31, + "learning_rate": 4.8344e-05, + "loss": 8.6833, + "step": 414000 + }, + { + "epoch": 3.32, + "learning_rate": 4.8342000000000006e-05, + "loss": 8.7016, + "step": 414500 + }, + { + "epoch": 3.32, + "learning_rate": 4.834e-05, + "loss": 8.7104, + "step": 415000 + }, + { + "epoch": 3.32, + "learning_rate": 4.8338e-05, + "loss": 8.6991, + "step": 415500 + }, + { + "epoch": 3.33, + "learning_rate": 4.8336000000000006e-05, + "loss": 8.6973, + "step": 416000 + }, + { + "epoch": 3.33, + "learning_rate": 4.8334e-05, + "loss": 8.7134, + "step": 416500 + }, + { + "epoch": 3.34, + "learning_rate": 4.8332000000000004e-05, + "loss": 8.6838, + "step": 417000 + }, + { + "epoch": 3.34, + "learning_rate": 4.833e-05, + "loss": 8.7218, + "step": 417500 + }, + { + "epoch": 3.34, + "learning_rate": 4.8328e-05, + "loss": 8.7094, + "step": 418000 + }, + { + "epoch": 3.35, + "learning_rate": 4.8326000000000005e-05, + "loss": 8.684, + "step": 418500 + }, + { + "epoch": 3.35, + "learning_rate": 4.8324e-05, + "loss": 8.7029, + "step": 419000 + }, + { + "epoch": 3.36, + "learning_rate": 4.8322e-05, + "loss": 8.6928, + "step": 419500 + }, + { + "epoch": 3.36, + "learning_rate": 4.8320000000000005e-05, + "loss": 8.6705, + "step": 420000 + }, + { + "epoch": 3.36, + "learning_rate": 4.8318e-05, + "loss": 8.6614, + "step": 420500 + }, + { + "epoch": 3.37, + "learning_rate": 4.8316e-05, + "loss": 8.6882, + "step": 421000 + }, + { + "epoch": 3.37, + "learning_rate": 4.8314000000000005e-05, + "loss": 8.6703, + "step": 421500 + }, + { + "epoch": 3.38, + "learning_rate": 4.8312e-05, + "loss": 8.6985, + "step": 422000 + }, + { + "epoch": 3.38, + "learning_rate": 4.8309999999999997e-05, + "loss": 8.686, + "step": 422500 + }, + { + "epoch": 3.38, + "learning_rate": 4.8308000000000006e-05, + "loss": 8.6813, + "step": 423000 + }, + { + "epoch": 3.39, + "learning_rate": 4.8306e-05, + "loss": 8.7127, + "step": 423500 + }, + { + "epoch": 3.39, + "learning_rate": 4.8304000000000004e-05, + "loss": 8.6972, + "step": 424000 + }, + { + "epoch": 3.4, + "learning_rate": 4.8302000000000006e-05, + "loss": 8.7183, + "step": 424500 + }, + { + "epoch": 3.4, + "learning_rate": 4.83e-05, + "loss": 8.7121, + "step": 425000 + }, + { + "epoch": 3.4, + "learning_rate": 4.8298000000000004e-05, + "loss": 8.7204, + "step": 425500 + }, + { + "epoch": 3.41, + "learning_rate": 4.8296e-05, + "loss": 8.7153, + "step": 426000 + }, + { + "epoch": 3.41, + "learning_rate": 4.8294e-05, + "loss": 8.6959, + "step": 426500 + }, + { + "epoch": 3.42, + "learning_rate": 4.8292000000000005e-05, + "loss": 8.7169, + "step": 427000 + }, + { + "epoch": 3.42, + "learning_rate": 4.829e-05, + "loss": 8.6833, + "step": 427500 + }, + { + "epoch": 3.42, + "learning_rate": 4.8288e-05, + "loss": 8.707, + "step": 428000 + }, + { + "epoch": 3.43, + "learning_rate": 4.8286000000000005e-05, + "loss": 8.6847, + "step": 428500 + }, + { + "epoch": 3.43, + "learning_rate": 4.8284e-05, + "loss": 8.6911, + "step": 429000 + }, + { + "epoch": 3.44, + "learning_rate": 4.8282e-05, + "loss": 8.7043, + "step": 429500 + }, + { + "epoch": 3.44, + "learning_rate": 4.8280000000000005e-05, + "loss": 8.7095, + "step": 430000 + }, + { + "epoch": 3.44, + "learning_rate": 4.8278e-05, + "loss": 8.6738, + "step": 430500 + }, + { + "epoch": 3.45, + "learning_rate": 4.8276e-05, + "loss": 8.7012, + "step": 431000 + }, + { + "epoch": 3.45, + "learning_rate": 4.8274000000000006e-05, + "loss": 8.6963, + "step": 431500 + }, + { + "epoch": 3.46, + "learning_rate": 4.8272e-05, + "loss": 8.7061, + "step": 432000 + }, + { + "epoch": 3.46, + "learning_rate": 4.8270000000000004e-05, + "loss": 8.6929, + "step": 432500 + }, + { + "epoch": 3.46, + "learning_rate": 4.8268e-05, + "loss": 8.6864, + "step": 433000 + }, + { + "epoch": 3.47, + "learning_rate": 4.8266e-05, + "loss": 8.701, + "step": 433500 + }, + { + "epoch": 3.47, + "learning_rate": 4.8264000000000004e-05, + "loss": 8.7049, + "step": 434000 + }, + { + "epoch": 3.48, + "learning_rate": 4.8262e-05, + "loss": 8.6915, + "step": 434500 + }, + { + "epoch": 3.48, + "learning_rate": 4.826e-05, + "loss": 8.7128, + "step": 435000 + }, + { + "epoch": 3.48, + "learning_rate": 4.8258000000000005e-05, + "loss": 8.7074, + "step": 435500 + }, + { + "epoch": 3.49, + "learning_rate": 4.8256e-05, + "loss": 8.6953, + "step": 436000 + }, + { + "epoch": 3.49, + "learning_rate": 4.8254e-05, + "loss": 8.7117, + "step": 436500 + }, + { + "epoch": 3.5, + "learning_rate": 4.8252000000000005e-05, + "loss": 8.6716, + "step": 437000 + }, + { + "epoch": 3.5, + "learning_rate": 4.825e-05, + "loss": 8.7095, + "step": 437500 + }, + { + "epoch": 3.5, + "learning_rate": 4.8248e-05, + "loss": 8.7017, + "step": 438000 + }, + { + "epoch": 3.51, + "learning_rate": 4.8246000000000005e-05, + "loss": 8.6966, + "step": 438500 + }, + { + "epoch": 3.51, + "learning_rate": 4.8244e-05, + "loss": 8.674, + "step": 439000 + }, + { + "epoch": 3.52, + "learning_rate": 4.8242e-05, + "loss": 8.6855, + "step": 439500 + }, + { + "epoch": 3.52, + "learning_rate": 4.824e-05, + "loss": 8.6747, + "step": 440000 + }, + { + "epoch": 3.52, + "learning_rate": 4.8238e-05, + "loss": 8.701, + "step": 440500 + }, + { + "epoch": 3.53, + "learning_rate": 4.8236000000000004e-05, + "loss": 8.6833, + "step": 441000 + }, + { + "epoch": 3.53, + "learning_rate": 4.8234e-05, + "loss": 8.6943, + "step": 441500 + }, + { + "epoch": 3.54, + "learning_rate": 4.8232e-05, + "loss": 8.689, + "step": 442000 + }, + { + "epoch": 3.54, + "learning_rate": 4.8230000000000004e-05, + "loss": 8.6998, + "step": 442500 + }, + { + "epoch": 3.54, + "learning_rate": 4.8228e-05, + "loss": 8.7034, + "step": 443000 + }, + { + "epoch": 3.55, + "learning_rate": 4.8226e-05, + "loss": 8.7114, + "step": 443500 + }, + { + "epoch": 3.55, + "learning_rate": 4.8224000000000004e-05, + "loss": 8.6846, + "step": 444000 + }, + { + "epoch": 3.56, + "learning_rate": 4.8222e-05, + "loss": 8.6767, + "step": 444500 + }, + { + "epoch": 3.56, + "learning_rate": 4.822e-05, + "loss": 8.726, + "step": 445000 + }, + { + "epoch": 3.56, + "learning_rate": 4.8218000000000005e-05, + "loss": 8.6712, + "step": 445500 + }, + { + "epoch": 3.57, + "learning_rate": 4.8216e-05, + "loss": 8.6972, + "step": 446000 + }, + { + "epoch": 3.57, + "learning_rate": 4.8214e-05, + "loss": 8.7112, + "step": 446500 + }, + { + "epoch": 3.58, + "learning_rate": 4.8212e-05, + "loss": 8.6705, + "step": 447000 + }, + { + "epoch": 3.58, + "learning_rate": 4.821e-05, + "loss": 8.7263, + "step": 447500 + }, + { + "epoch": 3.58, + "learning_rate": 4.8208e-05, + "loss": 8.6963, + "step": 448000 + }, + { + "epoch": 3.59, + "learning_rate": 4.8206e-05, + "loss": 8.7303, + "step": 448500 + }, + { + "epoch": 3.59, + "learning_rate": 4.820400000000001e-05, + "loss": 8.707, + "step": 449000 + }, + { + "epoch": 3.6, + "learning_rate": 4.8202000000000004e-05, + "loss": 8.6949, + "step": 449500 + }, + { + "epoch": 3.6, + "learning_rate": 4.82e-05, + "loss": 8.7131, + "step": 450000 + }, + { + "epoch": 3.6, + "learning_rate": 4.8198e-05, + "loss": 8.683, + "step": 450500 + }, + { + "epoch": 3.61, + "learning_rate": 4.8196000000000004e-05, + "loss": 8.6739, + "step": 451000 + }, + { + "epoch": 3.61, + "learning_rate": 4.8194e-05, + "loss": 8.693, + "step": 451500 + }, + { + "epoch": 3.62, + "learning_rate": 4.8192e-05, + "loss": 8.7077, + "step": 452000 + }, + { + "epoch": 3.62, + "learning_rate": 4.8190000000000004e-05, + "loss": 8.6969, + "step": 452500 + }, + { + "epoch": 3.62, + "learning_rate": 4.8188e-05, + "loss": 8.6799, + "step": 453000 + }, + { + "epoch": 3.63, + "learning_rate": 4.8186e-05, + "loss": 8.7057, + "step": 453500 + }, + { + "epoch": 3.63, + "learning_rate": 4.8184e-05, + "loss": 8.7014, + "step": 454000 + }, + { + "epoch": 3.64, + "learning_rate": 4.818200000000001e-05, + "loss": 8.7065, + "step": 454500 + }, + { + "epoch": 3.64, + "learning_rate": 4.818e-05, + "loss": 8.7073, + "step": 455000 + }, + { + "epoch": 3.64, + "learning_rate": 4.8178e-05, + "loss": 8.7097, + "step": 455500 + }, + { + "epoch": 3.65, + "learning_rate": 4.817600000000001e-05, + "loss": 8.7036, + "step": 456000 + }, + { + "epoch": 3.65, + "learning_rate": 4.8174e-05, + "loss": 8.6856, + "step": 456500 + }, + { + "epoch": 3.66, + "learning_rate": 4.8172e-05, + "loss": 8.7143, + "step": 457000 + }, + { + "epoch": 3.66, + "learning_rate": 4.817e-05, + "loss": 8.6936, + "step": 457500 + }, + { + "epoch": 3.66, + "learning_rate": 4.8168000000000004e-05, + "loss": 8.7067, + "step": 458000 + }, + { + "epoch": 3.67, + "learning_rate": 4.8166e-05, + "loss": 8.6913, + "step": 458500 + }, + { + "epoch": 3.67, + "learning_rate": 4.8164e-05, + "loss": 8.6758, + "step": 459000 + }, + { + "epoch": 3.68, + "learning_rate": 4.8162000000000004e-05, + "loss": 8.6876, + "step": 459500 + }, + { + "epoch": 3.68, + "learning_rate": 4.816e-05, + "loss": 8.6895, + "step": 460000 + }, + { + "epoch": 3.68, + "learning_rate": 4.8158e-05, + "loss": 8.7215, + "step": 460500 + }, + { + "epoch": 3.69, + "learning_rate": 4.8156000000000004e-05, + "loss": 8.6853, + "step": 461000 + }, + { + "epoch": 3.69, + "learning_rate": 4.815400000000001e-05, + "loss": 8.6857, + "step": 461500 + }, + { + "epoch": 3.7, + "learning_rate": 4.8152e-05, + "loss": 8.7186, + "step": 462000 + }, + { + "epoch": 3.7, + "learning_rate": 4.815e-05, + "loss": 8.6972, + "step": 462500 + }, + { + "epoch": 3.7, + "learning_rate": 4.814800000000001e-05, + "loss": 8.705, + "step": 463000 + }, + { + "epoch": 3.71, + "learning_rate": 4.8146e-05, + "loss": 8.7002, + "step": 463500 + }, + { + "epoch": 3.71, + "learning_rate": 4.8144e-05, + "loss": 8.6838, + "step": 464000 + }, + { + "epoch": 3.72, + "learning_rate": 4.8142e-05, + "loss": 8.6851, + "step": 464500 + }, + { + "epoch": 3.72, + "learning_rate": 4.814e-05, + "loss": 8.7308, + "step": 465000 + }, + { + "epoch": 3.72, + "learning_rate": 4.8138e-05, + "loss": 8.6843, + "step": 465500 + }, + { + "epoch": 3.73, + "learning_rate": 4.8136e-05, + "loss": 8.678, + "step": 466000 + }, + { + "epoch": 3.73, + "learning_rate": 4.8134000000000004e-05, + "loss": 8.661, + "step": 466500 + }, + { + "epoch": 3.74, + "learning_rate": 4.8132000000000006e-05, + "loss": 8.6863, + "step": 467000 + }, + { + "epoch": 3.74, + "learning_rate": 4.813e-05, + "loss": 8.6986, + "step": 467500 + }, + { + "epoch": 3.74, + "learning_rate": 4.8128000000000004e-05, + "loss": 8.7165, + "step": 468000 + }, + { + "epoch": 3.75, + "learning_rate": 4.8126000000000006e-05, + "loss": 8.6645, + "step": 468500 + }, + { + "epoch": 3.75, + "learning_rate": 4.8124e-05, + "loss": 8.7061, + "step": 469000 + }, + { + "epoch": 3.76, + "learning_rate": 4.8122e-05, + "loss": 8.6844, + "step": 469500 + }, + { + "epoch": 3.76, + "learning_rate": 4.812000000000001e-05, + "loss": 8.6871, + "step": 470000 + }, + { + "epoch": 3.76, + "learning_rate": 4.8118e-05, + "loss": 8.711, + "step": 470500 + }, + { + "epoch": 3.77, + "learning_rate": 4.8116e-05, + "loss": 8.6687, + "step": 471000 + }, + { + "epoch": 3.77, + "learning_rate": 4.8114e-05, + "loss": 8.6982, + "step": 471500 + }, + { + "epoch": 3.78, + "learning_rate": 4.8112e-05, + "loss": 8.6865, + "step": 472000 + }, + { + "epoch": 3.78, + "learning_rate": 4.8110000000000005e-05, + "loss": 8.6927, + "step": 472500 + }, + { + "epoch": 3.78, + "learning_rate": 4.8108e-05, + "loss": 8.6943, + "step": 473000 + }, + { + "epoch": 3.79, + "learning_rate": 4.8106e-05, + "loss": 8.6945, + "step": 473500 + }, + { + "epoch": 3.79, + "learning_rate": 4.8104000000000006e-05, + "loss": 8.6925, + "step": 474000 + }, + { + "epoch": 3.8, + "learning_rate": 4.8102e-05, + "loss": 8.6788, + "step": 474500 + }, + { + "epoch": 3.8, + "learning_rate": 4.8100000000000004e-05, + "loss": 8.7, + "step": 475000 + }, + { + "epoch": 3.8, + "learning_rate": 4.8098000000000006e-05, + "loss": 8.6685, + "step": 475500 + }, + { + "epoch": 3.81, + "learning_rate": 4.8096e-05, + "loss": 8.6779, + "step": 476000 + }, + { + "epoch": 3.81, + "learning_rate": 4.8094e-05, + "loss": 8.6893, + "step": 476500 + }, + { + "epoch": 3.82, + "learning_rate": 4.8092000000000006e-05, + "loss": 8.6897, + "step": 477000 + }, + { + "epoch": 3.82, + "learning_rate": 4.809e-05, + "loss": 8.7191, + "step": 477500 + }, + { + "epoch": 3.82, + "learning_rate": 4.8088e-05, + "loss": 8.663, + "step": 478000 + }, + { + "epoch": 3.83, + "learning_rate": 4.8086e-05, + "loss": 8.6669, + "step": 478500 + }, + { + "epoch": 3.83, + "learning_rate": 4.8084e-05, + "loss": 8.6691, + "step": 479000 + }, + { + "epoch": 3.84, + "learning_rate": 4.8082000000000005e-05, + "loss": 8.6918, + "step": 479500 + }, + { + "epoch": 3.84, + "learning_rate": 4.808e-05, + "loss": 8.6969, + "step": 480000 + }, + { + "epoch": 3.84, + "learning_rate": 4.8078e-05, + "loss": 8.6683, + "step": 480500 + }, + { + "epoch": 3.85, + "learning_rate": 4.8076000000000005e-05, + "loss": 8.7101, + "step": 481000 + }, + { + "epoch": 3.85, + "learning_rate": 4.8074e-05, + "loss": 8.6696, + "step": 481500 + }, + { + "epoch": 3.86, + "learning_rate": 4.8072e-05, + "loss": 8.6914, + "step": 482000 + }, + { + "epoch": 3.86, + "learning_rate": 4.8070000000000006e-05, + "loss": 8.6875, + "step": 482500 + }, + { + "epoch": 3.86, + "learning_rate": 4.8068e-05, + "loss": 8.7111, + "step": 483000 + }, + { + "epoch": 3.87, + "learning_rate": 4.8066e-05, + "loss": 8.708, + "step": 483500 + }, + { + "epoch": 3.87, + "learning_rate": 4.8064000000000006e-05, + "loss": 8.6654, + "step": 484000 + }, + { + "epoch": 3.88, + "learning_rate": 4.8062e-05, + "loss": 8.6937, + "step": 484500 + }, + { + "epoch": 3.88, + "learning_rate": 4.8060000000000004e-05, + "loss": 8.6961, + "step": 485000 + }, + { + "epoch": 3.88, + "learning_rate": 4.8058e-05, + "loss": 8.7011, + "step": 485500 + }, + { + "epoch": 3.89, + "learning_rate": 4.8056e-05, + "loss": 8.7058, + "step": 486000 + }, + { + "epoch": 3.89, + "learning_rate": 4.8054000000000004e-05, + "loss": 8.6886, + "step": 486500 + }, + { + "epoch": 3.9, + "learning_rate": 4.8052e-05, + "loss": 8.7001, + "step": 487000 + }, + { + "epoch": 3.9, + "learning_rate": 4.805e-05, + "loss": 8.7077, + "step": 487500 + }, + { + "epoch": 3.9, + "learning_rate": 4.8048000000000005e-05, + "loss": 8.6888, + "step": 488000 + }, + { + "epoch": 3.91, + "learning_rate": 4.8046e-05, + "loss": 8.7076, + "step": 488500 + }, + { + "epoch": 3.91, + "learning_rate": 4.8044e-05, + "loss": 8.6638, + "step": 489000 + }, + { + "epoch": 3.92, + "learning_rate": 4.8042000000000005e-05, + "loss": 8.6892, + "step": 489500 + }, + { + "epoch": 3.92, + "learning_rate": 4.804e-05, + "loss": 8.676, + "step": 490000 + }, + { + "epoch": 3.92, + "learning_rate": 4.8037999999999996e-05, + "loss": 8.7038, + "step": 490500 + }, + { + "epoch": 3.93, + "learning_rate": 4.8036000000000006e-05, + "loss": 8.6922, + "step": 491000 + }, + { + "epoch": 3.93, + "learning_rate": 4.8034e-05, + "loss": 8.6947, + "step": 491500 + }, + { + "epoch": 3.94, + "learning_rate": 4.8032000000000004e-05, + "loss": 8.6718, + "step": 492000 + }, + { + "epoch": 3.94, + "learning_rate": 4.8030000000000006e-05, + "loss": 8.7013, + "step": 492500 + }, + { + "epoch": 3.94, + "learning_rate": 4.8028e-05, + "loss": 8.6745, + "step": 493000 + }, + { + "epoch": 3.95, + "learning_rate": 4.8026000000000004e-05, + "loss": 8.7123, + "step": 493500 + }, + { + "epoch": 3.95, + "learning_rate": 4.8024e-05, + "loss": 8.7301, + "step": 494000 + }, + { + "epoch": 3.96, + "learning_rate": 4.8022e-05, + "loss": 8.7264, + "step": 494500 + }, + { + "epoch": 3.96, + "learning_rate": 4.8020000000000004e-05, + "loss": 8.6981, + "step": 495000 + }, + { + "epoch": 3.96, + "learning_rate": 4.8018e-05, + "loss": 8.697, + "step": 495500 + }, + { + "epoch": 3.97, + "learning_rate": 4.8016e-05, + "loss": 8.6812, + "step": 496000 + }, + { + "epoch": 3.97, + "learning_rate": 4.8014000000000005e-05, + "loss": 8.7023, + "step": 496500 + }, + { + "epoch": 3.98, + "learning_rate": 4.8012e-05, + "loss": 8.6937, + "step": 497000 + }, + { + "epoch": 3.98, + "learning_rate": 4.801e-05, + "loss": 8.6924, + "step": 497500 + }, + { + "epoch": 3.98, + "learning_rate": 4.8008000000000005e-05, + "loss": 8.7007, + "step": 498000 + }, + { + "epoch": 3.99, + "learning_rate": 4.8006e-05, + "loss": 8.7113, + "step": 498500 + }, + { + "epoch": 3.99, + "learning_rate": 4.8004e-05, + "loss": 8.6855, + "step": 499000 + }, + { + "epoch": 4.0, + "learning_rate": 4.8002000000000006e-05, + "loss": 8.6865, + "step": 499500 + }, + { + "epoch": 4.0, + "learning_rate": 4.8e-05, + "loss": 8.7076, + "step": 500000 + }, + { + "epoch": 4.0, + "learning_rate": 4.7998000000000004e-05, + "loss": 8.6981, + "step": 500500 + }, + { + "epoch": 4.01, + "learning_rate": 4.7996e-05, + "loss": 8.6832, + "step": 501000 + }, + { + "epoch": 4.01, + "learning_rate": 4.7994e-05, + "loss": 8.6853, + "step": 501500 + }, + { + "epoch": 4.02, + "learning_rate": 4.7992000000000004e-05, + "loss": 8.6963, + "step": 502000 + }, + { + "epoch": 4.02, + "learning_rate": 4.799e-05, + "loss": 8.6834, + "step": 502500 + }, + { + "epoch": 4.02, + "learning_rate": 4.7988e-05, + "loss": 8.6712, + "step": 503000 + }, + { + "epoch": 4.03, + "learning_rate": 4.7986000000000004e-05, + "loss": 8.7123, + "step": 503500 + }, + { + "epoch": 4.03, + "learning_rate": 4.7984e-05, + "loss": 8.6588, + "step": 504000 + }, + { + "epoch": 4.04, + "learning_rate": 4.7982e-05, + "loss": 8.6922, + "step": 504500 + }, + { + "epoch": 4.04, + "learning_rate": 4.7980000000000005e-05, + "loss": 8.702, + "step": 505000 + }, + { + "epoch": 4.04, + "learning_rate": 4.7978e-05, + "loss": 8.7003, + "step": 505500 + }, + { + "epoch": 4.05, + "learning_rate": 4.7976e-05, + "loss": 8.6942, + "step": 506000 + }, + { + "epoch": 4.05, + "learning_rate": 4.7974000000000005e-05, + "loss": 8.7065, + "step": 506500 + }, + { + "epoch": 4.06, + "learning_rate": 4.7972e-05, + "loss": 8.6901, + "step": 507000 + }, + { + "epoch": 4.06, + "learning_rate": 4.797e-05, + "loss": 8.6761, + "step": 507500 + }, + { + "epoch": 4.06, + "learning_rate": 4.7968e-05, + "loss": 8.6732, + "step": 508000 + }, + { + "epoch": 4.07, + "learning_rate": 4.7966e-05, + "loss": 8.6666, + "step": 508500 + }, + { + "epoch": 4.07, + "learning_rate": 4.7964000000000004e-05, + "loss": 8.7143, + "step": 509000 + }, + { + "epoch": 4.08, + "learning_rate": 4.7962e-05, + "loss": 8.7032, + "step": 509500 + }, + { + "epoch": 4.08, + "learning_rate": 4.796e-05, + "loss": 8.6971, + "step": 510000 + }, + { + "epoch": 4.08, + "learning_rate": 4.7958000000000004e-05, + "loss": 8.7349, + "step": 510500 + }, + { + "epoch": 4.09, + "learning_rate": 4.7956e-05, + "loss": 8.7171, + "step": 511000 + }, + { + "epoch": 4.09, + "learning_rate": 4.7954e-05, + "loss": 8.6922, + "step": 511500 + }, + { + "epoch": 4.1, + "learning_rate": 4.7952000000000004e-05, + "loss": 8.6794, + "step": 512000 + }, + { + "epoch": 4.1, + "learning_rate": 4.795e-05, + "loss": 8.7029, + "step": 512500 + }, + { + "epoch": 4.1, + "learning_rate": 4.7948e-05, + "loss": 8.6932, + "step": 513000 + }, + { + "epoch": 4.11, + "learning_rate": 4.7946000000000005e-05, + "loss": 8.7035, + "step": 513500 + }, + { + "epoch": 4.11, + "learning_rate": 4.7944e-05, + "loss": 8.6815, + "step": 514000 + }, + { + "epoch": 4.12, + "learning_rate": 4.7942e-05, + "loss": 8.6972, + "step": 514500 + }, + { + "epoch": 4.12, + "learning_rate": 4.794e-05, + "loss": 8.6814, + "step": 515000 + }, + { + "epoch": 4.12, + "learning_rate": 4.7938e-05, + "loss": 8.6591, + "step": 515500 + }, + { + "epoch": 4.13, + "learning_rate": 4.7936e-05, + "loss": 8.6983, + "step": 516000 + }, + { + "epoch": 4.13, + "learning_rate": 4.7934e-05, + "loss": 8.7041, + "step": 516500 + }, + { + "epoch": 4.14, + "learning_rate": 4.793200000000001e-05, + "loss": 8.6969, + "step": 517000 + }, + { + "epoch": 4.14, + "learning_rate": 4.7930000000000004e-05, + "loss": 8.7102, + "step": 517500 + }, + { + "epoch": 4.14, + "learning_rate": 4.7928e-05, + "loss": 8.6954, + "step": 518000 + }, + { + "epoch": 4.15, + "learning_rate": 4.7926e-05, + "loss": 8.6718, + "step": 518500 + }, + { + "epoch": 4.15, + "learning_rate": 4.7924000000000004e-05, + "loss": 8.6878, + "step": 519000 + }, + { + "epoch": 4.16, + "learning_rate": 4.7922e-05, + "loss": 8.6994, + "step": 519500 + }, + { + "epoch": 4.16, + "learning_rate": 4.792e-05, + "loss": 8.6711, + "step": 520000 + }, + { + "epoch": 4.16, + "learning_rate": 4.7918000000000004e-05, + "loss": 8.7005, + "step": 520500 + }, + { + "epoch": 4.17, + "learning_rate": 4.7916e-05, + "loss": 8.6749, + "step": 521000 + }, + { + "epoch": 4.17, + "learning_rate": 4.7914e-05, + "loss": 8.6821, + "step": 521500 + }, + { + "epoch": 4.18, + "learning_rate": 4.7912e-05, + "loss": 8.6771, + "step": 522000 + }, + { + "epoch": 4.18, + "learning_rate": 4.791000000000001e-05, + "loss": 8.6896, + "step": 522500 + }, + { + "epoch": 4.18, + "learning_rate": 4.7908e-05, + "loss": 8.7081, + "step": 523000 + }, + { + "epoch": 4.19, + "learning_rate": 4.7906e-05, + "loss": 8.6892, + "step": 523500 + }, + { + "epoch": 4.19, + "learning_rate": 4.790400000000001e-05, + "loss": 8.6998, + "step": 524000 + }, + { + "epoch": 4.2, + "learning_rate": 4.7902e-05, + "loss": 8.6664, + "step": 524500 + }, + { + "epoch": 4.2, + "learning_rate": 4.79e-05, + "loss": 8.7092, + "step": 525000 + }, + { + "epoch": 4.2, + "learning_rate": 4.7898e-05, + "loss": 8.6845, + "step": 525500 + }, + { + "epoch": 4.21, + "learning_rate": 4.7896000000000004e-05, + "loss": 8.7126, + "step": 526000 + }, + { + "epoch": 4.21, + "learning_rate": 4.7894e-05, + "loss": 8.6845, + "step": 526500 + }, + { + "epoch": 4.22, + "learning_rate": 4.7892e-05, + "loss": 8.721, + "step": 527000 + }, + { + "epoch": 4.22, + "learning_rate": 4.7890000000000004e-05, + "loss": 8.7022, + "step": 527500 + }, + { + "epoch": 4.22, + "learning_rate": 4.7888e-05, + "loss": 8.6862, + "step": 528000 + }, + { + "epoch": 4.23, + "learning_rate": 4.7886e-05, + "loss": 8.6964, + "step": 528500 + }, + { + "epoch": 4.23, + "learning_rate": 4.7884000000000004e-05, + "loss": 8.6632, + "step": 529000 + }, + { + "epoch": 4.24, + "learning_rate": 4.788200000000001e-05, + "loss": 8.7079, + "step": 529500 + }, + { + "epoch": 4.24, + "learning_rate": 4.788e-05, + "loss": 8.7429, + "step": 530000 + }, + { + "epoch": 4.24, + "learning_rate": 4.7878e-05, + "loss": 8.7043, + "step": 530500 + }, + { + "epoch": 4.25, + "learning_rate": 4.787600000000001e-05, + "loss": 8.7092, + "step": 531000 + }, + { + "epoch": 4.25, + "learning_rate": 4.7874e-05, + "loss": 8.6881, + "step": 531500 + }, + { + "epoch": 4.26, + "learning_rate": 4.7872e-05, + "loss": 8.6922, + "step": 532000 + }, + { + "epoch": 4.26, + "learning_rate": 4.787e-05, + "loss": 8.6983, + "step": 532500 + }, + { + "epoch": 4.26, + "learning_rate": 4.7868e-05, + "loss": 8.7213, + "step": 533000 + }, + { + "epoch": 4.27, + "learning_rate": 4.7866e-05, + "loss": 8.6881, + "step": 533500 + }, + { + "epoch": 4.27, + "learning_rate": 4.7864e-05, + "loss": 8.6939, + "step": 534000 + }, + { + "epoch": 4.28, + "learning_rate": 4.7862000000000004e-05, + "loss": 8.7001, + "step": 534500 + }, + { + "epoch": 4.28, + "learning_rate": 4.7860000000000006e-05, + "loss": 8.6931, + "step": 535000 + }, + { + "epoch": 4.28, + "learning_rate": 4.7858e-05, + "loss": 8.7165, + "step": 535500 + }, + { + "epoch": 4.29, + "learning_rate": 4.7856000000000004e-05, + "loss": 8.6589, + "step": 536000 + }, + { + "epoch": 4.29, + "learning_rate": 4.7854000000000006e-05, + "loss": 8.6572, + "step": 536500 + }, + { + "epoch": 4.3, + "learning_rate": 4.7852e-05, + "loss": 8.6591, + "step": 537000 + }, + { + "epoch": 4.3, + "learning_rate": 4.785e-05, + "loss": 8.6989, + "step": 537500 + }, + { + "epoch": 4.3, + "learning_rate": 4.784800000000001e-05, + "loss": 8.6687, + "step": 538000 + }, + { + "epoch": 4.31, + "learning_rate": 4.7846e-05, + "loss": 8.696, + "step": 538500 + }, + { + "epoch": 4.31, + "learning_rate": 4.7844e-05, + "loss": 8.6808, + "step": 539000 + }, + { + "epoch": 4.32, + "learning_rate": 4.7842e-05, + "loss": 8.6872, + "step": 539500 + }, + { + "epoch": 4.32, + "learning_rate": 4.784e-05, + "loss": 8.6958, + "step": 540000 + }, + { + "epoch": 4.32, + "learning_rate": 4.7838000000000005e-05, + "loss": 8.6808, + "step": 540500 + }, + { + "epoch": 4.33, + "learning_rate": 4.7836e-05, + "loss": 8.7062, + "step": 541000 + }, + { + "epoch": 4.33, + "learning_rate": 4.7834e-05, + "loss": 8.7068, + "step": 541500 + }, + { + "epoch": 4.34, + "learning_rate": 4.7832000000000006e-05, + "loss": 8.6692, + "step": 542000 + }, + { + "epoch": 4.34, + "learning_rate": 4.783e-05, + "loss": 8.6858, + "step": 542500 + }, + { + "epoch": 4.34, + "learning_rate": 4.7828000000000004e-05, + "loss": 8.7149, + "step": 543000 + }, + { + "epoch": 4.35, + "learning_rate": 4.7826000000000006e-05, + "loss": 8.6997, + "step": 543500 + }, + { + "epoch": 4.35, + "learning_rate": 4.7824e-05, + "loss": 8.6814, + "step": 544000 + }, + { + "epoch": 4.36, + "learning_rate": 4.7822e-05, + "loss": 8.7158, + "step": 544500 + }, + { + "epoch": 4.36, + "learning_rate": 4.7820000000000006e-05, + "loss": 8.6842, + "step": 545000 + }, + { + "epoch": 4.36, + "learning_rate": 4.7818e-05, + "loss": 8.6719, + "step": 545500 + }, + { + "epoch": 4.37, + "learning_rate": 4.7816e-05, + "loss": 8.6848, + "step": 546000 + }, + { + "epoch": 4.37, + "learning_rate": 4.7814e-05, + "loss": 8.6839, + "step": 546500 + }, + { + "epoch": 4.38, + "learning_rate": 4.7812e-05, + "loss": 8.6997, + "step": 547000 + }, + { + "epoch": 4.38, + "learning_rate": 4.7810000000000005e-05, + "loss": 8.6884, + "step": 547500 + }, + { + "epoch": 4.38, + "learning_rate": 4.7808e-05, + "loss": 8.6709, + "step": 548000 + }, + { + "epoch": 4.39, + "learning_rate": 4.7806e-05, + "loss": 8.7005, + "step": 548500 + }, + { + "epoch": 4.39, + "learning_rate": 4.7804000000000005e-05, + "loss": 8.688, + "step": 549000 + }, + { + "epoch": 4.4, + "learning_rate": 4.7802e-05, + "loss": 8.6819, + "step": 549500 + }, + { + "epoch": 4.4, + "learning_rate": 4.78e-05, + "loss": 8.6875, + "step": 550000 + }, + { + "epoch": 4.4, + "learning_rate": 4.7798000000000006e-05, + "loss": 8.6997, + "step": 550500 + }, + { + "epoch": 4.41, + "learning_rate": 4.7796e-05, + "loss": 8.7242, + "step": 551000 + }, + { + "epoch": 4.41, + "learning_rate": 4.7794e-05, + "loss": 8.7325, + "step": 551500 + }, + { + "epoch": 4.42, + "learning_rate": 4.7792000000000006e-05, + "loss": 8.6911, + "step": 552000 + }, + { + "epoch": 4.42, + "learning_rate": 4.779e-05, + "loss": 8.6963, + "step": 552500 + }, + { + "epoch": 4.42, + "learning_rate": 4.7788000000000004e-05, + "loss": 8.685, + "step": 553000 + }, + { + "epoch": 4.43, + "learning_rate": 4.7786000000000006e-05, + "loss": 8.6815, + "step": 553500 + }, + { + "epoch": 4.43, + "learning_rate": 4.7784e-05, + "loss": 8.7159, + "step": 554000 + }, + { + "epoch": 4.44, + "learning_rate": 4.7782000000000004e-05, + "loss": 8.6886, + "step": 554500 + }, + { + "epoch": 4.44, + "learning_rate": 4.778e-05, + "loss": 8.6965, + "step": 555000 + }, + { + "epoch": 4.44, + "learning_rate": 4.7778e-05, + "loss": 8.6883, + "step": 555500 + }, + { + "epoch": 4.45, + "learning_rate": 4.7776000000000005e-05, + "loss": 8.7095, + "step": 556000 + }, + { + "epoch": 4.45, + "learning_rate": 4.7774e-05, + "loss": 8.686, + "step": 556500 + }, + { + "epoch": 4.46, + "learning_rate": 4.7772e-05, + "loss": 8.6664, + "step": 557000 + }, + { + "epoch": 4.46, + "learning_rate": 4.7770000000000005e-05, + "loss": 8.7078, + "step": 557500 + }, + { + "epoch": 4.46, + "learning_rate": 4.7768e-05, + "loss": 8.6967, + "step": 558000 + }, + { + "epoch": 4.47, + "learning_rate": 4.7765999999999996e-05, + "loss": 8.6748, + "step": 558500 + }, + { + "epoch": 4.47, + "learning_rate": 4.7764000000000006e-05, + "loss": 8.7147, + "step": 559000 + }, + { + "epoch": 4.48, + "learning_rate": 4.7762e-05, + "loss": 8.7062, + "step": 559500 + }, + { + "epoch": 4.48, + "learning_rate": 4.7760000000000004e-05, + "loss": 8.6903, + "step": 560000 + }, + { + "epoch": 4.48, + "learning_rate": 4.7758000000000006e-05, + "loss": 8.725, + "step": 560500 + }, + { + "epoch": 4.49, + "learning_rate": 4.7756e-05, + "loss": 8.695, + "step": 561000 + }, + { + "epoch": 4.49, + "learning_rate": 4.7754000000000004e-05, + "loss": 8.6866, + "step": 561500 + }, + { + "epoch": 4.5, + "learning_rate": 4.7752e-05, + "loss": 8.6951, + "step": 562000 + }, + { + "epoch": 4.5, + "learning_rate": 4.775e-05, + "loss": 8.6861, + "step": 562500 + }, + { + "epoch": 4.5, + "learning_rate": 4.7748000000000004e-05, + "loss": 8.6742, + "step": 563000 + }, + { + "epoch": 4.51, + "learning_rate": 4.7746e-05, + "loss": 8.7243, + "step": 563500 + }, + { + "epoch": 4.51, + "learning_rate": 4.7744e-05, + "loss": 8.6688, + "step": 564000 + }, + { + "epoch": 4.52, + "learning_rate": 4.7742000000000005e-05, + "loss": 8.6872, + "step": 564500 + }, + { + "epoch": 4.52, + "learning_rate": 4.774e-05, + "loss": 8.694, + "step": 565000 + }, + { + "epoch": 4.52, + "learning_rate": 4.7738e-05, + "loss": 8.6958, + "step": 565500 + }, + { + "epoch": 4.53, + "learning_rate": 4.7736000000000005e-05, + "loss": 8.6991, + "step": 566000 + }, + { + "epoch": 4.53, + "learning_rate": 4.7734e-05, + "loss": 8.6851, + "step": 566500 + }, + { + "epoch": 4.54, + "learning_rate": 4.7732e-05, + "loss": 8.6653, + "step": 567000 + }, + { + "epoch": 4.54, + "learning_rate": 4.7730000000000005e-05, + "loss": 8.7074, + "step": 567500 + }, + { + "epoch": 4.54, + "learning_rate": 4.7728e-05, + "loss": 8.7233, + "step": 568000 + }, + { + "epoch": 4.55, + "learning_rate": 4.7726000000000004e-05, + "loss": 8.6912, + "step": 568500 + }, + { + "epoch": 4.55, + "learning_rate": 4.7724e-05, + "loss": 8.6986, + "step": 569000 + }, + { + "epoch": 4.56, + "learning_rate": 4.7722e-05, + "loss": 8.6708, + "step": 569500 + }, + { + "epoch": 4.56, + "learning_rate": 4.7720000000000004e-05, + "loss": 8.6864, + "step": 570000 + }, + { + "epoch": 4.56, + "learning_rate": 4.7718e-05, + "loss": 8.6781, + "step": 570500 + }, + { + "epoch": 4.57, + "learning_rate": 4.7716e-05, + "loss": 8.6778, + "step": 571000 + }, + { + "epoch": 4.57, + "learning_rate": 4.7714000000000004e-05, + "loss": 8.6937, + "step": 571500 + }, + { + "epoch": 4.58, + "learning_rate": 4.7712e-05, + "loss": 8.722, + "step": 572000 + }, + { + "epoch": 4.58, + "learning_rate": 4.771e-05, + "loss": 8.7032, + "step": 572500 + }, + { + "epoch": 4.58, + "learning_rate": 4.7708000000000005e-05, + "loss": 8.6986, + "step": 573000 + }, + { + "epoch": 4.59, + "learning_rate": 4.7706e-05, + "loss": 8.713, + "step": 573500 + }, + { + "epoch": 4.59, + "learning_rate": 4.7704e-05, + "loss": 8.6979, + "step": 574000 + }, + { + "epoch": 4.6, + "learning_rate": 4.7702000000000005e-05, + "loss": 8.6977, + "step": 574500 + }, + { + "epoch": 4.6, + "learning_rate": 4.77e-05, + "loss": 8.7149, + "step": 575000 + }, + { + "epoch": 4.6, + "learning_rate": 4.7698e-05, + "loss": 8.7074, + "step": 575500 + }, + { + "epoch": 4.61, + "learning_rate": 4.7696e-05, + "loss": 8.6984, + "step": 576000 + }, + { + "epoch": 4.61, + "learning_rate": 4.7694e-05, + "loss": 8.7013, + "step": 576500 + }, + { + "epoch": 4.62, + "learning_rate": 4.7692000000000003e-05, + "loss": 8.7002, + "step": 577000 + }, + { + "epoch": 4.62, + "learning_rate": 4.769e-05, + "loss": 8.6604, + "step": 577500 + }, + { + "epoch": 4.62, + "learning_rate": 4.768800000000001e-05, + "loss": 8.6784, + "step": 578000 + }, + { + "epoch": 4.63, + "learning_rate": 4.7686000000000004e-05, + "loss": 8.6893, + "step": 578500 + }, + { + "epoch": 4.63, + "learning_rate": 4.7684e-05, + "loss": 8.7328, + "step": 579000 + }, + { + "epoch": 4.64, + "learning_rate": 4.7682e-05, + "loss": 8.6774, + "step": 579500 + }, + { + "epoch": 4.64, + "learning_rate": 4.7680000000000004e-05, + "loss": 8.7057, + "step": 580000 + }, + { + "epoch": 4.64, + "learning_rate": 4.7678e-05, + "loss": 8.6948, + "step": 580500 + }, + { + "epoch": 4.65, + "learning_rate": 4.7676e-05, + "loss": 8.7093, + "step": 581000 + }, + { + "epoch": 4.65, + "learning_rate": 4.7674000000000005e-05, + "loss": 8.6769, + "step": 581500 + }, + { + "epoch": 4.66, + "learning_rate": 4.7672e-05, + "loss": 8.6765, + "step": 582000 + }, + { + "epoch": 4.66, + "learning_rate": 4.767e-05, + "loss": 8.6831, + "step": 582500 + }, + { + "epoch": 4.66, + "learning_rate": 4.7668e-05, + "loss": 8.6693, + "step": 583000 + }, + { + "epoch": 4.67, + "learning_rate": 4.7666e-05, + "loss": 8.6931, + "step": 583500 + }, + { + "epoch": 4.67, + "learning_rate": 4.7664e-05, + "loss": 8.7038, + "step": 584000 + }, + { + "epoch": 4.68, + "learning_rate": 4.7662e-05, + "loss": 8.7228, + "step": 584500 + }, + { + "epoch": 4.68, + "learning_rate": 4.766000000000001e-05, + "loss": 8.7059, + "step": 585000 + }, + { + "epoch": 4.68, + "learning_rate": 4.7658000000000003e-05, + "loss": 8.7001, + "step": 585500 + }, + { + "epoch": 4.69, + "learning_rate": 4.7656e-05, + "loss": 8.6872, + "step": 586000 + }, + { + "epoch": 4.69, + "learning_rate": 4.7654e-05, + "loss": 8.6756, + "step": 586500 + }, + { + "epoch": 4.7, + "learning_rate": 4.7652000000000004e-05, + "loss": 8.6775, + "step": 587000 + }, + { + "epoch": 4.7, + "learning_rate": 4.765e-05, + "loss": 8.6848, + "step": 587500 + }, + { + "epoch": 4.7, + "learning_rate": 4.7648e-05, + "loss": 8.675, + "step": 588000 + }, + { + "epoch": 4.71, + "learning_rate": 4.7646000000000004e-05, + "loss": 8.6876, + "step": 588500 + }, + { + "epoch": 4.71, + "learning_rate": 4.7644e-05, + "loss": 8.6964, + "step": 589000 + }, + { + "epoch": 4.72, + "learning_rate": 4.7642e-05, + "loss": 8.704, + "step": 589500 + }, + { + "epoch": 4.72, + "learning_rate": 4.7640000000000005e-05, + "loss": 8.6812, + "step": 590000 + }, + { + "epoch": 4.72, + "learning_rate": 4.763800000000001e-05, + "loss": 8.6891, + "step": 590500 + }, + { + "epoch": 4.73, + "learning_rate": 4.7636e-05, + "loss": 8.6733, + "step": 591000 + }, + { + "epoch": 4.73, + "learning_rate": 4.7634e-05, + "loss": 8.6834, + "step": 591500 + }, + { + "epoch": 4.74, + "learning_rate": 4.763200000000001e-05, + "loss": 8.6956, + "step": 592000 + }, + { + "epoch": 4.74, + "learning_rate": 4.763e-05, + "loss": 8.6648, + "step": 592500 + }, + { + "epoch": 4.74, + "learning_rate": 4.7628e-05, + "loss": 8.6825, + "step": 593000 + }, + { + "epoch": 4.75, + "learning_rate": 4.7626e-05, + "loss": 8.6911, + "step": 593500 + }, + { + "epoch": 4.75, + "learning_rate": 4.7624000000000003e-05, + "loss": 8.6713, + "step": 594000 + }, + { + "epoch": 4.76, + "learning_rate": 4.7622e-05, + "loss": 8.6945, + "step": 594500 + }, + { + "epoch": 4.76, + "learning_rate": 4.762e-05, + "loss": 8.6941, + "step": 595000 + }, + { + "epoch": 4.76, + "learning_rate": 4.7618000000000004e-05, + "loss": 8.7096, + "step": 595500 + }, + { + "epoch": 4.77, + "learning_rate": 4.7616000000000006e-05, + "loss": 8.7098, + "step": 596000 + }, + { + "epoch": 4.77, + "learning_rate": 4.7614e-05, + "loss": 8.7069, + "step": 596500 + }, + { + "epoch": 4.78, + "learning_rate": 4.7612000000000004e-05, + "loss": 8.6807, + "step": 597000 + }, + { + "epoch": 4.78, + "learning_rate": 4.761000000000001e-05, + "loss": 8.7137, + "step": 597500 + }, + { + "epoch": 4.78, + "learning_rate": 4.7608e-05, + "loss": 8.6549, + "step": 598000 + }, + { + "epoch": 4.79, + "learning_rate": 4.7606e-05, + "loss": 8.6963, + "step": 598500 + }, + { + "epoch": 4.79, + "learning_rate": 4.760400000000001e-05, + "loss": 8.683, + "step": 599000 + }, + { + "epoch": 4.8, + "learning_rate": 4.7602e-05, + "loss": 8.672, + "step": 599500 + }, + { + "epoch": 4.8, + "learning_rate": 4.76e-05, + "loss": 8.6915, + "step": 600000 + }, + { + "epoch": 4.8, + "learning_rate": 4.7598e-05, + "loss": 8.6956, + "step": 600500 + }, + { + "epoch": 4.81, + "learning_rate": 4.7596e-05, + "loss": 8.7001, + "step": 601000 + }, + { + "epoch": 4.81, + "learning_rate": 4.7594e-05, + "loss": 8.7155, + "step": 601500 + }, + { + "epoch": 4.82, + "learning_rate": 4.7592e-05, + "loss": 8.6802, + "step": 602000 + }, + { + "epoch": 4.82, + "learning_rate": 4.7590000000000003e-05, + "loss": 8.7002, + "step": 602500 + }, + { + "epoch": 4.82, + "learning_rate": 4.7588000000000006e-05, + "loss": 8.6859, + "step": 603000 + }, + { + "epoch": 4.83, + "learning_rate": 4.7586e-05, + "loss": 8.692, + "step": 603500 + }, + { + "epoch": 4.83, + "learning_rate": 4.7584000000000004e-05, + "loss": 8.6832, + "step": 604000 + }, + { + "epoch": 4.84, + "learning_rate": 4.7582000000000006e-05, + "loss": 8.6765, + "step": 604500 + }, + { + "epoch": 4.84, + "learning_rate": 4.758e-05, + "loss": 8.679, + "step": 605000 + }, + { + "epoch": 4.84, + "learning_rate": 4.7578e-05, + "loss": 8.7049, + "step": 605500 + }, + { + "epoch": 4.85, + "learning_rate": 4.757600000000001e-05, + "loss": 8.7157, + "step": 606000 + }, + { + "epoch": 4.85, + "learning_rate": 4.7574e-05, + "loss": 8.7064, + "step": 606500 + }, + { + "epoch": 4.86, + "learning_rate": 4.7572e-05, + "loss": 8.6896, + "step": 607000 + }, + { + "epoch": 4.86, + "learning_rate": 4.757e-05, + "loss": 8.7057, + "step": 607500 + }, + { + "epoch": 4.86, + "learning_rate": 4.7568e-05, + "loss": 8.6853, + "step": 608000 + }, + { + "epoch": 4.87, + "learning_rate": 4.7566000000000005e-05, + "loss": 8.6964, + "step": 608500 + }, + { + "epoch": 4.87, + "learning_rate": 4.7564e-05, + "loss": 8.7125, + "step": 609000 + }, + { + "epoch": 4.88, + "learning_rate": 4.7562e-05, + "loss": 8.7113, + "step": 609500 + }, + { + "epoch": 4.88, + "learning_rate": 4.7560000000000005e-05, + "loss": 8.6883, + "step": 610000 + }, + { + "epoch": 4.88, + "learning_rate": 4.7558e-05, + "loss": 8.7056, + "step": 610500 + }, + { + "epoch": 4.89, + "learning_rate": 4.7556000000000003e-05, + "loss": 8.6759, + "step": 611000 + }, + { + "epoch": 4.89, + "learning_rate": 4.7554000000000006e-05, + "loss": 8.6787, + "step": 611500 + }, + { + "epoch": 4.9, + "learning_rate": 4.7552e-05, + "loss": 8.6963, + "step": 612000 + }, + { + "epoch": 4.9, + "learning_rate": 4.755e-05, + "loss": 8.7135, + "step": 612500 + }, + { + "epoch": 4.9, + "learning_rate": 4.7548000000000006e-05, + "loss": 8.7052, + "step": 613000 + }, + { + "epoch": 4.91, + "learning_rate": 4.7546e-05, + "loss": 8.6763, + "step": 613500 + }, + { + "epoch": 4.91, + "learning_rate": 4.7544e-05, + "loss": 8.6894, + "step": 614000 + }, + { + "epoch": 4.92, + "learning_rate": 4.7542e-05, + "loss": 8.684, + "step": 614500 + }, + { + "epoch": 4.92, + "learning_rate": 4.754e-05, + "loss": 8.7096, + "step": 615000 + }, + { + "epoch": 4.92, + "learning_rate": 4.7538000000000005e-05, + "loss": 8.7041, + "step": 615500 + }, + { + "epoch": 4.93, + "learning_rate": 4.7536e-05, + "loss": 8.7029, + "step": 616000 + }, + { + "epoch": 4.93, + "learning_rate": 4.7534e-05, + "loss": 8.6898, + "step": 616500 + }, + { + "epoch": 4.94, + "learning_rate": 4.7532000000000005e-05, + "loss": 8.701, + "step": 617000 + }, + { + "epoch": 4.94, + "learning_rate": 4.753e-05, + "loss": 8.7274, + "step": 617500 + }, + { + "epoch": 4.94, + "learning_rate": 4.7528e-05, + "loss": 8.7009, + "step": 618000 + }, + { + "epoch": 4.95, + "learning_rate": 4.7526000000000005e-05, + "loss": 8.6851, + "step": 618500 + }, + { + "epoch": 4.95, + "learning_rate": 4.7524e-05, + "loss": 8.6986, + "step": 619000 + }, + { + "epoch": 4.96, + "learning_rate": 4.7522e-05, + "loss": 8.6776, + "step": 619500 + }, + { + "epoch": 4.96, + "learning_rate": 4.7520000000000006e-05, + "loss": 8.6817, + "step": 620000 + }, + { + "epoch": 4.96, + "learning_rate": 4.7518e-05, + "loss": 8.6879, + "step": 620500 + }, + { + "epoch": 4.97, + "learning_rate": 4.7516000000000004e-05, + "loss": 8.6722, + "step": 621000 + }, + { + "epoch": 4.97, + "learning_rate": 4.7514000000000006e-05, + "loss": 8.695, + "step": 621500 + }, + { + "epoch": 4.98, + "learning_rate": 4.7512e-05, + "loss": 8.673, + "step": 622000 + }, + { + "epoch": 4.98, + "learning_rate": 4.7510000000000004e-05, + "loss": 8.699, + "step": 622500 + }, + { + "epoch": 4.98, + "learning_rate": 4.7508e-05, + "loss": 8.6841, + "step": 623000 + }, + { + "epoch": 4.99, + "learning_rate": 4.7506e-05, + "loss": 8.6652, + "step": 623500 + }, + { + "epoch": 4.99, + "learning_rate": 4.7504000000000005e-05, + "loss": 8.7034, + "step": 624000 + }, + { + "epoch": 5.0, + "learning_rate": 4.7502e-05, + "loss": 8.6708, + "step": 624500 + }, + { + "epoch": 5.0, + "learning_rate": 4.75e-05, + "loss": 8.6996, + "step": 625000 + }, + { + "epoch": 5.0, + "learning_rate": 4.7498000000000005e-05, + "loss": 8.6895, + "step": 625500 + }, + { + "epoch": 5.01, + "learning_rate": 4.7496e-05, + "loss": 8.6687, + "step": 626000 + }, + { + "epoch": 5.01, + "learning_rate": 4.7493999999999996e-05, + "loss": 8.6684, + "step": 626500 + }, + { + "epoch": 5.02, + "learning_rate": 4.7492000000000005e-05, + "loss": 8.6835, + "step": 627000 + }, + { + "epoch": 5.02, + "learning_rate": 4.749e-05, + "loss": 8.6745, + "step": 627500 + }, + { + "epoch": 5.02, + "learning_rate": 4.7488000000000003e-05, + "loss": 8.6795, + "step": 628000 + }, + { + "epoch": 5.03, + "learning_rate": 4.7486000000000006e-05, + "loss": 8.6914, + "step": 628500 + }, + { + "epoch": 5.03, + "learning_rate": 4.7484e-05, + "loss": 8.6906, + "step": 629000 + }, + { + "epoch": 5.04, + "learning_rate": 4.7482000000000004e-05, + "loss": 8.7079, + "step": 629500 + }, + { + "epoch": 5.04, + "learning_rate": 4.748e-05, + "loss": 8.6805, + "step": 630000 + }, + { + "epoch": 5.04, + "learning_rate": 4.7478e-05, + "loss": 8.7025, + "step": 630500 + }, + { + "epoch": 5.05, + "learning_rate": 4.7476000000000004e-05, + "loss": 8.6981, + "step": 631000 + }, + { + "epoch": 5.05, + "learning_rate": 4.7474e-05, + "loss": 8.6627, + "step": 631500 + }, + { + "epoch": 5.06, + "learning_rate": 4.7472e-05, + "loss": 8.6893, + "step": 632000 + }, + { + "epoch": 5.06, + "learning_rate": 4.7470000000000005e-05, + "loss": 8.6852, + "step": 632500 + }, + { + "epoch": 5.06, + "learning_rate": 4.7468e-05, + "loss": 8.663, + "step": 633000 + }, + { + "epoch": 5.07, + "learning_rate": 4.7466e-05, + "loss": 8.6674, + "step": 633500 + }, + { + "epoch": 5.07, + "learning_rate": 4.7464000000000005e-05, + "loss": 8.6806, + "step": 634000 + }, + { + "epoch": 5.08, + "learning_rate": 4.7462e-05, + "loss": 8.6666, + "step": 634500 + }, + { + "epoch": 5.08, + "learning_rate": 4.746e-05, + "loss": 8.6728, + "step": 635000 + }, + { + "epoch": 5.08, + "learning_rate": 4.7458000000000005e-05, + "loss": 8.7154, + "step": 635500 + }, + { + "epoch": 5.09, + "learning_rate": 4.7456e-05, + "loss": 8.7059, + "step": 636000 + }, + { + "epoch": 5.09, + "learning_rate": 4.7454000000000003e-05, + "loss": 8.6599, + "step": 636500 + }, + { + "epoch": 5.1, + "learning_rate": 4.7452e-05, + "loss": 8.671, + "step": 637000 + }, + { + "epoch": 5.1, + "learning_rate": 4.745e-05, + "loss": 8.6911, + "step": 637500 + }, + { + "epoch": 5.1, + "learning_rate": 4.7448000000000004e-05, + "loss": 8.7119, + "step": 638000 + }, + { + "epoch": 5.11, + "learning_rate": 4.7446e-05, + "loss": 8.7202, + "step": 638500 + }, + { + "epoch": 5.11, + "learning_rate": 4.7444e-05, + "loss": 8.6662, + "step": 639000 + }, + { + "epoch": 5.12, + "learning_rate": 4.7442000000000004e-05, + "loss": 8.7054, + "step": 639500 + }, + { + "epoch": 5.12, + "learning_rate": 4.744e-05, + "loss": 8.673, + "step": 640000 + }, + { + "epoch": 5.12, + "learning_rate": 4.7438e-05, + "loss": 8.6985, + "step": 640500 + }, + { + "epoch": 5.13, + "learning_rate": 4.7436000000000005e-05, + "loss": 8.6946, + "step": 641000 + }, + { + "epoch": 5.13, + "learning_rate": 4.7434e-05, + "loss": 8.6743, + "step": 641500 + }, + { + "epoch": 5.14, + "learning_rate": 4.7432e-05, + "loss": 8.6483, + "step": 642000 + }, + { + "epoch": 5.14, + "learning_rate": 4.7430000000000005e-05, + "loss": 8.6967, + "step": 642500 + }, + { + "epoch": 5.14, + "learning_rate": 4.7428e-05, + "loss": 8.6999, + "step": 643000 + }, + { + "epoch": 5.15, + "learning_rate": 4.7426e-05, + "loss": 8.6907, + "step": 643500 + }, + { + "epoch": 5.15, + "learning_rate": 4.7424e-05, + "loss": 8.6941, + "step": 644000 + }, + { + "epoch": 5.16, + "learning_rate": 4.7422e-05, + "loss": 8.6523, + "step": 644500 + }, + { + "epoch": 5.16, + "learning_rate": 4.742e-05, + "loss": 8.7051, + "step": 645000 + }, + { + "epoch": 5.16, + "learning_rate": 4.7418e-05, + "loss": 8.6798, + "step": 645500 + }, + { + "epoch": 5.17, + "learning_rate": 4.741600000000001e-05, + "loss": 8.65, + "step": 646000 + }, + { + "epoch": 5.17, + "learning_rate": 4.7414000000000004e-05, + "loss": 8.6642, + "step": 646500 + }, + { + "epoch": 5.18, + "learning_rate": 4.7412e-05, + "loss": 8.6939, + "step": 647000 + }, + { + "epoch": 5.18, + "learning_rate": 4.741e-05, + "loss": 8.706, + "step": 647500 + }, + { + "epoch": 5.18, + "learning_rate": 4.7408000000000004e-05, + "loss": 8.6656, + "step": 648000 + }, + { + "epoch": 5.19, + "learning_rate": 4.7406e-05, + "loss": 8.6735, + "step": 648500 + }, + { + "epoch": 5.19, + "learning_rate": 4.7404e-05, + "loss": 8.7023, + "step": 649000 + }, + { + "epoch": 5.2, + "learning_rate": 4.7402000000000005e-05, + "loss": 8.6907, + "step": 649500 + }, + { + "epoch": 5.2, + "learning_rate": 4.74e-05, + "loss": 8.6788, + "step": 650000 + }, + { + "epoch": 5.2, + "learning_rate": 4.7398e-05, + "loss": 8.6952, + "step": 650500 + }, + { + "epoch": 5.21, + "learning_rate": 4.7396e-05, + "loss": 8.6699, + "step": 651000 + }, + { + "epoch": 5.21, + "learning_rate": 4.7394e-05, + "loss": 8.6877, + "step": 651500 + }, + { + "epoch": 5.22, + "learning_rate": 4.7392e-05, + "loss": 8.6928, + "step": 652000 + }, + { + "epoch": 5.22, + "learning_rate": 4.739e-05, + "loss": 8.6826, + "step": 652500 + }, + { + "epoch": 5.22, + "learning_rate": 4.738800000000001e-05, + "loss": 8.6977, + "step": 653000 + }, + { + "epoch": 5.23, + "learning_rate": 4.7386e-05, + "loss": 8.6922, + "step": 653500 + }, + { + "epoch": 5.23, + "learning_rate": 4.7384e-05, + "loss": 8.6922, + "step": 654000 + }, + { + "epoch": 5.24, + "learning_rate": 4.7382e-05, + "loss": 8.6917, + "step": 654500 + }, + { + "epoch": 5.24, + "learning_rate": 4.7380000000000004e-05, + "loss": 8.7062, + "step": 655000 + }, + { + "epoch": 5.24, + "learning_rate": 4.7378e-05, + "loss": 8.6872, + "step": 655500 + }, + { + "epoch": 5.25, + "learning_rate": 4.7376e-05, + "loss": 8.6742, + "step": 656000 + }, + { + "epoch": 5.25, + "learning_rate": 4.7374000000000004e-05, + "loss": 8.7035, + "step": 656500 + }, + { + "epoch": 5.26, + "learning_rate": 4.7372e-05, + "loss": 8.6762, + "step": 657000 + }, + { + "epoch": 5.26, + "learning_rate": 4.737e-05, + "loss": 8.7042, + "step": 657500 + }, + { + "epoch": 5.26, + "learning_rate": 4.7368000000000005e-05, + "loss": 8.687, + "step": 658000 + }, + { + "epoch": 5.27, + "learning_rate": 4.736600000000001e-05, + "loss": 8.6841, + "step": 658500 + }, + { + "epoch": 5.27, + "learning_rate": 4.7364e-05, + "loss": 8.7086, + "step": 659000 + }, + { + "epoch": 5.28, + "learning_rate": 4.7362e-05, + "loss": 8.6768, + "step": 659500 + }, + { + "epoch": 5.28, + "learning_rate": 4.736000000000001e-05, + "loss": 8.683, + "step": 660000 + }, + { + "epoch": 5.28, + "learning_rate": 4.7358e-05, + "loss": 8.6867, + "step": 660500 + }, + { + "epoch": 5.29, + "learning_rate": 4.7356e-05, + "loss": 8.6706, + "step": 661000 + }, + { + "epoch": 5.29, + "learning_rate": 4.7354e-05, + "loss": 8.6344, + "step": 661500 + }, + { + "epoch": 5.3, + "learning_rate": 4.7352e-05, + "loss": 8.6803, + "step": 662000 + }, + { + "epoch": 5.3, + "learning_rate": 4.735e-05, + "loss": 8.691, + "step": 662500 + }, + { + "epoch": 5.3, + "learning_rate": 4.7348e-05, + "loss": 8.6917, + "step": 663000 + }, + { + "epoch": 5.31, + "learning_rate": 4.7346000000000004e-05, + "loss": 8.6831, + "step": 663500 + }, + { + "epoch": 5.31, + "learning_rate": 4.7344000000000006e-05, + "loss": 8.6985, + "step": 664000 + }, + { + "epoch": 5.32, + "learning_rate": 4.7342e-05, + "loss": 8.6908, + "step": 664500 + }, + { + "epoch": 5.32, + "learning_rate": 4.7340000000000004e-05, + "loss": 8.6704, + "step": 665000 + }, + { + "epoch": 5.32, + "learning_rate": 4.7338000000000007e-05, + "loss": 8.685, + "step": 665500 + }, + { + "epoch": 5.33, + "learning_rate": 4.7336e-05, + "loss": 8.7153, + "step": 666000 + }, + { + "epoch": 5.33, + "learning_rate": 4.7334e-05, + "loss": 8.7006, + "step": 666500 + }, + { + "epoch": 5.34, + "learning_rate": 4.733200000000001e-05, + "loss": 8.6747, + "step": 667000 + }, + { + "epoch": 5.34, + "learning_rate": 4.733e-05, + "loss": 8.6613, + "step": 667500 + }, + { + "epoch": 5.34, + "learning_rate": 4.7328e-05, + "loss": 8.7035, + "step": 668000 + }, + { + "epoch": 5.35, + "learning_rate": 4.7326e-05, + "loss": 8.6896, + "step": 668500 + }, + { + "epoch": 5.35, + "learning_rate": 4.7324e-05, + "loss": 8.6955, + "step": 669000 + }, + { + "epoch": 5.36, + "learning_rate": 4.7322e-05, + "loss": 8.7121, + "step": 669500 + }, + { + "epoch": 5.36, + "learning_rate": 4.732e-05, + "loss": 8.6922, + "step": 670000 + }, + { + "epoch": 5.36, + "learning_rate": 4.7318e-05, + "loss": 8.6892, + "step": 670500 + }, + { + "epoch": 5.37, + "learning_rate": 4.7316000000000006e-05, + "loss": 8.6795, + "step": 671000 + }, + { + "epoch": 5.37, + "learning_rate": 4.7314e-05, + "loss": 8.7076, + "step": 671500 + }, + { + "epoch": 5.38, + "learning_rate": 4.7312000000000004e-05, + "loss": 8.6899, + "step": 672000 + }, + { + "epoch": 5.38, + "learning_rate": 4.7310000000000006e-05, + "loss": 8.7144, + "step": 672500 + }, + { + "epoch": 5.38, + "learning_rate": 4.7308e-05, + "loss": 8.7106, + "step": 673000 + }, + { + "epoch": 5.39, + "learning_rate": 4.7306e-05, + "loss": 8.683, + "step": 673500 + }, + { + "epoch": 5.39, + "learning_rate": 4.7304000000000007e-05, + "loss": 8.7013, + "step": 674000 + }, + { + "epoch": 5.4, + "learning_rate": 4.7302e-05, + "loss": 8.6933, + "step": 674500 + }, + { + "epoch": 5.4, + "learning_rate": 4.73e-05, + "loss": 8.7011, + "step": 675000 + }, + { + "epoch": 5.4, + "learning_rate": 4.7298e-05, + "loss": 8.6761, + "step": 675500 + }, + { + "epoch": 5.41, + "learning_rate": 4.7296e-05, + "loss": 8.676, + "step": 676000 + }, + { + "epoch": 5.41, + "learning_rate": 4.7294000000000005e-05, + "loss": 8.7049, + "step": 676500 + }, + { + "epoch": 5.42, + "learning_rate": 4.7292e-05, + "loss": 8.6862, + "step": 677000 + }, + { + "epoch": 5.42, + "learning_rate": 4.729e-05, + "loss": 8.6861, + "step": 677500 + }, + { + "epoch": 5.42, + "learning_rate": 4.7288000000000005e-05, + "loss": 8.6773, + "step": 678000 + }, + { + "epoch": 5.43, + "learning_rate": 4.7286e-05, + "loss": 8.6915, + "step": 678500 + }, + { + "epoch": 5.43, + "learning_rate": 4.7284e-05, + "loss": 8.6942, + "step": 679000 + }, + { + "epoch": 5.44, + "learning_rate": 4.7282000000000006e-05, + "loss": 8.6837, + "step": 679500 + }, + { + "epoch": 5.44, + "learning_rate": 4.728e-05, + "loss": 8.6891, + "step": 680000 + }, + { + "epoch": 5.44, + "learning_rate": 4.7278e-05, + "loss": 8.6752, + "step": 680500 + }, + { + "epoch": 5.45, + "learning_rate": 4.7276000000000006e-05, + "loss": 8.6769, + "step": 681000 + }, + { + "epoch": 5.45, + "learning_rate": 4.7274e-05, + "loss": 8.6738, + "step": 681500 + }, + { + "epoch": 5.46, + "learning_rate": 4.7272e-05, + "loss": 8.6892, + "step": 682000 + }, + { + "epoch": 5.46, + "learning_rate": 4.7270000000000007e-05, + "loss": 8.7074, + "step": 682500 + }, + { + "epoch": 5.46, + "learning_rate": 4.7268e-05, + "loss": 8.6909, + "step": 683000 + }, + { + "epoch": 5.47, + "learning_rate": 4.7266000000000005e-05, + "loss": 8.6848, + "step": 683500 + }, + { + "epoch": 5.47, + "learning_rate": 4.7264e-05, + "loss": 8.6926, + "step": 684000 + }, + { + "epoch": 5.48, + "learning_rate": 4.7262e-05, + "loss": 8.6606, + "step": 684500 + }, + { + "epoch": 5.48, + "learning_rate": 4.7260000000000005e-05, + "loss": 8.696, + "step": 685000 + }, + { + "epoch": 5.48, + "learning_rate": 4.7258e-05, + "loss": 8.6629, + "step": 685500 + }, + { + "epoch": 5.49, + "learning_rate": 4.7256e-05, + "loss": 8.6887, + "step": 686000 + }, + { + "epoch": 5.49, + "learning_rate": 4.7254000000000005e-05, + "loss": 8.6852, + "step": 686500 + }, + { + "epoch": 5.5, + "learning_rate": 4.7252e-05, + "loss": 8.6677, + "step": 687000 + }, + { + "epoch": 5.5, + "learning_rate": 4.7249999999999997e-05, + "loss": 8.6951, + "step": 687500 + }, + { + "epoch": 5.5, + "learning_rate": 4.7248000000000006e-05, + "loss": 8.6928, + "step": 688000 + }, + { + "epoch": 5.51, + "learning_rate": 4.7246e-05, + "loss": 8.7003, + "step": 688500 + }, + { + "epoch": 5.51, + "learning_rate": 4.7244000000000004e-05, + "loss": 8.6892, + "step": 689000 + }, + { + "epoch": 5.52, + "learning_rate": 4.7242000000000006e-05, + "loss": 8.6872, + "step": 689500 + }, + { + "epoch": 5.52, + "learning_rate": 4.724e-05, + "loss": 8.711, + "step": 690000 + }, + { + "epoch": 5.52, + "learning_rate": 4.7238000000000004e-05, + "loss": 8.7015, + "step": 690500 + }, + { + "epoch": 5.53, + "learning_rate": 4.7236e-05, + "loss": 8.6695, + "step": 691000 + }, + { + "epoch": 5.53, + "learning_rate": 4.7234e-05, + "loss": 8.7176, + "step": 691500 + }, + { + "epoch": 5.54, + "learning_rate": 4.7232000000000005e-05, + "loss": 8.7359, + "step": 692000 + }, + { + "epoch": 5.54, + "learning_rate": 4.723e-05, + "loss": 8.6934, + "step": 692500 + }, + { + "epoch": 5.54, + "learning_rate": 4.7228e-05, + "loss": 8.6977, + "step": 693000 + }, + { + "epoch": 5.55, + "learning_rate": 4.7226000000000005e-05, + "loss": 8.6904, + "step": 693500 + }, + { + "epoch": 5.55, + "learning_rate": 4.7224e-05, + "loss": 8.6897, + "step": 694000 + }, + { + "epoch": 5.56, + "learning_rate": 4.7222e-05, + "loss": 8.6611, + "step": 694500 + }, + { + "epoch": 5.56, + "learning_rate": 4.7220000000000005e-05, + "loss": 8.6864, + "step": 695000 + }, + { + "epoch": 5.56, + "learning_rate": 4.7218e-05, + "loss": 8.6747, + "step": 695500 + }, + { + "epoch": 5.57, + "learning_rate": 4.7216e-05, + "loss": 8.6676, + "step": 696000 + }, + { + "epoch": 5.57, + "learning_rate": 4.7214000000000006e-05, + "loss": 8.7055, + "step": 696500 + }, + { + "epoch": 5.58, + "learning_rate": 4.7212e-05, + "loss": 8.6965, + "step": 697000 + }, + { + "epoch": 5.58, + "learning_rate": 4.7210000000000004e-05, + "loss": 8.6965, + "step": 697500 + }, + { + "epoch": 5.58, + "learning_rate": 4.7208e-05, + "loss": 8.6918, + "step": 698000 + }, + { + "epoch": 5.59, + "learning_rate": 4.7206e-05, + "loss": 8.6898, + "step": 698500 + }, + { + "epoch": 5.59, + "learning_rate": 4.7204000000000004e-05, + "loss": 8.7017, + "step": 699000 + }, + { + "epoch": 5.6, + "learning_rate": 4.7202e-05, + "loss": 8.6897, + "step": 699500 + }, + { + "epoch": 5.6, + "learning_rate": 4.72e-05, + "loss": 8.6989, + "step": 700000 + }, + { + "epoch": 5.6, + "learning_rate": 4.7198000000000004e-05, + "loss": 8.6906, + "step": 700500 + }, + { + "epoch": 5.61, + "learning_rate": 4.7196e-05, + "loss": 8.6889, + "step": 701000 + }, + { + "epoch": 5.61, + "learning_rate": 4.7194e-05, + "loss": 8.6767, + "step": 701500 + }, + { + "epoch": 5.62, + "learning_rate": 4.7192000000000005e-05, + "loss": 8.6921, + "step": 702000 + }, + { + "epoch": 5.62, + "learning_rate": 4.719e-05, + "loss": 8.6512, + "step": 702500 + }, + { + "epoch": 5.62, + "learning_rate": 4.7188e-05, + "loss": 8.6645, + "step": 703000 + }, + { + "epoch": 5.63, + "learning_rate": 4.7186000000000005e-05, + "loss": 8.6867, + "step": 703500 + }, + { + "epoch": 5.63, + "learning_rate": 4.7184e-05, + "loss": 8.6958, + "step": 704000 + }, + { + "epoch": 5.64, + "learning_rate": 4.7182e-05, + "loss": 8.6656, + "step": 704500 + }, + { + "epoch": 5.64, + "learning_rate": 4.718e-05, + "loss": 8.6697, + "step": 705000 + }, + { + "epoch": 5.64, + "learning_rate": 4.7178e-05, + "loss": 8.6867, + "step": 705500 + }, + { + "epoch": 5.65, + "learning_rate": 4.7176000000000004e-05, + "loss": 8.6707, + "step": 706000 + }, + { + "epoch": 5.65, + "learning_rate": 4.7174e-05, + "loss": 8.6832, + "step": 706500 + }, + { + "epoch": 5.66, + "learning_rate": 4.7172e-05, + "loss": 8.6698, + "step": 707000 + }, + { + "epoch": 5.66, + "learning_rate": 4.7170000000000004e-05, + "loss": 8.6951, + "step": 707500 + }, + { + "epoch": 5.66, + "learning_rate": 4.7168e-05, + "loss": 8.6829, + "step": 708000 + }, + { + "epoch": 5.67, + "learning_rate": 4.7166e-05, + "loss": 8.6971, + "step": 708500 + }, + { + "epoch": 5.67, + "learning_rate": 4.7164000000000004e-05, + "loss": 8.6797, + "step": 709000 + }, + { + "epoch": 5.68, + "learning_rate": 4.7162e-05, + "loss": 8.7025, + "step": 709500 + }, + { + "epoch": 5.68, + "learning_rate": 4.716e-05, + "loss": 8.6876, + "step": 710000 + }, + { + "epoch": 5.68, + "learning_rate": 4.7158000000000005e-05, + "loss": 8.6836, + "step": 710500 + }, + { + "epoch": 5.69, + "learning_rate": 4.7156e-05, + "loss": 8.6867, + "step": 711000 + }, + { + "epoch": 5.69, + "learning_rate": 4.7154e-05, + "loss": 8.6948, + "step": 711500 + }, + { + "epoch": 5.7, + "learning_rate": 4.7152e-05, + "loss": 8.7156, + "step": 712000 + }, + { + "epoch": 5.7, + "learning_rate": 4.715e-05, + "loss": 8.6833, + "step": 712500 + }, + { + "epoch": 5.7, + "learning_rate": 4.7148e-05, + "loss": 8.6823, + "step": 713000 + }, + { + "epoch": 5.71, + "learning_rate": 4.7146e-05, + "loss": 8.6815, + "step": 713500 + }, + { + "epoch": 5.71, + "learning_rate": 4.714400000000001e-05, + "loss": 8.6914, + "step": 714000 + }, + { + "epoch": 5.72, + "learning_rate": 4.7142000000000004e-05, + "loss": 8.6879, + "step": 714500 + }, + { + "epoch": 5.72, + "learning_rate": 4.714e-05, + "loss": 8.6631, + "step": 715000 + }, + { + "epoch": 5.72, + "learning_rate": 4.7138e-05, + "loss": 8.6916, + "step": 715500 + }, + { + "epoch": 5.73, + "learning_rate": 4.7136000000000004e-05, + "loss": 8.6991, + "step": 716000 + }, + { + "epoch": 5.73, + "learning_rate": 4.7134e-05, + "loss": 8.6888, + "step": 716500 + }, + { + "epoch": 5.74, + "learning_rate": 4.7132e-05, + "loss": 8.6925, + "step": 717000 + }, + { + "epoch": 5.74, + "learning_rate": 4.7130000000000004e-05, + "loss": 8.6895, + "step": 717500 + }, + { + "epoch": 5.74, + "learning_rate": 4.7128e-05, + "loss": 8.6821, + "step": 718000 + }, + { + "epoch": 5.75, + "learning_rate": 4.7126e-05, + "loss": 8.6825, + "step": 718500 + }, + { + "epoch": 5.75, + "learning_rate": 4.7124000000000005e-05, + "loss": 8.6447, + "step": 719000 + }, + { + "epoch": 5.76, + "learning_rate": 4.712200000000001e-05, + "loss": 8.6865, + "step": 719500 + }, + { + "epoch": 5.76, + "learning_rate": 4.712e-05, + "loss": 8.6965, + "step": 720000 + }, + { + "epoch": 5.76, + "learning_rate": 4.7118e-05, + "loss": 8.6994, + "step": 720500 + }, + { + "epoch": 5.77, + "learning_rate": 4.711600000000001e-05, + "loss": 8.7109, + "step": 721000 + }, + { + "epoch": 5.77, + "learning_rate": 4.7114e-05, + "loss": 8.6661, + "step": 721500 + }, + { + "epoch": 5.78, + "learning_rate": 4.7112e-05, + "loss": 8.6944, + "step": 722000 + }, + { + "epoch": 5.78, + "learning_rate": 4.711e-05, + "loss": 8.7156, + "step": 722500 + }, + { + "epoch": 5.78, + "learning_rate": 4.7108000000000004e-05, + "loss": 8.6916, + "step": 723000 + }, + { + "epoch": 5.79, + "learning_rate": 4.7106e-05, + "loss": 8.6661, + "step": 723500 + }, + { + "epoch": 5.79, + "learning_rate": 4.7104e-05, + "loss": 8.6902, + "step": 724000 + }, + { + "epoch": 5.8, + "learning_rate": 4.7102000000000004e-05, + "loss": 8.6957, + "step": 724500 + }, + { + "epoch": 5.8, + "learning_rate": 4.71e-05, + "loss": 8.687, + "step": 725000 + }, + { + "epoch": 5.8, + "learning_rate": 4.7098e-05, + "loss": 8.7003, + "step": 725500 + }, + { + "epoch": 5.81, + "learning_rate": 4.7096000000000004e-05, + "loss": 8.6713, + "step": 726000 + }, + { + "epoch": 5.81, + "learning_rate": 4.709400000000001e-05, + "loss": 8.6699, + "step": 726500 + }, + { + "epoch": 5.82, + "learning_rate": 4.7092e-05, + "loss": 8.6869, + "step": 727000 + }, + { + "epoch": 5.82, + "learning_rate": 4.709e-05, + "loss": 8.6778, + "step": 727500 + }, + { + "epoch": 5.82, + "learning_rate": 4.708800000000001e-05, + "loss": 8.675, + "step": 728000 + }, + { + "epoch": 5.83, + "learning_rate": 4.7086e-05, + "loss": 8.6962, + "step": 728500 + }, + { + "epoch": 5.83, + "learning_rate": 4.7084e-05, + "loss": 8.7078, + "step": 729000 + }, + { + "epoch": 5.84, + "learning_rate": 4.7082e-05, + "loss": 8.6918, + "step": 729500 + }, + { + "epoch": 5.84, + "learning_rate": 4.708e-05, + "loss": 8.7071, + "step": 730000 + }, + { + "epoch": 5.84, + "learning_rate": 4.7078e-05, + "loss": 8.6868, + "step": 730500 + }, + { + "epoch": 5.85, + "learning_rate": 4.7076e-05, + "loss": 8.6995, + "step": 731000 + }, + { + "epoch": 5.85, + "learning_rate": 4.7074000000000004e-05, + "loss": 8.6819, + "step": 731500 + }, + { + "epoch": 5.86, + "learning_rate": 4.7072000000000006e-05, + "loss": 8.6853, + "step": 732000 + }, + { + "epoch": 5.86, + "learning_rate": 4.707e-05, + "loss": 8.695, + "step": 732500 + }, + { + "epoch": 5.86, + "learning_rate": 4.7068000000000004e-05, + "loss": 8.6754, + "step": 733000 + }, + { + "epoch": 5.87, + "learning_rate": 4.7066000000000006e-05, + "loss": 8.7053, + "step": 733500 + }, + { + "epoch": 5.87, + "learning_rate": 4.7064e-05, + "loss": 8.6838, + "step": 734000 + }, + { + "epoch": 5.88, + "learning_rate": 4.7062e-05, + "loss": 8.6893, + "step": 734500 + }, + { + "epoch": 5.88, + "learning_rate": 4.706000000000001e-05, + "loss": 8.7103, + "step": 735000 + }, + { + "epoch": 5.88, + "learning_rate": 4.7058e-05, + "loss": 8.6732, + "step": 735500 + }, + { + "epoch": 5.89, + "learning_rate": 4.7056e-05, + "loss": 8.7037, + "step": 736000 + }, + { + "epoch": 5.89, + "learning_rate": 4.7054e-05, + "loss": 8.6949, + "step": 736500 + }, + { + "epoch": 5.9, + "learning_rate": 4.7052e-05, + "loss": 8.7115, + "step": 737000 + }, + { + "epoch": 5.9, + "learning_rate": 4.705e-05, + "loss": 8.6782, + "step": 737500 + }, + { + "epoch": 5.9, + "learning_rate": 4.7048e-05, + "loss": 8.6852, + "step": 738000 + }, + { + "epoch": 5.91, + "learning_rate": 4.7046e-05, + "loss": 8.6756, + "step": 738500 + }, + { + "epoch": 5.91, + "learning_rate": 4.7044000000000006e-05, + "loss": 8.6945, + "step": 739000 + }, + { + "epoch": 5.92, + "learning_rate": 4.7042e-05, + "loss": 8.6926, + "step": 739500 + }, + { + "epoch": 5.92, + "learning_rate": 4.7040000000000004e-05, + "loss": 8.6959, + "step": 740000 + }, + { + "epoch": 5.92, + "learning_rate": 4.7038000000000006e-05, + "loss": 8.6791, + "step": 740500 + }, + { + "epoch": 5.93, + "learning_rate": 4.7036e-05, + "loss": 8.6837, + "step": 741000 + }, + { + "epoch": 5.93, + "learning_rate": 4.7034e-05, + "loss": 8.6567, + "step": 741500 + }, + { + "epoch": 5.94, + "learning_rate": 4.7032000000000006e-05, + "loss": 8.6724, + "step": 742000 + }, + { + "epoch": 5.94, + "learning_rate": 4.703e-05, + "loss": 8.6793, + "step": 742500 + }, + { + "epoch": 5.94, + "learning_rate": 4.7028e-05, + "loss": 8.7018, + "step": 743000 + }, + { + "epoch": 5.95, + "learning_rate": 4.7026e-05, + "loss": 8.6535, + "step": 743500 + }, + { + "epoch": 5.95, + "learning_rate": 4.7024e-05, + "loss": 8.6763, + "step": 744000 + }, + { + "epoch": 5.96, + "learning_rate": 4.7022000000000005e-05, + "loss": 8.6904, + "step": 744500 + }, + { + "epoch": 5.96, + "learning_rate": 4.702e-05, + "loss": 8.7006, + "step": 745000 + }, + { + "epoch": 5.96, + "learning_rate": 4.7018e-05, + "loss": 8.705, + "step": 745500 + }, + { + "epoch": 5.97, + "learning_rate": 4.7016000000000005e-05, + "loss": 8.685, + "step": 746000 + }, + { + "epoch": 5.97, + "learning_rate": 4.7014e-05, + "loss": 8.6814, + "step": 746500 + }, + { + "epoch": 5.98, + "learning_rate": 4.7012e-05, + "loss": 8.6848, + "step": 747000 + }, + { + "epoch": 5.98, + "learning_rate": 4.7010000000000006e-05, + "loss": 8.6957, + "step": 747500 + }, + { + "epoch": 5.98, + "learning_rate": 4.7008e-05, + "loss": 8.6929, + "step": 748000 + }, + { + "epoch": 5.99, + "learning_rate": 4.7006e-05, + "loss": 8.6925, + "step": 748500 + }, + { + "epoch": 5.99, + "learning_rate": 4.7004000000000006e-05, + "loss": 8.7093, + "step": 749000 + }, + { + "epoch": 6.0, + "learning_rate": 4.7002e-05, + "loss": 8.6777, + "step": 749500 + }, + { + "epoch": 6.0, + "learning_rate": 4.7e-05, + "loss": 8.6913, + "step": 750000 + }, + { + "epoch": 6.0, + "learning_rate": 4.6998000000000006e-05, + "loss": 8.7201, + "step": 750500 + }, + { + "epoch": 6.01, + "learning_rate": 4.6996e-05, + "loss": 8.6985, + "step": 751000 + }, + { + "epoch": 6.01, + "learning_rate": 4.6994000000000004e-05, + "loss": 8.673, + "step": 751500 + }, + { + "epoch": 6.02, + "learning_rate": 4.6992e-05, + "loss": 8.6662, + "step": 752000 + }, + { + "epoch": 6.02, + "learning_rate": 4.699e-05, + "loss": 8.7033, + "step": 752500 + }, + { + "epoch": 6.02, + "learning_rate": 4.6988000000000005e-05, + "loss": 8.7036, + "step": 753000 + }, + { + "epoch": 6.03, + "learning_rate": 4.6986e-05, + "loss": 8.7168, + "step": 753500 + }, + { + "epoch": 6.03, + "learning_rate": 4.6984e-05, + "loss": 8.6793, + "step": 754000 + }, + { + "epoch": 6.04, + "learning_rate": 4.6982000000000005e-05, + "loss": 8.658, + "step": 754500 + }, + { + "epoch": 6.04, + "learning_rate": 4.698e-05, + "loss": 8.6972, + "step": 755000 + }, + { + "epoch": 6.04, + "learning_rate": 4.6977999999999996e-05, + "loss": 8.681, + "step": 755500 + }, + { + "epoch": 6.05, + "learning_rate": 4.6976000000000006e-05, + "loss": 8.6965, + "step": 756000 + }, + { + "epoch": 6.05, + "learning_rate": 4.6974e-05, + "loss": 8.6773, + "step": 756500 + }, + { + "epoch": 6.06, + "learning_rate": 4.6972000000000004e-05, + "loss": 8.6668, + "step": 757000 + }, + { + "epoch": 6.06, + "learning_rate": 4.6970000000000006e-05, + "loss": 8.696, + "step": 757500 + }, + { + "epoch": 6.06, + "learning_rate": 4.6968e-05, + "loss": 8.6799, + "step": 758000 + }, + { + "epoch": 6.07, + "learning_rate": 4.6966000000000004e-05, + "loss": 8.6923, + "step": 758500 + }, + { + "epoch": 6.07, + "learning_rate": 4.6964e-05, + "loss": 8.6856, + "step": 759000 + }, + { + "epoch": 6.08, + "learning_rate": 4.6962e-05, + "loss": 8.6911, + "step": 759500 + }, + { + "epoch": 6.08, + "learning_rate": 4.6960000000000004e-05, + "loss": 8.6814, + "step": 760000 + }, + { + "epoch": 6.08, + "learning_rate": 4.6958e-05, + "loss": 8.6729, + "step": 760500 + }, + { + "epoch": 6.09, + "learning_rate": 4.6956e-05, + "loss": 8.7033, + "step": 761000 + }, + { + "epoch": 6.09, + "learning_rate": 4.6954000000000005e-05, + "loss": 8.6683, + "step": 761500 + }, + { + "epoch": 6.1, + "learning_rate": 4.6952e-05, + "loss": 8.6567, + "step": 762000 + }, + { + "epoch": 6.1, + "learning_rate": 4.695e-05, + "loss": 8.6747, + "step": 762500 + }, + { + "epoch": 6.1, + "learning_rate": 4.6948000000000005e-05, + "loss": 8.6623, + "step": 763000 + }, + { + "epoch": 6.11, + "learning_rate": 4.6946e-05, + "loss": 8.69, + "step": 763500 + }, + { + "epoch": 6.11, + "learning_rate": 4.6944e-05, + "loss": 8.6797, + "step": 764000 + }, + { + "epoch": 6.12, + "learning_rate": 4.6942000000000006e-05, + "loss": 8.694, + "step": 764500 + }, + { + "epoch": 6.12, + "learning_rate": 4.694e-05, + "loss": 8.6899, + "step": 765000 + }, + { + "epoch": 6.12, + "learning_rate": 4.6938000000000004e-05, + "loss": 8.67, + "step": 765500 + }, + { + "epoch": 6.13, + "learning_rate": 4.6936e-05, + "loss": 8.6741, + "step": 766000 + }, + { + "epoch": 6.13, + "learning_rate": 4.6934e-05, + "loss": 8.7204, + "step": 766500 + }, + { + "epoch": 6.14, + "learning_rate": 4.6932000000000004e-05, + "loss": 8.6861, + "step": 767000 + }, + { + "epoch": 6.14, + "learning_rate": 4.693e-05, + "loss": 8.7019, + "step": 767500 + }, + { + "epoch": 6.14, + "learning_rate": 4.6928e-05, + "loss": 8.6781, + "step": 768000 + }, + { + "epoch": 6.15, + "learning_rate": 4.6926000000000004e-05, + "loss": 8.6855, + "step": 768500 + }, + { + "epoch": 6.15, + "learning_rate": 4.6924e-05, + "loss": 8.6801, + "step": 769000 + }, + { + "epoch": 6.16, + "learning_rate": 4.6922e-05, + "loss": 8.7049, + "step": 769500 + }, + { + "epoch": 6.16, + "learning_rate": 4.6920000000000005e-05, + "loss": 8.6789, + "step": 770000 + }, + { + "epoch": 6.16, + "learning_rate": 4.6918e-05, + "loss": 8.6692, + "step": 770500 + }, + { + "epoch": 6.17, + "learning_rate": 4.6916e-05, + "loss": 8.7287, + "step": 771000 + }, + { + "epoch": 6.17, + "learning_rate": 4.6914000000000005e-05, + "loss": 8.6889, + "step": 771500 + }, + { + "epoch": 6.18, + "learning_rate": 4.6912e-05, + "loss": 8.666, + "step": 772000 + }, + { + "epoch": 6.18, + "learning_rate": 4.691e-05, + "loss": 8.6938, + "step": 772500 + }, + { + "epoch": 6.18, + "learning_rate": 4.6908e-05, + "loss": 8.6645, + "step": 773000 + }, + { + "epoch": 6.19, + "learning_rate": 4.6906e-05, + "loss": 8.6783, + "step": 773500 + }, + { + "epoch": 6.19, + "learning_rate": 4.6904000000000004e-05, + "loss": 8.656, + "step": 774000 + }, + { + "epoch": 6.2, + "learning_rate": 4.6902e-05, + "loss": 8.6943, + "step": 774500 + }, + { + "epoch": 6.2, + "learning_rate": 4.69e-05, + "loss": 8.7057, + "step": 775000 + }, + { + "epoch": 6.2, + "learning_rate": 4.6898000000000004e-05, + "loss": 8.6791, + "step": 775500 + }, + { + "epoch": 6.21, + "learning_rate": 4.6896e-05, + "loss": 8.6785, + "step": 776000 + }, + { + "epoch": 6.21, + "learning_rate": 4.6894e-05, + "loss": 8.6989, + "step": 776500 + }, + { + "epoch": 6.22, + "learning_rate": 4.6892000000000004e-05, + "loss": 8.6844, + "step": 777000 + }, + { + "epoch": 6.22, + "learning_rate": 4.689e-05, + "loss": 8.6658, + "step": 777500 + }, + { + "epoch": 6.22, + "learning_rate": 4.6888e-05, + "loss": 8.6335, + "step": 778000 + }, + { + "epoch": 6.23, + "learning_rate": 4.6886000000000005e-05, + "loss": 8.6875, + "step": 778500 + }, + { + "epoch": 6.23, + "learning_rate": 4.6884e-05, + "loss": 8.6967, + "step": 779000 + }, + { + "epoch": 6.24, + "learning_rate": 4.6882e-05, + "loss": 8.66, + "step": 779500 + }, + { + "epoch": 6.24, + "learning_rate": 4.688e-05, + "loss": 8.6612, + "step": 780000 + }, + { + "epoch": 6.24, + "learning_rate": 4.6878e-05, + "loss": 8.6867, + "step": 780500 + }, + { + "epoch": 6.25, + "learning_rate": 4.6876e-05, + "loss": 8.6855, + "step": 781000 + }, + { + "epoch": 6.25, + "learning_rate": 4.6874e-05, + "loss": 8.6826, + "step": 781500 + }, + { + "epoch": 6.26, + "learning_rate": 4.687200000000001e-05, + "loss": 8.6773, + "step": 782000 + }, + { + "epoch": 6.26, + "learning_rate": 4.6870000000000004e-05, + "loss": 8.6837, + "step": 782500 + }, + { + "epoch": 6.26, + "learning_rate": 4.6868e-05, + "loss": 8.6954, + "step": 783000 + }, + { + "epoch": 6.27, + "learning_rate": 4.6866e-05, + "loss": 8.6884, + "step": 783500 + }, + { + "epoch": 6.27, + "learning_rate": 4.6864000000000004e-05, + "loss": 8.7012, + "step": 784000 + }, + { + "epoch": 6.28, + "learning_rate": 4.6862e-05, + "loss": 8.686, + "step": 784500 + }, + { + "epoch": 6.28, + "learning_rate": 4.686e-05, + "loss": 8.6754, + "step": 785000 + }, + { + "epoch": 6.28, + "learning_rate": 4.6858000000000004e-05, + "loss": 8.6742, + "step": 785500 + }, + { + "epoch": 6.29, + "learning_rate": 4.6856e-05, + "loss": 8.7039, + "step": 786000 + }, + { + "epoch": 6.29, + "learning_rate": 4.6854e-05, + "loss": 8.6997, + "step": 786500 + }, + { + "epoch": 6.3, + "learning_rate": 4.6852000000000005e-05, + "loss": 8.6888, + "step": 787000 + }, + { + "epoch": 6.3, + "learning_rate": 4.685000000000001e-05, + "loss": 8.6968, + "step": 787500 + }, + { + "epoch": 6.3, + "learning_rate": 4.6848e-05, + "loss": 8.6881, + "step": 788000 + }, + { + "epoch": 6.31, + "learning_rate": 4.6846e-05, + "loss": 8.7016, + "step": 788500 + }, + { + "epoch": 6.31, + "learning_rate": 4.684400000000001e-05, + "loss": 8.674, + "step": 789000 + }, + { + "epoch": 6.32, + "learning_rate": 4.6842e-05, + "loss": 8.6957, + "step": 789500 + }, + { + "epoch": 6.32, + "learning_rate": 4.684e-05, + "loss": 8.6614, + "step": 790000 + }, + { + "epoch": 6.32, + "learning_rate": 4.6838e-05, + "loss": 8.681, + "step": 790500 + }, + { + "epoch": 6.33, + "learning_rate": 4.6836000000000004e-05, + "loss": 8.6727, + "step": 791000 + }, + { + "epoch": 6.33, + "learning_rate": 4.6834e-05, + "loss": 8.6681, + "step": 791500 + }, + { + "epoch": 6.34, + "learning_rate": 4.6832e-05, + "loss": 8.6964, + "step": 792000 + }, + { + "epoch": 6.34, + "learning_rate": 4.6830000000000004e-05, + "loss": 8.6826, + "step": 792500 + }, + { + "epoch": 6.34, + "learning_rate": 4.6828e-05, + "loss": 8.7072, + "step": 793000 + }, + { + "epoch": 6.35, + "learning_rate": 4.6826e-05, + "loss": 8.6884, + "step": 793500 + }, + { + "epoch": 6.35, + "learning_rate": 4.6824000000000004e-05, + "loss": 8.7096, + "step": 794000 + }, + { + "epoch": 6.36, + "learning_rate": 4.682200000000001e-05, + "loss": 8.6898, + "step": 794500 + }, + { + "epoch": 6.36, + "learning_rate": 4.682e-05, + "loss": 8.6621, + "step": 795000 + }, + { + "epoch": 6.36, + "learning_rate": 4.6818e-05, + "loss": 8.7063, + "step": 795500 + }, + { + "epoch": 6.37, + "learning_rate": 4.681600000000001e-05, + "loss": 8.6877, + "step": 796000 + }, + { + "epoch": 6.37, + "learning_rate": 4.6814e-05, + "loss": 8.691, + "step": 796500 + }, + { + "epoch": 6.38, + "learning_rate": 4.6812e-05, + "loss": 8.6942, + "step": 797000 + }, + { + "epoch": 6.38, + "learning_rate": 4.681e-05, + "loss": 8.6936, + "step": 797500 + }, + { + "epoch": 6.38, + "learning_rate": 4.6808e-05, + "loss": 8.688, + "step": 798000 + }, + { + "epoch": 6.39, + "learning_rate": 4.6806e-05, + "loss": 8.7132, + "step": 798500 + }, + { + "epoch": 6.39, + "learning_rate": 4.6804e-05, + "loss": 8.6968, + "step": 799000 + }, + { + "epoch": 6.4, + "learning_rate": 4.6802000000000004e-05, + "loss": 8.6962, + "step": 799500 + }, + { + "epoch": 6.4, + "learning_rate": 4.6800000000000006e-05, + "loss": 8.6979, + "step": 800000 + }, + { + "epoch": 6.4, + "learning_rate": 4.6798e-05, + "loss": 8.6787, + "step": 800500 + }, + { + "epoch": 6.41, + "learning_rate": 4.6796000000000004e-05, + "loss": 8.6847, + "step": 801000 + }, + { + "epoch": 6.41, + "learning_rate": 4.6794000000000006e-05, + "loss": 8.7164, + "step": 801500 + }, + { + "epoch": 6.42, + "learning_rate": 4.6792e-05, + "loss": 8.6892, + "step": 802000 + }, + { + "epoch": 6.42, + "learning_rate": 4.679e-05, + "loss": 8.6569, + "step": 802500 + }, + { + "epoch": 6.42, + "learning_rate": 4.678800000000001e-05, + "loss": 8.7136, + "step": 803000 + }, + { + "epoch": 6.43, + "learning_rate": 4.6786e-05, + "loss": 8.6801, + "step": 803500 + }, + { + "epoch": 6.43, + "learning_rate": 4.6784e-05, + "loss": 8.6836, + "step": 804000 + }, + { + "epoch": 6.44, + "learning_rate": 4.6782e-05, + "loss": 8.685, + "step": 804500 + }, + { + "epoch": 6.44, + "learning_rate": 4.678e-05, + "loss": 8.7094, + "step": 805000 + }, + { + "epoch": 6.44, + "learning_rate": 4.6778e-05, + "loss": 8.691, + "step": 805500 + }, + { + "epoch": 6.45, + "learning_rate": 4.6776e-05, + "loss": 8.6758, + "step": 806000 + }, + { + "epoch": 6.45, + "learning_rate": 4.6774e-05, + "loss": 8.7133, + "step": 806500 + }, + { + "epoch": 6.46, + "learning_rate": 4.6772000000000006e-05, + "loss": 8.6901, + "step": 807000 + }, + { + "epoch": 6.46, + "learning_rate": 4.677e-05, + "loss": 8.6884, + "step": 807500 + }, + { + "epoch": 6.46, + "learning_rate": 4.6768000000000004e-05, + "loss": 8.6815, + "step": 808000 + }, + { + "epoch": 6.47, + "learning_rate": 4.6766000000000006e-05, + "loss": 8.6945, + "step": 808500 + }, + { + "epoch": 6.47, + "learning_rate": 4.6764e-05, + "loss": 8.6881, + "step": 809000 + }, + { + "epoch": 6.48, + "learning_rate": 4.6762e-05, + "loss": 8.6891, + "step": 809500 + }, + { + "epoch": 6.48, + "learning_rate": 4.6760000000000006e-05, + "loss": 8.6968, + "step": 810000 + }, + { + "epoch": 6.48, + "learning_rate": 4.6758e-05, + "loss": 8.6841, + "step": 810500 + }, + { + "epoch": 6.49, + "learning_rate": 4.6756e-05, + "loss": 8.6876, + "step": 811000 + }, + { + "epoch": 6.49, + "learning_rate": 4.675400000000001e-05, + "loss": 8.6979, + "step": 811500 + }, + { + "epoch": 6.5, + "learning_rate": 4.6752e-05, + "loss": 8.678, + "step": 812000 + }, + { + "epoch": 6.5, + "learning_rate": 4.6750000000000005e-05, + "loss": 8.6972, + "step": 812500 + }, + { + "epoch": 6.5, + "learning_rate": 4.6748e-05, + "loss": 8.6811, + "step": 813000 + }, + { + "epoch": 6.51, + "learning_rate": 4.6746e-05, + "loss": 8.6973, + "step": 813500 + }, + { + "epoch": 6.51, + "learning_rate": 4.6744000000000005e-05, + "loss": 8.6943, + "step": 814000 + }, + { + "epoch": 6.52, + "learning_rate": 4.6742e-05, + "loss": 8.6855, + "step": 814500 + }, + { + "epoch": 6.52, + "learning_rate": 4.674e-05, + "loss": 8.6963, + "step": 815000 + }, + { + "epoch": 6.52, + "learning_rate": 4.6738000000000006e-05, + "loss": 8.7114, + "step": 815500 + }, + { + "epoch": 6.53, + "learning_rate": 4.6736e-05, + "loss": 8.6996, + "step": 816000 + }, + { + "epoch": 6.53, + "learning_rate": 4.6734e-05, + "loss": 8.6951, + "step": 816500 + }, + { + "epoch": 6.54, + "learning_rate": 4.6732000000000006e-05, + "loss": 8.6772, + "step": 817000 + }, + { + "epoch": 6.54, + "learning_rate": 4.673e-05, + "loss": 8.6724, + "step": 817500 + }, + { + "epoch": 6.54, + "learning_rate": 4.6728e-05, + "loss": 8.7174, + "step": 818000 + }, + { + "epoch": 6.55, + "learning_rate": 4.6726000000000006e-05, + "loss": 8.6783, + "step": 818500 + }, + { + "epoch": 6.55, + "learning_rate": 4.6724e-05, + "loss": 8.6842, + "step": 819000 + }, + { + "epoch": 6.56, + "learning_rate": 4.6722000000000004e-05, + "loss": 8.6893, + "step": 819500 + }, + { + "epoch": 6.56, + "learning_rate": 4.672e-05, + "loss": 8.6598, + "step": 820000 + }, + { + "epoch": 6.56, + "learning_rate": 4.6718e-05, + "loss": 8.6785, + "step": 820500 + }, + { + "epoch": 6.57, + "learning_rate": 4.6716000000000005e-05, + "loss": 8.6983, + "step": 821000 + }, + { + "epoch": 6.57, + "learning_rate": 4.6714e-05, + "loss": 8.6691, + "step": 821500 + }, + { + "epoch": 6.58, + "learning_rate": 4.6712e-05, + "loss": 8.6649, + "step": 822000 + }, + { + "epoch": 6.58, + "learning_rate": 4.6710000000000005e-05, + "loss": 8.6774, + "step": 822500 + }, + { + "epoch": 6.58, + "learning_rate": 4.6708e-05, + "loss": 8.662, + "step": 823000 + }, + { + "epoch": 6.59, + "learning_rate": 4.6706e-05, + "loss": 8.7077, + "step": 823500 + }, + { + "epoch": 6.59, + "learning_rate": 4.6704000000000005e-05, + "loss": 8.6697, + "step": 824000 + }, + { + "epoch": 6.6, + "learning_rate": 4.6702e-05, + "loss": 8.6873, + "step": 824500 + }, + { + "epoch": 6.6, + "learning_rate": 4.6700000000000003e-05, + "loss": 8.6606, + "step": 825000 + }, + { + "epoch": 6.6, + "learning_rate": 4.6698000000000006e-05, + "loss": 8.6672, + "step": 825500 + }, + { + "epoch": 6.61, + "learning_rate": 4.6696e-05, + "loss": 8.7246, + "step": 826000 + }, + { + "epoch": 6.61, + "learning_rate": 4.6694000000000004e-05, + "loss": 8.6949, + "step": 826500 + }, + { + "epoch": 6.62, + "learning_rate": 4.6692e-05, + "loss": 8.6766, + "step": 827000 + }, + { + "epoch": 6.62, + "learning_rate": 4.669e-05, + "loss": 8.6795, + "step": 827500 + }, + { + "epoch": 6.62, + "learning_rate": 4.6688000000000004e-05, + "loss": 8.6992, + "step": 828000 + }, + { + "epoch": 6.63, + "learning_rate": 4.6686e-05, + "loss": 8.6833, + "step": 828500 + }, + { + "epoch": 6.63, + "learning_rate": 4.6684e-05, + "loss": 8.692, + "step": 829000 + }, + { + "epoch": 6.64, + "learning_rate": 4.6682000000000005e-05, + "loss": 8.686, + "step": 829500 + }, + { + "epoch": 6.64, + "learning_rate": 4.668e-05, + "loss": 8.695, + "step": 830000 + }, + { + "epoch": 6.64, + "learning_rate": 4.6678e-05, + "loss": 8.6997, + "step": 830500 + }, + { + "epoch": 6.65, + "learning_rate": 4.6676000000000005e-05, + "loss": 8.7094, + "step": 831000 + }, + { + "epoch": 6.65, + "learning_rate": 4.6674e-05, + "loss": 8.7044, + "step": 831500 + }, + { + "epoch": 6.66, + "learning_rate": 4.6672e-05, + "loss": 8.7036, + "step": 832000 + }, + { + "epoch": 6.66, + "learning_rate": 4.6670000000000005e-05, + "loss": 8.6943, + "step": 832500 + }, + { + "epoch": 6.66, + "learning_rate": 4.6668e-05, + "loss": 8.6921, + "step": 833000 + }, + { + "epoch": 6.67, + "learning_rate": 4.6666000000000003e-05, + "loss": 8.6799, + "step": 833500 + }, + { + "epoch": 6.67, + "learning_rate": 4.6664e-05, + "loss": 8.6947, + "step": 834000 + }, + { + "epoch": 6.68, + "learning_rate": 4.6662e-05, + "loss": 8.6906, + "step": 834500 + }, + { + "epoch": 6.68, + "learning_rate": 4.6660000000000004e-05, + "loss": 8.694, + "step": 835000 + }, + { + "epoch": 6.68, + "learning_rate": 4.6658e-05, + "loss": 8.6796, + "step": 835500 + }, + { + "epoch": 6.69, + "learning_rate": 4.6656e-05, + "loss": 8.6982, + "step": 836000 + }, + { + "epoch": 6.69, + "learning_rate": 4.6654000000000004e-05, + "loss": 8.7206, + "step": 836500 + }, + { + "epoch": 6.7, + "learning_rate": 4.6652e-05, + "loss": 8.6691, + "step": 837000 + }, + { + "epoch": 6.7, + "learning_rate": 4.665e-05, + "loss": 8.6577, + "step": 837500 + }, + { + "epoch": 6.7, + "learning_rate": 4.6648000000000005e-05, + "loss": 8.714, + "step": 838000 + }, + { + "epoch": 6.71, + "learning_rate": 4.6646e-05, + "loss": 8.651, + "step": 838500 + }, + { + "epoch": 6.71, + "learning_rate": 4.6644e-05, + "loss": 8.6644, + "step": 839000 + }, + { + "epoch": 6.72, + "learning_rate": 4.6642000000000005e-05, + "loss": 8.684, + "step": 839500 + }, + { + "epoch": 6.72, + "learning_rate": 4.664e-05, + "loss": 8.6915, + "step": 840000 + }, + { + "epoch": 6.72, + "learning_rate": 4.6638e-05, + "loss": 8.6944, + "step": 840500 + }, + { + "epoch": 6.73, + "learning_rate": 4.6636e-05, + "loss": 8.694, + "step": 841000 + }, + { + "epoch": 6.73, + "learning_rate": 4.6634e-05, + "loss": 8.664, + "step": 841500 + }, + { + "epoch": 6.74, + "learning_rate": 4.6632000000000003e-05, + "loss": 8.6761, + "step": 842000 + }, + { + "epoch": 6.74, + "learning_rate": 4.663e-05, + "loss": 8.6971, + "step": 842500 + }, + { + "epoch": 6.74, + "learning_rate": 4.6628e-05, + "loss": 8.6782, + "step": 843000 + }, + { + "epoch": 6.75, + "learning_rate": 4.6626000000000004e-05, + "loss": 8.6581, + "step": 843500 + }, + { + "epoch": 6.75, + "learning_rate": 4.6624e-05, + "loss": 8.6781, + "step": 844000 + }, + { + "epoch": 6.76, + "learning_rate": 4.6622e-05, + "loss": 8.6633, + "step": 844500 + }, + { + "epoch": 6.76, + "learning_rate": 4.6620000000000004e-05, + "loss": 8.6702, + "step": 845000 + }, + { + "epoch": 6.76, + "learning_rate": 4.6618e-05, + "loss": 8.6883, + "step": 845500 + }, + { + "epoch": 6.77, + "learning_rate": 4.6616e-05, + "loss": 8.6945, + "step": 846000 + }, + { + "epoch": 6.77, + "learning_rate": 4.6614000000000005e-05, + "loss": 8.6573, + "step": 846500 + }, + { + "epoch": 6.78, + "learning_rate": 4.6612e-05, + "loss": 8.672, + "step": 847000 + }, + { + "epoch": 6.78, + "learning_rate": 4.661e-05, + "loss": 8.659, + "step": 847500 + }, + { + "epoch": 6.78, + "learning_rate": 4.6608e-05, + "loss": 8.6699, + "step": 848000 + }, + { + "epoch": 6.79, + "learning_rate": 4.6606e-05, + "loss": 8.6829, + "step": 848500 + }, + { + "epoch": 6.79, + "learning_rate": 4.6604e-05, + "loss": 8.6908, + "step": 849000 + }, + { + "epoch": 6.8, + "learning_rate": 4.6602e-05, + "loss": 8.6943, + "step": 849500 + }, + { + "epoch": 6.8, + "learning_rate": 4.660000000000001e-05, + "loss": 8.6991, + "step": 850000 + }, + { + "epoch": 6.8, + "learning_rate": 4.6598000000000003e-05, + "loss": 8.6771, + "step": 850500 + }, + { + "epoch": 6.81, + "learning_rate": 4.6596e-05, + "loss": 8.719, + "step": 851000 + }, + { + "epoch": 6.81, + "learning_rate": 4.6594e-05, + "loss": 8.6948, + "step": 851500 + }, + { + "epoch": 6.82, + "learning_rate": 4.6592000000000004e-05, + "loss": 8.6727, + "step": 852000 + }, + { + "epoch": 6.82, + "learning_rate": 4.659e-05, + "loss": 8.6538, + "step": 852500 + }, + { + "epoch": 6.82, + "learning_rate": 4.6588e-05, + "loss": 8.6852, + "step": 853000 + }, + { + "epoch": 6.83, + "learning_rate": 4.6586000000000004e-05, + "loss": 8.7012, + "step": 853500 + }, + { + "epoch": 6.83, + "learning_rate": 4.6584e-05, + "loss": 8.679, + "step": 854000 + }, + { + "epoch": 6.84, + "learning_rate": 4.6582e-05, + "loss": 8.6946, + "step": 854500 + }, + { + "epoch": 6.84, + "learning_rate": 4.6580000000000005e-05, + "loss": 8.6983, + "step": 855000 + }, + { + "epoch": 6.84, + "learning_rate": 4.657800000000001e-05, + "loss": 8.6911, + "step": 855500 + }, + { + "epoch": 6.85, + "learning_rate": 4.6576e-05, + "loss": 8.7233, + "step": 856000 + }, + { + "epoch": 6.85, + "learning_rate": 4.6574e-05, + "loss": 8.6793, + "step": 856500 + }, + { + "epoch": 6.86, + "learning_rate": 4.657200000000001e-05, + "loss": 8.6707, + "step": 857000 + }, + { + "epoch": 6.86, + "learning_rate": 4.657e-05, + "loss": 8.6818, + "step": 857500 + }, + { + "epoch": 6.86, + "learning_rate": 4.6568e-05, + "loss": 8.6773, + "step": 858000 + }, + { + "epoch": 6.87, + "learning_rate": 4.6566e-05, + "loss": 8.7184, + "step": 858500 + }, + { + "epoch": 6.87, + "learning_rate": 4.6564000000000003e-05, + "loss": 8.6863, + "step": 859000 + }, + { + "epoch": 6.88, + "learning_rate": 4.6562e-05, + "loss": 8.6887, + "step": 859500 + }, + { + "epoch": 6.88, + "learning_rate": 4.656e-05, + "loss": 8.6982, + "step": 860000 + }, + { + "epoch": 6.88, + "learning_rate": 4.6558000000000004e-05, + "loss": 8.7029, + "step": 860500 + }, + { + "epoch": 6.89, + "learning_rate": 4.6556e-05, + "loss": 8.6883, + "step": 861000 + }, + { + "epoch": 6.89, + "learning_rate": 4.6554e-05, + "loss": 8.6869, + "step": 861500 + }, + { + "epoch": 6.9, + "learning_rate": 4.6552000000000004e-05, + "loss": 8.6949, + "step": 862000 + }, + { + "epoch": 6.9, + "learning_rate": 4.655000000000001e-05, + "loss": 8.6647, + "step": 862500 + }, + { + "epoch": 6.9, + "learning_rate": 4.6548e-05, + "loss": 8.6981, + "step": 863000 + }, + { + "epoch": 6.91, + "learning_rate": 4.6546e-05, + "loss": 8.706, + "step": 863500 + }, + { + "epoch": 6.91, + "learning_rate": 4.654400000000001e-05, + "loss": 8.6744, + "step": 864000 + }, + { + "epoch": 6.92, + "learning_rate": 4.6542e-05, + "loss": 8.6722, + "step": 864500 + }, + { + "epoch": 6.92, + "learning_rate": 4.654e-05, + "loss": 8.6963, + "step": 865000 + }, + { + "epoch": 6.92, + "learning_rate": 4.6538e-05, + "loss": 8.6823, + "step": 865500 + }, + { + "epoch": 6.93, + "learning_rate": 4.6536e-05, + "loss": 8.6683, + "step": 866000 + }, + { + "epoch": 6.93, + "learning_rate": 4.6534e-05, + "loss": 8.6902, + "step": 866500 + }, + { + "epoch": 6.94, + "learning_rate": 4.6532e-05, + "loss": 8.679, + "step": 867000 + }, + { + "epoch": 6.94, + "learning_rate": 4.6530000000000003e-05, + "loss": 8.689, + "step": 867500 + }, + { + "epoch": 6.94, + "learning_rate": 4.6528000000000006e-05, + "loss": 8.682, + "step": 868000 + }, + { + "epoch": 6.95, + "learning_rate": 4.6526e-05, + "loss": 8.6504, + "step": 868500 + }, + { + "epoch": 6.95, + "learning_rate": 4.6524000000000004e-05, + "loss": 8.6971, + "step": 869000 + }, + { + "epoch": 6.96, + "learning_rate": 4.6522000000000006e-05, + "loss": 8.695, + "step": 869500 + }, + { + "epoch": 6.96, + "learning_rate": 4.652e-05, + "loss": 8.671, + "step": 870000 + }, + { + "epoch": 6.96, + "learning_rate": 4.6518e-05, + "loss": 8.6795, + "step": 870500 + }, + { + "epoch": 6.97, + "learning_rate": 4.651600000000001e-05, + "loss": 8.6897, + "step": 871000 + }, + { + "epoch": 6.97, + "learning_rate": 4.6514e-05, + "loss": 8.686, + "step": 871500 + }, + { + "epoch": 6.98, + "learning_rate": 4.6512e-05, + "loss": 8.6801, + "step": 872000 + }, + { + "epoch": 6.98, + "learning_rate": 4.651e-05, + "loss": 8.7021, + "step": 872500 + }, + { + "epoch": 6.98, + "learning_rate": 4.6508e-05, + "loss": 8.68, + "step": 873000 + }, + { + "epoch": 6.99, + "learning_rate": 4.6506e-05, + "loss": 8.6635, + "step": 873500 + }, + { + "epoch": 6.99, + "learning_rate": 4.6504e-05, + "loss": 8.6801, + "step": 874000 + }, + { + "epoch": 7.0, + "learning_rate": 4.6502e-05, + "loss": 8.6499, + "step": 874500 + }, + { + "epoch": 7.0, + "learning_rate": 4.6500000000000005e-05, + "loss": 8.7053, + "step": 875000 + }, + { + "epoch": 7.0, + "learning_rate": 4.6498e-05, + "loss": 8.6667, + "step": 875500 + }, + { + "epoch": 7.01, + "learning_rate": 4.6496000000000003e-05, + "loss": 8.6919, + "step": 876000 + }, + { + "epoch": 7.01, + "learning_rate": 4.6494000000000006e-05, + "loss": 8.6901, + "step": 876500 + }, + { + "epoch": 7.02, + "learning_rate": 4.6492e-05, + "loss": 8.7098, + "step": 877000 + }, + { + "epoch": 7.02, + "learning_rate": 4.649e-05, + "loss": 8.6854, + "step": 877500 + }, + { + "epoch": 7.02, + "learning_rate": 4.6488000000000006e-05, + "loss": 8.6944, + "step": 878000 + }, + { + "epoch": 7.03, + "learning_rate": 4.6486e-05, + "loss": 8.704, + "step": 878500 + }, + { + "epoch": 7.03, + "learning_rate": 4.6484e-05, + "loss": 8.6889, + "step": 879000 + }, + { + "epoch": 7.04, + "learning_rate": 4.6482000000000007e-05, + "loss": 8.6797, + "step": 879500 + }, + { + "epoch": 7.04, + "learning_rate": 4.648e-05, + "loss": 8.6706, + "step": 880000 + }, + { + "epoch": 7.04, + "learning_rate": 4.6478000000000005e-05, + "loss": 8.689, + "step": 880500 + }, + { + "epoch": 7.05, + "learning_rate": 4.6476e-05, + "loss": 8.7023, + "step": 881000 + }, + { + "epoch": 7.05, + "learning_rate": 4.6474e-05, + "loss": 8.6985, + "step": 881500 + }, + { + "epoch": 7.06, + "learning_rate": 4.6472000000000005e-05, + "loss": 8.6837, + "step": 882000 + }, + { + "epoch": 7.06, + "learning_rate": 4.647e-05, + "loss": 8.6838, + "step": 882500 + }, + { + "epoch": 7.06, + "learning_rate": 4.6468e-05, + "loss": 8.7124, + "step": 883000 + }, + { + "epoch": 7.07, + "learning_rate": 4.6466000000000005e-05, + "loss": 8.7076, + "step": 883500 + }, + { + "epoch": 7.07, + "learning_rate": 4.6464e-05, + "loss": 8.7046, + "step": 884000 + }, + { + "epoch": 7.08, + "learning_rate": 4.6462e-05, + "loss": 8.6597, + "step": 884500 + }, + { + "epoch": 7.08, + "learning_rate": 4.6460000000000006e-05, + "loss": 8.6988, + "step": 885000 + }, + { + "epoch": 7.08, + "learning_rate": 4.6458e-05, + "loss": 8.6622, + "step": 885500 + }, + { + "epoch": 7.09, + "learning_rate": 4.6456e-05, + "loss": 8.6962, + "step": 886000 + }, + { + "epoch": 7.09, + "learning_rate": 4.6454000000000006e-05, + "loss": 8.6824, + "step": 886500 + }, + { + "epoch": 7.1, + "learning_rate": 4.6452e-05, + "loss": 8.6729, + "step": 887000 + }, + { + "epoch": 7.1, + "learning_rate": 4.6450000000000004e-05, + "loss": 8.7063, + "step": 887500 + }, + { + "epoch": 7.1, + "learning_rate": 4.6448e-05, + "loss": 8.6514, + "step": 888000 + }, + { + "epoch": 7.11, + "learning_rate": 4.6446e-05, + "loss": 8.7005, + "step": 888500 + }, + { + "epoch": 7.11, + "learning_rate": 4.6444000000000005e-05, + "loss": 8.6967, + "step": 889000 + }, + { + "epoch": 7.12, + "learning_rate": 4.6442e-05, + "loss": 8.6805, + "step": 889500 + }, + { + "epoch": 7.12, + "learning_rate": 4.644e-05, + "loss": 8.6851, + "step": 890000 + }, + { + "epoch": 7.12, + "learning_rate": 4.6438000000000005e-05, + "loss": 8.7003, + "step": 890500 + }, + { + "epoch": 7.13, + "learning_rate": 4.6436e-05, + "loss": 8.6987, + "step": 891000 + }, + { + "epoch": 7.13, + "learning_rate": 4.6434e-05, + "loss": 8.694, + "step": 891500 + }, + { + "epoch": 7.14, + "learning_rate": 4.6432000000000005e-05, + "loss": 8.6898, + "step": 892000 + }, + { + "epoch": 7.14, + "learning_rate": 4.643e-05, + "loss": 8.6917, + "step": 892500 + }, + { + "epoch": 7.14, + "learning_rate": 4.6428000000000003e-05, + "loss": 8.6972, + "step": 893000 + }, + { + "epoch": 7.15, + "learning_rate": 4.6426000000000006e-05, + "loss": 8.7035, + "step": 893500 + }, + { + "epoch": 7.15, + "learning_rate": 4.6424e-05, + "loss": 8.7104, + "step": 894000 + }, + { + "epoch": 7.16, + "learning_rate": 4.6422000000000004e-05, + "loss": 8.7077, + "step": 894500 + }, + { + "epoch": 7.16, + "learning_rate": 4.642e-05, + "loss": 8.6918, + "step": 895000 + }, + { + "epoch": 7.16, + "learning_rate": 4.6418e-05, + "loss": 8.6994, + "step": 895500 + }, + { + "epoch": 7.17, + "learning_rate": 4.6416000000000004e-05, + "loss": 8.6486, + "step": 896000 + }, + { + "epoch": 7.17, + "learning_rate": 4.6414e-05, + "loss": 8.6843, + "step": 896500 + }, + { + "epoch": 7.18, + "learning_rate": 4.6412e-05, + "loss": 8.6733, + "step": 897000 + }, + { + "epoch": 7.18, + "learning_rate": 4.6410000000000005e-05, + "loss": 8.6797, + "step": 897500 + }, + { + "epoch": 7.18, + "learning_rate": 4.6408e-05, + "loss": 8.6684, + "step": 898000 + }, + { + "epoch": 7.19, + "learning_rate": 4.6406e-05, + "loss": 8.6925, + "step": 898500 + }, + { + "epoch": 7.19, + "learning_rate": 4.6404000000000005e-05, + "loss": 8.6618, + "step": 899000 + }, + { + "epoch": 7.2, + "learning_rate": 4.6402e-05, + "loss": 8.7055, + "step": 899500 + }, + { + "epoch": 7.2, + "learning_rate": 4.64e-05, + "loss": 8.6637, + "step": 900000 + }, + { + "epoch": 7.2, + "learning_rate": 4.6398000000000005e-05, + "loss": 8.6877, + "step": 900500 + }, + { + "epoch": 7.21, + "learning_rate": 4.6396e-05, + "loss": 8.6848, + "step": 901000 + }, + { + "epoch": 7.21, + "learning_rate": 4.6394e-05, + "loss": 8.6867, + "step": 901500 + }, + { + "epoch": 7.22, + "learning_rate": 4.6392e-05, + "loss": 8.6845, + "step": 902000 + }, + { + "epoch": 7.22, + "learning_rate": 4.639e-05, + "loss": 8.6648, + "step": 902500 + }, + { + "epoch": 7.22, + "learning_rate": 4.6388000000000004e-05, + "loss": 8.6905, + "step": 903000 + }, + { + "epoch": 7.23, + "learning_rate": 4.6386e-05, + "loss": 8.6813, + "step": 903500 + }, + { + "epoch": 7.23, + "learning_rate": 4.6384e-05, + "loss": 8.7004, + "step": 904000 + }, + { + "epoch": 7.24, + "learning_rate": 4.6382000000000004e-05, + "loss": 8.6759, + "step": 904500 + }, + { + "epoch": 7.24, + "learning_rate": 4.638e-05, + "loss": 8.6784, + "step": 905000 + }, + { + "epoch": 7.24, + "learning_rate": 4.6378e-05, + "loss": 8.6783, + "step": 905500 + }, + { + "epoch": 7.25, + "learning_rate": 4.6376000000000005e-05, + "loss": 8.6821, + "step": 906000 + }, + { + "epoch": 7.25, + "learning_rate": 4.6374e-05, + "loss": 8.704, + "step": 906500 + }, + { + "epoch": 7.26, + "learning_rate": 4.6372e-05, + "loss": 8.681, + "step": 907000 + }, + { + "epoch": 7.26, + "learning_rate": 4.6370000000000005e-05, + "loss": 8.664, + "step": 907500 + }, + { + "epoch": 7.26, + "learning_rate": 4.6368e-05, + "loss": 8.6814, + "step": 908000 + }, + { + "epoch": 7.27, + "learning_rate": 4.6366e-05, + "loss": 8.6596, + "step": 908500 + }, + { + "epoch": 7.27, + "learning_rate": 4.6364e-05, + "loss": 8.6903, + "step": 909000 + }, + { + "epoch": 7.28, + "learning_rate": 4.6362e-05, + "loss": 8.696, + "step": 909500 + }, + { + "epoch": 7.28, + "learning_rate": 4.636e-05, + "loss": 8.6709, + "step": 910000 + }, + { + "epoch": 7.28, + "learning_rate": 4.6358e-05, + "loss": 8.6807, + "step": 910500 + }, + { + "epoch": 7.29, + "learning_rate": 4.635600000000001e-05, + "loss": 8.6804, + "step": 911000 + }, + { + "epoch": 7.29, + "learning_rate": 4.6354000000000004e-05, + "loss": 8.6734, + "step": 911500 + }, + { + "epoch": 7.3, + "learning_rate": 4.6352e-05, + "loss": 8.6951, + "step": 912000 + }, + { + "epoch": 7.3, + "learning_rate": 4.635e-05, + "loss": 8.6676, + "step": 912500 + }, + { + "epoch": 7.3, + "learning_rate": 4.6348000000000004e-05, + "loss": 8.6494, + "step": 913000 + }, + { + "epoch": 7.31, + "learning_rate": 4.6346e-05, + "loss": 8.6651, + "step": 913500 + }, + { + "epoch": 7.31, + "learning_rate": 4.6344e-05, + "loss": 8.6902, + "step": 914000 + }, + { + "epoch": 7.32, + "learning_rate": 4.6342000000000005e-05, + "loss": 8.6748, + "step": 914500 + }, + { + "epoch": 7.32, + "learning_rate": 4.634e-05, + "loss": 8.7043, + "step": 915000 + }, + { + "epoch": 7.32, + "learning_rate": 4.6338e-05, + "loss": 8.6911, + "step": 915500 + }, + { + "epoch": 7.33, + "learning_rate": 4.6336000000000005e-05, + "loss": 8.6861, + "step": 916000 + }, + { + "epoch": 7.33, + "learning_rate": 4.6334e-05, + "loss": 8.7087, + "step": 916500 + }, + { + "epoch": 7.34, + "learning_rate": 4.6332e-05, + "loss": 8.6657, + "step": 917000 + }, + { + "epoch": 7.34, + "learning_rate": 4.633e-05, + "loss": 8.6511, + "step": 917500 + }, + { + "epoch": 7.34, + "learning_rate": 4.632800000000001e-05, + "loss": 8.6858, + "step": 918000 + }, + { + "epoch": 7.35, + "learning_rate": 4.6326e-05, + "loss": 8.6662, + "step": 918500 + }, + { + "epoch": 7.35, + "learning_rate": 4.6324e-05, + "loss": 8.6781, + "step": 919000 + }, + { + "epoch": 7.36, + "learning_rate": 4.6322e-05, + "loss": 8.697, + "step": 919500 + }, + { + "epoch": 7.36, + "learning_rate": 4.6320000000000004e-05, + "loss": 8.6628, + "step": 920000 + }, + { + "epoch": 7.36, + "learning_rate": 4.6318e-05, + "loss": 8.6862, + "step": 920500 + }, + { + "epoch": 7.37, + "learning_rate": 4.6316e-05, + "loss": 8.6931, + "step": 921000 + }, + { + "epoch": 7.37, + "learning_rate": 4.6314000000000004e-05, + "loss": 8.6492, + "step": 921500 + }, + { + "epoch": 7.38, + "learning_rate": 4.6312e-05, + "loss": 8.6837, + "step": 922000 + }, + { + "epoch": 7.38, + "learning_rate": 4.631e-05, + "loss": 8.7073, + "step": 922500 + }, + { + "epoch": 7.38, + "learning_rate": 4.6308000000000005e-05, + "loss": 8.6857, + "step": 923000 + }, + { + "epoch": 7.39, + "learning_rate": 4.630600000000001e-05, + "loss": 8.6884, + "step": 923500 + }, + { + "epoch": 7.39, + "learning_rate": 4.6304e-05, + "loss": 8.6743, + "step": 924000 + }, + { + "epoch": 7.4, + "learning_rate": 4.6302e-05, + "loss": 8.6903, + "step": 924500 + }, + { + "epoch": 7.4, + "learning_rate": 4.630000000000001e-05, + "loss": 8.6847, + "step": 925000 + }, + { + "epoch": 7.4, + "learning_rate": 4.6298e-05, + "loss": 8.698, + "step": 925500 + }, + { + "epoch": 7.41, + "learning_rate": 4.6296e-05, + "loss": 8.6978, + "step": 926000 + }, + { + "epoch": 7.41, + "learning_rate": 4.6294e-05, + "loss": 8.6721, + "step": 926500 + }, + { + "epoch": 7.42, + "learning_rate": 4.6292e-05, + "loss": 8.6805, + "step": 927000 + }, + { + "epoch": 7.42, + "learning_rate": 4.629e-05, + "loss": 8.6839, + "step": 927500 + }, + { + "epoch": 7.42, + "learning_rate": 4.6288e-05, + "loss": 8.6973, + "step": 928000 + }, + { + "epoch": 7.43, + "learning_rate": 4.6286000000000004e-05, + "loss": 8.6982, + "step": 928500 + }, + { + "epoch": 7.43, + "learning_rate": 4.6284e-05, + "loss": 8.6642, + "step": 929000 + }, + { + "epoch": 7.44, + "learning_rate": 4.6282e-05, + "loss": 8.6692, + "step": 929500 + }, + { + "epoch": 7.44, + "learning_rate": 4.6280000000000004e-05, + "loss": 8.6817, + "step": 930000 + }, + { + "epoch": 7.44, + "learning_rate": 4.6278000000000007e-05, + "loss": 8.6982, + "step": 930500 + }, + { + "epoch": 7.45, + "learning_rate": 4.6276e-05, + "loss": 8.7005, + "step": 931000 + }, + { + "epoch": 7.45, + "learning_rate": 4.6274e-05, + "loss": 8.6706, + "step": 931500 + }, + { + "epoch": 7.46, + "learning_rate": 4.627200000000001e-05, + "loss": 8.6844, + "step": 932000 + }, + { + "epoch": 7.46, + "learning_rate": 4.627e-05, + "loss": 8.6853, + "step": 932500 + }, + { + "epoch": 7.46, + "learning_rate": 4.6268e-05, + "loss": 8.7154, + "step": 933000 + }, + { + "epoch": 7.47, + "learning_rate": 4.6266e-05, + "loss": 8.6622, + "step": 933500 + }, + { + "epoch": 7.47, + "learning_rate": 4.6264e-05, + "loss": 8.7021, + "step": 934000 + }, + { + "epoch": 7.48, + "learning_rate": 4.6262e-05, + "loss": 8.6999, + "step": 934500 + }, + { + "epoch": 7.48, + "learning_rate": 4.626e-05, + "loss": 8.7232, + "step": 935000 + }, + { + "epoch": 7.48, + "learning_rate": 4.6258e-05, + "loss": 8.6903, + "step": 935500 + }, + { + "epoch": 7.49, + "learning_rate": 4.6256000000000006e-05, + "loss": 8.7058, + "step": 936000 + }, + { + "epoch": 7.49, + "learning_rate": 4.6254e-05, + "loss": 8.678, + "step": 936500 + }, + { + "epoch": 7.5, + "learning_rate": 4.6252000000000004e-05, + "loss": 8.6845, + "step": 937000 + }, + { + "epoch": 7.5, + "learning_rate": 4.6250000000000006e-05, + "loss": 8.7047, + "step": 937500 + }, + { + "epoch": 7.5, + "learning_rate": 4.6248e-05, + "loss": 8.7072, + "step": 938000 + }, + { + "epoch": 7.51, + "learning_rate": 4.6246e-05, + "loss": 8.6988, + "step": 938500 + }, + { + "epoch": 7.51, + "learning_rate": 4.6244000000000007e-05, + "loss": 8.708, + "step": 939000 + }, + { + "epoch": 7.52, + "learning_rate": 4.6242e-05, + "loss": 8.6982, + "step": 939500 + }, + { + "epoch": 7.52, + "learning_rate": 4.624e-05, + "loss": 8.6931, + "step": 940000 + }, + { + "epoch": 7.52, + "learning_rate": 4.623800000000001e-05, + "loss": 8.6934, + "step": 940500 + }, + { + "epoch": 7.53, + "learning_rate": 4.6236e-05, + "loss": 8.7143, + "step": 941000 + }, + { + "epoch": 7.53, + "learning_rate": 4.6234e-05, + "loss": 8.6808, + "step": 941500 + }, + { + "epoch": 7.54, + "learning_rate": 4.6232e-05, + "loss": 8.6717, + "step": 942000 + }, + { + "epoch": 7.54, + "learning_rate": 4.623e-05, + "loss": 8.6726, + "step": 942500 + }, + { + "epoch": 7.54, + "learning_rate": 4.6228000000000005e-05, + "loss": 8.6821, + "step": 943000 + }, + { + "epoch": 7.55, + "learning_rate": 4.6226e-05, + "loss": 8.6614, + "step": 943500 + }, + { + "epoch": 7.55, + "learning_rate": 4.6224e-05, + "loss": 8.6905, + "step": 944000 + }, + { + "epoch": 7.56, + "learning_rate": 4.6222000000000006e-05, + "loss": 8.6846, + "step": 944500 + }, + { + "epoch": 7.56, + "learning_rate": 4.622e-05, + "loss": 8.676, + "step": 945000 + }, + { + "epoch": 7.56, + "learning_rate": 4.6218e-05, + "loss": 8.6599, + "step": 945500 + }, + { + "epoch": 7.57, + "learning_rate": 4.6216000000000006e-05, + "loss": 8.7162, + "step": 946000 + }, + { + "epoch": 7.57, + "learning_rate": 4.6214e-05, + "loss": 8.6802, + "step": 946500 + }, + { + "epoch": 7.58, + "learning_rate": 4.6212e-05, + "loss": 8.6806, + "step": 947000 + }, + { + "epoch": 7.58, + "learning_rate": 4.6210000000000006e-05, + "loss": 8.6999, + "step": 947500 + }, + { + "epoch": 7.58, + "learning_rate": 4.6208e-05, + "loss": 8.7034, + "step": 948000 + }, + { + "epoch": 7.59, + "learning_rate": 4.6206000000000005e-05, + "loss": 8.687, + "step": 948500 + }, + { + "epoch": 7.59, + "learning_rate": 4.6204e-05, + "loss": 8.6834, + "step": 949000 + }, + { + "epoch": 7.6, + "learning_rate": 4.6202e-05, + "loss": 8.6872, + "step": 949500 + }, + { + "epoch": 7.6, + "learning_rate": 4.6200000000000005e-05, + "loss": 8.6548, + "step": 950000 + }, + { + "epoch": 7.6, + "learning_rate": 4.6198e-05, + "loss": 8.6762, + "step": 950500 + }, + { + "epoch": 7.61, + "learning_rate": 4.6196e-05, + "loss": 8.6646, + "step": 951000 + }, + { + "epoch": 7.61, + "learning_rate": 4.6194000000000005e-05, + "loss": 8.6795, + "step": 951500 + }, + { + "epoch": 7.62, + "learning_rate": 4.6192e-05, + "loss": 8.6805, + "step": 952000 + }, + { + "epoch": 7.62, + "learning_rate": 4.619e-05, + "loss": 8.6733, + "step": 952500 + }, + { + "epoch": 7.62, + "learning_rate": 4.6188000000000006e-05, + "loss": 8.6975, + "step": 953000 + }, + { + "epoch": 7.63, + "learning_rate": 4.6186e-05, + "loss": 8.6797, + "step": 953500 + }, + { + "epoch": 7.63, + "learning_rate": 4.6184e-05, + "loss": 8.6787, + "step": 954000 + }, + { + "epoch": 7.64, + "learning_rate": 4.6182000000000006e-05, + "loss": 8.6848, + "step": 954500 + }, + { + "epoch": 7.64, + "learning_rate": 4.618e-05, + "loss": 8.6965, + "step": 955000 + }, + { + "epoch": 7.64, + "learning_rate": 4.6178000000000004e-05, + "loss": 8.6782, + "step": 955500 + }, + { + "epoch": 7.65, + "learning_rate": 4.6176e-05, + "loss": 8.6756, + "step": 956000 + }, + { + "epoch": 7.65, + "learning_rate": 4.6174e-05, + "loss": 8.712, + "step": 956500 + }, + { + "epoch": 7.66, + "learning_rate": 4.6172000000000004e-05, + "loss": 8.6875, + "step": 957000 + }, + { + "epoch": 7.66, + "learning_rate": 4.617e-05, + "loss": 8.6896, + "step": 957500 + }, + { + "epoch": 7.66, + "learning_rate": 4.6168e-05, + "loss": 8.69, + "step": 958000 + }, + { + "epoch": 7.67, + "learning_rate": 4.6166000000000005e-05, + "loss": 8.6729, + "step": 958500 + }, + { + "epoch": 7.67, + "learning_rate": 4.6164e-05, + "loss": 8.6989, + "step": 959000 + }, + { + "epoch": 7.68, + "learning_rate": 4.6162e-05, + "loss": 8.6812, + "step": 959500 + }, + { + "epoch": 7.68, + "learning_rate": 4.6160000000000005e-05, + "loss": 8.7029, + "step": 960000 + }, + { + "epoch": 7.68, + "learning_rate": 4.6158e-05, + "loss": 8.6698, + "step": 960500 + }, + { + "epoch": 7.69, + "learning_rate": 4.6156e-05, + "loss": 8.6891, + "step": 961000 + }, + { + "epoch": 7.69, + "learning_rate": 4.6154000000000006e-05, + "loss": 8.6966, + "step": 961500 + }, + { + "epoch": 7.7, + "learning_rate": 4.6152e-05, + "loss": 8.6876, + "step": 962000 + }, + { + "epoch": 7.7, + "learning_rate": 4.6150000000000004e-05, + "loss": 8.6973, + "step": 962500 + }, + { + "epoch": 7.7, + "learning_rate": 4.6148e-05, + "loss": 8.6773, + "step": 963000 + }, + { + "epoch": 7.71, + "learning_rate": 4.6146e-05, + "loss": 8.6886, + "step": 963500 + }, + { + "epoch": 7.71, + "learning_rate": 4.6144000000000004e-05, + "loss": 8.6869, + "step": 964000 + }, + { + "epoch": 7.72, + "learning_rate": 4.6142e-05, + "loss": 8.6692, + "step": 964500 + }, + { + "epoch": 7.72, + "learning_rate": 4.614e-05, + "loss": 8.6766, + "step": 965000 + }, + { + "epoch": 7.72, + "learning_rate": 4.6138000000000004e-05, + "loss": 8.6666, + "step": 965500 + }, + { + "epoch": 7.73, + "learning_rate": 4.6136e-05, + "loss": 8.687, + "step": 966000 + }, + { + "epoch": 7.73, + "learning_rate": 4.6134e-05, + "loss": 8.6504, + "step": 966500 + }, + { + "epoch": 7.74, + "learning_rate": 4.6132000000000005e-05, + "loss": 8.6796, + "step": 967000 + }, + { + "epoch": 7.74, + "learning_rate": 4.613e-05, + "loss": 8.6913, + "step": 967500 + }, + { + "epoch": 7.74, + "learning_rate": 4.6128e-05, + "loss": 8.6593, + "step": 968000 + }, + { + "epoch": 7.75, + "learning_rate": 4.6126000000000005e-05, + "loss": 8.6936, + "step": 968500 + }, + { + "epoch": 7.75, + "learning_rate": 4.6124e-05, + "loss": 8.6986, + "step": 969000 + }, + { + "epoch": 7.76, + "learning_rate": 4.6122e-05, + "loss": 8.7003, + "step": 969500 + }, + { + "epoch": 7.76, + "learning_rate": 4.612e-05, + "loss": 8.7268, + "step": 970000 + }, + { + "epoch": 7.76, + "learning_rate": 4.6118e-05, + "loss": 8.6642, + "step": 970500 + }, + { + "epoch": 7.77, + "learning_rate": 4.6116000000000004e-05, + "loss": 8.7087, + "step": 971000 + }, + { + "epoch": 7.77, + "learning_rate": 4.6114e-05, + "loss": 8.6864, + "step": 971500 + }, + { + "epoch": 7.78, + "learning_rate": 4.6112e-05, + "loss": 8.6814, + "step": 972000 + }, + { + "epoch": 7.78, + "learning_rate": 4.6110000000000004e-05, + "loss": 8.6834, + "step": 972500 + }, + { + "epoch": 7.78, + "learning_rate": 4.6108e-05, + "loss": 8.6783, + "step": 973000 + }, + { + "epoch": 7.79, + "learning_rate": 4.6106e-05, + "loss": 8.6716, + "step": 973500 + }, + { + "epoch": 7.79, + "learning_rate": 4.6104000000000004e-05, + "loss": 8.691, + "step": 974000 + }, + { + "epoch": 7.8, + "learning_rate": 4.6102e-05, + "loss": 8.6724, + "step": 974500 + }, + { + "epoch": 7.8, + "learning_rate": 4.61e-05, + "loss": 8.6989, + "step": 975000 + }, + { + "epoch": 7.8, + "learning_rate": 4.6098000000000005e-05, + "loss": 8.6891, + "step": 975500 + }, + { + "epoch": 7.81, + "learning_rate": 4.6096e-05, + "loss": 8.7081, + "step": 976000 + }, + { + "epoch": 7.81, + "learning_rate": 4.6094e-05, + "loss": 8.6749, + "step": 976500 + }, + { + "epoch": 7.82, + "learning_rate": 4.6092e-05, + "loss": 8.6825, + "step": 977000 + }, + { + "epoch": 7.82, + "learning_rate": 4.609e-05, + "loss": 8.6799, + "step": 977500 + }, + { + "epoch": 7.82, + "learning_rate": 4.6088e-05, + "loss": 8.6761, + "step": 978000 + }, + { + "epoch": 7.83, + "learning_rate": 4.6086e-05, + "loss": 8.6895, + "step": 978500 + }, + { + "epoch": 7.83, + "learning_rate": 4.608400000000001e-05, + "loss": 8.6833, + "step": 979000 + }, + { + "epoch": 7.84, + "learning_rate": 4.6082000000000004e-05, + "loss": 8.6868, + "step": 979500 + }, + { + "epoch": 7.84, + "learning_rate": 4.608e-05, + "loss": 8.6998, + "step": 980000 + }, + { + "epoch": 7.84, + "learning_rate": 4.6078e-05, + "loss": 8.6742, + "step": 980500 + }, + { + "epoch": 7.85, + "learning_rate": 4.6076000000000004e-05, + "loss": 8.6903, + "step": 981000 + }, + { + "epoch": 7.85, + "learning_rate": 4.6074e-05, + "loss": 8.7028, + "step": 981500 + }, + { + "epoch": 7.86, + "learning_rate": 4.6072e-05, + "loss": 8.6664, + "step": 982000 + }, + { + "epoch": 7.86, + "learning_rate": 4.6070000000000004e-05, + "loss": 8.7171, + "step": 982500 + }, + { + "epoch": 7.86, + "learning_rate": 4.6068e-05, + "loss": 8.6859, + "step": 983000 + }, + { + "epoch": 7.87, + "learning_rate": 4.6066e-05, + "loss": 8.6715, + "step": 983500 + }, + { + "epoch": 7.87, + "learning_rate": 4.6064000000000005e-05, + "loss": 8.6685, + "step": 984000 + }, + { + "epoch": 7.88, + "learning_rate": 4.6062e-05, + "loss": 8.7039, + "step": 984500 + }, + { + "epoch": 7.88, + "learning_rate": 4.606e-05, + "loss": 8.6847, + "step": 985000 + }, + { + "epoch": 7.88, + "learning_rate": 4.6058e-05, + "loss": 8.6586, + "step": 985500 + }, + { + "epoch": 7.89, + "learning_rate": 4.605600000000001e-05, + "loss": 8.7179, + "step": 986000 + }, + { + "epoch": 7.89, + "learning_rate": 4.6054e-05, + "loss": 8.6705, + "step": 986500 + }, + { + "epoch": 7.9, + "learning_rate": 4.6052e-05, + "loss": 8.6893, + "step": 987000 + }, + { + "epoch": 7.9, + "learning_rate": 4.605e-05, + "loss": 8.6609, + "step": 987500 + }, + { + "epoch": 7.9, + "learning_rate": 4.6048000000000004e-05, + "loss": 8.684, + "step": 988000 + }, + { + "epoch": 7.91, + "learning_rate": 4.6046e-05, + "loss": 8.6849, + "step": 988500 + }, + { + "epoch": 7.91, + "learning_rate": 4.6044e-05, + "loss": 8.681, + "step": 989000 + }, + { + "epoch": 7.92, + "learning_rate": 4.6042000000000004e-05, + "loss": 8.6703, + "step": 989500 + }, + { + "epoch": 7.92, + "learning_rate": 4.604e-05, + "loss": 8.6585, + "step": 990000 + }, + { + "epoch": 7.92, + "learning_rate": 4.6038e-05, + "loss": 8.6652, + "step": 990500 + }, + { + "epoch": 7.93, + "learning_rate": 4.6036000000000004e-05, + "loss": 8.6842, + "step": 991000 + }, + { + "epoch": 7.93, + "learning_rate": 4.603400000000001e-05, + "loss": 8.6643, + "step": 991500 + }, + { + "epoch": 7.94, + "learning_rate": 4.6032e-05, + "loss": 8.6786, + "step": 992000 + }, + { + "epoch": 7.94, + "learning_rate": 4.603e-05, + "loss": 8.6923, + "step": 992500 + }, + { + "epoch": 7.94, + "learning_rate": 4.602800000000001e-05, + "loss": 8.687, + "step": 993000 + }, + { + "epoch": 7.95, + "learning_rate": 4.6026e-05, + "loss": 8.6822, + "step": 993500 + }, + { + "epoch": 7.95, + "learning_rate": 4.6024e-05, + "loss": 8.692, + "step": 994000 + }, + { + "epoch": 7.96, + "learning_rate": 4.6022e-05, + "loss": 8.7046, + "step": 994500 + }, + { + "epoch": 7.96, + "learning_rate": 4.602e-05, + "loss": 8.6693, + "step": 995000 + }, + { + "epoch": 7.96, + "learning_rate": 4.6018e-05, + "loss": 8.6736, + "step": 995500 + }, + { + "epoch": 7.97, + "learning_rate": 4.6016e-05, + "loss": 8.6926, + "step": 996000 + }, + { + "epoch": 7.97, + "learning_rate": 4.6014000000000004e-05, + "loss": 8.6698, + "step": 996500 + }, + { + "epoch": 7.98, + "learning_rate": 4.6012e-05, + "loss": 8.6779, + "step": 997000 + }, + { + "epoch": 7.98, + "learning_rate": 4.601e-05, + "loss": 8.7049, + "step": 997500 + }, + { + "epoch": 7.98, + "learning_rate": 4.6008000000000004e-05, + "loss": 8.6701, + "step": 998000 + }, + { + "epoch": 7.99, + "learning_rate": 4.6006000000000006e-05, + "loss": 8.6645, + "step": 998500 + }, + { + "epoch": 7.99, + "learning_rate": 4.6004e-05, + "loss": 8.6496, + "step": 999000 + }, + { + "epoch": 8.0, + "learning_rate": 4.6002e-05, + "loss": 8.6946, + "step": 999500 + }, + { + "epoch": 8.0, + "learning_rate": 4.600000000000001e-05, + "loss": 8.6764, + "step": 1000000 + }, + { + "epoch": 8.0, + "learning_rate": 4.5998e-05, + "loss": 8.697, + "step": 1000500 + }, + { + "epoch": 8.01, + "learning_rate": 4.5996e-05, + "loss": 8.7044, + "step": 1001000 + }, + { + "epoch": 8.01, + "learning_rate": 4.5994e-05, + "loss": 8.7141, + "step": 1001500 + }, + { + "epoch": 8.02, + "learning_rate": 4.5992e-05, + "loss": 8.6652, + "step": 1002000 + }, + { + "epoch": 8.02, + "learning_rate": 4.599e-05, + "loss": 8.6987, + "step": 1002500 + }, + { + "epoch": 8.02, + "learning_rate": 4.5988e-05, + "loss": 8.6945, + "step": 1003000 + }, + { + "epoch": 8.03, + "learning_rate": 4.5986e-05, + "loss": 8.6813, + "step": 1003500 + }, + { + "epoch": 8.03, + "learning_rate": 4.5984000000000006e-05, + "loss": 8.7214, + "step": 1004000 + }, + { + "epoch": 8.04, + "learning_rate": 4.5982e-05, + "loss": 8.6751, + "step": 1004500 + }, + { + "epoch": 8.04, + "learning_rate": 4.5980000000000004e-05, + "loss": 8.6723, + "step": 1005000 + }, + { + "epoch": 8.04, + "learning_rate": 4.5978000000000006e-05, + "loss": 8.6717, + "step": 1005500 + }, + { + "epoch": 8.05, + "learning_rate": 4.5976e-05, + "loss": 8.679, + "step": 1006000 + }, + { + "epoch": 8.05, + "learning_rate": 4.5974e-05, + "loss": 8.658, + "step": 1006500 + }, + { + "epoch": 8.06, + "learning_rate": 4.5972000000000006e-05, + "loss": 8.6902, + "step": 1007000 + }, + { + "epoch": 8.06, + "learning_rate": 4.597e-05, + "loss": 8.7246, + "step": 1007500 + }, + { + "epoch": 8.06, + "learning_rate": 4.5968e-05, + "loss": 8.7052, + "step": 1008000 + }, + { + "epoch": 8.07, + "learning_rate": 4.596600000000001e-05, + "loss": 8.6782, + "step": 1008500 + }, + { + "epoch": 8.07, + "learning_rate": 4.5964e-05, + "loss": 8.6826, + "step": 1009000 + }, + { + "epoch": 8.08, + "learning_rate": 4.5962e-05, + "loss": 8.6854, + "step": 1009500 + }, + { + "epoch": 8.08, + "learning_rate": 4.596e-05, + "loss": 8.6999, + "step": 1010000 + }, + { + "epoch": 8.08, + "learning_rate": 4.5958e-05, + "loss": 8.7048, + "step": 1010500 + }, + { + "epoch": 8.09, + "learning_rate": 4.5956000000000005e-05, + "loss": 8.676, + "step": 1011000 + }, + { + "epoch": 8.09, + "learning_rate": 4.5954e-05, + "loss": 8.6606, + "step": 1011500 + }, + { + "epoch": 8.1, + "learning_rate": 4.5952e-05, + "loss": 8.6777, + "step": 1012000 + }, + { + "epoch": 8.1, + "learning_rate": 4.5950000000000006e-05, + "loss": 8.6933, + "step": 1012500 + }, + { + "epoch": 8.1, + "learning_rate": 4.5948e-05, + "loss": 8.6798, + "step": 1013000 + }, + { + "epoch": 8.11, + "learning_rate": 4.5946e-05, + "loss": 8.7053, + "step": 1013500 + }, + { + "epoch": 8.11, + "learning_rate": 4.5944000000000006e-05, + "loss": 8.6938, + "step": 1014000 + }, + { + "epoch": 8.12, + "learning_rate": 4.5942e-05, + "loss": 8.6784, + "step": 1014500 + }, + { + "epoch": 8.12, + "learning_rate": 4.594e-05, + "loss": 8.676, + "step": 1015000 + }, + { + "epoch": 8.12, + "learning_rate": 4.5938000000000006e-05, + "loss": 8.6565, + "step": 1015500 + }, + { + "epoch": 8.13, + "learning_rate": 4.5936e-05, + "loss": 8.6663, + "step": 1016000 + }, + { + "epoch": 8.13, + "learning_rate": 4.5934000000000004e-05, + "loss": 8.6991, + "step": 1016500 + }, + { + "epoch": 8.14, + "learning_rate": 4.5932e-05, + "loss": 8.6868, + "step": 1017000 + }, + { + "epoch": 8.14, + "learning_rate": 4.593e-05, + "loss": 8.6851, + "step": 1017500 + }, + { + "epoch": 8.14, + "learning_rate": 4.5928000000000005e-05, + "loss": 8.6733, + "step": 1018000 + }, + { + "epoch": 8.15, + "learning_rate": 4.5926e-05, + "loss": 8.6802, + "step": 1018500 + }, + { + "epoch": 8.15, + "learning_rate": 4.5924e-05, + "loss": 8.6988, + "step": 1019000 + }, + { + "epoch": 8.16, + "learning_rate": 4.5922000000000005e-05, + "loss": 8.6794, + "step": 1019500 + }, + { + "epoch": 8.16, + "learning_rate": 4.592e-05, + "loss": 8.7025, + "step": 1020000 + }, + { + "epoch": 8.16, + "learning_rate": 4.5918e-05, + "loss": 8.6893, + "step": 1020500 + }, + { + "epoch": 8.17, + "learning_rate": 4.5916000000000006e-05, + "loss": 8.6904, + "step": 1021000 + }, + { + "epoch": 8.17, + "learning_rate": 4.5914e-05, + "loss": 8.6713, + "step": 1021500 + }, + { + "epoch": 8.18, + "learning_rate": 4.5912e-05, + "loss": 8.6845, + "step": 1022000 + }, + { + "epoch": 8.18, + "learning_rate": 4.5910000000000006e-05, + "loss": 8.6934, + "step": 1022500 + }, + { + "epoch": 8.18, + "learning_rate": 4.5908e-05, + "loss": 8.7045, + "step": 1023000 + }, + { + "epoch": 8.19, + "learning_rate": 4.5906000000000004e-05, + "loss": 8.6809, + "step": 1023500 + }, + { + "epoch": 8.19, + "learning_rate": 4.5904e-05, + "loss": 8.6934, + "step": 1024000 + }, + { + "epoch": 8.2, + "learning_rate": 4.5902e-05, + "loss": 8.7136, + "step": 1024500 + }, + { + "epoch": 8.2, + "learning_rate": 4.5900000000000004e-05, + "loss": 8.7, + "step": 1025000 + }, + { + "epoch": 8.2, + "learning_rate": 4.5898e-05, + "loss": 8.6985, + "step": 1025500 + }, + { + "epoch": 8.21, + "learning_rate": 4.5896e-05, + "loss": 8.6758, + "step": 1026000 + }, + { + "epoch": 8.21, + "learning_rate": 4.5894000000000005e-05, + "loss": 8.679, + "step": 1026500 + }, + { + "epoch": 8.22, + "learning_rate": 4.5892e-05, + "loss": 8.689, + "step": 1027000 + }, + { + "epoch": 8.22, + "learning_rate": 4.589e-05, + "loss": 8.6771, + "step": 1027500 + }, + { + "epoch": 8.22, + "learning_rate": 4.5888000000000005e-05, + "loss": 8.6877, + "step": 1028000 + }, + { + "epoch": 8.23, + "learning_rate": 4.5886e-05, + "loss": 8.7039, + "step": 1028500 + }, + { + "epoch": 8.23, + "learning_rate": 4.5884e-05, + "loss": 8.6799, + "step": 1029000 + }, + { + "epoch": 8.24, + "learning_rate": 4.5882000000000006e-05, + "loss": 8.6733, + "step": 1029500 + }, + { + "epoch": 8.24, + "learning_rate": 4.588e-05, + "loss": 8.6921, + "step": 1030000 + }, + { + "epoch": 8.24, + "learning_rate": 4.5878000000000004e-05, + "loss": 8.7019, + "step": 1030500 + }, + { + "epoch": 8.25, + "learning_rate": 4.5876e-05, + "loss": 8.6973, + "step": 1031000 + }, + { + "epoch": 8.25, + "learning_rate": 4.5874e-05, + "loss": 8.6742, + "step": 1031500 + }, + { + "epoch": 8.26, + "learning_rate": 4.5872000000000004e-05, + "loss": 8.6983, + "step": 1032000 + }, + { + "epoch": 8.26, + "learning_rate": 4.587e-05, + "loss": 8.6928, + "step": 1032500 + }, + { + "epoch": 8.26, + "learning_rate": 4.5868e-05, + "loss": 8.6773, + "step": 1033000 + }, + { + "epoch": 8.27, + "learning_rate": 4.5866000000000004e-05, + "loss": 8.6524, + "step": 1033500 + }, + { + "epoch": 8.27, + "learning_rate": 4.5864e-05, + "loss": 8.6835, + "step": 1034000 + }, + { + "epoch": 8.28, + "learning_rate": 4.5862e-05, + "loss": 8.681, + "step": 1034500 + }, + { + "epoch": 8.28, + "learning_rate": 4.5860000000000005e-05, + "loss": 8.6665, + "step": 1035000 + }, + { + "epoch": 8.28, + "learning_rate": 4.5858e-05, + "loss": 8.6794, + "step": 1035500 + }, + { + "epoch": 8.29, + "learning_rate": 4.5856e-05, + "loss": 8.6984, + "step": 1036000 + }, + { + "epoch": 8.29, + "learning_rate": 4.5854000000000005e-05, + "loss": 8.6684, + "step": 1036500 + }, + { + "epoch": 8.3, + "learning_rate": 4.5852e-05, + "loss": 8.6814, + "step": 1037000 + }, + { + "epoch": 8.3, + "learning_rate": 4.585e-05, + "loss": 8.654, + "step": 1037500 + }, + { + "epoch": 8.3, + "learning_rate": 4.5848e-05, + "loss": 8.6615, + "step": 1038000 + }, + { + "epoch": 8.31, + "learning_rate": 4.5846e-05, + "loss": 8.7002, + "step": 1038500 + }, + { + "epoch": 8.31, + "learning_rate": 4.5844000000000004e-05, + "loss": 8.6894, + "step": 1039000 + }, + { + "epoch": 8.32, + "learning_rate": 4.5842e-05, + "loss": 8.673, + "step": 1039500 + }, + { + "epoch": 8.32, + "learning_rate": 4.584e-05, + "loss": 8.6806, + "step": 1040000 + }, + { + "epoch": 8.32, + "learning_rate": 4.5838000000000004e-05, + "loss": 8.6709, + "step": 1040500 + }, + { + "epoch": 8.33, + "learning_rate": 4.5836e-05, + "loss": 8.6795, + "step": 1041000 + }, + { + "epoch": 8.33, + "learning_rate": 4.5834e-05, + "loss": 8.6674, + "step": 1041500 + }, + { + "epoch": 8.34, + "learning_rate": 4.5832000000000004e-05, + "loss": 8.6833, + "step": 1042000 + }, + { + "epoch": 8.34, + "learning_rate": 4.583e-05, + "loss": 8.669, + "step": 1042500 + }, + { + "epoch": 8.34, + "learning_rate": 4.5828e-05, + "loss": 8.7021, + "step": 1043000 + }, + { + "epoch": 8.35, + "learning_rate": 4.5826000000000005e-05, + "loss": 8.688, + "step": 1043500 + }, + { + "epoch": 8.35, + "learning_rate": 4.5824e-05, + "loss": 8.6934, + "step": 1044000 + }, + { + "epoch": 8.36, + "learning_rate": 4.5822e-05, + "loss": 8.6851, + "step": 1044500 + }, + { + "epoch": 8.36, + "learning_rate": 4.5820000000000005e-05, + "loss": 8.6744, + "step": 1045000 + }, + { + "epoch": 8.36, + "learning_rate": 4.5818e-05, + "loss": 8.6615, + "step": 1045500 + }, + { + "epoch": 8.37, + "learning_rate": 4.5816e-05, + "loss": 8.7014, + "step": 1046000 + }, + { + "epoch": 8.37, + "learning_rate": 4.5814e-05, + "loss": 8.697, + "step": 1046500 + }, + { + "epoch": 8.38, + "learning_rate": 4.581200000000001e-05, + "loss": 8.6944, + "step": 1047000 + }, + { + "epoch": 8.38, + "learning_rate": 4.5810000000000004e-05, + "loss": 8.6951, + "step": 1047500 + }, + { + "epoch": 8.38, + "learning_rate": 4.5808e-05, + "loss": 8.6984, + "step": 1048000 + }, + { + "epoch": 8.39, + "learning_rate": 4.5806e-05, + "loss": 8.6842, + "step": 1048500 + }, + { + "epoch": 8.39, + "learning_rate": 4.5804000000000004e-05, + "loss": 8.6664, + "step": 1049000 + }, + { + "epoch": 8.4, + "learning_rate": 4.5802e-05, + "loss": 8.6947, + "step": 1049500 + }, + { + "epoch": 8.4, + "learning_rate": 4.58e-05, + "loss": 8.701, + "step": 1050000 + }, + { + "epoch": 8.4, + "learning_rate": 4.5798000000000004e-05, + "loss": 8.6766, + "step": 1050500 + }, + { + "epoch": 8.41, + "learning_rate": 4.5796e-05, + "loss": 8.7013, + "step": 1051000 + }, + { + "epoch": 8.41, + "learning_rate": 4.5794e-05, + "loss": 8.687, + "step": 1051500 + }, + { + "epoch": 8.42, + "learning_rate": 4.5792000000000005e-05, + "loss": 8.6919, + "step": 1052000 + }, + { + "epoch": 8.42, + "learning_rate": 4.579e-05, + "loss": 8.6885, + "step": 1052500 + }, + { + "epoch": 8.42, + "learning_rate": 4.5788e-05, + "loss": 8.6599, + "step": 1053000 + }, + { + "epoch": 8.43, + "learning_rate": 4.5786e-05, + "loss": 8.7044, + "step": 1053500 + }, + { + "epoch": 8.43, + "learning_rate": 4.578400000000001e-05, + "loss": 8.6804, + "step": 1054000 + }, + { + "epoch": 8.44, + "learning_rate": 4.5782e-05, + "loss": 8.7054, + "step": 1054500 + }, + { + "epoch": 8.44, + "learning_rate": 4.578e-05, + "loss": 8.6756, + "step": 1055000 + }, + { + "epoch": 8.44, + "learning_rate": 4.5778e-05, + "loss": 8.6731, + "step": 1055500 + }, + { + "epoch": 8.45, + "learning_rate": 4.5776000000000004e-05, + "loss": 8.6934, + "step": 1056000 + }, + { + "epoch": 8.45, + "learning_rate": 4.5774e-05, + "loss": 8.6564, + "step": 1056500 + }, + { + "epoch": 8.46, + "learning_rate": 4.5772e-05, + "loss": 8.6726, + "step": 1057000 + }, + { + "epoch": 8.46, + "learning_rate": 4.5770000000000004e-05, + "loss": 8.6737, + "step": 1057500 + }, + { + "epoch": 8.46, + "learning_rate": 4.5768e-05, + "loss": 8.7018, + "step": 1058000 + }, + { + "epoch": 8.47, + "learning_rate": 4.5766e-05, + "loss": 8.661, + "step": 1058500 + }, + { + "epoch": 8.47, + "learning_rate": 4.5764000000000004e-05, + "loss": 8.6806, + "step": 1059000 + }, + { + "epoch": 8.48, + "learning_rate": 4.576200000000001e-05, + "loss": 8.7031, + "step": 1059500 + }, + { + "epoch": 8.48, + "learning_rate": 4.576e-05, + "loss": 8.6937, + "step": 1060000 + }, + { + "epoch": 8.48, + "learning_rate": 4.5758e-05, + "loss": 8.7177, + "step": 1060500 + }, + { + "epoch": 8.49, + "learning_rate": 4.575600000000001e-05, + "loss": 8.6888, + "step": 1061000 + }, + { + "epoch": 8.49, + "learning_rate": 4.5754e-05, + "loss": 8.704, + "step": 1061500 + }, + { + "epoch": 8.5, + "learning_rate": 4.5752e-05, + "loss": 8.692, + "step": 1062000 + }, + { + "epoch": 8.5, + "learning_rate": 4.575e-05, + "loss": 8.6758, + "step": 1062500 + }, + { + "epoch": 8.5, + "learning_rate": 4.5748e-05, + "loss": 8.6767, + "step": 1063000 + }, + { + "epoch": 8.51, + "learning_rate": 4.5746e-05, + "loss": 8.6824, + "step": 1063500 + }, + { + "epoch": 8.51, + "learning_rate": 4.5744e-05, + "loss": 8.6717, + "step": 1064000 + }, + { + "epoch": 8.52, + "learning_rate": 4.5742000000000004e-05, + "loss": 8.67, + "step": 1064500 + }, + { + "epoch": 8.52, + "learning_rate": 4.574e-05, + "loss": 8.6797, + "step": 1065000 + }, + { + "epoch": 8.52, + "learning_rate": 4.5738e-05, + "loss": 8.6688, + "step": 1065500 + }, + { + "epoch": 8.53, + "learning_rate": 4.5736000000000004e-05, + "loss": 8.6806, + "step": 1066000 + }, + { + "epoch": 8.53, + "learning_rate": 4.5734000000000006e-05, + "loss": 8.6835, + "step": 1066500 + }, + { + "epoch": 8.54, + "learning_rate": 4.5732e-05, + "loss": 8.699, + "step": 1067000 + }, + { + "epoch": 8.54, + "learning_rate": 4.573e-05, + "loss": 8.688, + "step": 1067500 + }, + { + "epoch": 8.54, + "learning_rate": 4.572800000000001e-05, + "loss": 8.6814, + "step": 1068000 + }, + { + "epoch": 8.55, + "learning_rate": 4.5726e-05, + "loss": 8.6903, + "step": 1068500 + }, + { + "epoch": 8.55, + "learning_rate": 4.5724e-05, + "loss": 8.7022, + "step": 1069000 + }, + { + "epoch": 8.56, + "learning_rate": 4.572200000000001e-05, + "loss": 8.7054, + "step": 1069500 + }, + { + "epoch": 8.56, + "learning_rate": 4.572e-05, + "loss": 8.6911, + "step": 1070000 + }, + { + "epoch": 8.56, + "learning_rate": 4.5718e-05, + "loss": 8.6921, + "step": 1070500 + }, + { + "epoch": 8.57, + "learning_rate": 4.5716e-05, + "loss": 8.6771, + "step": 1071000 + }, + { + "epoch": 8.57, + "learning_rate": 4.5714e-05, + "loss": 8.6779, + "step": 1071500 + }, + { + "epoch": 8.58, + "learning_rate": 4.5712000000000006e-05, + "loss": 8.706, + "step": 1072000 + }, + { + "epoch": 8.58, + "learning_rate": 4.571e-05, + "loss": 8.6893, + "step": 1072500 + }, + { + "epoch": 8.58, + "learning_rate": 4.5708000000000004e-05, + "loss": 8.7018, + "step": 1073000 + }, + { + "epoch": 8.59, + "learning_rate": 4.5706000000000006e-05, + "loss": 8.7055, + "step": 1073500 + }, + { + "epoch": 8.59, + "learning_rate": 4.5704e-05, + "loss": 8.7174, + "step": 1074000 + }, + { + "epoch": 8.6, + "learning_rate": 4.5702e-05, + "loss": 8.6836, + "step": 1074500 + }, + { + "epoch": 8.6, + "learning_rate": 4.5700000000000006e-05, + "loss": 8.7157, + "step": 1075000 + }, + { + "epoch": 8.6, + "learning_rate": 4.5698e-05, + "loss": 8.6948, + "step": 1075500 + }, + { + "epoch": 8.61, + "learning_rate": 4.5696e-05, + "loss": 8.6763, + "step": 1076000 + }, + { + "epoch": 8.61, + "learning_rate": 4.569400000000001e-05, + "loss": 8.711, + "step": 1076500 + }, + { + "epoch": 8.62, + "learning_rate": 4.5692e-05, + "loss": 8.6814, + "step": 1077000 + }, + { + "epoch": 8.62, + "learning_rate": 4.569e-05, + "loss": 8.695, + "step": 1077500 + }, + { + "epoch": 8.62, + "learning_rate": 4.5688e-05, + "loss": 8.6985, + "step": 1078000 + }, + { + "epoch": 8.63, + "learning_rate": 4.5686e-05, + "loss": 8.6937, + "step": 1078500 + }, + { + "epoch": 8.63, + "learning_rate": 4.5684000000000005e-05, + "loss": 8.674, + "step": 1079000 + }, + { + "epoch": 8.64, + "learning_rate": 4.5682e-05, + "loss": 8.6655, + "step": 1079500 + }, + { + "epoch": 8.64, + "learning_rate": 4.568e-05, + "loss": 8.6605, + "step": 1080000 + }, + { + "epoch": 8.64, + "learning_rate": 4.5678000000000005e-05, + "loss": 8.6464, + "step": 1080500 + }, + { + "epoch": 8.65, + "learning_rate": 4.5676e-05, + "loss": 8.6928, + "step": 1081000 + }, + { + "epoch": 8.65, + "learning_rate": 4.5674000000000003e-05, + "loss": 8.6973, + "step": 1081500 + }, + { + "epoch": 8.66, + "learning_rate": 4.5672000000000006e-05, + "loss": 8.6953, + "step": 1082000 + }, + { + "epoch": 8.66, + "learning_rate": 4.567e-05, + "loss": 8.7098, + "step": 1082500 + }, + { + "epoch": 8.66, + "learning_rate": 4.5668e-05, + "loss": 8.6848, + "step": 1083000 + }, + { + "epoch": 8.67, + "learning_rate": 4.5666000000000006e-05, + "loss": 8.6531, + "step": 1083500 + }, + { + "epoch": 8.67, + "learning_rate": 4.5664e-05, + "loss": 8.6983, + "step": 1084000 + }, + { + "epoch": 8.68, + "learning_rate": 4.5662000000000004e-05, + "loss": 8.6984, + "step": 1084500 + }, + { + "epoch": 8.68, + "learning_rate": 4.566e-05, + "loss": 8.6849, + "step": 1085000 + }, + { + "epoch": 8.68, + "learning_rate": 4.5658e-05, + "loss": 8.6652, + "step": 1085500 + }, + { + "epoch": 8.69, + "learning_rate": 4.5656000000000005e-05, + "loss": 8.6564, + "step": 1086000 + }, + { + "epoch": 8.69, + "learning_rate": 4.5654e-05, + "loss": 8.7027, + "step": 1086500 + }, + { + "epoch": 8.7, + "learning_rate": 4.5652e-05, + "loss": 8.6681, + "step": 1087000 + }, + { + "epoch": 8.7, + "learning_rate": 4.5650000000000005e-05, + "loss": 8.6785, + "step": 1087500 + }, + { + "epoch": 8.7, + "learning_rate": 4.5648e-05, + "loss": 8.6725, + "step": 1088000 + }, + { + "epoch": 8.71, + "learning_rate": 4.5646e-05, + "loss": 8.6948, + "step": 1088500 + }, + { + "epoch": 8.71, + "learning_rate": 4.5644000000000005e-05, + "loss": 8.672, + "step": 1089000 + }, + { + "epoch": 8.72, + "learning_rate": 4.5642e-05, + "loss": 8.703, + "step": 1089500 + }, + { + "epoch": 8.72, + "learning_rate": 4.564e-05, + "loss": 8.6858, + "step": 1090000 + }, + { + "epoch": 8.72, + "learning_rate": 4.5638000000000006e-05, + "loss": 8.7108, + "step": 1090500 + }, + { + "epoch": 8.73, + "learning_rate": 4.5636e-05, + "loss": 8.6863, + "step": 1091000 + }, + { + "epoch": 8.73, + "learning_rate": 4.5634000000000004e-05, + "loss": 8.6943, + "step": 1091500 + }, + { + "epoch": 8.74, + "learning_rate": 4.5632e-05, + "loss": 8.6723, + "step": 1092000 + }, + { + "epoch": 8.74, + "learning_rate": 4.563e-05, + "loss": 8.6849, + "step": 1092500 + }, + { + "epoch": 8.74, + "learning_rate": 4.5628000000000004e-05, + "loss": 8.6747, + "step": 1093000 + }, + { + "epoch": 8.75, + "learning_rate": 4.5626e-05, + "loss": 8.683, + "step": 1093500 + }, + { + "epoch": 8.75, + "learning_rate": 4.5624e-05, + "loss": 8.6828, + "step": 1094000 + }, + { + "epoch": 8.76, + "learning_rate": 4.5622000000000005e-05, + "loss": 8.668, + "step": 1094500 + }, + { + "epoch": 8.76, + "learning_rate": 4.562e-05, + "loss": 8.7027, + "step": 1095000 + }, + { + "epoch": 8.76, + "learning_rate": 4.5618e-05, + "loss": 8.6762, + "step": 1095500 + }, + { + "epoch": 8.77, + "learning_rate": 4.5616000000000005e-05, + "loss": 8.6559, + "step": 1096000 + }, + { + "epoch": 8.77, + "learning_rate": 4.5614e-05, + "loss": 8.682, + "step": 1096500 + }, + { + "epoch": 8.78, + "learning_rate": 4.5612e-05, + "loss": 8.6751, + "step": 1097000 + }, + { + "epoch": 8.78, + "learning_rate": 4.5610000000000005e-05, + "loss": 8.6736, + "step": 1097500 + }, + { + "epoch": 8.78, + "learning_rate": 4.5608e-05, + "loss": 8.6695, + "step": 1098000 + }, + { + "epoch": 8.79, + "learning_rate": 4.5606000000000003e-05, + "loss": 8.6935, + "step": 1098500 + }, + { + "epoch": 8.79, + "learning_rate": 4.5604e-05, + "loss": 8.6871, + "step": 1099000 + }, + { + "epoch": 8.8, + "learning_rate": 4.5602e-05, + "loss": 8.6717, + "step": 1099500 + }, + { + "epoch": 8.8, + "learning_rate": 4.5600000000000004e-05, + "loss": 8.7084, + "step": 1100000 + }, + { + "epoch": 8.8, + "learning_rate": 4.5598e-05, + "loss": 8.6653, + "step": 1100500 + }, + { + "epoch": 8.81, + "learning_rate": 4.5596e-05, + "loss": 8.6648, + "step": 1101000 + }, + { + "epoch": 8.81, + "learning_rate": 4.5594000000000004e-05, + "loss": 8.695, + "step": 1101500 + }, + { + "epoch": 8.82, + "learning_rate": 4.5592e-05, + "loss": 8.6835, + "step": 1102000 + }, + { + "epoch": 8.82, + "learning_rate": 4.559e-05, + "loss": 8.6787, + "step": 1102500 + }, + { + "epoch": 8.82, + "learning_rate": 4.5588000000000005e-05, + "loss": 8.6911, + "step": 1103000 + }, + { + "epoch": 8.83, + "learning_rate": 4.5586e-05, + "loss": 8.6841, + "step": 1103500 + }, + { + "epoch": 8.83, + "learning_rate": 4.5584e-05, + "loss": 8.6883, + "step": 1104000 + }, + { + "epoch": 8.84, + "learning_rate": 4.5582000000000005e-05, + "loss": 8.6887, + "step": 1104500 + }, + { + "epoch": 8.84, + "learning_rate": 4.558e-05, + "loss": 8.6936, + "step": 1105000 + }, + { + "epoch": 8.84, + "learning_rate": 4.5578e-05, + "loss": 8.6631, + "step": 1105500 + }, + { + "epoch": 8.85, + "learning_rate": 4.5576e-05, + "loss": 8.692, + "step": 1106000 + }, + { + "epoch": 8.85, + "learning_rate": 4.5574e-05, + "loss": 8.6803, + "step": 1106500 + }, + { + "epoch": 8.86, + "learning_rate": 4.5572000000000003e-05, + "loss": 8.6719, + "step": 1107000 + }, + { + "epoch": 8.86, + "learning_rate": 4.557e-05, + "loss": 8.6884, + "step": 1107500 + }, + { + "epoch": 8.86, + "learning_rate": 4.5568e-05, + "loss": 8.7145, + "step": 1108000 + }, + { + "epoch": 8.87, + "learning_rate": 4.5566000000000004e-05, + "loss": 8.6818, + "step": 1108500 + }, + { + "epoch": 8.87, + "learning_rate": 4.5564e-05, + "loss": 8.6981, + "step": 1109000 + }, + { + "epoch": 8.88, + "learning_rate": 4.5562e-05, + "loss": 8.6619, + "step": 1109500 + }, + { + "epoch": 8.88, + "learning_rate": 4.5560000000000004e-05, + "loss": 8.6769, + "step": 1110000 + }, + { + "epoch": 8.88, + "learning_rate": 4.5558e-05, + "loss": 8.7165, + "step": 1110500 + }, + { + "epoch": 8.89, + "learning_rate": 4.5556e-05, + "loss": 8.6883, + "step": 1111000 + }, + { + "epoch": 8.89, + "learning_rate": 4.5554000000000005e-05, + "loss": 8.6881, + "step": 1111500 + }, + { + "epoch": 8.9, + "learning_rate": 4.5552e-05, + "loss": 8.6761, + "step": 1112000 + }, + { + "epoch": 8.9, + "learning_rate": 4.555e-05, + "loss": 8.6914, + "step": 1112500 + }, + { + "epoch": 8.9, + "learning_rate": 4.5548000000000005e-05, + "loss": 8.7039, + "step": 1113000 + }, + { + "epoch": 8.91, + "learning_rate": 4.5546e-05, + "loss": 8.6931, + "step": 1113500 + }, + { + "epoch": 8.91, + "learning_rate": 4.5544e-05, + "loss": 8.6731, + "step": 1114000 + }, + { + "epoch": 8.92, + "learning_rate": 4.5542e-05, + "loss": 8.6768, + "step": 1114500 + }, + { + "epoch": 8.92, + "learning_rate": 4.554000000000001e-05, + "loss": 8.6697, + "step": 1115000 + }, + { + "epoch": 8.92, + "learning_rate": 4.5538000000000003e-05, + "loss": 8.679, + "step": 1115500 + }, + { + "epoch": 8.93, + "learning_rate": 4.5536e-05, + "loss": 8.6865, + "step": 1116000 + }, + { + "epoch": 8.93, + "learning_rate": 4.5534e-05, + "loss": 8.6871, + "step": 1116500 + }, + { + "epoch": 8.94, + "learning_rate": 4.5532000000000004e-05, + "loss": 8.6906, + "step": 1117000 + }, + { + "epoch": 8.94, + "learning_rate": 4.553e-05, + "loss": 8.7003, + "step": 1117500 + }, + { + "epoch": 8.94, + "learning_rate": 4.5528e-05, + "loss": 8.6846, + "step": 1118000 + }, + { + "epoch": 8.95, + "learning_rate": 4.5526000000000004e-05, + "loss": 8.6789, + "step": 1118500 + }, + { + "epoch": 8.95, + "learning_rate": 4.5524e-05, + "loss": 8.667, + "step": 1119000 + }, + { + "epoch": 8.96, + "learning_rate": 4.5522e-05, + "loss": 8.6753, + "step": 1119500 + }, + { + "epoch": 8.96, + "learning_rate": 4.5520000000000005e-05, + "loss": 8.7123, + "step": 1120000 + }, + { + "epoch": 8.96, + "learning_rate": 4.5518e-05, + "loss": 8.6812, + "step": 1120500 + }, + { + "epoch": 8.97, + "learning_rate": 4.5516e-05, + "loss": 8.6849, + "step": 1121000 + }, + { + "epoch": 8.97, + "learning_rate": 4.5514e-05, + "loss": 8.6712, + "step": 1121500 + }, + { + "epoch": 8.98, + "learning_rate": 4.551200000000001e-05, + "loss": 8.6826, + "step": 1122000 + }, + { + "epoch": 8.98, + "learning_rate": 4.551e-05, + "loss": 8.6913, + "step": 1122500 + }, + { + "epoch": 8.98, + "learning_rate": 4.5508e-05, + "loss": 8.6773, + "step": 1123000 + }, + { + "epoch": 8.99, + "learning_rate": 4.5506e-05, + "loss": 8.6498, + "step": 1123500 + }, + { + "epoch": 8.99, + "learning_rate": 4.5504000000000003e-05, + "loss": 8.6851, + "step": 1124000 + }, + { + "epoch": 9.0, + "learning_rate": 4.5502e-05, + "loss": 8.6908, + "step": 1124500 + }, + { + "epoch": 9.0, + "learning_rate": 4.55e-05, + "loss": 8.6865, + "step": 1125000 + }, + { + "epoch": 9.0, + "learning_rate": 4.5498000000000004e-05, + "loss": 8.69, + "step": 1125500 + }, + { + "epoch": 9.01, + "learning_rate": 4.5496e-05, + "loss": 8.6816, + "step": 1126000 + }, + { + "epoch": 9.01, + "learning_rate": 4.5494e-05, + "loss": 8.6707, + "step": 1126500 + }, + { + "epoch": 9.02, + "learning_rate": 4.5492000000000004e-05, + "loss": 8.6813, + "step": 1127000 + }, + { + "epoch": 9.02, + "learning_rate": 4.549000000000001e-05, + "loss": 8.6957, + "step": 1127500 + }, + { + "epoch": 9.02, + "learning_rate": 4.5488e-05, + "loss": 8.6902, + "step": 1128000 + }, + { + "epoch": 9.03, + "learning_rate": 4.5486e-05, + "loss": 8.6867, + "step": 1128500 + }, + { + "epoch": 9.03, + "learning_rate": 4.548400000000001e-05, + "loss": 8.7026, + "step": 1129000 + }, + { + "epoch": 9.04, + "learning_rate": 4.5482e-05, + "loss": 8.7217, + "step": 1129500 + }, + { + "epoch": 9.04, + "learning_rate": 4.548e-05, + "loss": 8.716, + "step": 1130000 + }, + { + "epoch": 9.04, + "learning_rate": 4.5478e-05, + "loss": 8.6752, + "step": 1130500 + }, + { + "epoch": 9.05, + "learning_rate": 4.5476e-05, + "loss": 8.6636, + "step": 1131000 + }, + { + "epoch": 9.05, + "learning_rate": 4.5474e-05, + "loss": 8.6819, + "step": 1131500 + }, + { + "epoch": 9.06, + "learning_rate": 4.5472e-05, + "loss": 8.6725, + "step": 1132000 + }, + { + "epoch": 9.06, + "learning_rate": 4.5470000000000003e-05, + "loss": 8.6871, + "step": 1132500 + }, + { + "epoch": 9.06, + "learning_rate": 4.5468e-05, + "loss": 8.683, + "step": 1133000 + }, + { + "epoch": 9.07, + "learning_rate": 4.5466e-05, + "loss": 8.6964, + "step": 1133500 + }, + { + "epoch": 9.07, + "learning_rate": 4.5464000000000004e-05, + "loss": 8.6957, + "step": 1134000 + }, + { + "epoch": 9.08, + "learning_rate": 4.5462000000000006e-05, + "loss": 8.6726, + "step": 1134500 + }, + { + "epoch": 9.08, + "learning_rate": 4.546e-05, + "loss": 8.6876, + "step": 1135000 + }, + { + "epoch": 9.08, + "learning_rate": 4.5458e-05, + "loss": 8.6787, + "step": 1135500 + }, + { + "epoch": 9.09, + "learning_rate": 4.5456000000000007e-05, + "loss": 8.6688, + "step": 1136000 + }, + { + "epoch": 9.09, + "learning_rate": 4.5454e-05, + "loss": 8.6881, + "step": 1136500 + }, + { + "epoch": 9.1, + "learning_rate": 4.5452e-05, + "loss": 8.6567, + "step": 1137000 + }, + { + "epoch": 9.1, + "learning_rate": 4.545000000000001e-05, + "loss": 8.6703, + "step": 1137500 + }, + { + "epoch": 9.1, + "learning_rate": 4.5448e-05, + "loss": 8.6733, + "step": 1138000 + }, + { + "epoch": 9.11, + "learning_rate": 4.5446e-05, + "loss": 8.7065, + "step": 1138500 + }, + { + "epoch": 9.11, + "learning_rate": 4.5444e-05, + "loss": 8.6827, + "step": 1139000 + }, + { + "epoch": 9.12, + "learning_rate": 4.5442e-05, + "loss": 8.7059, + "step": 1139500 + }, + { + "epoch": 9.12, + "learning_rate": 4.5440000000000005e-05, + "loss": 8.6822, + "step": 1140000 + }, + { + "epoch": 9.12, + "learning_rate": 4.5438e-05, + "loss": 8.673, + "step": 1140500 + }, + { + "epoch": 9.13, + "learning_rate": 4.5436000000000003e-05, + "loss": 8.6909, + "step": 1141000 + }, + { + "epoch": 9.13, + "learning_rate": 4.5434000000000006e-05, + "loss": 8.6873, + "step": 1141500 + }, + { + "epoch": 9.14, + "learning_rate": 4.5432e-05, + "loss": 8.6955, + "step": 1142000 + }, + { + "epoch": 9.14, + "learning_rate": 4.543e-05, + "loss": 8.7092, + "step": 1142500 + }, + { + "epoch": 9.14, + "learning_rate": 4.5428000000000006e-05, + "loss": 8.7171, + "step": 1143000 + }, + { + "epoch": 9.15, + "learning_rate": 4.5426e-05, + "loss": 8.6882, + "step": 1143500 + }, + { + "epoch": 9.15, + "learning_rate": 4.5424e-05, + "loss": 8.7058, + "step": 1144000 + }, + { + "epoch": 9.16, + "learning_rate": 4.5422000000000007e-05, + "loss": 8.6528, + "step": 1144500 + }, + { + "epoch": 9.16, + "learning_rate": 4.542e-05, + "loss": 8.7011, + "step": 1145000 + }, + { + "epoch": 9.16, + "learning_rate": 4.5418e-05, + "loss": 8.6872, + "step": 1145500 + }, + { + "epoch": 9.17, + "learning_rate": 4.5416e-05, + "loss": 8.6969, + "step": 1146000 + }, + { + "epoch": 9.17, + "learning_rate": 4.5414e-05, + "loss": 8.6982, + "step": 1146500 + }, + { + "epoch": 9.18, + "learning_rate": 4.5412000000000005e-05, + "loss": 8.7002, + "step": 1147000 + }, + { + "epoch": 9.18, + "learning_rate": 4.541e-05, + "loss": 8.6873, + "step": 1147500 + }, + { + "epoch": 9.18, + "learning_rate": 4.5408e-05, + "loss": 8.6973, + "step": 1148000 + }, + { + "epoch": 9.19, + "learning_rate": 4.5406000000000005e-05, + "loss": 8.694, + "step": 1148500 + }, + { + "epoch": 9.19, + "learning_rate": 4.5404e-05, + "loss": 8.6527, + "step": 1149000 + }, + { + "epoch": 9.2, + "learning_rate": 4.5402000000000003e-05, + "loss": 8.6799, + "step": 1149500 + }, + { + "epoch": 9.2, + "learning_rate": 4.5400000000000006e-05, + "loss": 8.6838, + "step": 1150000 + }, + { + "epoch": 9.2, + "learning_rate": 4.5398e-05, + "loss": 8.6942, + "step": 1150500 + }, + { + "epoch": 9.21, + "learning_rate": 4.5396e-05, + "loss": 8.6611, + "step": 1151000 + }, + { + "epoch": 9.21, + "learning_rate": 4.5394000000000006e-05, + "loss": 8.6773, + "step": 1151500 + }, + { + "epoch": 9.22, + "learning_rate": 4.5392e-05, + "loss": 8.6724, + "step": 1152000 + }, + { + "epoch": 9.22, + "learning_rate": 4.5390000000000004e-05, + "loss": 8.6972, + "step": 1152500 + }, + { + "epoch": 9.22, + "learning_rate": 4.5388e-05, + "loss": 8.6775, + "step": 1153000 + }, + { + "epoch": 9.23, + "learning_rate": 4.5386e-05, + "loss": 8.6831, + "step": 1153500 + }, + { + "epoch": 9.23, + "learning_rate": 4.5384000000000005e-05, + "loss": 8.6901, + "step": 1154000 + }, + { + "epoch": 9.24, + "learning_rate": 4.5382e-05, + "loss": 8.677, + "step": 1154500 + }, + { + "epoch": 9.24, + "learning_rate": 4.538e-05, + "loss": 8.6739, + "step": 1155000 + }, + { + "epoch": 9.24, + "learning_rate": 4.5378000000000005e-05, + "loss": 8.6889, + "step": 1155500 + }, + { + "epoch": 9.25, + "learning_rate": 4.5376e-05, + "loss": 8.6539, + "step": 1156000 + }, + { + "epoch": 9.25, + "learning_rate": 4.5374e-05, + "loss": 8.6665, + "step": 1156500 + }, + { + "epoch": 9.26, + "learning_rate": 4.5372000000000005e-05, + "loss": 8.6792, + "step": 1157000 + }, + { + "epoch": 9.26, + "learning_rate": 4.537e-05, + "loss": 8.6745, + "step": 1157500 + }, + { + "epoch": 9.26, + "learning_rate": 4.5368e-05, + "loss": 8.6843, + "step": 1158000 + }, + { + "epoch": 9.27, + "learning_rate": 4.5366000000000006e-05, + "loss": 8.6662, + "step": 1158500 + }, + { + "epoch": 9.27, + "learning_rate": 4.5364e-05, + "loss": 8.6862, + "step": 1159000 + }, + { + "epoch": 9.28, + "learning_rate": 4.5362000000000004e-05, + "loss": 8.6739, + "step": 1159500 + }, + { + "epoch": 9.28, + "learning_rate": 4.536e-05, + "loss": 8.6797, + "step": 1160000 + }, + { + "epoch": 9.28, + "learning_rate": 4.5358e-05, + "loss": 8.6775, + "step": 1160500 + }, + { + "epoch": 9.29, + "learning_rate": 4.5356000000000004e-05, + "loss": 8.6791, + "step": 1161000 + }, + { + "epoch": 9.29, + "learning_rate": 4.5354e-05, + "loss": 8.6946, + "step": 1161500 + }, + { + "epoch": 9.3, + "learning_rate": 4.5352e-05, + "loss": 8.6883, + "step": 1162000 + }, + { + "epoch": 9.3, + "learning_rate": 4.5350000000000005e-05, + "loss": 8.6724, + "step": 1162500 + }, + { + "epoch": 9.3, + "learning_rate": 4.5348e-05, + "loss": 8.6816, + "step": 1163000 + }, + { + "epoch": 9.31, + "learning_rate": 4.5346e-05, + "loss": 8.6815, + "step": 1163500 + }, + { + "epoch": 9.31, + "learning_rate": 4.5344000000000005e-05, + "loss": 8.6697, + "step": 1164000 + }, + { + "epoch": 9.32, + "learning_rate": 4.5342e-05, + "loss": 8.7017, + "step": 1164500 + }, + { + "epoch": 9.32, + "learning_rate": 4.534e-05, + "loss": 8.6955, + "step": 1165000 + }, + { + "epoch": 9.32, + "learning_rate": 4.5338000000000005e-05, + "loss": 8.6704, + "step": 1165500 + }, + { + "epoch": 9.33, + "learning_rate": 4.5336e-05, + "loss": 8.6589, + "step": 1166000 + }, + { + "epoch": 9.33, + "learning_rate": 4.5334e-05, + "loss": 8.6468, + "step": 1166500 + }, + { + "epoch": 9.34, + "learning_rate": 4.5332e-05, + "loss": 8.693, + "step": 1167000 + }, + { + "epoch": 9.34, + "learning_rate": 4.533e-05, + "loss": 8.6931, + "step": 1167500 + }, + { + "epoch": 9.34, + "learning_rate": 4.5328000000000004e-05, + "loss": 8.7162, + "step": 1168000 + }, + { + "epoch": 9.35, + "learning_rate": 4.5326e-05, + "loss": 8.6884, + "step": 1168500 + }, + { + "epoch": 9.35, + "learning_rate": 4.5324e-05, + "loss": 8.712, + "step": 1169000 + }, + { + "epoch": 9.36, + "learning_rate": 4.5322000000000004e-05, + "loss": 8.6747, + "step": 1169500 + }, + { + "epoch": 9.36, + "learning_rate": 4.532e-05, + "loss": 8.6712, + "step": 1170000 + }, + { + "epoch": 9.36, + "learning_rate": 4.5318e-05, + "loss": 8.677, + "step": 1170500 + }, + { + "epoch": 9.37, + "learning_rate": 4.5316000000000005e-05, + "loss": 8.6598, + "step": 1171000 + }, + { + "epoch": 9.37, + "learning_rate": 4.5314e-05, + "loss": 8.685, + "step": 1171500 + }, + { + "epoch": 9.38, + "learning_rate": 4.5312e-05, + "loss": 8.6576, + "step": 1172000 + }, + { + "epoch": 9.38, + "learning_rate": 4.5310000000000005e-05, + "loss": 8.6396, + "step": 1172500 + }, + { + "epoch": 9.38, + "learning_rate": 4.5308e-05, + "loss": 8.6703, + "step": 1173000 + }, + { + "epoch": 9.39, + "learning_rate": 4.5306e-05, + "loss": 8.6643, + "step": 1173500 + }, + { + "epoch": 9.39, + "learning_rate": 4.5304000000000005e-05, + "loss": 8.6905, + "step": 1174000 + }, + { + "epoch": 9.4, + "learning_rate": 4.5302e-05, + "loss": 8.7058, + "step": 1174500 + }, + { + "epoch": 9.4, + "learning_rate": 4.53e-05, + "loss": 8.667, + "step": 1175000 + }, + { + "epoch": 9.4, + "learning_rate": 4.5298e-05, + "loss": 8.6675, + "step": 1175500 + }, + { + "epoch": 9.41, + "learning_rate": 4.5296e-05, + "loss": 8.7209, + "step": 1176000 + }, + { + "epoch": 9.41, + "learning_rate": 4.5294000000000004e-05, + "loss": 8.6623, + "step": 1176500 + }, + { + "epoch": 9.42, + "learning_rate": 4.5292e-05, + "loss": 8.6906, + "step": 1177000 + }, + { + "epoch": 9.42, + "learning_rate": 4.529e-05, + "loss": 8.6851, + "step": 1177500 + }, + { + "epoch": 9.42, + "learning_rate": 4.5288000000000004e-05, + "loss": 8.6968, + "step": 1178000 + }, + { + "epoch": 9.43, + "learning_rate": 4.5286e-05, + "loss": 8.6732, + "step": 1178500 + }, + { + "epoch": 9.43, + "learning_rate": 4.5284e-05, + "loss": 8.6564, + "step": 1179000 + }, + { + "epoch": 9.44, + "learning_rate": 4.5282000000000005e-05, + "loss": 8.6834, + "step": 1179500 + }, + { + "epoch": 9.44, + "learning_rate": 4.528e-05, + "loss": 8.6975, + "step": 1180000 + }, + { + "epoch": 9.44, + "learning_rate": 4.5278e-05, + "loss": 8.704, + "step": 1180500 + }, + { + "epoch": 9.45, + "learning_rate": 4.5276000000000005e-05, + "loss": 8.6882, + "step": 1181000 + }, + { + "epoch": 9.45, + "learning_rate": 4.5274e-05, + "loss": 8.6667, + "step": 1181500 + }, + { + "epoch": 9.46, + "learning_rate": 4.5272e-05, + "loss": 8.6596, + "step": 1182000 + }, + { + "epoch": 9.46, + "learning_rate": 4.527e-05, + "loss": 8.6778, + "step": 1182500 + }, + { + "epoch": 9.46, + "learning_rate": 4.526800000000001e-05, + "loss": 8.6934, + "step": 1183000 + }, + { + "epoch": 9.47, + "learning_rate": 4.5266e-05, + "loss": 8.677, + "step": 1183500 + }, + { + "epoch": 9.47, + "learning_rate": 4.5264e-05, + "loss": 8.6835, + "step": 1184000 + }, + { + "epoch": 9.48, + "learning_rate": 4.5262e-05, + "loss": 8.6924, + "step": 1184500 + }, + { + "epoch": 9.48, + "learning_rate": 4.5260000000000004e-05, + "loss": 8.6882, + "step": 1185000 + }, + { + "epoch": 9.48, + "learning_rate": 4.5258e-05, + "loss": 8.6857, + "step": 1185500 + }, + { + "epoch": 9.49, + "learning_rate": 4.5256e-05, + "loss": 8.7064, + "step": 1186000 + }, + { + "epoch": 9.49, + "learning_rate": 4.5254000000000004e-05, + "loss": 8.6854, + "step": 1186500 + }, + { + "epoch": 9.5, + "learning_rate": 4.5252e-05, + "loss": 8.6783, + "step": 1187000 + }, + { + "epoch": 9.5, + "learning_rate": 4.525e-05, + "loss": 8.698, + "step": 1187500 + }, + { + "epoch": 9.5, + "learning_rate": 4.5248000000000005e-05, + "loss": 8.6608, + "step": 1188000 + }, + { + "epoch": 9.51, + "learning_rate": 4.5246e-05, + "loss": 8.6811, + "step": 1188500 + }, + { + "epoch": 9.51, + "learning_rate": 4.5244e-05, + "loss": 8.6703, + "step": 1189000 + }, + { + "epoch": 9.52, + "learning_rate": 4.5242e-05, + "loss": 8.6833, + "step": 1189500 + }, + { + "epoch": 9.52, + "learning_rate": 4.524000000000001e-05, + "loss": 8.6919, + "step": 1190000 + }, + { + "epoch": 9.52, + "learning_rate": 4.5238e-05, + "loss": 8.6848, + "step": 1190500 + }, + { + "epoch": 9.53, + "learning_rate": 4.5236e-05, + "loss": 8.7048, + "step": 1191000 + }, + { + "epoch": 9.53, + "learning_rate": 4.5234e-05, + "loss": 8.6826, + "step": 1191500 + }, + { + "epoch": 9.54, + "learning_rate": 4.5232e-05, + "loss": 8.6761, + "step": 1192000 + }, + { + "epoch": 9.54, + "learning_rate": 4.523e-05, + "loss": 8.6748, + "step": 1192500 + }, + { + "epoch": 9.54, + "learning_rate": 4.5228e-05, + "loss": 8.6893, + "step": 1193000 + }, + { + "epoch": 9.55, + "learning_rate": 4.5226000000000004e-05, + "loss": 8.6839, + "step": 1193500 + }, + { + "epoch": 9.55, + "learning_rate": 4.5224e-05, + "loss": 8.6602, + "step": 1194000 + }, + { + "epoch": 9.56, + "learning_rate": 4.5222e-05, + "loss": 8.6843, + "step": 1194500 + }, + { + "epoch": 9.56, + "learning_rate": 4.5220000000000004e-05, + "loss": 8.6746, + "step": 1195000 + }, + { + "epoch": 9.56, + "learning_rate": 4.5218000000000007e-05, + "loss": 8.6799, + "step": 1195500 + }, + { + "epoch": 9.57, + "learning_rate": 4.5216e-05, + "loss": 8.6897, + "step": 1196000 + }, + { + "epoch": 9.57, + "learning_rate": 4.5214e-05, + "loss": 8.6752, + "step": 1196500 + }, + { + "epoch": 9.58, + "learning_rate": 4.521200000000001e-05, + "loss": 8.6867, + "step": 1197000 + }, + { + "epoch": 9.58, + "learning_rate": 4.521e-05, + "loss": 8.7146, + "step": 1197500 + }, + { + "epoch": 9.58, + "learning_rate": 4.5208e-05, + "loss": 8.6859, + "step": 1198000 + }, + { + "epoch": 9.59, + "learning_rate": 4.5206e-05, + "loss": 8.6733, + "step": 1198500 + }, + { + "epoch": 9.59, + "learning_rate": 4.5204e-05, + "loss": 8.6976, + "step": 1199000 + }, + { + "epoch": 9.6, + "learning_rate": 4.5202e-05, + "loss": 8.7035, + "step": 1199500 + }, + { + "epoch": 9.6, + "learning_rate": 4.52e-05, + "loss": 8.7009, + "step": 1200000 + }, + { + "epoch": 9.6, + "learning_rate": 4.5198e-05, + "loss": 8.6647, + "step": 1200500 + }, + { + "epoch": 9.61, + "learning_rate": 4.5196e-05, + "loss": 8.6873, + "step": 1201000 + }, + { + "epoch": 9.61, + "learning_rate": 4.5194e-05, + "loss": 8.6858, + "step": 1201500 + }, + { + "epoch": 9.62, + "learning_rate": 4.5192000000000004e-05, + "loss": 8.715, + "step": 1202000 + }, + { + "epoch": 9.62, + "learning_rate": 4.5190000000000006e-05, + "loss": 8.7143, + "step": 1202500 + }, + { + "epoch": 9.62, + "learning_rate": 4.5188e-05, + "loss": 8.6628, + "step": 1203000 + }, + { + "epoch": 9.63, + "learning_rate": 4.5186e-05, + "loss": 8.7013, + "step": 1203500 + }, + { + "epoch": 9.63, + "learning_rate": 4.5184000000000006e-05, + "loss": 8.6764, + "step": 1204000 + }, + { + "epoch": 9.64, + "learning_rate": 4.5182e-05, + "loss": 8.6965, + "step": 1204500 + }, + { + "epoch": 9.64, + "learning_rate": 4.518e-05, + "loss": 8.6844, + "step": 1205000 + }, + { + "epoch": 9.64, + "learning_rate": 4.517800000000001e-05, + "loss": 8.6834, + "step": 1205500 + }, + { + "epoch": 9.65, + "learning_rate": 4.5176e-05, + "loss": 8.705, + "step": 1206000 + }, + { + "epoch": 9.65, + "learning_rate": 4.5174e-05, + "loss": 8.683, + "step": 1206500 + }, + { + "epoch": 9.66, + "learning_rate": 4.5172e-05, + "loss": 8.6824, + "step": 1207000 + }, + { + "epoch": 9.66, + "learning_rate": 4.517e-05, + "loss": 8.6833, + "step": 1207500 + }, + { + "epoch": 9.66, + "learning_rate": 4.5168000000000005e-05, + "loss": 8.6748, + "step": 1208000 + }, + { + "epoch": 9.67, + "learning_rate": 4.5166e-05, + "loss": 8.6791, + "step": 1208500 + }, + { + "epoch": 9.67, + "learning_rate": 4.5164e-05, + "loss": 8.6444, + "step": 1209000 + }, + { + "epoch": 9.68, + "learning_rate": 4.5162000000000006e-05, + "loss": 8.6819, + "step": 1209500 + }, + { + "epoch": 9.68, + "learning_rate": 4.516e-05, + "loss": 8.6921, + "step": 1210000 + }, + { + "epoch": 9.68, + "learning_rate": 4.5158000000000004e-05, + "loss": 8.6828, + "step": 1210500 + }, + { + "epoch": 9.69, + "learning_rate": 4.5156000000000006e-05, + "loss": 8.6836, + "step": 1211000 + }, + { + "epoch": 9.69, + "learning_rate": 4.5154e-05, + "loss": 8.7005, + "step": 1211500 + }, + { + "epoch": 9.7, + "learning_rate": 4.5152e-05, + "loss": 8.6894, + "step": 1212000 + }, + { + "epoch": 9.7, + "learning_rate": 4.5150000000000006e-05, + "loss": 8.6928, + "step": 1212500 + }, + { + "epoch": 9.7, + "learning_rate": 4.5148e-05, + "loss": 8.6539, + "step": 1213000 + }, + { + "epoch": 9.71, + "learning_rate": 4.5146e-05, + "loss": 8.6771, + "step": 1213500 + }, + { + "epoch": 9.71, + "learning_rate": 4.5144e-05, + "loss": 8.6776, + "step": 1214000 + }, + { + "epoch": 9.72, + "learning_rate": 4.5142e-05, + "loss": 8.7029, + "step": 1214500 + }, + { + "epoch": 9.72, + "learning_rate": 4.5140000000000005e-05, + "loss": 8.687, + "step": 1215000 + }, + { + "epoch": 9.72, + "learning_rate": 4.5138e-05, + "loss": 8.6748, + "step": 1215500 + }, + { + "epoch": 9.73, + "learning_rate": 4.5136e-05, + "loss": 8.673, + "step": 1216000 + }, + { + "epoch": 9.73, + "learning_rate": 4.5134000000000005e-05, + "loss": 8.6738, + "step": 1216500 + }, + { + "epoch": 9.74, + "learning_rate": 4.5132e-05, + "loss": 8.6695, + "step": 1217000 + }, + { + "epoch": 9.74, + "learning_rate": 4.513e-05, + "loss": 8.6698, + "step": 1217500 + }, + { + "epoch": 9.74, + "learning_rate": 4.5128000000000006e-05, + "loss": 8.6836, + "step": 1218000 + }, + { + "epoch": 9.75, + "learning_rate": 4.5126e-05, + "loss": 8.6994, + "step": 1218500 + }, + { + "epoch": 9.75, + "learning_rate": 4.5124e-05, + "loss": 8.6496, + "step": 1219000 + }, + { + "epoch": 9.76, + "learning_rate": 4.5122000000000006e-05, + "loss": 8.6957, + "step": 1219500 + }, + { + "epoch": 9.76, + "learning_rate": 4.512e-05, + "loss": 8.6748, + "step": 1220000 + }, + { + "epoch": 9.76, + "learning_rate": 4.5118000000000004e-05, + "loss": 8.6937, + "step": 1220500 + }, + { + "epoch": 9.77, + "learning_rate": 4.5116e-05, + "loss": 8.6993, + "step": 1221000 + }, + { + "epoch": 9.77, + "learning_rate": 4.5114e-05, + "loss": 8.6919, + "step": 1221500 + }, + { + "epoch": 9.78, + "learning_rate": 4.5112000000000004e-05, + "loss": 8.6906, + "step": 1222000 + }, + { + "epoch": 9.78, + "learning_rate": 4.511e-05, + "loss": 8.669, + "step": 1222500 + }, + { + "epoch": 9.78, + "learning_rate": 4.5108e-05, + "loss": 8.678, + "step": 1223000 + }, + { + "epoch": 9.79, + "learning_rate": 4.5106000000000005e-05, + "loss": 8.6985, + "step": 1223500 + }, + { + "epoch": 9.79, + "learning_rate": 4.5104e-05, + "loss": 8.6903, + "step": 1224000 + }, + { + "epoch": 9.8, + "learning_rate": 4.5102e-05, + "loss": 8.7035, + "step": 1224500 + }, + { + "epoch": 9.8, + "learning_rate": 4.5100000000000005e-05, + "loss": 8.6777, + "step": 1225000 + }, + { + "epoch": 9.8, + "learning_rate": 4.5098e-05, + "loss": 8.6877, + "step": 1225500 + }, + { + "epoch": 9.81, + "learning_rate": 4.5096e-05, + "loss": 8.6793, + "step": 1226000 + }, + { + "epoch": 9.81, + "learning_rate": 4.5094000000000006e-05, + "loss": 8.6831, + "step": 1226500 + }, + { + "epoch": 9.82, + "learning_rate": 4.5092e-05, + "loss": 8.6731, + "step": 1227000 + }, + { + "epoch": 9.82, + "learning_rate": 4.5090000000000004e-05, + "loss": 8.6718, + "step": 1227500 + }, + { + "epoch": 9.82, + "learning_rate": 4.5088e-05, + "loss": 8.6769, + "step": 1228000 + }, + { + "epoch": 9.83, + "learning_rate": 4.5086e-05, + "loss": 8.6699, + "step": 1228500 + }, + { + "epoch": 9.83, + "learning_rate": 4.5084000000000004e-05, + "loss": 8.7083, + "step": 1229000 + }, + { + "epoch": 9.84, + "learning_rate": 4.5082e-05, + "loss": 8.6809, + "step": 1229500 + }, + { + "epoch": 9.84, + "learning_rate": 4.508e-05, + "loss": 8.6919, + "step": 1230000 + }, + { + "epoch": 9.84, + "learning_rate": 4.5078000000000004e-05, + "loss": 8.6801, + "step": 1230500 + }, + { + "epoch": 9.85, + "learning_rate": 4.5076e-05, + "loss": 8.6558, + "step": 1231000 + }, + { + "epoch": 9.85, + "learning_rate": 4.5074e-05, + "loss": 8.6635, + "step": 1231500 + }, + { + "epoch": 9.86, + "learning_rate": 4.5072000000000005e-05, + "loss": 8.6968, + "step": 1232000 + }, + { + "epoch": 9.86, + "learning_rate": 4.507e-05, + "loss": 8.675, + "step": 1232500 + }, + { + "epoch": 9.86, + "learning_rate": 4.5068e-05, + "loss": 8.6717, + "step": 1233000 + }, + { + "epoch": 9.87, + "learning_rate": 4.5066000000000005e-05, + "loss": 8.6742, + "step": 1233500 + }, + { + "epoch": 9.87, + "learning_rate": 4.5064e-05, + "loss": 8.6858, + "step": 1234000 + }, + { + "epoch": 9.88, + "learning_rate": 4.5062e-05, + "loss": 8.6819, + "step": 1234500 + }, + { + "epoch": 9.88, + "learning_rate": 4.506e-05, + "loss": 8.6757, + "step": 1235000 + }, + { + "epoch": 9.88, + "learning_rate": 4.5058e-05, + "loss": 8.6685, + "step": 1235500 + }, + { + "epoch": 9.89, + "learning_rate": 4.5056000000000004e-05, + "loss": 8.6693, + "step": 1236000 + }, + { + "epoch": 9.89, + "learning_rate": 4.5054e-05, + "loss": 8.6885, + "step": 1236500 + }, + { + "epoch": 9.9, + "learning_rate": 4.5052e-05, + "loss": 8.6897, + "step": 1237000 + }, + { + "epoch": 9.9, + "learning_rate": 4.5050000000000004e-05, + "loss": 8.6522, + "step": 1237500 + }, + { + "epoch": 9.9, + "learning_rate": 4.5048e-05, + "loss": 8.6907, + "step": 1238000 + }, + { + "epoch": 9.91, + "learning_rate": 4.5046e-05, + "loss": 8.6739, + "step": 1238500 + }, + { + "epoch": 9.91, + "learning_rate": 4.5044000000000004e-05, + "loss": 8.7044, + "step": 1239000 + }, + { + "epoch": 9.92, + "learning_rate": 4.5042e-05, + "loss": 8.663, + "step": 1239500 + }, + { + "epoch": 9.92, + "learning_rate": 4.504e-05, + "loss": 8.6775, + "step": 1240000 + }, + { + "epoch": 9.92, + "learning_rate": 4.5038000000000005e-05, + "loss": 8.6476, + "step": 1240500 + }, + { + "epoch": 9.93, + "learning_rate": 4.5036e-05, + "loss": 8.698, + "step": 1241000 + }, + { + "epoch": 9.93, + "learning_rate": 4.5034e-05, + "loss": 8.658, + "step": 1241500 + }, + { + "epoch": 9.94, + "learning_rate": 4.5032000000000005e-05, + "loss": 8.6879, + "step": 1242000 + }, + { + "epoch": 9.94, + "learning_rate": 4.503e-05, + "loss": 8.6903, + "step": 1242500 + }, + { + "epoch": 9.94, + "learning_rate": 4.5028e-05, + "loss": 8.6587, + "step": 1243000 + }, + { + "epoch": 9.95, + "learning_rate": 4.5026e-05, + "loss": 8.6678, + "step": 1243500 + }, + { + "epoch": 9.95, + "learning_rate": 4.5024e-05, + "loss": 8.6691, + "step": 1244000 + }, + { + "epoch": 9.96, + "learning_rate": 4.5022000000000004e-05, + "loss": 8.6914, + "step": 1244500 + }, + { + "epoch": 9.96, + "learning_rate": 4.502e-05, + "loss": 8.6846, + "step": 1245000 + }, + { + "epoch": 9.96, + "learning_rate": 4.5018e-05, + "loss": 8.6758, + "step": 1245500 + }, + { + "epoch": 9.97, + "learning_rate": 4.5016000000000004e-05, + "loss": 8.6839, + "step": 1246000 + }, + { + "epoch": 9.97, + "learning_rate": 4.5014e-05, + "loss": 8.7049, + "step": 1246500 + }, + { + "epoch": 9.98, + "learning_rate": 4.5012e-05, + "loss": 8.6841, + "step": 1247000 + }, + { + "epoch": 9.98, + "learning_rate": 4.5010000000000004e-05, + "loss": 8.702, + "step": 1247500 + }, + { + "epoch": 9.98, + "learning_rate": 4.5008e-05, + "loss": 8.699, + "step": 1248000 + }, + { + "epoch": 9.99, + "learning_rate": 4.5006e-05, + "loss": 8.693, + "step": 1248500 + }, + { + "epoch": 9.99, + "learning_rate": 4.5004000000000005e-05, + "loss": 8.6829, + "step": 1249000 + }, + { + "epoch": 10.0, + "learning_rate": 4.5002e-05, + "loss": 8.7048, + "step": 1249500 + }, + { + "epoch": 10.0, + "learning_rate": 4.5e-05, + "loss": 8.6988, + "step": 1250000 + }, + { + "epoch": 10.0, + "learning_rate": 4.4998e-05, + "loss": 8.6821, + "step": 1250500 + }, + { + "epoch": 10.01, + "learning_rate": 4.499600000000001e-05, + "loss": 8.6763, + "step": 1251000 + }, + { + "epoch": 10.01, + "learning_rate": 4.4994e-05, + "loss": 8.7007, + "step": 1251500 + }, + { + "epoch": 10.02, + "learning_rate": 4.4992e-05, + "loss": 8.694, + "step": 1252000 + }, + { + "epoch": 10.02, + "learning_rate": 4.499e-05, + "loss": 8.6748, + "step": 1252500 + }, + { + "epoch": 10.02, + "learning_rate": 4.4988000000000004e-05, + "loss": 8.6479, + "step": 1253000 + }, + { + "epoch": 10.03, + "learning_rate": 4.4986e-05, + "loss": 8.6867, + "step": 1253500 + }, + { + "epoch": 10.03, + "learning_rate": 4.4984e-05, + "loss": 8.6867, + "step": 1254000 + }, + { + "epoch": 10.04, + "learning_rate": 4.4982000000000004e-05, + "loss": 8.6843, + "step": 1254500 + }, + { + "epoch": 10.04, + "learning_rate": 4.498e-05, + "loss": 8.6788, + "step": 1255000 + }, + { + "epoch": 10.04, + "learning_rate": 4.4978e-05, + "loss": 8.6696, + "step": 1255500 + }, + { + "epoch": 10.05, + "learning_rate": 4.4976000000000004e-05, + "loss": 8.6893, + "step": 1256000 + }, + { + "epoch": 10.05, + "learning_rate": 4.4974e-05, + "loss": 8.6841, + "step": 1256500 + }, + { + "epoch": 10.06, + "learning_rate": 4.4972e-05, + "loss": 8.6759, + "step": 1257000 + }, + { + "epoch": 10.06, + "learning_rate": 4.497e-05, + "loss": 8.7065, + "step": 1257500 + }, + { + "epoch": 10.06, + "learning_rate": 4.496800000000001e-05, + "loss": 8.6793, + "step": 1258000 + }, + { + "epoch": 10.07, + "learning_rate": 4.4966e-05, + "loss": 8.6588, + "step": 1258500 + }, + { + "epoch": 10.07, + "learning_rate": 4.4964e-05, + "loss": 8.6842, + "step": 1259000 + }, + { + "epoch": 10.08, + "learning_rate": 4.4962e-05, + "loss": 8.704, + "step": 1259500 + }, + { + "epoch": 10.08, + "learning_rate": 4.496e-05, + "loss": 8.6781, + "step": 1260000 + }, + { + "epoch": 10.08, + "learning_rate": 4.4958e-05, + "loss": 8.6905, + "step": 1260500 + }, + { + "epoch": 10.09, + "learning_rate": 4.4956e-05, + "loss": 8.6818, + "step": 1261000 + }, + { + "epoch": 10.09, + "learning_rate": 4.4954000000000004e-05, + "loss": 8.6809, + "step": 1261500 + }, + { + "epoch": 10.1, + "learning_rate": 4.4952e-05, + "loss": 8.6735, + "step": 1262000 + }, + { + "epoch": 10.1, + "learning_rate": 4.495e-05, + "loss": 8.6851, + "step": 1262500 + }, + { + "epoch": 10.1, + "learning_rate": 4.4948000000000004e-05, + "loss": 8.6629, + "step": 1263000 + }, + { + "epoch": 10.11, + "learning_rate": 4.4946000000000006e-05, + "loss": 8.6771, + "step": 1263500 + }, + { + "epoch": 10.11, + "learning_rate": 4.4944e-05, + "loss": 8.6778, + "step": 1264000 + }, + { + "epoch": 10.12, + "learning_rate": 4.4942e-05, + "loss": 8.6912, + "step": 1264500 + }, + { + "epoch": 10.12, + "learning_rate": 4.494000000000001e-05, + "loss": 8.6871, + "step": 1265000 + }, + { + "epoch": 10.12, + "learning_rate": 4.4938e-05, + "loss": 8.6768, + "step": 1265500 + }, + { + "epoch": 10.13, + "learning_rate": 4.4936e-05, + "loss": 8.6829, + "step": 1266000 + }, + { + "epoch": 10.13, + "learning_rate": 4.493400000000001e-05, + "loss": 8.6759, + "step": 1266500 + }, + { + "epoch": 10.14, + "learning_rate": 4.4932e-05, + "loss": 8.6955, + "step": 1267000 + }, + { + "epoch": 10.14, + "learning_rate": 4.493e-05, + "loss": 8.6918, + "step": 1267500 + }, + { + "epoch": 10.14, + "learning_rate": 4.4928e-05, + "loss": 8.6779, + "step": 1268000 + }, + { + "epoch": 10.15, + "learning_rate": 4.4926e-05, + "loss": 8.7041, + "step": 1268500 + }, + { + "epoch": 10.15, + "learning_rate": 4.4924e-05, + "loss": 8.6847, + "step": 1269000 + }, + { + "epoch": 10.16, + "learning_rate": 4.4922e-05, + "loss": 8.6974, + "step": 1269500 + }, + { + "epoch": 10.16, + "learning_rate": 4.4920000000000004e-05, + "loss": 8.6668, + "step": 1270000 + }, + { + "epoch": 10.16, + "learning_rate": 4.4918000000000006e-05, + "loss": 8.6764, + "step": 1270500 + }, + { + "epoch": 10.17, + "learning_rate": 4.4916e-05, + "loss": 8.7033, + "step": 1271000 + }, + { + "epoch": 10.17, + "learning_rate": 4.4914e-05, + "loss": 8.6782, + "step": 1271500 + }, + { + "epoch": 10.18, + "learning_rate": 4.4912000000000006e-05, + "loss": 8.6733, + "step": 1272000 + }, + { + "epoch": 10.18, + "learning_rate": 4.491e-05, + "loss": 8.6912, + "step": 1272500 + }, + { + "epoch": 10.18, + "learning_rate": 4.4908e-05, + "loss": 8.6948, + "step": 1273000 + }, + { + "epoch": 10.19, + "learning_rate": 4.490600000000001e-05, + "loss": 8.6829, + "step": 1273500 + }, + { + "epoch": 10.19, + "learning_rate": 4.4904e-05, + "loss": 8.6759, + "step": 1274000 + }, + { + "epoch": 10.2, + "learning_rate": 4.4902e-05, + "loss": 8.6712, + "step": 1274500 + }, + { + "epoch": 10.2, + "learning_rate": 4.49e-05, + "loss": 8.6979, + "step": 1275000 + }, + { + "epoch": 10.2, + "learning_rate": 4.4898e-05, + "loss": 8.677, + "step": 1275500 + }, + { + "epoch": 10.21, + "learning_rate": 4.4896000000000005e-05, + "loss": 8.692, + "step": 1276000 + }, + { + "epoch": 10.21, + "learning_rate": 4.4894e-05, + "loss": 8.6974, + "step": 1276500 + }, + { + "epoch": 10.22, + "learning_rate": 4.4892e-05, + "loss": 8.6646, + "step": 1277000 + }, + { + "epoch": 10.22, + "learning_rate": 4.4890000000000006e-05, + "loss": 8.703, + "step": 1277500 + }, + { + "epoch": 10.22, + "learning_rate": 4.4888e-05, + "loss": 8.6844, + "step": 1278000 + }, + { + "epoch": 10.23, + "learning_rate": 4.4886000000000004e-05, + "loss": 8.6685, + "step": 1278500 + }, + { + "epoch": 10.23, + "learning_rate": 4.4884000000000006e-05, + "loss": 8.6807, + "step": 1279000 + }, + { + "epoch": 10.24, + "learning_rate": 4.4882e-05, + "loss": 8.6909, + "step": 1279500 + }, + { + "epoch": 10.24, + "learning_rate": 4.488e-05, + "loss": 8.6838, + "step": 1280000 + }, + { + "epoch": 10.24, + "learning_rate": 4.4878000000000006e-05, + "loss": 8.6648, + "step": 1280500 + }, + { + "epoch": 10.25, + "learning_rate": 4.4876e-05, + "loss": 8.6703, + "step": 1281000 + }, + { + "epoch": 10.25, + "learning_rate": 4.4874000000000004e-05, + "loss": 8.7103, + "step": 1281500 + }, + { + "epoch": 10.26, + "learning_rate": 4.4872e-05, + "loss": 8.684, + "step": 1282000 + }, + { + "epoch": 10.26, + "learning_rate": 4.487e-05, + "loss": 8.645, + "step": 1282500 + }, + { + "epoch": 10.26, + "learning_rate": 4.4868000000000005e-05, + "loss": 8.6707, + "step": 1283000 + }, + { + "epoch": 10.27, + "learning_rate": 4.4866e-05, + "loss": 8.6758, + "step": 1283500 + }, + { + "epoch": 10.27, + "learning_rate": 4.4864e-05, + "loss": 8.679, + "step": 1284000 + }, + { + "epoch": 10.28, + "learning_rate": 4.4862000000000005e-05, + "loss": 8.672, + "step": 1284500 + }, + { + "epoch": 10.28, + "learning_rate": 4.486e-05, + "loss": 8.6981, + "step": 1285000 + }, + { + "epoch": 10.28, + "learning_rate": 4.4858e-05, + "loss": 8.6861, + "step": 1285500 + }, + { + "epoch": 10.29, + "learning_rate": 4.4856000000000006e-05, + "loss": 8.6675, + "step": 1286000 + }, + { + "epoch": 10.29, + "learning_rate": 4.4854e-05, + "loss": 8.6978, + "step": 1286500 + }, + { + "epoch": 10.3, + "learning_rate": 4.4852e-05, + "loss": 8.7052, + "step": 1287000 + }, + { + "epoch": 10.3, + "learning_rate": 4.4850000000000006e-05, + "loss": 8.6748, + "step": 1287500 + }, + { + "epoch": 10.3, + "learning_rate": 4.4848e-05, + "loss": 8.6747, + "step": 1288000 + }, + { + "epoch": 10.31, + "learning_rate": 4.4846000000000004e-05, + "loss": 8.6947, + "step": 1288500 + }, + { + "epoch": 10.31, + "learning_rate": 4.4844e-05, + "loss": 8.6767, + "step": 1289000 + }, + { + "epoch": 10.32, + "learning_rate": 4.4842e-05, + "loss": 8.6911, + "step": 1289500 + }, + { + "epoch": 10.32, + "learning_rate": 4.4840000000000004e-05, + "loss": 8.6862, + "step": 1290000 + }, + { + "epoch": 10.32, + "learning_rate": 4.4838e-05, + "loss": 8.6846, + "step": 1290500 + }, + { + "epoch": 10.33, + "learning_rate": 4.4836e-05, + "loss": 8.6587, + "step": 1291000 + }, + { + "epoch": 10.33, + "learning_rate": 4.4834000000000005e-05, + "loss": 8.6888, + "step": 1291500 + }, + { + "epoch": 10.34, + "learning_rate": 4.4832e-05, + "loss": 8.7028, + "step": 1292000 + }, + { + "epoch": 10.34, + "learning_rate": 4.483e-05, + "loss": 8.669, + "step": 1292500 + }, + { + "epoch": 10.34, + "learning_rate": 4.4828000000000005e-05, + "loss": 8.6689, + "step": 1293000 + }, + { + "epoch": 10.35, + "learning_rate": 4.4826e-05, + "loss": 8.6661, + "step": 1293500 + }, + { + "epoch": 10.35, + "learning_rate": 4.4824e-05, + "loss": 8.6998, + "step": 1294000 + }, + { + "epoch": 10.36, + "learning_rate": 4.4822000000000006e-05, + "loss": 8.6838, + "step": 1294500 + }, + { + "epoch": 10.36, + "learning_rate": 4.482e-05, + "loss": 8.6834, + "step": 1295000 + }, + { + "epoch": 10.36, + "learning_rate": 4.4818000000000004e-05, + "loss": 8.6713, + "step": 1295500 + }, + { + "epoch": 10.37, + "learning_rate": 4.4816e-05, + "loss": 8.672, + "step": 1296000 + }, + { + "epoch": 10.37, + "learning_rate": 4.4814e-05, + "loss": 8.6941, + "step": 1296500 + }, + { + "epoch": 10.38, + "learning_rate": 4.4812000000000004e-05, + "loss": 8.6948, + "step": 1297000 + }, + { + "epoch": 10.38, + "learning_rate": 4.481e-05, + "loss": 8.6743, + "step": 1297500 + }, + { + "epoch": 10.38, + "learning_rate": 4.4808e-05, + "loss": 8.6913, + "step": 1298000 + }, + { + "epoch": 10.39, + "learning_rate": 4.4806000000000004e-05, + "loss": 8.699, + "step": 1298500 + }, + { + "epoch": 10.39, + "learning_rate": 4.4804e-05, + "loss": 8.6978, + "step": 1299000 + }, + { + "epoch": 10.4, + "learning_rate": 4.4802e-05, + "loss": 8.6837, + "step": 1299500 + }, + { + "epoch": 10.4, + "learning_rate": 4.4800000000000005e-05, + "loss": 8.6797, + "step": 1300000 + }, + { + "epoch": 10.4, + "learning_rate": 4.4798e-05, + "loss": 8.6873, + "step": 1300500 + }, + { + "epoch": 10.41, + "learning_rate": 4.4796e-05, + "loss": 8.6678, + "step": 1301000 + }, + { + "epoch": 10.41, + "learning_rate": 4.4794000000000005e-05, + "loss": 8.6768, + "step": 1301500 + }, + { + "epoch": 10.42, + "learning_rate": 4.4792e-05, + "loss": 8.6882, + "step": 1302000 + }, + { + "epoch": 10.42, + "learning_rate": 4.479e-05, + "loss": 8.6682, + "step": 1302500 + }, + { + "epoch": 10.42, + "learning_rate": 4.4788000000000006e-05, + "loss": 8.6659, + "step": 1303000 + }, + { + "epoch": 10.43, + "learning_rate": 4.4786e-05, + "loss": 8.676, + "step": 1303500 + }, + { + "epoch": 10.43, + "learning_rate": 4.4784000000000004e-05, + "loss": 8.6675, + "step": 1304000 + }, + { + "epoch": 10.44, + "learning_rate": 4.4782e-05, + "loss": 8.6916, + "step": 1304500 + }, + { + "epoch": 10.44, + "learning_rate": 4.478e-05, + "loss": 8.674, + "step": 1305000 + }, + { + "epoch": 10.44, + "learning_rate": 4.4778000000000004e-05, + "loss": 8.7042, + "step": 1305500 + }, + { + "epoch": 10.45, + "learning_rate": 4.4776e-05, + "loss": 8.6648, + "step": 1306000 + }, + { + "epoch": 10.45, + "learning_rate": 4.4774e-05, + "loss": 8.6879, + "step": 1306500 + }, + { + "epoch": 10.46, + "learning_rate": 4.4772000000000004e-05, + "loss": 8.6917, + "step": 1307000 + }, + { + "epoch": 10.46, + "learning_rate": 4.477e-05, + "loss": 8.6734, + "step": 1307500 + }, + { + "epoch": 10.46, + "learning_rate": 4.4768e-05, + "loss": 8.6804, + "step": 1308000 + }, + { + "epoch": 10.47, + "learning_rate": 4.4766000000000005e-05, + "loss": 8.6807, + "step": 1308500 + }, + { + "epoch": 10.47, + "learning_rate": 4.4764e-05, + "loss": 8.6974, + "step": 1309000 + }, + { + "epoch": 10.48, + "learning_rate": 4.4762e-05, + "loss": 8.6956, + "step": 1309500 + }, + { + "epoch": 10.48, + "learning_rate": 4.4760000000000005e-05, + "loss": 8.6606, + "step": 1310000 + }, + { + "epoch": 10.48, + "learning_rate": 4.4758e-05, + "loss": 8.6837, + "step": 1310500 + }, + { + "epoch": 10.49, + "learning_rate": 4.4756e-05, + "loss": 8.6747, + "step": 1311000 + }, + { + "epoch": 10.49, + "learning_rate": 4.4754e-05, + "loss": 8.6753, + "step": 1311500 + }, + { + "epoch": 10.5, + "learning_rate": 4.4752e-05, + "loss": 8.6538, + "step": 1312000 + }, + { + "epoch": 10.5, + "learning_rate": 4.4750000000000004e-05, + "loss": 8.6761, + "step": 1312500 + }, + { + "epoch": 10.5, + "learning_rate": 4.4748e-05, + "loss": 8.68, + "step": 1313000 + }, + { + "epoch": 10.51, + "learning_rate": 4.4746e-05, + "loss": 8.689, + "step": 1313500 + }, + { + "epoch": 10.51, + "learning_rate": 4.4744000000000004e-05, + "loss": 8.6889, + "step": 1314000 + }, + { + "epoch": 10.52, + "learning_rate": 4.4742e-05, + "loss": 8.6552, + "step": 1314500 + }, + { + "epoch": 10.52, + "learning_rate": 4.474e-05, + "loss": 8.6928, + "step": 1315000 + }, + { + "epoch": 10.52, + "learning_rate": 4.4738000000000004e-05, + "loss": 8.6594, + "step": 1315500 + }, + { + "epoch": 10.53, + "learning_rate": 4.4736e-05, + "loss": 8.6797, + "step": 1316000 + }, + { + "epoch": 10.53, + "learning_rate": 4.4734e-05, + "loss": 8.6711, + "step": 1316500 + }, + { + "epoch": 10.54, + "learning_rate": 4.4732000000000005e-05, + "loss": 8.6532, + "step": 1317000 + }, + { + "epoch": 10.54, + "learning_rate": 4.473e-05, + "loss": 8.6831, + "step": 1317500 + }, + { + "epoch": 10.54, + "learning_rate": 4.4728e-05, + "loss": 8.6986, + "step": 1318000 + }, + { + "epoch": 10.55, + "learning_rate": 4.4726e-05, + "loss": 8.6883, + "step": 1318500 + }, + { + "epoch": 10.55, + "learning_rate": 4.472400000000001e-05, + "loss": 8.7141, + "step": 1319000 + }, + { + "epoch": 10.56, + "learning_rate": 4.4722e-05, + "loss": 8.6755, + "step": 1319500 + }, + { + "epoch": 10.56, + "learning_rate": 4.472e-05, + "loss": 8.6628, + "step": 1320000 + }, + { + "epoch": 10.56, + "learning_rate": 4.4718e-05, + "loss": 8.6673, + "step": 1320500 + }, + { + "epoch": 10.57, + "learning_rate": 4.4716000000000004e-05, + "loss": 8.6782, + "step": 1321000 + }, + { + "epoch": 10.57, + "learning_rate": 4.4714e-05, + "loss": 8.6657, + "step": 1321500 + }, + { + "epoch": 10.58, + "learning_rate": 4.4712e-05, + "loss": 8.6986, + "step": 1322000 + }, + { + "epoch": 10.58, + "learning_rate": 4.4710000000000004e-05, + "loss": 8.673, + "step": 1322500 + }, + { + "epoch": 10.58, + "learning_rate": 4.4708e-05, + "loss": 8.6761, + "step": 1323000 + }, + { + "epoch": 10.59, + "learning_rate": 4.4706e-05, + "loss": 8.6888, + "step": 1323500 + }, + { + "epoch": 10.59, + "learning_rate": 4.4704000000000004e-05, + "loss": 8.6774, + "step": 1324000 + }, + { + "epoch": 10.6, + "learning_rate": 4.4702e-05, + "loss": 8.6629, + "step": 1324500 + }, + { + "epoch": 10.6, + "learning_rate": 4.47e-05, + "loss": 8.6626, + "step": 1325000 + }, + { + "epoch": 10.6, + "learning_rate": 4.4698e-05, + "loss": 8.6831, + "step": 1325500 + }, + { + "epoch": 10.61, + "learning_rate": 4.469600000000001e-05, + "loss": 8.6885, + "step": 1326000 + }, + { + "epoch": 10.61, + "learning_rate": 4.4694e-05, + "loss": 8.7342, + "step": 1326500 + }, + { + "epoch": 10.62, + "learning_rate": 4.4692e-05, + "loss": 8.7031, + "step": 1327000 + }, + { + "epoch": 10.62, + "learning_rate": 4.469e-05, + "loss": 8.7079, + "step": 1327500 + }, + { + "epoch": 10.62, + "learning_rate": 4.4688e-05, + "loss": 8.6874, + "step": 1328000 + }, + { + "epoch": 10.63, + "learning_rate": 4.4686e-05, + "loss": 8.7168, + "step": 1328500 + }, + { + "epoch": 10.63, + "learning_rate": 4.4684e-05, + "loss": 8.683, + "step": 1329000 + }, + { + "epoch": 10.64, + "learning_rate": 4.4682000000000004e-05, + "loss": 8.676, + "step": 1329500 + }, + { + "epoch": 10.64, + "learning_rate": 4.468e-05, + "loss": 8.6367, + "step": 1330000 + }, + { + "epoch": 10.64, + "learning_rate": 4.4678e-05, + "loss": 8.6868, + "step": 1330500 + }, + { + "epoch": 10.65, + "learning_rate": 4.4676000000000004e-05, + "loss": 8.7022, + "step": 1331000 + }, + { + "epoch": 10.65, + "learning_rate": 4.4674000000000006e-05, + "loss": 8.7035, + "step": 1331500 + }, + { + "epoch": 10.66, + "learning_rate": 4.4672e-05, + "loss": 8.6841, + "step": 1332000 + }, + { + "epoch": 10.66, + "learning_rate": 4.467e-05, + "loss": 8.6874, + "step": 1332500 + }, + { + "epoch": 10.66, + "learning_rate": 4.466800000000001e-05, + "loss": 8.7151, + "step": 1333000 + }, + { + "epoch": 10.67, + "learning_rate": 4.4666e-05, + "loss": 8.6965, + "step": 1333500 + }, + { + "epoch": 10.67, + "learning_rate": 4.4664e-05, + "loss": 8.6873, + "step": 1334000 + }, + { + "epoch": 10.68, + "learning_rate": 4.466200000000001e-05, + "loss": 8.6843, + "step": 1334500 + }, + { + "epoch": 10.68, + "learning_rate": 4.466e-05, + "loss": 8.6629, + "step": 1335000 + }, + { + "epoch": 10.68, + "learning_rate": 4.4658e-05, + "loss": 8.6947, + "step": 1335500 + }, + { + "epoch": 10.69, + "learning_rate": 4.4656e-05, + "loss": 8.6782, + "step": 1336000 + }, + { + "epoch": 10.69, + "learning_rate": 4.4654e-05, + "loss": 8.6953, + "step": 1336500 + }, + { + "epoch": 10.7, + "learning_rate": 4.4652e-05, + "loss": 8.6806, + "step": 1337000 + }, + { + "epoch": 10.7, + "learning_rate": 4.465e-05, + "loss": 8.6673, + "step": 1337500 + }, + { + "epoch": 10.7, + "learning_rate": 4.4648000000000003e-05, + "loss": 8.6953, + "step": 1338000 + }, + { + "epoch": 10.71, + "learning_rate": 4.4646000000000006e-05, + "loss": 8.6766, + "step": 1338500 + }, + { + "epoch": 10.71, + "learning_rate": 4.4644e-05, + "loss": 8.6922, + "step": 1339000 + }, + { + "epoch": 10.72, + "learning_rate": 4.4642e-05, + "loss": 8.6896, + "step": 1339500 + }, + { + "epoch": 10.72, + "learning_rate": 4.4640000000000006e-05, + "loss": 8.6947, + "step": 1340000 + }, + { + "epoch": 10.72, + "learning_rate": 4.4638e-05, + "loss": 8.6774, + "step": 1340500 + }, + { + "epoch": 10.73, + "learning_rate": 4.4636e-05, + "loss": 8.6984, + "step": 1341000 + }, + { + "epoch": 10.73, + "learning_rate": 4.463400000000001e-05, + "loss": 8.6892, + "step": 1341500 + }, + { + "epoch": 10.74, + "learning_rate": 4.4632e-05, + "loss": 8.6877, + "step": 1342000 + }, + { + "epoch": 10.74, + "learning_rate": 4.463e-05, + "loss": 8.6854, + "step": 1342500 + }, + { + "epoch": 10.74, + "learning_rate": 4.4628e-05, + "loss": 8.7147, + "step": 1343000 + }, + { + "epoch": 10.75, + "learning_rate": 4.4626e-05, + "loss": 8.6754, + "step": 1343500 + }, + { + "epoch": 10.75, + "learning_rate": 4.4624000000000005e-05, + "loss": 8.6926, + "step": 1344000 + }, + { + "epoch": 10.76, + "learning_rate": 4.4622e-05, + "loss": 8.6954, + "step": 1344500 + }, + { + "epoch": 10.76, + "learning_rate": 4.462e-05, + "loss": 8.6694, + "step": 1345000 + }, + { + "epoch": 10.76, + "learning_rate": 4.4618000000000005e-05, + "loss": 8.6997, + "step": 1345500 + }, + { + "epoch": 10.77, + "learning_rate": 4.4616e-05, + "loss": 8.6677, + "step": 1346000 + }, + { + "epoch": 10.77, + "learning_rate": 4.4614000000000003e-05, + "loss": 8.6774, + "step": 1346500 + }, + { + "epoch": 10.78, + "learning_rate": 4.4612000000000006e-05, + "loss": 8.6694, + "step": 1347000 + }, + { + "epoch": 10.78, + "learning_rate": 4.461e-05, + "loss": 8.6681, + "step": 1347500 + }, + { + "epoch": 10.78, + "learning_rate": 4.4608e-05, + "loss": 8.6813, + "step": 1348000 + }, + { + "epoch": 10.79, + "learning_rate": 4.4606000000000006e-05, + "loss": 8.6797, + "step": 1348500 + }, + { + "epoch": 10.79, + "learning_rate": 4.4604e-05, + "loss": 8.6917, + "step": 1349000 + }, + { + "epoch": 10.8, + "learning_rate": 4.4602000000000004e-05, + "loss": 8.6854, + "step": 1349500 + }, + { + "epoch": 10.8, + "learning_rate": 4.46e-05, + "loss": 8.6866, + "step": 1350000 + }, + { + "epoch": 10.8, + "learning_rate": 4.4598e-05, + "loss": 8.6949, + "step": 1350500 + }, + { + "epoch": 10.81, + "learning_rate": 4.4596000000000005e-05, + "loss": 8.6847, + "step": 1351000 + }, + { + "epoch": 10.81, + "learning_rate": 4.4594e-05, + "loss": 8.6616, + "step": 1351500 + }, + { + "epoch": 10.82, + "learning_rate": 4.4592e-05, + "loss": 8.6827, + "step": 1352000 + }, + { + "epoch": 10.82, + "learning_rate": 4.4590000000000005e-05, + "loss": 8.6497, + "step": 1352500 + }, + { + "epoch": 10.82, + "learning_rate": 4.4588e-05, + "loss": 8.685, + "step": 1353000 + }, + { + "epoch": 10.83, + "learning_rate": 4.4586e-05, + "loss": 8.6852, + "step": 1353500 + }, + { + "epoch": 10.83, + "learning_rate": 4.4584000000000005e-05, + "loss": 8.669, + "step": 1354000 + }, + { + "epoch": 10.84, + "learning_rate": 4.4582e-05, + "loss": 8.6911, + "step": 1354500 + }, + { + "epoch": 10.84, + "learning_rate": 4.458e-05, + "loss": 8.6871, + "step": 1355000 + }, + { + "epoch": 10.84, + "learning_rate": 4.4578000000000006e-05, + "loss": 8.6838, + "step": 1355500 + }, + { + "epoch": 10.85, + "learning_rate": 4.4576e-05, + "loss": 8.6989, + "step": 1356000 + }, + { + "epoch": 10.85, + "learning_rate": 4.4574000000000004e-05, + "loss": 8.6843, + "step": 1356500 + }, + { + "epoch": 10.86, + "learning_rate": 4.4572e-05, + "loss": 8.6736, + "step": 1357000 + }, + { + "epoch": 10.86, + "learning_rate": 4.457e-05, + "loss": 8.6755, + "step": 1357500 + }, + { + "epoch": 10.86, + "learning_rate": 4.4568000000000004e-05, + "loss": 8.6829, + "step": 1358000 + }, + { + "epoch": 10.87, + "learning_rate": 4.4566e-05, + "loss": 8.6925, + "step": 1358500 + }, + { + "epoch": 10.87, + "learning_rate": 4.4564e-05, + "loss": 8.6795, + "step": 1359000 + }, + { + "epoch": 10.88, + "learning_rate": 4.4562000000000005e-05, + "loss": 8.6818, + "step": 1359500 + }, + { + "epoch": 10.88, + "learning_rate": 4.456e-05, + "loss": 8.6958, + "step": 1360000 + }, + { + "epoch": 10.88, + "learning_rate": 4.4558e-05, + "loss": 8.6859, + "step": 1360500 + }, + { + "epoch": 10.89, + "learning_rate": 4.4556000000000005e-05, + "loss": 8.6639, + "step": 1361000 + }, + { + "epoch": 10.89, + "learning_rate": 4.4554e-05, + "loss": 8.6765, + "step": 1361500 + }, + { + "epoch": 10.9, + "learning_rate": 4.4552e-05, + "loss": 8.6821, + "step": 1362000 + }, + { + "epoch": 10.9, + "learning_rate": 4.4550000000000005e-05, + "loss": 8.6866, + "step": 1362500 + }, + { + "epoch": 10.9, + "learning_rate": 4.4548e-05, + "loss": 8.6553, + "step": 1363000 + }, + { + "epoch": 10.91, + "learning_rate": 4.4546000000000003e-05, + "loss": 8.6824, + "step": 1363500 + }, + { + "epoch": 10.91, + "learning_rate": 4.4544e-05, + "loss": 8.6674, + "step": 1364000 + }, + { + "epoch": 10.92, + "learning_rate": 4.4542e-05, + "loss": 8.6805, + "step": 1364500 + }, + { + "epoch": 10.92, + "learning_rate": 4.4540000000000004e-05, + "loss": 8.6714, + "step": 1365000 + }, + { + "epoch": 10.92, + "learning_rate": 4.4538e-05, + "loss": 8.7068, + "step": 1365500 + }, + { + "epoch": 10.93, + "learning_rate": 4.4536e-05, + "loss": 8.6629, + "step": 1366000 + }, + { + "epoch": 10.93, + "learning_rate": 4.4534000000000004e-05, + "loss": 8.6774, + "step": 1366500 + }, + { + "epoch": 10.94, + "learning_rate": 4.4532e-05, + "loss": 8.6886, + "step": 1367000 + }, + { + "epoch": 10.94, + "learning_rate": 4.453e-05, + "loss": 8.6808, + "step": 1367500 + }, + { + "epoch": 10.94, + "learning_rate": 4.4528000000000005e-05, + "loss": 8.6658, + "step": 1368000 + }, + { + "epoch": 10.95, + "learning_rate": 4.4526e-05, + "loss": 8.6964, + "step": 1368500 + }, + { + "epoch": 10.95, + "learning_rate": 4.4524e-05, + "loss": 8.6908, + "step": 1369000 + }, + { + "epoch": 10.96, + "learning_rate": 4.4522000000000005e-05, + "loss": 8.6808, + "step": 1369500 + }, + { + "epoch": 10.96, + "learning_rate": 4.452e-05, + "loss": 8.6823, + "step": 1370000 + }, + { + "epoch": 10.96, + "learning_rate": 4.4518e-05, + "loss": 8.6761, + "step": 1370500 + }, + { + "epoch": 10.97, + "learning_rate": 4.4516000000000005e-05, + "loss": 8.685, + "step": 1371000 + }, + { + "epoch": 10.97, + "learning_rate": 4.4514e-05, + "loss": 8.6699, + "step": 1371500 + }, + { + "epoch": 10.98, + "learning_rate": 4.4512000000000003e-05, + "loss": 8.6561, + "step": 1372000 + }, + { + "epoch": 10.98, + "learning_rate": 4.451e-05, + "loss": 8.6768, + "step": 1372500 + }, + { + "epoch": 10.98, + "learning_rate": 4.4508e-05, + "loss": 8.6836, + "step": 1373000 + }, + { + "epoch": 10.99, + "learning_rate": 4.4506000000000004e-05, + "loss": 8.6828, + "step": 1373500 + }, + { + "epoch": 10.99, + "learning_rate": 4.4504e-05, + "loss": 8.6874, + "step": 1374000 + }, + { + "epoch": 11.0, + "learning_rate": 4.4502e-05, + "loss": 8.6723, + "step": 1374500 + }, + { + "epoch": 11.0, + "learning_rate": 4.4500000000000004e-05, + "loss": 8.6568, + "step": 1375000 + }, + { + "epoch": 11.0, + "learning_rate": 4.4498e-05, + "loss": 8.6732, + "step": 1375500 + }, + { + "epoch": 11.01, + "learning_rate": 4.4496e-05, + "loss": 8.6856, + "step": 1376000 + }, + { + "epoch": 11.01, + "learning_rate": 4.4494000000000005e-05, + "loss": 8.6748, + "step": 1376500 + }, + { + "epoch": 11.02, + "learning_rate": 4.4492e-05, + "loss": 8.6798, + "step": 1377000 + }, + { + "epoch": 11.02, + "learning_rate": 4.449e-05, + "loss": 8.6773, + "step": 1377500 + }, + { + "epoch": 11.02, + "learning_rate": 4.4488000000000005e-05, + "loss": 8.686, + "step": 1378000 + }, + { + "epoch": 11.03, + "learning_rate": 4.4486e-05, + "loss": 8.6831, + "step": 1378500 + }, + { + "epoch": 11.03, + "learning_rate": 4.4484e-05, + "loss": 8.6704, + "step": 1379000 + }, + { + "epoch": 11.04, + "learning_rate": 4.4482e-05, + "loss": 8.6722, + "step": 1379500 + }, + { + "epoch": 11.04, + "learning_rate": 4.448e-05, + "loss": 8.6715, + "step": 1380000 + }, + { + "epoch": 11.04, + "learning_rate": 4.4478000000000003e-05, + "loss": 8.6763, + "step": 1380500 + }, + { + "epoch": 11.05, + "learning_rate": 4.4476e-05, + "loss": 8.6949, + "step": 1381000 + }, + { + "epoch": 11.05, + "learning_rate": 4.4474e-05, + "loss": 8.6873, + "step": 1381500 + }, + { + "epoch": 11.06, + "learning_rate": 4.4472000000000004e-05, + "loss": 8.6794, + "step": 1382000 + }, + { + "epoch": 11.06, + "learning_rate": 4.447e-05, + "loss": 8.6865, + "step": 1382500 + }, + { + "epoch": 11.06, + "learning_rate": 4.4468e-05, + "loss": 8.6937, + "step": 1383000 + }, + { + "epoch": 11.07, + "learning_rate": 4.4466000000000004e-05, + "loss": 8.6774, + "step": 1383500 + }, + { + "epoch": 11.07, + "learning_rate": 4.4464e-05, + "loss": 8.671, + "step": 1384000 + }, + { + "epoch": 11.08, + "learning_rate": 4.4462e-05, + "loss": 8.6739, + "step": 1384500 + }, + { + "epoch": 11.08, + "learning_rate": 4.4460000000000005e-05, + "loss": 8.6876, + "step": 1385000 + }, + { + "epoch": 11.08, + "learning_rate": 4.4458e-05, + "loss": 8.6725, + "step": 1385500 + }, + { + "epoch": 11.09, + "learning_rate": 4.4456e-05, + "loss": 8.696, + "step": 1386000 + }, + { + "epoch": 11.09, + "learning_rate": 4.4454e-05, + "loss": 8.6979, + "step": 1386500 + }, + { + "epoch": 11.1, + "learning_rate": 4.445200000000001e-05, + "loss": 8.6636, + "step": 1387000 + }, + { + "epoch": 11.1, + "learning_rate": 4.445e-05, + "loss": 8.6806, + "step": 1387500 + }, + { + "epoch": 11.1, + "learning_rate": 4.4448e-05, + "loss": 8.6741, + "step": 1388000 + }, + { + "epoch": 11.11, + "learning_rate": 4.4446e-05, + "loss": 8.6797, + "step": 1388500 + }, + { + "epoch": 11.11, + "learning_rate": 4.4444000000000003e-05, + "loss": 8.675, + "step": 1389000 + }, + { + "epoch": 11.12, + "learning_rate": 4.4442e-05, + "loss": 8.6788, + "step": 1389500 + }, + { + "epoch": 11.12, + "learning_rate": 4.444e-05, + "loss": 8.6812, + "step": 1390000 + }, + { + "epoch": 11.12, + "learning_rate": 4.4438000000000004e-05, + "loss": 8.7191, + "step": 1390500 + }, + { + "epoch": 11.13, + "learning_rate": 4.4436e-05, + "loss": 8.6671, + "step": 1391000 + }, + { + "epoch": 11.13, + "learning_rate": 4.4434e-05, + "loss": 8.7002, + "step": 1391500 + }, + { + "epoch": 11.14, + "learning_rate": 4.4432000000000004e-05, + "loss": 8.6935, + "step": 1392000 + }, + { + "epoch": 11.14, + "learning_rate": 4.443e-05, + "loss": 8.6774, + "step": 1392500 + }, + { + "epoch": 11.14, + "learning_rate": 4.4428e-05, + "loss": 8.6507, + "step": 1393000 + }, + { + "epoch": 11.15, + "learning_rate": 4.4426e-05, + "loss": 8.7016, + "step": 1393500 + }, + { + "epoch": 11.15, + "learning_rate": 4.442400000000001e-05, + "loss": 8.7073, + "step": 1394000 + }, + { + "epoch": 11.16, + "learning_rate": 4.4422e-05, + "loss": 8.6642, + "step": 1394500 + }, + { + "epoch": 11.16, + "learning_rate": 4.442e-05, + "loss": 8.7026, + "step": 1395000 + }, + { + "epoch": 11.16, + "learning_rate": 4.441800000000001e-05, + "loss": 8.6713, + "step": 1395500 + }, + { + "epoch": 11.17, + "learning_rate": 4.4416e-05, + "loss": 8.6974, + "step": 1396000 + }, + { + "epoch": 11.17, + "learning_rate": 4.4414e-05, + "loss": 8.6899, + "step": 1396500 + }, + { + "epoch": 11.18, + "learning_rate": 4.4412e-05, + "loss": 8.6838, + "step": 1397000 + }, + { + "epoch": 11.18, + "learning_rate": 4.4410000000000003e-05, + "loss": 8.68, + "step": 1397500 + }, + { + "epoch": 11.18, + "learning_rate": 4.4408e-05, + "loss": 8.7208, + "step": 1398000 + }, + { + "epoch": 11.19, + "learning_rate": 4.4406e-05, + "loss": 8.6746, + "step": 1398500 + }, + { + "epoch": 11.19, + "learning_rate": 4.4404000000000004e-05, + "loss": 8.666, + "step": 1399000 + }, + { + "epoch": 11.2, + "learning_rate": 4.4402000000000006e-05, + "loss": 8.6831, + "step": 1399500 + }, + { + "epoch": 11.2, + "learning_rate": 4.44e-05, + "loss": 8.6936, + "step": 1400000 + }, + { + "epoch": 11.2, + "learning_rate": 4.4398e-05, + "loss": 8.6704, + "step": 1400500 + }, + { + "epoch": 11.21, + "learning_rate": 4.4396000000000007e-05, + "loss": 8.6812, + "step": 1401000 + }, + { + "epoch": 11.21, + "learning_rate": 4.4394e-05, + "loss": 8.6934, + "step": 1401500 + }, + { + "epoch": 11.22, + "learning_rate": 4.4392e-05, + "loss": 8.6618, + "step": 1402000 + }, + { + "epoch": 11.22, + "learning_rate": 4.439000000000001e-05, + "loss": 8.689, + "step": 1402500 + }, + { + "epoch": 11.22, + "learning_rate": 4.4388e-05, + "loss": 8.6973, + "step": 1403000 + }, + { + "epoch": 11.23, + "learning_rate": 4.4386e-05, + "loss": 8.677, + "step": 1403500 + }, + { + "epoch": 11.23, + "learning_rate": 4.4384e-05, + "loss": 8.6716, + "step": 1404000 + }, + { + "epoch": 11.24, + "learning_rate": 4.4382e-05, + "loss": 8.6798, + "step": 1404500 + }, + { + "epoch": 11.24, + "learning_rate": 4.438e-05, + "loss": 8.6936, + "step": 1405000 + }, + { + "epoch": 11.24, + "learning_rate": 4.4378e-05, + "loss": 8.7097, + "step": 1405500 + }, + { + "epoch": 11.25, + "learning_rate": 4.4376e-05, + "loss": 8.6674, + "step": 1406000 + }, + { + "epoch": 11.25, + "learning_rate": 4.4374000000000006e-05, + "loss": 8.6778, + "step": 1406500 + }, + { + "epoch": 11.26, + "learning_rate": 4.4372e-05, + "loss": 8.673, + "step": 1407000 + }, + { + "epoch": 11.26, + "learning_rate": 4.4370000000000004e-05, + "loss": 8.6796, + "step": 1407500 + }, + { + "epoch": 11.26, + "learning_rate": 4.4368000000000006e-05, + "loss": 8.6807, + "step": 1408000 + }, + { + "epoch": 11.27, + "learning_rate": 4.4366e-05, + "loss": 8.684, + "step": 1408500 + }, + { + "epoch": 11.27, + "learning_rate": 4.4364e-05, + "loss": 8.6913, + "step": 1409000 + }, + { + "epoch": 11.28, + "learning_rate": 4.4362000000000007e-05, + "loss": 8.717, + "step": 1409500 + }, + { + "epoch": 11.28, + "learning_rate": 4.436e-05, + "loss": 8.6598, + "step": 1410000 + }, + { + "epoch": 11.28, + "learning_rate": 4.4358e-05, + "loss": 8.681, + "step": 1410500 + }, + { + "epoch": 11.29, + "learning_rate": 4.4356e-05, + "loss": 8.6636, + "step": 1411000 + }, + { + "epoch": 11.29, + "learning_rate": 4.4354e-05, + "loss": 8.6663, + "step": 1411500 + }, + { + "epoch": 11.3, + "learning_rate": 4.4352000000000005e-05, + "loss": 8.6876, + "step": 1412000 + }, + { + "epoch": 11.3, + "learning_rate": 4.435e-05, + "loss": 8.6941, + "step": 1412500 + }, + { + "epoch": 11.3, + "learning_rate": 4.4348e-05, + "loss": 8.6862, + "step": 1413000 + }, + { + "epoch": 11.31, + "learning_rate": 4.4346000000000005e-05, + "loss": 8.6784, + "step": 1413500 + }, + { + "epoch": 11.31, + "learning_rate": 4.4344e-05, + "loss": 8.6941, + "step": 1414000 + }, + { + "epoch": 11.32, + "learning_rate": 4.4342e-05, + "loss": 8.6919, + "step": 1414500 + }, + { + "epoch": 11.32, + "learning_rate": 4.4340000000000006e-05, + "loss": 8.6629, + "step": 1415000 + }, + { + "epoch": 11.32, + "learning_rate": 4.4338e-05, + "loss": 8.6665, + "step": 1415500 + }, + { + "epoch": 11.33, + "learning_rate": 4.4336e-05, + "loss": 8.6617, + "step": 1416000 + }, + { + "epoch": 11.33, + "learning_rate": 4.4334000000000006e-05, + "loss": 8.688, + "step": 1416500 + }, + { + "epoch": 11.34, + "learning_rate": 4.4332e-05, + "loss": 8.6783, + "step": 1417000 + }, + { + "epoch": 11.34, + "learning_rate": 4.4330000000000004e-05, + "loss": 8.6857, + "step": 1417500 + }, + { + "epoch": 11.34, + "learning_rate": 4.4328e-05, + "loss": 8.6568, + "step": 1418000 + }, + { + "epoch": 11.35, + "learning_rate": 4.4326e-05, + "loss": 8.7021, + "step": 1418500 + }, + { + "epoch": 11.35, + "learning_rate": 4.4324000000000005e-05, + "loss": 8.6954, + "step": 1419000 + }, + { + "epoch": 11.36, + "learning_rate": 4.4322e-05, + "loss": 8.6757, + "step": 1419500 + }, + { + "epoch": 11.36, + "learning_rate": 4.432e-05, + "loss": 8.6948, + "step": 1420000 + }, + { + "epoch": 11.36, + "learning_rate": 4.4318000000000005e-05, + "loss": 8.6815, + "step": 1420500 + }, + { + "epoch": 11.37, + "learning_rate": 4.4316e-05, + "loss": 8.6836, + "step": 1421000 + }, + { + "epoch": 11.37, + "learning_rate": 4.4314e-05, + "loss": 8.6762, + "step": 1421500 + }, + { + "epoch": 11.38, + "learning_rate": 4.4312000000000005e-05, + "loss": 8.6873, + "step": 1422000 + }, + { + "epoch": 11.38, + "learning_rate": 4.431e-05, + "loss": 8.6883, + "step": 1422500 + }, + { + "epoch": 11.38, + "learning_rate": 4.4307999999999997e-05, + "loss": 8.6727, + "step": 1423000 + }, + { + "epoch": 11.39, + "learning_rate": 4.4306000000000006e-05, + "loss": 8.6866, + "step": 1423500 + }, + { + "epoch": 11.39, + "learning_rate": 4.4304e-05, + "loss": 8.6688, + "step": 1424000 + }, + { + "epoch": 11.4, + "learning_rate": 4.4302000000000004e-05, + "loss": 8.691, + "step": 1424500 + }, + { + "epoch": 11.4, + "learning_rate": 4.43e-05, + "loss": 8.6905, + "step": 1425000 + }, + { + "epoch": 11.4, + "learning_rate": 4.4298e-05, + "loss": 8.6575, + "step": 1425500 + }, + { + "epoch": 11.41, + "learning_rate": 4.4296000000000004e-05, + "loss": 8.6747, + "step": 1426000 + }, + { + "epoch": 11.41, + "learning_rate": 4.4294e-05, + "loss": 8.6863, + "step": 1426500 + }, + { + "epoch": 11.42, + "learning_rate": 4.4292e-05, + "loss": 8.6581, + "step": 1427000 + }, + { + "epoch": 11.42, + "learning_rate": 4.4290000000000005e-05, + "loss": 8.6637, + "step": 1427500 + }, + { + "epoch": 11.42, + "learning_rate": 4.4288e-05, + "loss": 8.6897, + "step": 1428000 + }, + { + "epoch": 11.43, + "learning_rate": 4.4286e-05, + "loss": 8.7029, + "step": 1428500 + }, + { + "epoch": 11.43, + "learning_rate": 4.4284000000000005e-05, + "loss": 8.694, + "step": 1429000 + }, + { + "epoch": 11.44, + "learning_rate": 4.4282e-05, + "loss": 8.6907, + "step": 1429500 + }, + { + "epoch": 11.44, + "learning_rate": 4.428e-05, + "loss": 8.6931, + "step": 1430000 + }, + { + "epoch": 11.44, + "learning_rate": 4.4278000000000005e-05, + "loss": 8.6977, + "step": 1430500 + }, + { + "epoch": 11.45, + "learning_rate": 4.4276e-05, + "loss": 8.68, + "step": 1431000 + }, + { + "epoch": 11.45, + "learning_rate": 4.4274e-05, + "loss": 8.6818, + "step": 1431500 + }, + { + "epoch": 11.46, + "learning_rate": 4.4272000000000006e-05, + "loss": 8.6891, + "step": 1432000 + }, + { + "epoch": 11.46, + "learning_rate": 4.427e-05, + "loss": 8.6873, + "step": 1432500 + }, + { + "epoch": 11.46, + "learning_rate": 4.4268000000000004e-05, + "loss": 8.7087, + "step": 1433000 + }, + { + "epoch": 11.47, + "learning_rate": 4.4266e-05, + "loss": 8.6785, + "step": 1433500 + }, + { + "epoch": 11.47, + "learning_rate": 4.4264e-05, + "loss": 8.6784, + "step": 1434000 + }, + { + "epoch": 11.48, + "learning_rate": 4.4262000000000004e-05, + "loss": 8.6885, + "step": 1434500 + }, + { + "epoch": 11.48, + "learning_rate": 4.426e-05, + "loss": 8.7019, + "step": 1435000 + }, + { + "epoch": 11.48, + "learning_rate": 4.4258e-05, + "loss": 8.698, + "step": 1435500 + }, + { + "epoch": 11.49, + "learning_rate": 4.4256000000000005e-05, + "loss": 8.7079, + "step": 1436000 + }, + { + "epoch": 11.49, + "learning_rate": 4.4254e-05, + "loss": 8.661, + "step": 1436500 + }, + { + "epoch": 11.5, + "learning_rate": 4.4252e-05, + "loss": 8.6553, + "step": 1437000 + }, + { + "epoch": 11.5, + "learning_rate": 4.4250000000000005e-05, + "loss": 8.6629, + "step": 1437500 + }, + { + "epoch": 11.5, + "learning_rate": 4.4248e-05, + "loss": 8.6927, + "step": 1438000 + }, + { + "epoch": 11.51, + "learning_rate": 4.4246e-05, + "loss": 8.6908, + "step": 1438500 + }, + { + "epoch": 11.51, + "learning_rate": 4.4244000000000005e-05, + "loss": 8.6646, + "step": 1439000 + }, + { + "epoch": 11.52, + "learning_rate": 4.4242e-05, + "loss": 8.6639, + "step": 1439500 + }, + { + "epoch": 11.52, + "learning_rate": 4.424e-05, + "loss": 8.6975, + "step": 1440000 + }, + { + "epoch": 11.52, + "learning_rate": 4.4238e-05, + "loss": 8.67, + "step": 1440500 + }, + { + "epoch": 11.53, + "learning_rate": 4.4236e-05, + "loss": 8.6734, + "step": 1441000 + }, + { + "epoch": 11.53, + "learning_rate": 4.4234000000000004e-05, + "loss": 8.6764, + "step": 1441500 + }, + { + "epoch": 11.54, + "learning_rate": 4.4232e-05, + "loss": 8.676, + "step": 1442000 + }, + { + "epoch": 11.54, + "learning_rate": 4.423e-05, + "loss": 8.646, + "step": 1442500 + }, + { + "epoch": 11.54, + "learning_rate": 4.4228000000000004e-05, + "loss": 8.7027, + "step": 1443000 + }, + { + "epoch": 11.55, + "learning_rate": 4.4226e-05, + "loss": 8.6741, + "step": 1443500 + }, + { + "epoch": 11.55, + "learning_rate": 4.4224e-05, + "loss": 8.6565, + "step": 1444000 + }, + { + "epoch": 11.56, + "learning_rate": 4.4222000000000005e-05, + "loss": 8.6644, + "step": 1444500 + }, + { + "epoch": 11.56, + "learning_rate": 4.422e-05, + "loss": 8.6796, + "step": 1445000 + }, + { + "epoch": 11.56, + "learning_rate": 4.4218e-05, + "loss": 8.6761, + "step": 1445500 + }, + { + "epoch": 11.57, + "learning_rate": 4.4216000000000005e-05, + "loss": 8.6797, + "step": 1446000 + }, + { + "epoch": 11.57, + "learning_rate": 4.4214e-05, + "loss": 8.6821, + "step": 1446500 + }, + { + "epoch": 11.58, + "learning_rate": 4.4212e-05, + "loss": 8.6804, + "step": 1447000 + }, + { + "epoch": 11.58, + "learning_rate": 4.421e-05, + "loss": 8.6837, + "step": 1447500 + }, + { + "epoch": 11.58, + "learning_rate": 4.4208e-05, + "loss": 8.6764, + "step": 1448000 + }, + { + "epoch": 11.59, + "learning_rate": 4.4206e-05, + "loss": 8.7004, + "step": 1448500 + }, + { + "epoch": 11.59, + "learning_rate": 4.4204e-05, + "loss": 8.6578, + "step": 1449000 + }, + { + "epoch": 11.6, + "learning_rate": 4.4202e-05, + "loss": 8.657, + "step": 1449500 + }, + { + "epoch": 11.6, + "learning_rate": 4.4200000000000004e-05, + "loss": 8.6699, + "step": 1450000 + }, + { + "epoch": 11.6, + "learning_rate": 4.4198e-05, + "loss": 8.6896, + "step": 1450500 + }, + { + "epoch": 11.61, + "learning_rate": 4.4196e-05, + "loss": 8.6403, + "step": 1451000 + }, + { + "epoch": 11.61, + "learning_rate": 4.4194000000000004e-05, + "loss": 8.6853, + "step": 1451500 + }, + { + "epoch": 11.62, + "learning_rate": 4.4192e-05, + "loss": 8.6746, + "step": 1452000 + }, + { + "epoch": 11.62, + "learning_rate": 4.419e-05, + "loss": 8.6948, + "step": 1452500 + }, + { + "epoch": 11.62, + "learning_rate": 4.4188000000000005e-05, + "loss": 8.6854, + "step": 1453000 + }, + { + "epoch": 11.63, + "learning_rate": 4.4186e-05, + "loss": 8.6629, + "step": 1453500 + }, + { + "epoch": 11.63, + "learning_rate": 4.4184e-05, + "loss": 8.6872, + "step": 1454000 + }, + { + "epoch": 11.64, + "learning_rate": 4.4182e-05, + "loss": 8.673, + "step": 1454500 + }, + { + "epoch": 11.64, + "learning_rate": 4.418000000000001e-05, + "loss": 8.6516, + "step": 1455000 + }, + { + "epoch": 11.64, + "learning_rate": 4.4178e-05, + "loss": 8.669, + "step": 1455500 + }, + { + "epoch": 11.65, + "learning_rate": 4.4176e-05, + "loss": 8.6677, + "step": 1456000 + }, + { + "epoch": 11.65, + "learning_rate": 4.4174e-05, + "loss": 8.6699, + "step": 1456500 + }, + { + "epoch": 11.66, + "learning_rate": 4.4172e-05, + "loss": 8.665, + "step": 1457000 + }, + { + "epoch": 11.66, + "learning_rate": 4.417e-05, + "loss": 8.7142, + "step": 1457500 + }, + { + "epoch": 11.66, + "learning_rate": 4.4168e-05, + "loss": 8.6866, + "step": 1458000 + }, + { + "epoch": 11.67, + "learning_rate": 4.4166000000000004e-05, + "loss": 8.6632, + "step": 1458500 + }, + { + "epoch": 11.67, + "learning_rate": 4.4164e-05, + "loss": 8.6833, + "step": 1459000 + }, + { + "epoch": 11.68, + "learning_rate": 4.4162e-05, + "loss": 8.6737, + "step": 1459500 + }, + { + "epoch": 11.68, + "learning_rate": 4.4160000000000004e-05, + "loss": 8.7098, + "step": 1460000 + }, + { + "epoch": 11.68, + "learning_rate": 4.4158e-05, + "loss": 8.6475, + "step": 1460500 + }, + { + "epoch": 11.69, + "learning_rate": 4.4156e-05, + "loss": 8.6624, + "step": 1461000 + }, + { + "epoch": 11.69, + "learning_rate": 4.4154e-05, + "loss": 8.6969, + "step": 1461500 + }, + { + "epoch": 11.7, + "learning_rate": 4.415200000000001e-05, + "loss": 8.6833, + "step": 1462000 + }, + { + "epoch": 11.7, + "learning_rate": 4.415e-05, + "loss": 8.689, + "step": 1462500 + }, + { + "epoch": 11.7, + "learning_rate": 4.4148e-05, + "loss": 8.6595, + "step": 1463000 + }, + { + "epoch": 11.71, + "learning_rate": 4.414600000000001e-05, + "loss": 8.6789, + "step": 1463500 + }, + { + "epoch": 11.71, + "learning_rate": 4.4144e-05, + "loss": 8.6726, + "step": 1464000 + }, + { + "epoch": 11.72, + "learning_rate": 4.4142e-05, + "loss": 8.6825, + "step": 1464500 + }, + { + "epoch": 11.72, + "learning_rate": 4.414e-05, + "loss": 8.6773, + "step": 1465000 + }, + { + "epoch": 11.72, + "learning_rate": 4.4138e-05, + "loss": 8.6724, + "step": 1465500 + }, + { + "epoch": 11.73, + "learning_rate": 4.4136e-05, + "loss": 8.6929, + "step": 1466000 + }, + { + "epoch": 11.73, + "learning_rate": 4.4134e-05, + "loss": 8.6711, + "step": 1466500 + }, + { + "epoch": 11.74, + "learning_rate": 4.4132000000000004e-05, + "loss": 8.6716, + "step": 1467000 + }, + { + "epoch": 11.74, + "learning_rate": 4.4130000000000006e-05, + "loss": 8.71, + "step": 1467500 + }, + { + "epoch": 11.74, + "learning_rate": 4.4128e-05, + "loss": 8.6614, + "step": 1468000 + }, + { + "epoch": 11.75, + "learning_rate": 4.4126e-05, + "loss": 8.6917, + "step": 1468500 + }, + { + "epoch": 11.75, + "learning_rate": 4.4124000000000006e-05, + "loss": 8.6831, + "step": 1469000 + }, + { + "epoch": 11.76, + "learning_rate": 4.4122e-05, + "loss": 8.668, + "step": 1469500 + }, + { + "epoch": 11.76, + "learning_rate": 4.412e-05, + "loss": 8.667, + "step": 1470000 + }, + { + "epoch": 11.76, + "learning_rate": 4.411800000000001e-05, + "loss": 8.6614, + "step": 1470500 + }, + { + "epoch": 11.77, + "learning_rate": 4.4116e-05, + "loss": 8.6821, + "step": 1471000 + }, + { + "epoch": 11.77, + "learning_rate": 4.4114e-05, + "loss": 8.6653, + "step": 1471500 + }, + { + "epoch": 11.78, + "learning_rate": 4.4112e-05, + "loss": 8.6863, + "step": 1472000 + }, + { + "epoch": 11.78, + "learning_rate": 4.411e-05, + "loss": 8.6759, + "step": 1472500 + }, + { + "epoch": 11.78, + "learning_rate": 4.4108000000000005e-05, + "loss": 8.6805, + "step": 1473000 + }, + { + "epoch": 11.79, + "learning_rate": 4.4106e-05, + "loss": 8.7015, + "step": 1473500 + }, + { + "epoch": 11.79, + "learning_rate": 4.4104e-05, + "loss": 8.6562, + "step": 1474000 + }, + { + "epoch": 11.8, + "learning_rate": 4.4102000000000006e-05, + "loss": 8.6747, + "step": 1474500 + }, + { + "epoch": 11.8, + "learning_rate": 4.41e-05, + "loss": 8.6789, + "step": 1475000 + }, + { + "epoch": 11.8, + "learning_rate": 4.4098000000000004e-05, + "loss": 8.6524, + "step": 1475500 + }, + { + "epoch": 11.81, + "learning_rate": 4.4096000000000006e-05, + "loss": 8.6618, + "step": 1476000 + }, + { + "epoch": 11.81, + "learning_rate": 4.4094e-05, + "loss": 8.6817, + "step": 1476500 + }, + { + "epoch": 11.82, + "learning_rate": 4.4092e-05, + "loss": 8.6948, + "step": 1477000 + }, + { + "epoch": 11.82, + "learning_rate": 4.4090000000000006e-05, + "loss": 8.6682, + "step": 1477500 + }, + { + "epoch": 11.82, + "learning_rate": 4.4088e-05, + "loss": 8.6452, + "step": 1478000 + }, + { + "epoch": 11.83, + "learning_rate": 4.4086e-05, + "loss": 8.6851, + "step": 1478500 + }, + { + "epoch": 11.83, + "learning_rate": 4.4084e-05, + "loss": 8.6777, + "step": 1479000 + }, + { + "epoch": 11.84, + "learning_rate": 4.4082e-05, + "loss": 8.6732, + "step": 1479500 + }, + { + "epoch": 11.84, + "learning_rate": 4.4080000000000005e-05, + "loss": 8.6927, + "step": 1480000 + }, + { + "epoch": 11.84, + "learning_rate": 4.4078e-05, + "loss": 8.6913, + "step": 1480500 + }, + { + "epoch": 11.85, + "learning_rate": 4.4076e-05, + "loss": 8.6612, + "step": 1481000 + }, + { + "epoch": 11.85, + "learning_rate": 4.4074000000000005e-05, + "loss": 8.7139, + "step": 1481500 + }, + { + "epoch": 11.86, + "learning_rate": 4.4072e-05, + "loss": 8.6856, + "step": 1482000 + }, + { + "epoch": 11.86, + "learning_rate": 4.407e-05, + "loss": 8.6723, + "step": 1482500 + }, + { + "epoch": 11.86, + "learning_rate": 4.4068000000000006e-05, + "loss": 8.6858, + "step": 1483000 + }, + { + "epoch": 11.87, + "learning_rate": 4.4066e-05, + "loss": 8.6761, + "step": 1483500 + }, + { + "epoch": 11.87, + "learning_rate": 4.4064e-05, + "loss": 8.6919, + "step": 1484000 + }, + { + "epoch": 11.88, + "learning_rate": 4.4062000000000006e-05, + "loss": 8.6761, + "step": 1484500 + }, + { + "epoch": 11.88, + "learning_rate": 4.406e-05, + "loss": 8.689, + "step": 1485000 + }, + { + "epoch": 11.88, + "learning_rate": 4.4058000000000004e-05, + "loss": 8.6675, + "step": 1485500 + }, + { + "epoch": 11.89, + "learning_rate": 4.4056e-05, + "loss": 8.6809, + "step": 1486000 + }, + { + "epoch": 11.89, + "learning_rate": 4.4054e-05, + "loss": 8.7032, + "step": 1486500 + }, + { + "epoch": 11.9, + "learning_rate": 4.4052000000000004e-05, + "loss": 8.7098, + "step": 1487000 + }, + { + "epoch": 11.9, + "learning_rate": 4.405e-05, + "loss": 8.6612, + "step": 1487500 + }, + { + "epoch": 11.9, + "learning_rate": 4.4048e-05, + "loss": 8.6746, + "step": 1488000 + }, + { + "epoch": 11.91, + "learning_rate": 4.4046000000000005e-05, + "loss": 8.6695, + "step": 1488500 + }, + { + "epoch": 11.91, + "learning_rate": 4.4044e-05, + "loss": 8.6714, + "step": 1489000 + }, + { + "epoch": 11.92, + "learning_rate": 4.4042e-05, + "loss": 8.6852, + "step": 1489500 + }, + { + "epoch": 11.92, + "learning_rate": 4.4040000000000005e-05, + "loss": 8.6772, + "step": 1490000 + }, + { + "epoch": 11.92, + "learning_rate": 4.4038e-05, + "loss": 8.6697, + "step": 1490500 + }, + { + "epoch": 11.93, + "learning_rate": 4.4035999999999996e-05, + "loss": 8.6456, + "step": 1491000 + }, + { + "epoch": 11.93, + "learning_rate": 4.4034000000000006e-05, + "loss": 8.6959, + "step": 1491500 + }, + { + "epoch": 11.94, + "learning_rate": 4.4032e-05, + "loss": 8.6616, + "step": 1492000 + }, + { + "epoch": 11.94, + "learning_rate": 4.4030000000000004e-05, + "loss": 8.6814, + "step": 1492500 + }, + { + "epoch": 11.94, + "learning_rate": 4.4028e-05, + "loss": 8.6567, + "step": 1493000 + }, + { + "epoch": 11.95, + "learning_rate": 4.4026e-05, + "loss": 8.6518, + "step": 1493500 + }, + { + "epoch": 11.95, + "learning_rate": 4.4024000000000004e-05, + "loss": 8.6799, + "step": 1494000 + }, + { + "epoch": 11.96, + "learning_rate": 4.4022e-05, + "loss": 8.6895, + "step": 1494500 + }, + { + "epoch": 11.96, + "learning_rate": 4.402e-05, + "loss": 8.677, + "step": 1495000 + }, + { + "epoch": 11.96, + "learning_rate": 4.4018000000000004e-05, + "loss": 8.6905, + "step": 1495500 + }, + { + "epoch": 11.97, + "learning_rate": 4.4016e-05, + "loss": 8.6613, + "step": 1496000 + }, + { + "epoch": 11.97, + "learning_rate": 4.4014e-05, + "loss": 8.6763, + "step": 1496500 + }, + { + "epoch": 11.98, + "learning_rate": 4.4012000000000005e-05, + "loss": 8.6546, + "step": 1497000 + }, + { + "epoch": 11.98, + "learning_rate": 4.401e-05, + "loss": 8.6658, + "step": 1497500 + }, + { + "epoch": 11.98, + "learning_rate": 4.4008e-05, + "loss": 8.6746, + "step": 1498000 + }, + { + "epoch": 11.99, + "learning_rate": 4.4006000000000005e-05, + "loss": 8.666, + "step": 1498500 + }, + { + "epoch": 11.99, + "learning_rate": 4.4004e-05, + "loss": 8.6715, + "step": 1499000 + }, + { + "epoch": 12.0, + "learning_rate": 4.4002e-05, + "loss": 8.6713, + "step": 1499500 + }, + { + "epoch": 12.0, + "learning_rate": 4.4000000000000006e-05, + "loss": 8.6739, + "step": 1500000 + }, + { + "epoch": 12.0, + "learning_rate": 4.3998e-05, + "loss": 8.6672, + "step": 1500500 + }, + { + "epoch": 12.01, + "learning_rate": 4.3996000000000004e-05, + "loss": 8.6654, + "step": 1501000 + }, + { + "epoch": 12.01, + "learning_rate": 4.3994e-05, + "loss": 8.71, + "step": 1501500 + }, + { + "epoch": 12.02, + "learning_rate": 4.3992e-05, + "loss": 8.6906, + "step": 1502000 + }, + { + "epoch": 12.02, + "learning_rate": 4.3990000000000004e-05, + "loss": 8.7011, + "step": 1502500 + }, + { + "epoch": 12.02, + "learning_rate": 4.3988e-05, + "loss": 8.6885, + "step": 1503000 + }, + { + "epoch": 12.03, + "learning_rate": 4.3986e-05, + "loss": 8.6526, + "step": 1503500 + }, + { + "epoch": 12.03, + "learning_rate": 4.3984000000000004e-05, + "loss": 8.6879, + "step": 1504000 + }, + { + "epoch": 12.04, + "learning_rate": 4.3982e-05, + "loss": 8.6624, + "step": 1504500 + }, + { + "epoch": 12.04, + "learning_rate": 4.398e-05, + "loss": 8.7053, + "step": 1505000 + }, + { + "epoch": 12.04, + "learning_rate": 4.3978000000000005e-05, + "loss": 8.6805, + "step": 1505500 + }, + { + "epoch": 12.05, + "learning_rate": 4.3976e-05, + "loss": 8.6868, + "step": 1506000 + }, + { + "epoch": 12.05, + "learning_rate": 4.3974e-05, + "loss": 8.7021, + "step": 1506500 + }, + { + "epoch": 12.06, + "learning_rate": 4.3972000000000005e-05, + "loss": 8.6909, + "step": 1507000 + }, + { + "epoch": 12.06, + "learning_rate": 4.397e-05, + "loss": 8.6705, + "step": 1507500 + }, + { + "epoch": 12.06, + "learning_rate": 4.3968e-05, + "loss": 8.6963, + "step": 1508000 + }, + { + "epoch": 12.07, + "learning_rate": 4.3966e-05, + "loss": 8.6846, + "step": 1508500 + }, + { + "epoch": 12.07, + "learning_rate": 4.3964e-05, + "loss": 8.6831, + "step": 1509000 + }, + { + "epoch": 12.08, + "learning_rate": 4.3962000000000004e-05, + "loss": 8.6882, + "step": 1509500 + }, + { + "epoch": 12.08, + "learning_rate": 4.396e-05, + "loss": 8.6903, + "step": 1510000 + }, + { + "epoch": 12.08, + "learning_rate": 4.3958e-05, + "loss": 8.6875, + "step": 1510500 + }, + { + "epoch": 12.09, + "learning_rate": 4.3956000000000004e-05, + "loss": 8.6809, + "step": 1511000 + }, + { + "epoch": 12.09, + "learning_rate": 4.3954e-05, + "loss": 8.6633, + "step": 1511500 + }, + { + "epoch": 12.1, + "learning_rate": 4.3952e-05, + "loss": 8.6722, + "step": 1512000 + }, + { + "epoch": 12.1, + "learning_rate": 4.3950000000000004e-05, + "loss": 8.6903, + "step": 1512500 + }, + { + "epoch": 12.1, + "learning_rate": 4.3948e-05, + "loss": 8.6771, + "step": 1513000 + }, + { + "epoch": 12.11, + "learning_rate": 4.3946e-05, + "loss": 8.6747, + "step": 1513500 + }, + { + "epoch": 12.11, + "learning_rate": 4.3944000000000005e-05, + "loss": 8.691, + "step": 1514000 + }, + { + "epoch": 12.12, + "learning_rate": 4.3942e-05, + "loss": 8.6689, + "step": 1514500 + }, + { + "epoch": 12.12, + "learning_rate": 4.394e-05, + "loss": 8.6619, + "step": 1515000 + }, + { + "epoch": 12.12, + "learning_rate": 4.3938e-05, + "loss": 8.6476, + "step": 1515500 + }, + { + "epoch": 12.13, + "learning_rate": 4.3936e-05, + "loss": 8.6996, + "step": 1516000 + }, + { + "epoch": 12.13, + "learning_rate": 4.3934e-05, + "loss": 8.6747, + "step": 1516500 + }, + { + "epoch": 12.14, + "learning_rate": 4.3932e-05, + "loss": 8.6774, + "step": 1517000 + }, + { + "epoch": 12.14, + "learning_rate": 4.393e-05, + "loss": 8.6869, + "step": 1517500 + }, + { + "epoch": 12.14, + "learning_rate": 4.3928000000000004e-05, + "loss": 8.6653, + "step": 1518000 + }, + { + "epoch": 12.15, + "learning_rate": 4.3926e-05, + "loss": 8.6809, + "step": 1518500 + }, + { + "epoch": 12.15, + "learning_rate": 4.3924e-05, + "loss": 8.6973, + "step": 1519000 + }, + { + "epoch": 12.16, + "learning_rate": 4.3922000000000004e-05, + "loss": 8.6989, + "step": 1519500 + }, + { + "epoch": 12.16, + "learning_rate": 4.392e-05, + "loss": 8.6899, + "step": 1520000 + }, + { + "epoch": 12.16, + "learning_rate": 4.3918e-05, + "loss": 8.6943, + "step": 1520500 + }, + { + "epoch": 12.17, + "learning_rate": 4.3916000000000004e-05, + "loss": 8.6592, + "step": 1521000 + }, + { + "epoch": 12.17, + "learning_rate": 4.3914e-05, + "loss": 8.6819, + "step": 1521500 + }, + { + "epoch": 12.18, + "learning_rate": 4.3912e-05, + "loss": 8.6925, + "step": 1522000 + }, + { + "epoch": 12.18, + "learning_rate": 4.391e-05, + "loss": 8.6697, + "step": 1522500 + }, + { + "epoch": 12.18, + "learning_rate": 4.390800000000001e-05, + "loss": 8.6586, + "step": 1523000 + }, + { + "epoch": 12.19, + "learning_rate": 4.3906e-05, + "loss": 8.6722, + "step": 1523500 + }, + { + "epoch": 12.19, + "learning_rate": 4.3904e-05, + "loss": 8.6586, + "step": 1524000 + }, + { + "epoch": 12.2, + "learning_rate": 4.390200000000001e-05, + "loss": 8.6938, + "step": 1524500 + }, + { + "epoch": 12.2, + "learning_rate": 4.39e-05, + "loss": 8.6725, + "step": 1525000 + }, + { + "epoch": 12.2, + "learning_rate": 4.3898e-05, + "loss": 8.6467, + "step": 1525500 + }, + { + "epoch": 12.21, + "learning_rate": 4.3896e-05, + "loss": 8.7079, + "step": 1526000 + }, + { + "epoch": 12.21, + "learning_rate": 4.3894000000000004e-05, + "loss": 8.6854, + "step": 1526500 + }, + { + "epoch": 12.22, + "learning_rate": 4.3892e-05, + "loss": 8.7007, + "step": 1527000 + }, + { + "epoch": 12.22, + "learning_rate": 4.389e-05, + "loss": 8.6889, + "step": 1527500 + }, + { + "epoch": 12.22, + "learning_rate": 4.3888000000000004e-05, + "loss": 8.7038, + "step": 1528000 + }, + { + "epoch": 12.23, + "learning_rate": 4.3886e-05, + "loss": 8.6536, + "step": 1528500 + }, + { + "epoch": 12.23, + "learning_rate": 4.3884e-05, + "loss": 8.6831, + "step": 1529000 + }, + { + "epoch": 12.24, + "learning_rate": 4.3882e-05, + "loss": 8.6792, + "step": 1529500 + }, + { + "epoch": 12.24, + "learning_rate": 4.388000000000001e-05, + "loss": 8.6932, + "step": 1530000 + }, + { + "epoch": 12.24, + "learning_rate": 4.3878e-05, + "loss": 8.6891, + "step": 1530500 + }, + { + "epoch": 12.25, + "learning_rate": 4.3876e-05, + "loss": 8.6866, + "step": 1531000 + }, + { + "epoch": 12.25, + "learning_rate": 4.387400000000001e-05, + "loss": 8.672, + "step": 1531500 + }, + { + "epoch": 12.26, + "learning_rate": 4.3872e-05, + "loss": 8.672, + "step": 1532000 + }, + { + "epoch": 12.26, + "learning_rate": 4.387e-05, + "loss": 8.7002, + "step": 1532500 + }, + { + "epoch": 12.26, + "learning_rate": 4.3868e-05, + "loss": 8.6728, + "step": 1533000 + }, + { + "epoch": 12.27, + "learning_rate": 4.3866e-05, + "loss": 8.6812, + "step": 1533500 + }, + { + "epoch": 12.27, + "learning_rate": 4.3864e-05, + "loss": 8.644, + "step": 1534000 + }, + { + "epoch": 12.28, + "learning_rate": 4.3862e-05, + "loss": 8.6899, + "step": 1534500 + }, + { + "epoch": 12.28, + "learning_rate": 4.3860000000000004e-05, + "loss": 8.688, + "step": 1535000 + }, + { + "epoch": 12.28, + "learning_rate": 4.3858000000000006e-05, + "loss": 8.6622, + "step": 1535500 + }, + { + "epoch": 12.29, + "learning_rate": 4.3856e-05, + "loss": 8.6991, + "step": 1536000 + }, + { + "epoch": 12.29, + "learning_rate": 4.3854000000000004e-05, + "loss": 8.699, + "step": 1536500 + }, + { + "epoch": 12.3, + "learning_rate": 4.3852000000000006e-05, + "loss": 8.6681, + "step": 1537000 + }, + { + "epoch": 12.3, + "learning_rate": 4.385e-05, + "loss": 8.6747, + "step": 1537500 + }, + { + "epoch": 12.3, + "learning_rate": 4.3848e-05, + "loss": 8.6442, + "step": 1538000 + }, + { + "epoch": 12.31, + "learning_rate": 4.384600000000001e-05, + "loss": 8.6469, + "step": 1538500 + }, + { + "epoch": 12.31, + "learning_rate": 4.3844e-05, + "loss": 8.6613, + "step": 1539000 + }, + { + "epoch": 12.32, + "learning_rate": 4.3842e-05, + "loss": 8.6914, + "step": 1539500 + }, + { + "epoch": 12.32, + "learning_rate": 4.384e-05, + "loss": 8.6721, + "step": 1540000 + }, + { + "epoch": 12.32, + "learning_rate": 4.3838e-05, + "loss": 8.6913, + "step": 1540500 + }, + { + "epoch": 12.33, + "learning_rate": 4.3836000000000005e-05, + "loss": 8.6717, + "step": 1541000 + }, + { + "epoch": 12.33, + "learning_rate": 4.3834e-05, + "loss": 8.6817, + "step": 1541500 + }, + { + "epoch": 12.34, + "learning_rate": 4.3832e-05, + "loss": 8.6997, + "step": 1542000 + }, + { + "epoch": 12.34, + "learning_rate": 4.3830000000000006e-05, + "loss": 8.7035, + "step": 1542500 + }, + { + "epoch": 12.34, + "learning_rate": 4.3828e-05, + "loss": 8.6876, + "step": 1543000 + }, + { + "epoch": 12.35, + "learning_rate": 4.3826000000000004e-05, + "loss": 8.6981, + "step": 1543500 + }, + { + "epoch": 12.35, + "learning_rate": 4.3824000000000006e-05, + "loss": 8.7151, + "step": 1544000 + }, + { + "epoch": 12.36, + "learning_rate": 4.3822e-05, + "loss": 8.7211, + "step": 1544500 + }, + { + "epoch": 12.36, + "learning_rate": 4.382e-05, + "loss": 8.699, + "step": 1545000 + }, + { + "epoch": 12.36, + "learning_rate": 4.3818000000000006e-05, + "loss": 8.6745, + "step": 1545500 + }, + { + "epoch": 12.37, + "learning_rate": 4.3816e-05, + "loss": 8.6977, + "step": 1546000 + }, + { + "epoch": 12.37, + "learning_rate": 4.3814e-05, + "loss": 8.6718, + "step": 1546500 + }, + { + "epoch": 12.38, + "learning_rate": 4.3812e-05, + "loss": 8.7128, + "step": 1547000 + }, + { + "epoch": 12.38, + "learning_rate": 4.381e-05, + "loss": 8.6882, + "step": 1547500 + }, + { + "epoch": 12.38, + "learning_rate": 4.3808000000000005e-05, + "loss": 8.6773, + "step": 1548000 + }, + { + "epoch": 12.39, + "learning_rate": 4.3806e-05, + "loss": 8.6646, + "step": 1548500 + }, + { + "epoch": 12.39, + "learning_rate": 4.3804e-05, + "loss": 8.6737, + "step": 1549000 + }, + { + "epoch": 12.4, + "learning_rate": 4.3802000000000005e-05, + "loss": 8.65, + "step": 1549500 + }, + { + "epoch": 12.4, + "learning_rate": 4.38e-05, + "loss": 8.6774, + "step": 1550000 + }, + { + "epoch": 12.4, + "learning_rate": 4.3798e-05, + "loss": 8.6685, + "step": 1550500 + }, + { + "epoch": 12.41, + "learning_rate": 4.3796000000000006e-05, + "loss": 8.6778, + "step": 1551000 + }, + { + "epoch": 12.41, + "learning_rate": 4.3794e-05, + "loss": 8.6895, + "step": 1551500 + }, + { + "epoch": 12.42, + "learning_rate": 4.3792e-05, + "loss": 8.6687, + "step": 1552000 + }, + { + "epoch": 12.42, + "learning_rate": 4.3790000000000006e-05, + "loss": 8.6951, + "step": 1552500 + }, + { + "epoch": 12.42, + "learning_rate": 4.3788e-05, + "loss": 8.6959, + "step": 1553000 + }, + { + "epoch": 12.43, + "learning_rate": 4.3786000000000004e-05, + "loss": 8.67, + "step": 1553500 + }, + { + "epoch": 12.43, + "learning_rate": 4.3784e-05, + "loss": 8.6531, + "step": 1554000 + }, + { + "epoch": 12.44, + "learning_rate": 4.3782e-05, + "loss": 8.7009, + "step": 1554500 + }, + { + "epoch": 12.44, + "learning_rate": 4.3780000000000004e-05, + "loss": 8.6707, + "step": 1555000 + }, + { + "epoch": 12.44, + "learning_rate": 4.3778e-05, + "loss": 8.676, + "step": 1555500 + }, + { + "epoch": 12.45, + "learning_rate": 4.3776e-05, + "loss": 8.6667, + "step": 1556000 + }, + { + "epoch": 12.45, + "learning_rate": 4.3774000000000005e-05, + "loss": 8.7089, + "step": 1556500 + }, + { + "epoch": 12.46, + "learning_rate": 4.3772e-05, + "loss": 8.6605, + "step": 1557000 + }, + { + "epoch": 12.46, + "learning_rate": 4.377e-05, + "loss": 8.7017, + "step": 1557500 + }, + { + "epoch": 12.46, + "learning_rate": 4.3768000000000005e-05, + "loss": 8.6731, + "step": 1558000 + }, + { + "epoch": 12.47, + "learning_rate": 4.3766e-05, + "loss": 8.6783, + "step": 1558500 + }, + { + "epoch": 12.47, + "learning_rate": 4.3763999999999996e-05, + "loss": 8.715, + "step": 1559000 + }, + { + "epoch": 12.48, + "learning_rate": 4.3762000000000006e-05, + "loss": 8.6533, + "step": 1559500 + }, + { + "epoch": 12.48, + "learning_rate": 4.376e-05, + "loss": 8.6847, + "step": 1560000 + } + ], + "max_steps": 12500000, + "num_train_epochs": 100, + "total_flos": 4.871898683434107e+17, + "trial_name": null, + "trial_params": null +}