{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.582010582010582, "global_step": 500000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 10.2046, "step": 100 }, { "epoch": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 9.366, "step": 200 }, { "epoch": 0.01, "learning_rate": 3e-06, "loss": 8.9116, "step": 300 }, { "epoch": 0.01, "learning_rate": 4.000000000000001e-06, "loss": 8.5519, "step": 400 }, { "epoch": 0.01, "learning_rate": 5e-06, "loss": 8.1293, "step": 500 }, { "epoch": 0.01, "learning_rate": 6e-06, "loss": 7.6255, "step": 600 }, { "epoch": 0.01, "learning_rate": 7.000000000000001e-06, "loss": 7.1213, "step": 700 }, { "epoch": 0.02, "learning_rate": 8.000000000000001e-06, "loss": 6.7092, "step": 800 }, { "epoch": 0.02, "learning_rate": 9e-06, "loss": 6.4643, "step": 900 }, { "epoch": 0.02, "learning_rate": 1e-05, "loss": 6.311, "step": 1000 }, { "epoch": 0.02, "learning_rate": 1.1000000000000001e-05, "loss": 6.2174, "step": 1100 }, { "epoch": 0.03, "learning_rate": 1.2e-05, "loss": 6.1559, "step": 1200 }, { "epoch": 0.03, "learning_rate": 1.3000000000000001e-05, "loss": 6.1035, "step": 1300 }, { "epoch": 0.03, "learning_rate": 1.4000000000000001e-05, "loss": 6.0585, "step": 1400 }, { "epoch": 0.03, "learning_rate": 1.5e-05, "loss": 6.0161, "step": 1500 }, { "epoch": 0.03, "learning_rate": 1.6000000000000003e-05, "loss": 5.9838, "step": 1600 }, { "epoch": 0.04, "learning_rate": 1.7000000000000003e-05, "loss": 5.9601, "step": 1700 }, { "epoch": 0.04, "learning_rate": 1.8e-05, "loss": 5.9298, "step": 1800 }, { "epoch": 0.04, "learning_rate": 1.9e-05, "loss": 5.9038, "step": 1900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 5.8812, "step": 2000 }, { "epoch": 0.04, "learning_rate": 2.1e-05, "loss": 5.8652, "step": 2100 }, { "epoch": 0.05, "learning_rate": 2.2000000000000003e-05, "loss": 5.8491, "step": 2200 }, { "epoch": 0.05, "learning_rate": 2.3000000000000003e-05, "loss": 5.8159, "step": 2300 }, { "epoch": 0.05, "learning_rate": 2.4e-05, "loss": 5.8046, "step": 2400 }, { "epoch": 0.05, "learning_rate": 2.5e-05, "loss": 5.7949, "step": 2500 }, { "epoch": 0.06, "learning_rate": 2.6000000000000002e-05, "loss": 5.7781, "step": 2600 }, { "epoch": 0.06, "learning_rate": 2.7000000000000002e-05, "loss": 5.7541, "step": 2700 }, { "epoch": 0.06, "learning_rate": 2.8000000000000003e-05, "loss": 5.7444, "step": 2800 }, { "epoch": 0.06, "learning_rate": 2.9e-05, "loss": 5.7199, "step": 2900 }, { "epoch": 0.06, "learning_rate": 3e-05, "loss": 5.7058, "step": 3000 }, { "epoch": 0.07, "learning_rate": 3.1e-05, "loss": 5.6956, "step": 3100 }, { "epoch": 0.07, "learning_rate": 3.2000000000000005e-05, "loss": 5.6666, "step": 3200 }, { "epoch": 0.07, "learning_rate": 3.3e-05, "loss": 5.6611, "step": 3300 }, { "epoch": 0.07, "learning_rate": 3.4000000000000007e-05, "loss": 5.6509, "step": 3400 }, { "epoch": 0.07, "learning_rate": 3.5e-05, "loss": 5.6268, "step": 3500 }, { "epoch": 0.08, "learning_rate": 3.6e-05, "loss": 5.6069, "step": 3600 }, { "epoch": 0.08, "learning_rate": 3.7e-05, "loss": 5.595, "step": 3700 }, { "epoch": 0.08, "learning_rate": 3.8e-05, "loss": 5.5835, "step": 3800 }, { "epoch": 0.08, "learning_rate": 3.9000000000000006e-05, "loss": 5.5517, "step": 3900 }, { "epoch": 0.08, "learning_rate": 4e-05, "loss": 5.538, "step": 4000 }, { "epoch": 0.09, "learning_rate": 4.1e-05, "loss": 5.508, "step": 4100 }, { "epoch": 0.09, "learning_rate": 4.2e-05, "loss": 5.4989, "step": 4200 }, { "epoch": 0.09, "learning_rate": 4.3e-05, "loss": 5.4678, "step": 4300 }, { "epoch": 0.09, "learning_rate": 4.4000000000000006e-05, "loss": 5.4355, "step": 4400 }, { "epoch": 0.1, "learning_rate": 4.5e-05, "loss": 5.3605, "step": 4500 }, { "epoch": 0.1, "learning_rate": 4.600000000000001e-05, "loss": 5.2785, "step": 4600 }, { "epoch": 0.1, "learning_rate": 4.7e-05, "loss": 5.2045, "step": 4700 }, { "epoch": 0.1, "learning_rate": 4.8e-05, "loss": 5.1103, "step": 4800 }, { "epoch": 0.1, "learning_rate": 4.9e-05, "loss": 5.0325, "step": 4900 }, { "epoch": 0.11, "learning_rate": 5e-05, "loss": 4.9641, "step": 5000 }, { "epoch": 0.11, "learning_rate": 5.1000000000000006e-05, "loss": 4.8674, "step": 5100 }, { "epoch": 0.11, "learning_rate": 5.2000000000000004e-05, "loss": 4.7503, "step": 5200 }, { "epoch": 0.11, "learning_rate": 5.300000000000001e-05, "loss": 4.6245, "step": 5300 }, { "epoch": 0.11, "learning_rate": 5.4000000000000005e-05, "loss": 4.5214, "step": 5400 }, { "epoch": 0.12, "learning_rate": 5.500000000000001e-05, "loss": 4.4142, "step": 5500 }, { "epoch": 0.12, "learning_rate": 5.6000000000000006e-05, "loss": 4.3115, "step": 5600 }, { "epoch": 0.12, "learning_rate": 5.6999999999999996e-05, "loss": 4.2359, "step": 5700 }, { "epoch": 0.12, "learning_rate": 5.8e-05, "loss": 4.1521, "step": 5800 }, { "epoch": 0.12, "learning_rate": 5.9e-05, "loss": 4.0841, "step": 5900 }, { "epoch": 0.13, "learning_rate": 6e-05, "loss": 4.0192, "step": 6000 }, { "epoch": 0.13, "learning_rate": 6.1e-05, "loss": 3.9532, "step": 6100 }, { "epoch": 0.13, "learning_rate": 6.2e-05, "loss": 3.8956, "step": 6200 }, { "epoch": 0.13, "learning_rate": 6.3e-05, "loss": 3.8454, "step": 6300 }, { "epoch": 0.14, "learning_rate": 6.400000000000001e-05, "loss": 3.7992, "step": 6400 }, { "epoch": 0.14, "learning_rate": 6.500000000000001e-05, "loss": 3.753, "step": 6500 }, { "epoch": 0.14, "learning_rate": 6.6e-05, "loss": 3.7035, "step": 6600 }, { "epoch": 0.14, "learning_rate": 6.7e-05, "loss": 3.6755, "step": 6700 }, { "epoch": 0.14, "learning_rate": 6.800000000000001e-05, "loss": 3.6067, "step": 6800 }, { "epoch": 0.15, "learning_rate": 6.9e-05, "loss": 3.5867, "step": 6900 }, { "epoch": 0.15, "learning_rate": 7e-05, "loss": 3.5409, "step": 7000 }, { "epoch": 0.15, "learning_rate": 7.1e-05, "loss": 3.4989, "step": 7100 }, { "epoch": 0.15, "learning_rate": 7.2e-05, "loss": 3.463, "step": 7200 }, { "epoch": 0.15, "learning_rate": 7.3e-05, "loss": 3.4175, "step": 7300 }, { "epoch": 0.16, "learning_rate": 7.4e-05, "loss": 3.3896, "step": 7400 }, { "epoch": 0.16, "learning_rate": 7.500000000000001e-05, "loss": 3.3591, "step": 7500 }, { "epoch": 0.16, "learning_rate": 7.6e-05, "loss": 3.3243, "step": 7600 }, { "epoch": 0.16, "learning_rate": 7.7e-05, "loss": 3.284, "step": 7700 }, { "epoch": 0.17, "learning_rate": 7.800000000000001e-05, "loss": 3.2571, "step": 7800 }, { "epoch": 0.17, "learning_rate": 7.900000000000001e-05, "loss": 3.2228, "step": 7900 }, { "epoch": 0.17, "learning_rate": 8e-05, "loss": 3.1848, "step": 8000 }, { "epoch": 0.17, "learning_rate": 8.1e-05, "loss": 3.1748, "step": 8100 }, { "epoch": 0.17, "learning_rate": 8.2e-05, "loss": 3.1406, "step": 8200 }, { "epoch": 0.18, "learning_rate": 8.3e-05, "loss": 3.123, "step": 8300 }, { "epoch": 0.18, "learning_rate": 8.4e-05, "loss": 3.1, "step": 8400 }, { "epoch": 0.18, "learning_rate": 8.5e-05, "loss": 3.0738, "step": 8500 }, { "epoch": 0.18, "learning_rate": 8.6e-05, "loss": 3.05, "step": 8600 }, { "epoch": 0.18, "learning_rate": 8.7e-05, "loss": 3.0246, "step": 8700 }, { "epoch": 0.19, "learning_rate": 8.800000000000001e-05, "loss": 2.9948, "step": 8800 }, { "epoch": 0.19, "learning_rate": 8.900000000000001e-05, "loss": 2.9928, "step": 8900 }, { "epoch": 0.19, "learning_rate": 9e-05, "loss": 2.9668, "step": 9000 }, { "epoch": 0.19, "learning_rate": 9.1e-05, "loss": 2.9411, "step": 9100 }, { "epoch": 0.19, "learning_rate": 9.200000000000001e-05, "loss": 2.9251, "step": 9200 }, { "epoch": 0.2, "learning_rate": 9.300000000000001e-05, "loss": 2.9019, "step": 9300 }, { "epoch": 0.2, "learning_rate": 9.4e-05, "loss": 2.8918, "step": 9400 }, { "epoch": 0.2, "learning_rate": 9.5e-05, "loss": 2.8718, "step": 9500 }, { "epoch": 0.2, "learning_rate": 9.6e-05, "loss": 2.8649, "step": 9600 }, { "epoch": 0.21, "learning_rate": 9.7e-05, "loss": 2.8558, "step": 9700 }, { "epoch": 0.21, "learning_rate": 9.8e-05, "loss": 2.8363, "step": 9800 }, { "epoch": 0.21, "learning_rate": 9.900000000000001e-05, "loss": 2.8145, "step": 9900 }, { "epoch": 0.21, "learning_rate": 0.0001, "loss": 2.8047, "step": 10000 }, { "epoch": 0.21, "learning_rate": 9.99795918367347e-05, "loss": 2.785, "step": 10100 }, { "epoch": 0.22, "learning_rate": 9.995918367346939e-05, "loss": 2.7711, "step": 10200 }, { "epoch": 0.22, "learning_rate": 9.993877551020409e-05, "loss": 2.7626, "step": 10300 }, { "epoch": 0.22, "learning_rate": 9.991836734693878e-05, "loss": 2.7413, "step": 10400 }, { "epoch": 0.22, "learning_rate": 9.989795918367347e-05, "loss": 2.7286, "step": 10500 }, { "epoch": 0.22, "learning_rate": 9.987755102040817e-05, "loss": 2.7203, "step": 10600 }, { "epoch": 0.23, "learning_rate": 9.985714285714287e-05, "loss": 2.6978, "step": 10700 }, { "epoch": 0.23, "learning_rate": 9.983673469387755e-05, "loss": 2.6935, "step": 10800 }, { "epoch": 0.23, "learning_rate": 9.981632653061225e-05, "loss": 2.6829, "step": 10900 }, { "epoch": 0.23, "learning_rate": 9.979591836734695e-05, "loss": 2.6686, "step": 11000 }, { "epoch": 0.23, "learning_rate": 9.977551020408163e-05, "loss": 2.6699, "step": 11100 }, { "epoch": 0.24, "learning_rate": 9.975510204081633e-05, "loss": 2.6482, "step": 11200 }, { "epoch": 0.24, "learning_rate": 9.973469387755102e-05, "loss": 2.637, "step": 11300 }, { "epoch": 0.24, "learning_rate": 9.971428571428571e-05, "loss": 2.6153, "step": 11400 }, { "epoch": 0.24, "learning_rate": 9.969387755102041e-05, "loss": 2.612, "step": 11500 }, { "epoch": 0.25, "learning_rate": 9.96734693877551e-05, "loss": 2.6096, "step": 11600 }, { "epoch": 0.25, "learning_rate": 9.96530612244898e-05, "loss": 2.5922, "step": 11700 }, { "epoch": 0.25, "learning_rate": 9.96326530612245e-05, "loss": 2.5857, "step": 11800 }, { "epoch": 0.25, "learning_rate": 9.961224489795918e-05, "loss": 2.5749, "step": 11900 }, { "epoch": 0.25, "learning_rate": 9.959183673469388e-05, "loss": 2.5663, "step": 12000 }, { "epoch": 0.26, "learning_rate": 9.957142857142858e-05, "loss": 2.5588, "step": 12100 }, { "epoch": 0.26, "learning_rate": 9.955102040816326e-05, "loss": 2.5513, "step": 12200 }, { "epoch": 0.26, "learning_rate": 9.953061224489797e-05, "loss": 2.5398, "step": 12300 }, { "epoch": 0.26, "learning_rate": 9.951020408163266e-05, "loss": 2.5352, "step": 12400 }, { "epoch": 0.26, "learning_rate": 9.948979591836736e-05, "loss": 2.5278, "step": 12500 }, { "epoch": 0.27, "learning_rate": 9.946938775510205e-05, "loss": 2.5242, "step": 12600 }, { "epoch": 0.27, "learning_rate": 9.944897959183674e-05, "loss": 2.5046, "step": 12700 }, { "epoch": 0.27, "learning_rate": 9.942857142857144e-05, "loss": 2.4943, "step": 12800 }, { "epoch": 0.27, "learning_rate": 9.940816326530614e-05, "loss": 2.4989, "step": 12900 }, { "epoch": 0.28, "learning_rate": 9.938775510204082e-05, "loss": 2.4866, "step": 13000 }, { "epoch": 0.28, "learning_rate": 9.936734693877552e-05, "loss": 2.4764, "step": 13100 }, { "epoch": 0.28, "learning_rate": 9.934693877551022e-05, "loss": 2.4728, "step": 13200 }, { "epoch": 0.28, "learning_rate": 9.93265306122449e-05, "loss": 2.4532, "step": 13300 }, { "epoch": 0.28, "learning_rate": 9.93061224489796e-05, "loss": 2.4532, "step": 13400 }, { "epoch": 0.29, "learning_rate": 9.92857142857143e-05, "loss": 2.464, "step": 13500 }, { "epoch": 0.29, "learning_rate": 9.926530612244898e-05, "loss": 2.4405, "step": 13600 }, { "epoch": 0.29, "learning_rate": 9.924489795918368e-05, "loss": 2.4397, "step": 13700 }, { "epoch": 0.29, "learning_rate": 9.922448979591838e-05, "loss": 2.4286, "step": 13800 }, { "epoch": 0.29, "learning_rate": 9.920408163265306e-05, "loss": 2.4184, "step": 13900 }, { "epoch": 0.3, "learning_rate": 9.91838775510204e-05, "loss": 2.4207, "step": 14000 }, { "epoch": 0.3, "learning_rate": 9.91634693877551e-05, "loss": 2.4132, "step": 14100 }, { "epoch": 0.3, "learning_rate": 9.91430612244898e-05, "loss": 2.4126, "step": 14200 }, { "epoch": 0.3, "learning_rate": 9.912265306122449e-05, "loss": 2.4032, "step": 14300 }, { "epoch": 0.3, "learning_rate": 9.910224489795919e-05, "loss": 2.3902, "step": 14400 }, { "epoch": 0.31, "learning_rate": 9.908183673469388e-05, "loss": 2.3934, "step": 14500 }, { "epoch": 0.31, "learning_rate": 9.906142857142857e-05, "loss": 2.3873, "step": 14600 }, { "epoch": 0.31, "learning_rate": 9.904102040816327e-05, "loss": 2.3686, "step": 14700 }, { "epoch": 0.31, "learning_rate": 9.902061224489797e-05, "loss": 2.3741, "step": 14800 }, { "epoch": 0.32, "learning_rate": 9.900020408163265e-05, "loss": 2.3662, "step": 14900 }, { "epoch": 0.32, "learning_rate": 9.897979591836735e-05, "loss": 2.3625, "step": 15000 }, { "epoch": 0.32, "learning_rate": 9.895938775510205e-05, "loss": 2.3601, "step": 15100 }, { "epoch": 0.32, "learning_rate": 9.893897959183675e-05, "loss": 2.36, "step": 15200 }, { "epoch": 0.32, "learning_rate": 9.891857142857144e-05, "loss": 2.3391, "step": 15300 }, { "epoch": 0.33, "learning_rate": 9.889816326530613e-05, "loss": 2.3479, "step": 15400 }, { "epoch": 0.33, "learning_rate": 9.887775510204083e-05, "loss": 2.3399, "step": 15500 }, { "epoch": 0.33, "learning_rate": 9.885734693877553e-05, "loss": 2.3281, "step": 15600 }, { "epoch": 0.33, "learning_rate": 9.883693877551021e-05, "loss": 2.3202, "step": 15700 }, { "epoch": 0.33, "learning_rate": 9.881653061224491e-05, "loss": 2.3204, "step": 15800 }, { "epoch": 0.34, "learning_rate": 9.87961224489796e-05, "loss": 2.3178, "step": 15900 }, { "epoch": 0.34, "learning_rate": 9.877571428571429e-05, "loss": 2.3155, "step": 16000 }, { "epoch": 0.34, "learning_rate": 9.875530612244899e-05, "loss": 2.3132, "step": 16100 }, { "epoch": 0.34, "learning_rate": 9.873489795918367e-05, "loss": 2.3096, "step": 16200 }, { "epoch": 0.34, "learning_rate": 9.871448979591837e-05, "loss": 2.298, "step": 16300 }, { "epoch": 0.35, "learning_rate": 9.869408163265307e-05, "loss": 2.3049, "step": 16400 }, { "epoch": 0.35, "learning_rate": 9.867367346938776e-05, "loss": 2.2992, "step": 16500 }, { "epoch": 0.35, "learning_rate": 9.865326530612245e-05, "loss": 2.2926, "step": 16600 }, { "epoch": 0.35, "learning_rate": 9.863285714285715e-05, "loss": 2.2843, "step": 16700 }, { "epoch": 0.36, "learning_rate": 9.861244897959184e-05, "loss": 2.2873, "step": 16800 }, { "epoch": 0.36, "learning_rate": 9.859204081632654e-05, "loss": 2.2797, "step": 16900 }, { "epoch": 0.36, "learning_rate": 9.857163265306123e-05, "loss": 2.2736, "step": 17000 }, { "epoch": 0.36, "learning_rate": 9.855122448979592e-05, "loss": 2.2744, "step": 17100 }, { "epoch": 0.36, "learning_rate": 9.853081632653062e-05, "loss": 2.2719, "step": 17200 }, { "epoch": 0.37, "learning_rate": 9.851040816326532e-05, "loss": 2.2618, "step": 17300 }, { "epoch": 0.37, "learning_rate": 9.849e-05, "loss": 2.2613, "step": 17400 }, { "epoch": 0.37, "learning_rate": 9.84695918367347e-05, "loss": 2.2648, "step": 17500 }, { "epoch": 0.37, "learning_rate": 9.844938775510204e-05, "loss": 2.2479, "step": 17600 }, { "epoch": 0.37, "learning_rate": 9.842897959183674e-05, "loss": 2.2456, "step": 17700 }, { "epoch": 0.38, "learning_rate": 9.840857142857142e-05, "loss": 2.2376, "step": 17800 }, { "epoch": 0.38, "learning_rate": 9.838816326530613e-05, "loss": 2.2329, "step": 17900 }, { "epoch": 0.38, "learning_rate": 9.836775510204082e-05, "loss": 2.2304, "step": 18000 }, { "epoch": 0.38, "learning_rate": 9.834734693877552e-05, "loss": 2.2331, "step": 18100 }, { "epoch": 0.39, "learning_rate": 9.832693877551022e-05, "loss": 2.2272, "step": 18200 }, { "epoch": 0.39, "learning_rate": 9.83065306122449e-05, "loss": 2.2291, "step": 18300 }, { "epoch": 0.39, "learning_rate": 9.82861224489796e-05, "loss": 2.2169, "step": 18400 }, { "epoch": 0.39, "learning_rate": 9.82657142857143e-05, "loss": 2.2227, "step": 18500 }, { "epoch": 0.39, "learning_rate": 9.824530612244898e-05, "loss": 2.2115, "step": 18600 }, { "epoch": 0.4, "learning_rate": 9.822489795918368e-05, "loss": 2.2094, "step": 18700 }, { "epoch": 0.4, "learning_rate": 9.820448979591838e-05, "loss": 2.2061, "step": 18800 }, { "epoch": 0.4, "learning_rate": 9.818408163265306e-05, "loss": 2.1997, "step": 18900 }, { "epoch": 0.4, "learning_rate": 9.816367346938776e-05, "loss": 2.1995, "step": 19000 }, { "epoch": 0.4, "learning_rate": 9.814326530612246e-05, "loss": 2.1977, "step": 19100 }, { "epoch": 0.41, "learning_rate": 9.812285714285715e-05, "loss": 2.1991, "step": 19200 }, { "epoch": 0.41, "learning_rate": 9.810244897959184e-05, "loss": 2.1947, "step": 19300 }, { "epoch": 0.41, "learning_rate": 9.808204081632654e-05, "loss": 2.1963, "step": 19400 }, { "epoch": 0.41, "learning_rate": 9.806163265306123e-05, "loss": 2.1837, "step": 19500 }, { "epoch": 0.41, "learning_rate": 9.804122448979593e-05, "loss": 2.1804, "step": 19600 }, { "epoch": 0.42, "learning_rate": 9.802081632653062e-05, "loss": 2.1775, "step": 19700 }, { "epoch": 0.42, "learning_rate": 9.800040816326531e-05, "loss": 2.1752, "step": 19800 }, { "epoch": 0.42, "learning_rate": 9.798000000000001e-05, "loss": 2.1774, "step": 19900 }, { "epoch": 0.42, "learning_rate": 9.795959183673469e-05, "loss": 2.1665, "step": 20000 }, { "epoch": 0.43, "learning_rate": 9.793918367346939e-05, "loss": 2.1627, "step": 20100 }, { "epoch": 0.43, "learning_rate": 9.791877551020409e-05, "loss": 2.1643, "step": 20200 }, { "epoch": 0.43, "learning_rate": 9.789857142857143e-05, "loss": 2.1728, "step": 20300 }, { "epoch": 0.43, "learning_rate": 9.787816326530613e-05, "loss": 2.1629, "step": 20400 }, { "epoch": 0.43, "learning_rate": 9.785775510204081e-05, "loss": 2.1569, "step": 20500 }, { "epoch": 0.44, "learning_rate": 9.783734693877552e-05, "loss": 2.1567, "step": 20600 }, { "epoch": 0.44, "learning_rate": 9.781693877551021e-05, "loss": 2.1566, "step": 20700 }, { "epoch": 0.44, "learning_rate": 9.779673469387755e-05, "loss": 2.1454, "step": 20800 }, { "epoch": 0.44, "learning_rate": 9.777632653061225e-05, "loss": 2.157, "step": 20900 }, { "epoch": 0.44, "learning_rate": 9.775591836734695e-05, "loss": 2.1419, "step": 21000 }, { "epoch": 0.45, "learning_rate": 9.773551020408163e-05, "loss": 2.1389, "step": 21100 }, { "epoch": 0.45, "learning_rate": 9.771510204081633e-05, "loss": 2.1391, "step": 21200 }, { "epoch": 0.45, "learning_rate": 9.769469387755103e-05, "loss": 2.1343, "step": 21300 }, { "epoch": 0.45, "learning_rate": 9.767428571428571e-05, "loss": 2.1381, "step": 21400 }, { "epoch": 0.46, "learning_rate": 9.765387755102041e-05, "loss": 2.1352, "step": 21500 }, { "epoch": 0.46, "learning_rate": 9.763346938775511e-05, "loss": 2.1352, "step": 21600 }, { "epoch": 0.46, "learning_rate": 9.76130612244898e-05, "loss": 2.1262, "step": 21700 }, { "epoch": 0.46, "learning_rate": 9.759265306122449e-05, "loss": 2.133, "step": 21800 }, { "epoch": 0.46, "learning_rate": 9.757224489795919e-05, "loss": 2.1228, "step": 21900 }, { "epoch": 0.47, "learning_rate": 9.755183673469388e-05, "loss": 2.1245, "step": 22000 }, { "epoch": 0.47, "learning_rate": 9.753142857142857e-05, "loss": 2.1176, "step": 22100 }, { "epoch": 0.47, "learning_rate": 9.751102040816327e-05, "loss": 2.1164, "step": 22200 }, { "epoch": 0.47, "learning_rate": 9.749061224489796e-05, "loss": 2.1119, "step": 22300 }, { "epoch": 0.47, "learning_rate": 9.747020408163266e-05, "loss": 2.1172, "step": 22400 }, { "epoch": 0.48, "learning_rate": 9.744979591836735e-05, "loss": 2.1059, "step": 22500 }, { "epoch": 0.48, "learning_rate": 9.742938775510204e-05, "loss": 2.1087, "step": 22600 }, { "epoch": 0.48, "learning_rate": 9.740897959183674e-05, "loss": 2.1014, "step": 22700 }, { "epoch": 0.48, "learning_rate": 9.738857142857142e-05, "loss": 2.11, "step": 22800 }, { "epoch": 0.48, "learning_rate": 9.736816326530612e-05, "loss": 2.0921, "step": 22900 }, { "epoch": 0.49, "learning_rate": 9.734775510204082e-05, "loss": 2.0993, "step": 23000 }, { "epoch": 0.49, "learning_rate": 9.73273469387755e-05, "loss": 2.1137, "step": 23100 }, { "epoch": 0.49, "learning_rate": 9.73069387755102e-05, "loss": 2.0831, "step": 23200 }, { "epoch": 0.49, "learning_rate": 9.72865306122449e-05, "loss": 2.0913, "step": 23300 }, { "epoch": 0.5, "learning_rate": 9.72661224489796e-05, "loss": 2.0882, "step": 23400 }, { "epoch": 0.5, "learning_rate": 9.72457142857143e-05, "loss": 2.0866, "step": 23500 }, { "epoch": 0.5, "learning_rate": 9.7225306122449e-05, "loss": 2.0787, "step": 23600 }, { "epoch": 0.5, "learning_rate": 9.720489795918368e-05, "loss": 2.0838, "step": 23700 }, { "epoch": 0.5, "learning_rate": 9.718448979591838e-05, "loss": 2.0832, "step": 23800 }, { "epoch": 0.51, "learning_rate": 9.716408163265306e-05, "loss": 2.0752, "step": 23900 }, { "epoch": 0.51, "learning_rate": 9.714387755102042e-05, "loss": 2.0825, "step": 24000 }, { "epoch": 0.51, "learning_rate": 9.71234693877551e-05, "loss": 2.0727, "step": 24100 }, { "epoch": 0.51, "learning_rate": 9.71030612244898e-05, "loss": 2.0677, "step": 24200 }, { "epoch": 0.51, "learning_rate": 9.70826530612245e-05, "loss": 2.0616, "step": 24300 }, { "epoch": 0.52, "learning_rate": 9.706224489795918e-05, "loss": 2.0599, "step": 24400 }, { "epoch": 0.52, "learning_rate": 9.704183673469388e-05, "loss": 2.0761, "step": 24500 }, { "epoch": 0.52, "learning_rate": 9.702142857142857e-05, "loss": 2.0647, "step": 24600 }, { "epoch": 0.52, "learning_rate": 9.700102040816327e-05, "loss": 2.064, "step": 24700 }, { "epoch": 0.52, "learning_rate": 9.698061224489796e-05, "loss": 2.0585, "step": 24800 }, { "epoch": 0.53, "learning_rate": 9.696020408163265e-05, "loss": 2.058, "step": 24900 }, { "epoch": 0.53, "learning_rate": 9.693979591836735e-05, "loss": 2.0575, "step": 25000 }, { "epoch": 0.53, "learning_rate": 9.691938775510205e-05, "loss": 2.0467, "step": 25100 }, { "epoch": 0.53, "learning_rate": 9.689897959183673e-05, "loss": 2.0557, "step": 25200 }, { "epoch": 0.54, "learning_rate": 9.687857142857143e-05, "loss": 2.0391, "step": 25300 }, { "epoch": 0.54, "learning_rate": 9.685816326530613e-05, "loss": 2.0377, "step": 25400 }, { "epoch": 0.54, "learning_rate": 9.683775510204081e-05, "loss": 2.0564, "step": 25500 }, { "epoch": 0.54, "learning_rate": 9.681734693877551e-05, "loss": 2.0432, "step": 25600 }, { "epoch": 0.54, "learning_rate": 9.679693877551021e-05, "loss": 2.0504, "step": 25700 }, { "epoch": 0.55, "learning_rate": 9.67765306122449e-05, "loss": 2.0465, "step": 25800 }, { "epoch": 0.55, "learning_rate": 9.675612244897959e-05, "loss": 2.0407, "step": 25900 }, { "epoch": 0.55, "learning_rate": 9.673571428571429e-05, "loss": 2.0432, "step": 26000 }, { "epoch": 0.55, "learning_rate": 9.671530612244898e-05, "loss": 2.0386, "step": 26100 }, { "epoch": 0.55, "learning_rate": 9.669489795918369e-05, "loss": 2.023, "step": 26200 }, { "epoch": 0.56, "learning_rate": 9.667469387755103e-05, "loss": 2.0236, "step": 26300 }, { "epoch": 0.56, "learning_rate": 9.665428571428571e-05, "loss": 2.023, "step": 26400 }, { "epoch": 0.56, "learning_rate": 9.663387755102041e-05, "loss": 2.032, "step": 26500 }, { "epoch": 0.56, "learning_rate": 9.661367346938775e-05, "loss": 2.0356, "step": 26600 }, { "epoch": 0.57, "learning_rate": 9.659326530612245e-05, "loss": 2.0287, "step": 26700 }, { "epoch": 0.57, "learning_rate": 9.657285714285715e-05, "loss": 2.0249, "step": 26800 }, { "epoch": 0.57, "learning_rate": 9.655244897959183e-05, "loss": 2.0241, "step": 26900 }, { "epoch": 0.57, "learning_rate": 9.653204081632653e-05, "loss": 2.0162, "step": 27000 }, { "epoch": 0.57, "learning_rate": 9.651163265306123e-05, "loss": 2.0054, "step": 27100 }, { "epoch": 0.58, "learning_rate": 9.649122448979593e-05, "loss": 2.0278, "step": 27200 }, { "epoch": 0.58, "learning_rate": 9.647081632653063e-05, "loss": 2.0101, "step": 27300 }, { "epoch": 0.58, "learning_rate": 9.645040816326531e-05, "loss": 2.0098, "step": 27400 }, { "epoch": 0.58, "learning_rate": 9.643000000000001e-05, "loss": 2.0176, "step": 27500 }, { "epoch": 0.58, "learning_rate": 9.640959183673471e-05, "loss": 2.0118, "step": 27600 }, { "epoch": 0.59, "learning_rate": 9.63891836734694e-05, "loss": 1.999, "step": 27700 }, { "epoch": 0.59, "learning_rate": 9.636877551020409e-05, "loss": 2.0061, "step": 27800 }, { "epoch": 0.59, "learning_rate": 9.634836734693879e-05, "loss": 1.9981, "step": 27900 }, { "epoch": 0.59, "learning_rate": 9.632795918367347e-05, "loss": 2.0041, "step": 28000 }, { "epoch": 0.59, "learning_rate": 9.630755102040817e-05, "loss": 1.9992, "step": 28100 }, { "epoch": 0.6, "learning_rate": 9.628714285714286e-05, "loss": 2.0004, "step": 28200 }, { "epoch": 0.6, "learning_rate": 9.626673469387756e-05, "loss": 2.0035, "step": 28300 }, { "epoch": 0.6, "learning_rate": 9.624632653061225e-05, "loss": 1.99, "step": 28400 }, { "epoch": 0.6, "learning_rate": 9.622591836734694e-05, "loss": 2.0057, "step": 28500 }, { "epoch": 0.61, "learning_rate": 9.620551020408164e-05, "loss": 1.9969, "step": 28600 }, { "epoch": 0.61, "learning_rate": 9.618510204081634e-05, "loss": 2.0034, "step": 28700 }, { "epoch": 0.61, "learning_rate": 9.616469387755102e-05, "loss": 1.9936, "step": 28800 }, { "epoch": 0.61, "learning_rate": 9.614428571428572e-05, "loss": 1.9894, "step": 28900 }, { "epoch": 0.61, "learning_rate": 9.612387755102042e-05, "loss": 1.9868, "step": 29000 }, { "epoch": 0.62, "learning_rate": 9.61034693877551e-05, "loss": 1.986, "step": 29100 }, { "epoch": 0.62, "learning_rate": 9.60830612244898e-05, "loss": 1.9815, "step": 29200 }, { "epoch": 0.62, "learning_rate": 9.60626530612245e-05, "loss": 1.9786, "step": 29300 }, { "epoch": 0.62, "learning_rate": 9.604224489795918e-05, "loss": 1.9814, "step": 29400 }, { "epoch": 0.62, "learning_rate": 9.602183673469388e-05, "loss": 1.9706, "step": 29500 }, { "epoch": 0.63, "learning_rate": 9.600142857142858e-05, "loss": 1.9836, "step": 29600 }, { "epoch": 0.63, "learning_rate": 9.598102040816327e-05, "loss": 1.9775, "step": 29700 }, { "epoch": 0.63, "learning_rate": 9.596061224489796e-05, "loss": 1.9759, "step": 29800 }, { "epoch": 0.63, "learning_rate": 9.594020408163266e-05, "loss": 1.9742, "step": 29900 }, { "epoch": 0.63, "learning_rate": 9.591979591836735e-05, "loss": 1.9741, "step": 30000 }, { "epoch": 0.64, "learning_rate": 9.589938775510205e-05, "loss": 1.9729, "step": 30100 }, { "epoch": 0.64, "learning_rate": 9.587897959183674e-05, "loss": 1.9715, "step": 30200 }, { "epoch": 0.64, "learning_rate": 9.585857142857143e-05, "loss": 1.9599, "step": 30300 }, { "epoch": 0.64, "learning_rate": 9.583816326530613e-05, "loss": 1.969, "step": 30400 }, { "epoch": 0.65, "learning_rate": 9.581775510204081e-05, "loss": 1.9579, "step": 30500 }, { "epoch": 0.65, "learning_rate": 9.579734693877551e-05, "loss": 1.9747, "step": 30600 }, { "epoch": 0.65, "learning_rate": 9.577693877551021e-05, "loss": 1.9598, "step": 30700 }, { "epoch": 0.65, "learning_rate": 9.57565306122449e-05, "loss": 1.9604, "step": 30800 }, { "epoch": 0.65, "learning_rate": 9.573612244897959e-05, "loss": 1.9619, "step": 30900 }, { "epoch": 0.66, "learning_rate": 9.571591836734695e-05, "loss": 1.9631, "step": 31000 }, { "epoch": 0.66, "learning_rate": 9.569551020408164e-05, "loss": 1.9627, "step": 31100 }, { "epoch": 0.66, "learning_rate": 9.567510204081633e-05, "loss": 1.9624, "step": 31200 }, { "epoch": 0.66, "learning_rate": 9.565469387755103e-05, "loss": 1.957, "step": 31300 }, { "epoch": 0.66, "learning_rate": 9.563428571428573e-05, "loss": 1.9582, "step": 31400 }, { "epoch": 0.67, "learning_rate": 9.561387755102041e-05, "loss": 1.951, "step": 31500 }, { "epoch": 0.67, "learning_rate": 9.559346938775511e-05, "loss": 1.9566, "step": 31600 }, { "epoch": 0.67, "learning_rate": 9.557306122448981e-05, "loss": 1.9542, "step": 31700 }, { "epoch": 0.67, "learning_rate": 9.555265306122449e-05, "loss": 1.9523, "step": 31800 }, { "epoch": 0.68, "learning_rate": 9.553224489795919e-05, "loss": 1.95, "step": 31900 }, { "epoch": 0.68, "learning_rate": 9.551183673469389e-05, "loss": 1.9511, "step": 32000 }, { "epoch": 0.68, "learning_rate": 9.549142857142857e-05, "loss": 1.9521, "step": 32100 }, { "epoch": 0.68, "learning_rate": 9.547102040816327e-05, "loss": 1.9352, "step": 32200 }, { "epoch": 0.68, "learning_rate": 9.545061224489796e-05, "loss": 1.9526, "step": 32300 }, { "epoch": 0.69, "learning_rate": 9.543020408163266e-05, "loss": 1.949, "step": 32400 }, { "epoch": 0.69, "learning_rate": 9.540979591836735e-05, "loss": 1.9462, "step": 32500 }, { "epoch": 0.69, "learning_rate": 9.538938775510204e-05, "loss": 1.9307, "step": 32600 }, { "epoch": 0.69, "learning_rate": 9.536897959183674e-05, "loss": 1.9331, "step": 32700 }, { "epoch": 0.69, "learning_rate": 9.534877551020409e-05, "loss": 1.9348, "step": 32800 }, { "epoch": 0.7, "learning_rate": 9.532836734693879e-05, "loss": 1.9337, "step": 32900 }, { "epoch": 0.7, "learning_rate": 9.530795918367347e-05, "loss": 1.9364, "step": 33000 }, { "epoch": 0.7, "learning_rate": 9.528755102040817e-05, "loss": 1.9354, "step": 33100 }, { "epoch": 0.7, "learning_rate": 9.526714285714287e-05, "loss": 1.9338, "step": 33200 }, { "epoch": 0.7, "learning_rate": 9.524673469387756e-05, "loss": 1.9324, "step": 33300 }, { "epoch": 0.71, "learning_rate": 9.522632653061225e-05, "loss": 1.9245, "step": 33400 }, { "epoch": 0.71, "learning_rate": 9.520591836734695e-05, "loss": 1.9257, "step": 33500 }, { "epoch": 0.71, "learning_rate": 9.518551020408164e-05, "loss": 1.9342, "step": 33600 }, { "epoch": 0.71, "learning_rate": 9.516510204081634e-05, "loss": 1.9234, "step": 33700 }, { "epoch": 0.72, "learning_rate": 9.514469387755103e-05, "loss": 1.9315, "step": 33800 }, { "epoch": 0.72, "learning_rate": 9.512428571428572e-05, "loss": 1.9247, "step": 33900 }, { "epoch": 0.72, "learning_rate": 9.510387755102042e-05, "loss": 1.9185, "step": 34000 }, { "epoch": 0.72, "learning_rate": 9.50834693877551e-05, "loss": 1.9257, "step": 34100 }, { "epoch": 0.72, "learning_rate": 9.50630612244898e-05, "loss": 1.9159, "step": 34200 }, { "epoch": 0.73, "learning_rate": 9.50426530612245e-05, "loss": 1.9157, "step": 34300 }, { "epoch": 0.73, "learning_rate": 9.502224489795918e-05, "loss": 1.9157, "step": 34400 }, { "epoch": 0.73, "learning_rate": 9.500183673469388e-05, "loss": 1.9231, "step": 34500 }, { "epoch": 0.73, "learning_rate": 9.498142857142858e-05, "loss": 1.9168, "step": 34600 }, { "epoch": 0.73, "learning_rate": 9.496102040816327e-05, "loss": 1.9025, "step": 34700 }, { "epoch": 0.74, "learning_rate": 9.494061224489796e-05, "loss": 1.915, "step": 34800 }, { "epoch": 0.74, "learning_rate": 9.492020408163266e-05, "loss": 1.9145, "step": 34900 }, { "epoch": 0.74, "learning_rate": 9.489979591836735e-05, "loss": 1.9116, "step": 35000 }, { "epoch": 0.74, "learning_rate": 9.487938775510205e-05, "loss": 1.9127, "step": 35100 }, { "epoch": 0.74, "learning_rate": 9.485897959183674e-05, "loss": 1.9149, "step": 35200 }, { "epoch": 0.75, "learning_rate": 9.483877551020408e-05, "loss": 1.8986, "step": 35300 }, { "epoch": 0.75, "learning_rate": 9.481836734693877e-05, "loss": 1.9075, "step": 35400 }, { "epoch": 0.75, "learning_rate": 9.479795918367348e-05, "loss": 1.9033, "step": 35500 }, { "epoch": 0.75, "learning_rate": 9.477755102040818e-05, "loss": 1.907, "step": 35600 }, { "epoch": 0.76, "learning_rate": 9.475714285714286e-05, "loss": 1.8974, "step": 35700 }, { "epoch": 0.76, "learning_rate": 9.473673469387756e-05, "loss": 1.9046, "step": 35800 }, { "epoch": 0.76, "learning_rate": 9.471632653061225e-05, "loss": 1.902, "step": 35900 }, { "epoch": 0.76, "learning_rate": 9.469591836734695e-05, "loss": 1.9043, "step": 36000 }, { "epoch": 0.76, "learning_rate": 9.467551020408164e-05, "loss": 1.9067, "step": 36100 }, { "epoch": 0.77, "learning_rate": 9.465510204081633e-05, "loss": 1.8967, "step": 36200 }, { "epoch": 0.77, "learning_rate": 9.463469387755103e-05, "loss": 1.8988, "step": 36300 }, { "epoch": 0.77, "learning_rate": 9.461428571428573e-05, "loss": 1.8956, "step": 36400 }, { "epoch": 0.77, "learning_rate": 9.459387755102041e-05, "loss": 1.9023, "step": 36500 }, { "epoch": 0.77, "learning_rate": 9.457346938775511e-05, "loss": 1.8922, "step": 36600 }, { "epoch": 0.78, "learning_rate": 9.455306122448981e-05, "loss": 1.8957, "step": 36700 }, { "epoch": 0.78, "learning_rate": 9.453265306122449e-05, "loss": 1.8954, "step": 36800 }, { "epoch": 0.78, "learning_rate": 9.451224489795919e-05, "loss": 1.8881, "step": 36900 }, { "epoch": 0.78, "learning_rate": 9.449183673469389e-05, "loss": 1.8904, "step": 37000 }, { "epoch": 0.79, "learning_rate": 9.447142857142857e-05, "loss": 1.8925, "step": 37100 }, { "epoch": 0.79, "learning_rate": 9.445102040816327e-05, "loss": 1.8846, "step": 37200 }, { "epoch": 0.79, "learning_rate": 9.443061224489797e-05, "loss": 1.8902, "step": 37300 }, { "epoch": 0.79, "learning_rate": 9.441020408163266e-05, "loss": 1.891, "step": 37400 }, { "epoch": 0.79, "learning_rate": 9.438979591836735e-05, "loss": 1.8818, "step": 37500 }, { "epoch": 0.8, "learning_rate": 9.436938775510205e-05, "loss": 1.8877, "step": 37600 }, { "epoch": 0.8, "learning_rate": 9.434897959183674e-05, "loss": 1.8846, "step": 37700 }, { "epoch": 0.8, "learning_rate": 9.432857142857143e-05, "loss": 1.8811, "step": 37800 }, { "epoch": 0.8, "learning_rate": 9.430816326530612e-05, "loss": 1.8765, "step": 37900 }, { "epoch": 0.8, "learning_rate": 9.428775510204082e-05, "loss": 1.8753, "step": 38000 }, { "epoch": 0.81, "learning_rate": 9.426734693877552e-05, "loss": 1.8803, "step": 38100 }, { "epoch": 0.81, "learning_rate": 9.424714285714287e-05, "loss": 1.869, "step": 38200 }, { "epoch": 0.81, "learning_rate": 9.422673469387756e-05, "loss": 1.879, "step": 38300 }, { "epoch": 0.81, "learning_rate": 9.420632653061225e-05, "loss": 1.8771, "step": 38400 }, { "epoch": 0.81, "learning_rate": 9.418591836734695e-05, "loss": 1.874, "step": 38500 }, { "epoch": 0.82, "learning_rate": 9.416551020408164e-05, "loss": 1.8774, "step": 38600 }, { "epoch": 0.82, "learning_rate": 9.414510204081634e-05, "loss": 1.8701, "step": 38700 }, { "epoch": 0.82, "learning_rate": 9.412469387755103e-05, "loss": 1.8816, "step": 38800 }, { "epoch": 0.82, "learning_rate": 9.410428571428572e-05, "loss": 1.8751, "step": 38900 }, { "epoch": 0.83, "learning_rate": 9.408387755102042e-05, "loss": 1.8697, "step": 39000 }, { "epoch": 0.83, "learning_rate": 9.406346938775512e-05, "loss": 1.8759, "step": 39100 }, { "epoch": 0.83, "learning_rate": 9.40430612244898e-05, "loss": 1.868, "step": 39200 }, { "epoch": 0.83, "learning_rate": 9.40226530612245e-05, "loss": 1.877, "step": 39300 }, { "epoch": 0.83, "learning_rate": 9.40022448979592e-05, "loss": 1.8579, "step": 39400 }, { "epoch": 0.84, "learning_rate": 9.398183673469388e-05, "loss": 1.864, "step": 39500 }, { "epoch": 0.84, "learning_rate": 9.396142857142858e-05, "loss": 1.8668, "step": 39600 }, { "epoch": 0.84, "learning_rate": 9.394102040816328e-05, "loss": 1.8649, "step": 39700 }, { "epoch": 0.84, "learning_rate": 9.392061224489796e-05, "loss": 1.8577, "step": 39800 }, { "epoch": 0.84, "learning_rate": 9.390020408163266e-05, "loss": 1.8644, "step": 39900 }, { "epoch": 0.85, "learning_rate": 9.387979591836735e-05, "loss": 1.8623, "step": 40000 }, { "epoch": 0.85, "learning_rate": 9.385938775510204e-05, "loss": 1.8702, "step": 40100 }, { "epoch": 0.85, "learning_rate": 9.383918367346939e-05, "loss": 1.8701, "step": 40200 }, { "epoch": 0.85, "learning_rate": 9.381877551020408e-05, "loss": 1.8628, "step": 40300 }, { "epoch": 0.86, "learning_rate": 9.379836734693878e-05, "loss": 1.8651, "step": 40400 }, { "epoch": 0.86, "learning_rate": 9.377795918367347e-05, "loss": 1.8549, "step": 40500 }, { "epoch": 0.86, "learning_rate": 9.375755102040817e-05, "loss": 1.8583, "step": 40600 }, { "epoch": 0.86, "learning_rate": 9.373714285714285e-05, "loss": 1.8585, "step": 40700 }, { "epoch": 0.86, "learning_rate": 9.371673469387755e-05, "loss": 1.8534, "step": 40800 }, { "epoch": 0.87, "learning_rate": 9.369632653061225e-05, "loss": 1.8506, "step": 40900 }, { "epoch": 0.87, "learning_rate": 9.367591836734695e-05, "loss": 1.8517, "step": 41000 }, { "epoch": 0.87, "learning_rate": 9.365551020408164e-05, "loss": 1.8433, "step": 41100 }, { "epoch": 0.87, "learning_rate": 9.363510204081634e-05, "loss": 1.8522, "step": 41200 }, { "epoch": 0.87, "learning_rate": 9.361469387755103e-05, "loss": 1.8476, "step": 41300 }, { "epoch": 0.88, "learning_rate": 9.359428571428573e-05, "loss": 1.8474, "step": 41400 }, { "epoch": 0.88, "learning_rate": 9.357387755102042e-05, "loss": 1.8538, "step": 41500 }, { "epoch": 0.88, "learning_rate": 9.355346938775511e-05, "loss": 1.8529, "step": 41600 }, { "epoch": 0.88, "learning_rate": 9.35330612244898e-05, "loss": 1.8456, "step": 41700 }, { "epoch": 0.88, "learning_rate": 9.351265306122449e-05, "loss": 1.8524, "step": 41800 }, { "epoch": 0.89, "learning_rate": 9.349224489795919e-05, "loss": 1.8396, "step": 41900 }, { "epoch": 0.89, "learning_rate": 9.347183673469389e-05, "loss": 1.8365, "step": 42000 }, { "epoch": 0.89, "learning_rate": 9.345142857142857e-05, "loss": 1.8428, "step": 42100 }, { "epoch": 0.89, "learning_rate": 9.343102040816327e-05, "loss": 1.8381, "step": 42200 }, { "epoch": 0.9, "learning_rate": 9.341061224489797e-05, "loss": 1.8452, "step": 42300 }, { "epoch": 0.9, "learning_rate": 9.339020408163265e-05, "loss": 1.8318, "step": 42400 }, { "epoch": 0.9, "learning_rate": 9.336979591836735e-05, "loss": 1.8449, "step": 42500 }, { "epoch": 0.9, "learning_rate": 9.334938775510205e-05, "loss": 1.8372, "step": 42600 }, { "epoch": 0.9, "learning_rate": 9.332897959183674e-05, "loss": 1.8401, "step": 42700 }, { "epoch": 0.91, "learning_rate": 9.330857142857143e-05, "loss": 1.8383, "step": 42800 }, { "epoch": 0.91, "learning_rate": 9.328816326530613e-05, "loss": 1.8455, "step": 42900 }, { "epoch": 0.91, "learning_rate": 9.326775510204082e-05, "loss": 1.8365, "step": 43000 }, { "epoch": 0.91, "learning_rate": 9.324734693877552e-05, "loss": 1.8363, "step": 43100 }, { "epoch": 0.91, "learning_rate": 9.322693877551021e-05, "loss": 1.8314, "step": 43200 }, { "epoch": 0.92, "learning_rate": 9.32065306122449e-05, "loss": 1.8402, "step": 43300 }, { "epoch": 0.92, "learning_rate": 9.31861224489796e-05, "loss": 1.8266, "step": 43400 }, { "epoch": 0.92, "learning_rate": 9.31657142857143e-05, "loss": 1.8325, "step": 43500 }, { "epoch": 0.92, "learning_rate": 9.314530612244898e-05, "loss": 1.8309, "step": 43600 }, { "epoch": 0.92, "learning_rate": 9.312489795918368e-05, "loss": 1.8327, "step": 43700 }, { "epoch": 0.93, "learning_rate": 9.310448979591836e-05, "loss": 1.831, "step": 43800 }, { "epoch": 0.93, "learning_rate": 9.308408163265306e-05, "loss": 1.8328, "step": 43900 }, { "epoch": 0.93, "learning_rate": 9.306367346938776e-05, "loss": 1.8282, "step": 44000 }, { "epoch": 0.93, "learning_rate": 9.304326530612245e-05, "loss": 1.8296, "step": 44100 }, { "epoch": 0.94, "learning_rate": 9.302285714285714e-05, "loss": 1.8363, "step": 44200 }, { "epoch": 0.94, "learning_rate": 9.30026530612245e-05, "loss": 1.8332, "step": 44300 }, { "epoch": 0.94, "learning_rate": 9.29822448979592e-05, "loss": 1.8283, "step": 44400 }, { "epoch": 0.94, "learning_rate": 9.296183673469388e-05, "loss": 1.8268, "step": 44500 }, { "epoch": 0.94, "learning_rate": 9.294142857142858e-05, "loss": 1.8202, "step": 44600 }, { "epoch": 0.95, "learning_rate": 9.292102040816328e-05, "loss": 1.8201, "step": 44700 }, { "epoch": 0.95, "learning_rate": 9.290061224489796e-05, "loss": 1.8212, "step": 44800 }, { "epoch": 0.95, "learning_rate": 9.28804081632653e-05, "loss": 1.8187, "step": 44900 }, { "epoch": 0.95, "learning_rate": 9.286e-05, "loss": 1.8206, "step": 45000 }, { "epoch": 0.95, "learning_rate": 9.28395918367347e-05, "loss": 1.8195, "step": 45100 }, { "epoch": 0.96, "learning_rate": 9.281918367346939e-05, "loss": 1.8204, "step": 45200 }, { "epoch": 0.96, "learning_rate": 9.279877551020408e-05, "loss": 1.8205, "step": 45300 }, { "epoch": 0.96, "learning_rate": 9.277836734693878e-05, "loss": 1.8046, "step": 45400 }, { "epoch": 0.96, "learning_rate": 9.275795918367347e-05, "loss": 1.8107, "step": 45500 }, { "epoch": 0.97, "learning_rate": 9.273755102040817e-05, "loss": 1.8048, "step": 45600 }, { "epoch": 0.97, "learning_rate": 9.271714285714286e-05, "loss": 1.8062, "step": 45700 }, { "epoch": 0.97, "learning_rate": 9.269673469387755e-05, "loss": 1.8215, "step": 45800 }, { "epoch": 0.97, "learning_rate": 9.267632653061225e-05, "loss": 1.8174, "step": 45900 }, { "epoch": 0.97, "learning_rate": 9.265591836734694e-05, "loss": 1.8117, "step": 46000 }, { "epoch": 0.98, "learning_rate": 9.263551020408163e-05, "loss": 1.814, "step": 46100 }, { "epoch": 0.98, "learning_rate": 9.261510204081633e-05, "loss": 1.8195, "step": 46200 }, { "epoch": 0.98, "learning_rate": 9.259469387755103e-05, "loss": 1.8141, "step": 46300 }, { "epoch": 0.98, "learning_rate": 9.257428571428571e-05, "loss": 1.8168, "step": 46400 }, { "epoch": 0.98, "learning_rate": 9.255387755102042e-05, "loss": 1.8141, "step": 46500 }, { "epoch": 0.99, "learning_rate": 9.253346938775511e-05, "loss": 1.8012, "step": 46600 }, { "epoch": 0.99, "learning_rate": 9.25130612244898e-05, "loss": 1.8045, "step": 46700 }, { "epoch": 0.99, "learning_rate": 9.24926530612245e-05, "loss": 1.802, "step": 46800 }, { "epoch": 0.99, "learning_rate": 9.247224489795919e-05, "loss": 1.7992, "step": 46900 }, { "epoch": 0.99, "learning_rate": 9.245183673469389e-05, "loss": 1.802, "step": 47000 }, { "epoch": 1.0, "learning_rate": 9.243142857142859e-05, "loss": 1.8078, "step": 47100 }, { "epoch": 1.0, "learning_rate": 9.241122448979593e-05, "loss": 1.8033, "step": 47200 }, { "epoch": 1.0, "learning_rate": 9.239081632653061e-05, "loss": 1.8092, "step": 47300 }, { "epoch": 1.0, "learning_rate": 9.237040816326531e-05, "loss": 1.8075, "step": 47400 }, { "epoch": 1.01, "learning_rate": 9.235000000000001e-05, "loss": 1.8011, "step": 47500 }, { "epoch": 1.01, "learning_rate": 9.23295918367347e-05, "loss": 1.7966, "step": 47600 }, { "epoch": 1.01, "learning_rate": 9.230918367346939e-05, "loss": 1.7932, "step": 47700 }, { "epoch": 1.01, "learning_rate": 9.228877551020409e-05, "loss": 1.7997, "step": 47800 }, { "epoch": 1.01, "learning_rate": 9.226836734693877e-05, "loss": 1.7804, "step": 47900 }, { "epoch": 1.02, "learning_rate": 9.224795918367347e-05, "loss": 1.7995, "step": 48000 }, { "epoch": 1.02, "learning_rate": 9.222755102040817e-05, "loss": 1.7975, "step": 48100 }, { "epoch": 1.02, "learning_rate": 9.220714285714286e-05, "loss": 1.7941, "step": 48200 }, { "epoch": 1.02, "learning_rate": 9.218673469387755e-05, "loss": 1.7945, "step": 48300 }, { "epoch": 1.02, "learning_rate": 9.216632653061224e-05, "loss": 1.8069, "step": 48400 }, { "epoch": 1.03, "learning_rate": 9.214591836734694e-05, "loss": 1.7861, "step": 48500 }, { "epoch": 1.03, "learning_rate": 9.212551020408164e-05, "loss": 1.7893, "step": 48600 }, { "epoch": 1.03, "learning_rate": 9.210510204081632e-05, "loss": 1.7968, "step": 48700 }, { "epoch": 1.03, "learning_rate": 9.208469387755102e-05, "loss": 1.7837, "step": 48800 }, { "epoch": 1.03, "learning_rate": 9.206428571428572e-05, "loss": 1.7927, "step": 48900 }, { "epoch": 1.04, "learning_rate": 9.20438775510204e-05, "loss": 1.7835, "step": 49000 }, { "epoch": 1.04, "learning_rate": 9.20234693877551e-05, "loss": 1.786, "step": 49100 }, { "epoch": 1.04, "learning_rate": 9.20030612244898e-05, "loss": 1.7881, "step": 49200 }, { "epoch": 1.04, "learning_rate": 9.198285714285715e-05, "loss": 1.7879, "step": 49300 }, { "epoch": 1.05, "learning_rate": 9.196244897959184e-05, "loss": 1.7805, "step": 49400 }, { "epoch": 1.05, "learning_rate": 9.194204081632654e-05, "loss": 1.7913, "step": 49500 }, { "epoch": 1.05, "learning_rate": 9.192163265306124e-05, "loss": 1.7811, "step": 49600 }, { "epoch": 1.05, "learning_rate": 9.190122448979592e-05, "loss": 1.7742, "step": 49700 }, { "epoch": 1.05, "learning_rate": 9.188081632653062e-05, "loss": 1.7848, "step": 49800 }, { "epoch": 1.06, "learning_rate": 9.186040816326532e-05, "loss": 1.7778, "step": 49900 }, { "epoch": 1.06, "learning_rate": 9.184020408163266e-05, "loss": 1.7794, "step": 50000 }, { "epoch": 1.06, "learning_rate": 9.181979591836734e-05, "loss": 1.781, "step": 50100 }, { "epoch": 1.06, "learning_rate": 9.179938775510204e-05, "loss": 1.7792, "step": 50200 }, { "epoch": 1.06, "learning_rate": 9.177897959183674e-05, "loss": 1.7769, "step": 50300 }, { "epoch": 1.07, "learning_rate": 9.175857142857144e-05, "loss": 1.7735, "step": 50400 }, { "epoch": 1.07, "learning_rate": 9.173816326530614e-05, "loss": 1.7859, "step": 50500 }, { "epoch": 1.07, "learning_rate": 9.171775510204082e-05, "loss": 1.7767, "step": 50600 }, { "epoch": 1.07, "learning_rate": 9.169734693877552e-05, "loss": 1.7792, "step": 50700 }, { "epoch": 1.08, "learning_rate": 9.167693877551022e-05, "loss": 1.7791, "step": 50800 }, { "epoch": 1.08, "learning_rate": 9.16565306122449e-05, "loss": 1.7757, "step": 50900 }, { "epoch": 1.08, "learning_rate": 9.16361224489796e-05, "loss": 1.7723, "step": 51000 }, { "epoch": 1.08, "learning_rate": 9.16157142857143e-05, "loss": 1.7866, "step": 51100 }, { "epoch": 1.08, "learning_rate": 9.159530612244898e-05, "loss": 1.775, "step": 51200 }, { "epoch": 1.09, "learning_rate": 9.157489795918368e-05, "loss": 1.7649, "step": 51300 }, { "epoch": 1.09, "learning_rate": 9.155448979591838e-05, "loss": 1.7735, "step": 51400 }, { "epoch": 1.09, "learning_rate": 9.153408163265307e-05, "loss": 1.7694, "step": 51500 }, { "epoch": 1.09, "learning_rate": 9.151367346938776e-05, "loss": 1.7719, "step": 51600 }, { "epoch": 1.09, "learning_rate": 9.149326530612246e-05, "loss": 1.7653, "step": 51700 }, { "epoch": 1.1, "learning_rate": 9.147285714285715e-05, "loss": 1.7661, "step": 51800 }, { "epoch": 1.1, "learning_rate": 9.145244897959185e-05, "loss": 1.7754, "step": 51900 }, { "epoch": 1.1, "learning_rate": 9.143204081632653e-05, "loss": 1.7763, "step": 52000 }, { "epoch": 1.1, "learning_rate": 9.141163265306123e-05, "loss": 1.7632, "step": 52100 }, { "epoch": 1.1, "learning_rate": 9.139122448979593e-05, "loss": 1.7677, "step": 52200 }, { "epoch": 1.11, "learning_rate": 9.137081632653061e-05, "loss": 1.7678, "step": 52300 }, { "epoch": 1.11, "learning_rate": 9.135040816326531e-05, "loss": 1.7653, "step": 52400 }, { "epoch": 1.11, "learning_rate": 9.133000000000001e-05, "loss": 1.7606, "step": 52500 }, { "epoch": 1.11, "learning_rate": 9.130959183673469e-05, "loss": 1.763, "step": 52600 }, { "epoch": 1.12, "learning_rate": 9.128918367346939e-05, "loss": 1.7578, "step": 52700 }, { "epoch": 1.12, "learning_rate": 9.126877551020409e-05, "loss": 1.7688, "step": 52800 }, { "epoch": 1.12, "learning_rate": 9.124836734693877e-05, "loss": 1.7617, "step": 52900 }, { "epoch": 1.12, "learning_rate": 9.122795918367347e-05, "loss": 1.7671, "step": 53000 }, { "epoch": 1.12, "learning_rate": 9.120755102040817e-05, "loss": 1.7587, "step": 53100 }, { "epoch": 1.13, "learning_rate": 9.118714285714286e-05, "loss": 1.7603, "step": 53200 }, { "epoch": 1.13, "learning_rate": 9.116673469387755e-05, "loss": 1.7619, "step": 53300 }, { "epoch": 1.13, "learning_rate": 9.114632653061225e-05, "loss": 1.7661, "step": 53400 }, { "epoch": 1.13, "learning_rate": 9.112591836734694e-05, "loss": 1.7594, "step": 53500 }, { "epoch": 1.13, "learning_rate": 9.110551020408164e-05, "loss": 1.7596, "step": 53600 }, { "epoch": 1.14, "learning_rate": 9.108510204081633e-05, "loss": 1.7566, "step": 53700 }, { "epoch": 1.14, "learning_rate": 9.106469387755102e-05, "loss": 1.7634, "step": 53800 }, { "epoch": 1.14, "learning_rate": 9.104428571428572e-05, "loss": 1.7567, "step": 53900 }, { "epoch": 1.14, "learning_rate": 9.10238775510204e-05, "loss": 1.7582, "step": 54000 }, { "epoch": 1.14, "learning_rate": 9.10034693877551e-05, "loss": 1.7571, "step": 54100 }, { "epoch": 1.15, "learning_rate": 9.09830612244898e-05, "loss": 1.7602, "step": 54200 }, { "epoch": 1.15, "learning_rate": 9.096265306122448e-05, "loss": 1.7547, "step": 54300 }, { "epoch": 1.15, "learning_rate": 9.094224489795918e-05, "loss": 1.756, "step": 54400 }, { "epoch": 1.15, "learning_rate": 9.092204081632654e-05, "loss": 1.7572, "step": 54500 }, { "epoch": 1.16, "learning_rate": 9.090163265306123e-05, "loss": 1.7537, "step": 54600 }, { "epoch": 1.16, "learning_rate": 9.088122448979592e-05, "loss": 1.7488, "step": 54700 }, { "epoch": 1.16, "learning_rate": 9.086081632653062e-05, "loss": 1.749, "step": 54800 }, { "epoch": 1.16, "learning_rate": 9.084040816326532e-05, "loss": 1.7595, "step": 54900 }, { "epoch": 1.16, "learning_rate": 9.082e-05, "loss": 1.748, "step": 55000 }, { "epoch": 1.17, "learning_rate": 9.07995918367347e-05, "loss": 1.7583, "step": 55100 }, { "epoch": 1.17, "learning_rate": 9.07791836734694e-05, "loss": 1.7517, "step": 55200 }, { "epoch": 1.17, "learning_rate": 9.075877551020408e-05, "loss": 1.7597, "step": 55300 }, { "epoch": 1.17, "learning_rate": 9.073836734693878e-05, "loss": 1.7466, "step": 55400 }, { "epoch": 1.17, "learning_rate": 9.071795918367348e-05, "loss": 1.7591, "step": 55500 }, { "epoch": 1.18, "learning_rate": 9.069755102040816e-05, "loss": 1.7553, "step": 55600 }, { "epoch": 1.18, "learning_rate": 9.067714285714286e-05, "loss": 1.7522, "step": 55700 }, { "epoch": 1.18, "learning_rate": 9.065673469387755e-05, "loss": 1.7545, "step": 55800 }, { "epoch": 1.18, "learning_rate": 9.063632653061225e-05, "loss": 1.7465, "step": 55900 }, { "epoch": 1.19, "learning_rate": 9.061591836734694e-05, "loss": 1.7485, "step": 56000 }, { "epoch": 1.19, "learning_rate": 9.059551020408163e-05, "loss": 1.7433, "step": 56100 }, { "epoch": 1.19, "learning_rate": 9.057510204081633e-05, "loss": 1.7459, "step": 56200 }, { "epoch": 1.19, "learning_rate": 9.055469387755103e-05, "loss": 1.7517, "step": 56300 }, { "epoch": 1.19, "learning_rate": 9.053428571428571e-05, "loss": 1.7461, "step": 56400 }, { "epoch": 1.2, "learning_rate": 9.051387755102041e-05, "loss": 1.7453, "step": 56500 }, { "epoch": 1.2, "learning_rate": 9.049346938775511e-05, "loss": 1.7475, "step": 56600 }, { "epoch": 1.2, "learning_rate": 9.047326530612246e-05, "loss": 1.7362, "step": 56700 }, { "epoch": 1.2, "learning_rate": 9.045285714285715e-05, "loss": 1.7442, "step": 56800 }, { "epoch": 1.2, "learning_rate": 9.043244897959184e-05, "loss": 1.7404, "step": 56900 }, { "epoch": 1.21, "learning_rate": 9.041204081632654e-05, "loss": 1.7508, "step": 57000 }, { "epoch": 1.21, "learning_rate": 9.039163265306123e-05, "loss": 1.74, "step": 57100 }, { "epoch": 1.21, "learning_rate": 9.037122448979593e-05, "loss": 1.7411, "step": 57200 }, { "epoch": 1.21, "learning_rate": 9.035081632653062e-05, "loss": 1.7488, "step": 57300 }, { "epoch": 1.21, "learning_rate": 9.033040816326531e-05, "loss": 1.7416, "step": 57400 }, { "epoch": 1.22, "learning_rate": 9.031000000000001e-05, "loss": 1.7335, "step": 57500 }, { "epoch": 1.22, "learning_rate": 9.028959183673469e-05, "loss": 1.7294, "step": 57600 }, { "epoch": 1.22, "learning_rate": 9.026918367346939e-05, "loss": 1.7356, "step": 57700 }, { "epoch": 1.22, "learning_rate": 9.024877551020409e-05, "loss": 1.737, "step": 57800 }, { "epoch": 1.23, "learning_rate": 9.022836734693877e-05, "loss": 1.7334, "step": 57900 }, { "epoch": 1.23, "learning_rate": 9.020795918367347e-05, "loss": 1.739, "step": 58000 }, { "epoch": 1.23, "learning_rate": 9.018755102040817e-05, "loss": 1.7376, "step": 58100 }, { "epoch": 1.23, "learning_rate": 9.016714285714286e-05, "loss": 1.7282, "step": 58200 }, { "epoch": 1.23, "learning_rate": 9.014673469387755e-05, "loss": 1.7373, "step": 58300 }, { "epoch": 1.24, "learning_rate": 9.012632653061225e-05, "loss": 1.7267, "step": 58400 }, { "epoch": 1.24, "learning_rate": 9.010591836734694e-05, "loss": 1.7359, "step": 58500 }, { "epoch": 1.24, "learning_rate": 9.008551020408164e-05, "loss": 1.7355, "step": 58600 }, { "epoch": 1.24, "learning_rate": 9.006510204081633e-05, "loss": 1.7232, "step": 58700 }, { "epoch": 1.24, "learning_rate": 9.004469387755102e-05, "loss": 1.7437, "step": 58800 }, { "epoch": 1.25, "learning_rate": 9.002428571428572e-05, "loss": 1.7298, "step": 58900 }, { "epoch": 1.25, "learning_rate": 9.000387755102042e-05, "loss": 1.741, "step": 59000 }, { "epoch": 1.25, "learning_rate": 8.998367346938777e-05, "loss": 1.7277, "step": 59100 }, { "epoch": 1.25, "learning_rate": 8.996326530612245e-05, "loss": 1.7175, "step": 59200 }, { "epoch": 1.26, "learning_rate": 8.994285714285715e-05, "loss": 1.7265, "step": 59300 }, { "epoch": 1.26, "learning_rate": 8.992244897959185e-05, "loss": 1.7356, "step": 59400 }, { "epoch": 1.26, "learning_rate": 8.990204081632654e-05, "loss": 1.7379, "step": 59500 }, { "epoch": 1.26, "learning_rate": 8.988163265306123e-05, "loss": 1.7247, "step": 59600 }, { "epoch": 1.26, "learning_rate": 8.986122448979592e-05, "loss": 1.7357, "step": 59700 }, { "epoch": 1.27, "learning_rate": 8.984081632653062e-05, "loss": 1.7294, "step": 59800 }, { "epoch": 1.27, "learning_rate": 8.982040816326532e-05, "loss": 1.7194, "step": 59900 }, { "epoch": 1.27, "learning_rate": 8.98e-05, "loss": 1.732, "step": 60000 }, { "epoch": 1.27, "learning_rate": 8.97795918367347e-05, "loss": 1.7268, "step": 60100 }, { "epoch": 1.27, "learning_rate": 8.97591836734694e-05, "loss": 1.7266, "step": 60200 }, { "epoch": 1.28, "learning_rate": 8.973877551020408e-05, "loss": 1.7285, "step": 60300 }, { "epoch": 1.28, "learning_rate": 8.971836734693878e-05, "loss": 1.7191, "step": 60400 }, { "epoch": 1.28, "learning_rate": 8.969795918367348e-05, "loss": 1.7208, "step": 60500 }, { "epoch": 1.28, "learning_rate": 8.967755102040816e-05, "loss": 1.7213, "step": 60600 }, { "epoch": 1.28, "learning_rate": 8.965714285714286e-05, "loss": 1.7199, "step": 60700 }, { "epoch": 1.29, "learning_rate": 8.963673469387756e-05, "loss": 1.7276, "step": 60800 }, { "epoch": 1.29, "learning_rate": 8.961632653061225e-05, "loss": 1.725, "step": 60900 }, { "epoch": 1.29, "learning_rate": 8.959591836734694e-05, "loss": 1.7227, "step": 61000 }, { "epoch": 1.29, "learning_rate": 8.957551020408164e-05, "loss": 1.7272, "step": 61100 }, { "epoch": 1.3, "learning_rate": 8.955510204081633e-05, "loss": 1.715, "step": 61200 }, { "epoch": 1.3, "learning_rate": 8.953489795918367e-05, "loss": 1.7319, "step": 61300 }, { "epoch": 1.3, "learning_rate": 8.951448979591838e-05, "loss": 1.7271, "step": 61400 }, { "epoch": 1.3, "learning_rate": 8.949408163265306e-05, "loss": 1.7232, "step": 61500 }, { "epoch": 1.3, "learning_rate": 8.947367346938776e-05, "loss": 1.7178, "step": 61600 }, { "epoch": 1.31, "learning_rate": 8.945326530612246e-05, "loss": 1.7232, "step": 61700 }, { "epoch": 1.31, "learning_rate": 8.943285714285715e-05, "loss": 1.7197, "step": 61800 }, { "epoch": 1.31, "learning_rate": 8.941244897959184e-05, "loss": 1.7137, "step": 61900 }, { "epoch": 1.31, "learning_rate": 8.939204081632654e-05, "loss": 1.7153, "step": 62000 }, { "epoch": 1.31, "learning_rate": 8.937183673469388e-05, "loss": 1.716, "step": 62100 }, { "epoch": 1.32, "learning_rate": 8.935142857142857e-05, "loss": 1.7187, "step": 62200 }, { "epoch": 1.32, "learning_rate": 8.933102040816327e-05, "loss": 1.7137, "step": 62300 }, { "epoch": 1.32, "learning_rate": 8.931061224489797e-05, "loss": 1.7138, "step": 62400 }, { "epoch": 1.32, "learning_rate": 8.929020408163265e-05, "loss": 1.7162, "step": 62500 }, { "epoch": 1.32, "learning_rate": 8.926979591836735e-05, "loss": 1.7143, "step": 62600 }, { "epoch": 1.33, "learning_rate": 8.924938775510205e-05, "loss": 1.7056, "step": 62700 }, { "epoch": 1.33, "learning_rate": 8.922897959183673e-05, "loss": 1.7168, "step": 62800 }, { "epoch": 1.33, "learning_rate": 8.920857142857143e-05, "loss": 1.7115, "step": 62900 }, { "epoch": 1.33, "learning_rate": 8.918816326530613e-05, "loss": 1.7186, "step": 63000 }, { "epoch": 1.34, "learning_rate": 8.916775510204081e-05, "loss": 1.7073, "step": 63100 }, { "epoch": 1.34, "learning_rate": 8.914734693877551e-05, "loss": 1.706, "step": 63200 }, { "epoch": 1.34, "learning_rate": 8.912693877551021e-05, "loss": 1.7135, "step": 63300 }, { "epoch": 1.34, "learning_rate": 8.91065306122449e-05, "loss": 1.7051, "step": 63400 }, { "epoch": 1.34, "learning_rate": 8.908612244897959e-05, "loss": 1.702, "step": 63500 }, { "epoch": 1.35, "learning_rate": 8.906571428571429e-05, "loss": 1.7151, "step": 63600 }, { "epoch": 1.35, "learning_rate": 8.904530612244898e-05, "loss": 1.706, "step": 63700 }, { "epoch": 1.35, "learning_rate": 8.902489795918367e-05, "loss": 1.7042, "step": 63800 }, { "epoch": 1.35, "learning_rate": 8.900448979591837e-05, "loss": 1.7106, "step": 63900 }, { "epoch": 1.35, "learning_rate": 8.898408163265306e-05, "loss": 1.7157, "step": 64000 }, { "epoch": 1.36, "learning_rate": 8.896367346938776e-05, "loss": 1.707, "step": 64100 }, { "epoch": 1.36, "learning_rate": 8.894326530612245e-05, "loss": 1.7065, "step": 64200 }, { "epoch": 1.36, "learning_rate": 8.892285714285715e-05, "loss": 1.7028, "step": 64300 }, { "epoch": 1.36, "learning_rate": 8.890244897959185e-05, "loss": 1.7071, "step": 64400 }, { "epoch": 1.37, "learning_rate": 8.888204081632654e-05, "loss": 1.7122, "step": 64500 }, { "epoch": 1.37, "learning_rate": 8.886163265306123e-05, "loss": 1.7008, "step": 64600 }, { "epoch": 1.37, "learning_rate": 8.884122448979593e-05, "loss": 1.6919, "step": 64700 }, { "epoch": 1.37, "learning_rate": 8.882081632653062e-05, "loss": 1.6971, "step": 64800 }, { "epoch": 1.37, "learning_rate": 8.880061224489796e-05, "loss": 1.6916, "step": 64900 }, { "epoch": 1.38, "learning_rate": 8.878020408163266e-05, "loss": 1.7018, "step": 65000 }, { "epoch": 1.38, "learning_rate": 8.875979591836735e-05, "loss": 1.6992, "step": 65100 }, { "epoch": 1.38, "learning_rate": 8.873938775510204e-05, "loss": 1.7048, "step": 65200 }, { "epoch": 1.38, "learning_rate": 8.871897959183674e-05, "loss": 1.698, "step": 65300 }, { "epoch": 1.38, "learning_rate": 8.869857142857144e-05, "loss": 1.7036, "step": 65400 }, { "epoch": 1.39, "learning_rate": 8.867816326530612e-05, "loss": 1.7019, "step": 65500 }, { "epoch": 1.39, "learning_rate": 8.865775510204082e-05, "loss": 1.7049, "step": 65600 }, { "epoch": 1.39, "learning_rate": 8.863734693877552e-05, "loss": 1.6903, "step": 65700 }, { "epoch": 1.39, "learning_rate": 8.86169387755102e-05, "loss": 1.7013, "step": 65800 }, { "epoch": 1.39, "learning_rate": 8.85965306122449e-05, "loss": 1.6985, "step": 65900 }, { "epoch": 1.4, "learning_rate": 8.85761224489796e-05, "loss": 1.6899, "step": 66000 }, { "epoch": 1.4, "learning_rate": 8.855571428571428e-05, "loss": 1.7036, "step": 66100 }, { "epoch": 1.4, "learning_rate": 8.853530612244898e-05, "loss": 1.6986, "step": 66200 }, { "epoch": 1.4, "learning_rate": 8.851489795918367e-05, "loss": 1.6894, "step": 66300 }, { "epoch": 1.41, "learning_rate": 8.849448979591837e-05, "loss": 1.6953, "step": 66400 }, { "epoch": 1.41, "learning_rate": 8.847408163265306e-05, "loss": 1.6989, "step": 66500 }, { "epoch": 1.41, "learning_rate": 8.845367346938775e-05, "loss": 1.6929, "step": 66600 }, { "epoch": 1.41, "learning_rate": 8.843326530612245e-05, "loss": 1.6943, "step": 66700 }, { "epoch": 1.41, "learning_rate": 8.841285714285715e-05, "loss": 1.6957, "step": 66800 }, { "epoch": 1.42, "learning_rate": 8.839244897959184e-05, "loss": 1.6895, "step": 66900 }, { "epoch": 1.42, "learning_rate": 8.837204081632654e-05, "loss": 1.6964, "step": 67000 }, { "epoch": 1.42, "learning_rate": 8.835163265306123e-05, "loss": 1.6901, "step": 67100 }, { "epoch": 1.42, "learning_rate": 8.833122448979593e-05, "loss": 1.7021, "step": 67200 }, { "epoch": 1.42, "learning_rate": 8.831081632653062e-05, "loss": 1.6999, "step": 67300 }, { "epoch": 1.43, "learning_rate": 8.829040816326531e-05, "loss": 1.695, "step": 67400 }, { "epoch": 1.43, "learning_rate": 8.827000000000001e-05, "loss": 1.6981, "step": 67500 }, { "epoch": 1.43, "learning_rate": 8.82495918367347e-05, "loss": 1.6905, "step": 67600 }, { "epoch": 1.43, "learning_rate": 8.822918367346939e-05, "loss": 1.6954, "step": 67700 }, { "epoch": 1.43, "learning_rate": 8.820877551020409e-05, "loss": 1.6976, "step": 67800 }, { "epoch": 1.44, "learning_rate": 8.818836734693879e-05, "loss": 1.69, "step": 67900 }, { "epoch": 1.44, "learning_rate": 8.816795918367347e-05, "loss": 1.6851, "step": 68000 }, { "epoch": 1.44, "learning_rate": 8.814755102040817e-05, "loss": 1.6923, "step": 68100 }, { "epoch": 1.44, "learning_rate": 8.812714285714287e-05, "loss": 1.6876, "step": 68200 }, { "epoch": 1.45, "learning_rate": 8.810673469387755e-05, "loss": 1.6924, "step": 68300 }, { "epoch": 1.45, "learning_rate": 8.808632653061225e-05, "loss": 1.6842, "step": 68400 }, { "epoch": 1.45, "learning_rate": 8.806591836734695e-05, "loss": 1.6818, "step": 68500 }, { "epoch": 1.45, "learning_rate": 8.804551020408163e-05, "loss": 1.6891, "step": 68600 }, { "epoch": 1.45, "learning_rate": 8.802510204081633e-05, "loss": 1.6815, "step": 68700 }, { "epoch": 1.46, "learning_rate": 8.800469387755103e-05, "loss": 1.6905, "step": 68800 }, { "epoch": 1.46, "learning_rate": 8.798428571428572e-05, "loss": 1.6864, "step": 68900 }, { "epoch": 1.46, "learning_rate": 8.796408163265306e-05, "loss": 1.6832, "step": 69000 }, { "epoch": 1.46, "learning_rate": 8.794367346938776e-05, "loss": 1.6885, "step": 69100 }, { "epoch": 1.46, "learning_rate": 8.792326530612245e-05, "loss": 1.6906, "step": 69200 }, { "epoch": 1.47, "learning_rate": 8.790285714285714e-05, "loss": 1.6818, "step": 69300 }, { "epoch": 1.47, "learning_rate": 8.788265306122449e-05, "loss": 1.6789, "step": 69400 }, { "epoch": 1.47, "learning_rate": 8.786224489795919e-05, "loss": 1.6823, "step": 69500 }, { "epoch": 1.47, "learning_rate": 8.784183673469389e-05, "loss": 1.683, "step": 69600 }, { "epoch": 1.48, "learning_rate": 8.782142857142857e-05, "loss": 1.6849, "step": 69700 }, { "epoch": 1.48, "learning_rate": 8.780102040816327e-05, "loss": 1.6844, "step": 69800 }, { "epoch": 1.48, "learning_rate": 8.778061224489796e-05, "loss": 1.6798, "step": 69900 }, { "epoch": 1.48, "learning_rate": 8.776040816326531e-05, "loss": 1.6715, "step": 70000 }, { "epoch": 1.48, "learning_rate": 8.774e-05, "loss": 1.6838, "step": 70100 }, { "epoch": 1.49, "learning_rate": 8.77195918367347e-05, "loss": 1.6847, "step": 70200 }, { "epoch": 1.49, "learning_rate": 8.76991836734694e-05, "loss": 1.6865, "step": 70300 }, { "epoch": 1.49, "learning_rate": 8.767877551020408e-05, "loss": 1.6788, "step": 70400 }, { "epoch": 1.49, "learning_rate": 8.765836734693878e-05, "loss": 1.6836, "step": 70500 }, { "epoch": 1.49, "learning_rate": 8.763795918367346e-05, "loss": 1.6776, "step": 70600 }, { "epoch": 1.5, "learning_rate": 8.761755102040817e-05, "loss": 1.6734, "step": 70700 }, { "epoch": 1.5, "learning_rate": 8.759714285714287e-05, "loss": 1.6777, "step": 70800 }, { "epoch": 1.5, "learning_rate": 8.757673469387756e-05, "loss": 1.6768, "step": 70900 }, { "epoch": 1.5, "learning_rate": 8.755632653061225e-05, "loss": 1.6786, "step": 71000 }, { "epoch": 1.5, "learning_rate": 8.753591836734695e-05, "loss": 1.6835, "step": 71100 }, { "epoch": 1.51, "learning_rate": 8.751551020408164e-05, "loss": 1.6744, "step": 71200 }, { "epoch": 1.51, "learning_rate": 8.749510204081634e-05, "loss": 1.6714, "step": 71300 }, { "epoch": 1.51, "learning_rate": 8.747469387755103e-05, "loss": 1.6782, "step": 71400 }, { "epoch": 1.51, "learning_rate": 8.745428571428572e-05, "loss": 1.6745, "step": 71500 }, { "epoch": 1.52, "learning_rate": 8.743387755102042e-05, "loss": 1.6676, "step": 71600 }, { "epoch": 1.52, "learning_rate": 8.74134693877551e-05, "loss": 1.6725, "step": 71700 }, { "epoch": 1.52, "learning_rate": 8.73930612244898e-05, "loss": 1.6674, "step": 71800 }, { "epoch": 1.52, "learning_rate": 8.73726530612245e-05, "loss": 1.676, "step": 71900 }, { "epoch": 1.52, "learning_rate": 8.735224489795918e-05, "loss": 1.6717, "step": 72000 }, { "epoch": 1.53, "learning_rate": 8.733183673469388e-05, "loss": 1.6677, "step": 72100 }, { "epoch": 1.53, "learning_rate": 8.731142857142858e-05, "loss": 1.6684, "step": 72200 }, { "epoch": 1.53, "learning_rate": 8.729102040816327e-05, "loss": 1.6703, "step": 72300 }, { "epoch": 1.53, "learning_rate": 8.727061224489796e-05, "loss": 1.6717, "step": 72400 }, { "epoch": 1.53, "learning_rate": 8.725020408163266e-05, "loss": 1.672, "step": 72500 }, { "epoch": 1.54, "learning_rate": 8.722979591836735e-05, "loss": 1.6617, "step": 72600 }, { "epoch": 1.54, "learning_rate": 8.720938775510205e-05, "loss": 1.673, "step": 72700 }, { "epoch": 1.54, "learning_rate": 8.718897959183674e-05, "loss": 1.6649, "step": 72800 }, { "epoch": 1.54, "learning_rate": 8.716857142857143e-05, "loss": 1.6754, "step": 72900 }, { "epoch": 1.54, "learning_rate": 8.714816326530613e-05, "loss": 1.6693, "step": 73000 }, { "epoch": 1.55, "learning_rate": 8.712775510204083e-05, "loss": 1.6612, "step": 73100 }, { "epoch": 1.55, "learning_rate": 8.710734693877551e-05, "loss": 1.6712, "step": 73200 }, { "epoch": 1.55, "learning_rate": 8.708693877551021e-05, "loss": 1.6641, "step": 73300 }, { "epoch": 1.55, "learning_rate": 8.706653061224491e-05, "loss": 1.664, "step": 73400 }, { "epoch": 1.56, "learning_rate": 8.704612244897959e-05, "loss": 1.6667, "step": 73500 }, { "epoch": 1.56, "learning_rate": 8.702571428571429e-05, "loss": 1.6604, "step": 73600 }, { "epoch": 1.56, "learning_rate": 8.700530612244898e-05, "loss": 1.6719, "step": 73700 }, { "epoch": 1.56, "learning_rate": 8.698489795918367e-05, "loss": 1.6651, "step": 73800 }, { "epoch": 1.56, "learning_rate": 8.696448979591837e-05, "loss": 1.6675, "step": 73900 }, { "epoch": 1.57, "learning_rate": 8.694408163265306e-05, "loss": 1.6611, "step": 74000 }, { "epoch": 1.57, "learning_rate": 8.692367346938776e-05, "loss": 1.6659, "step": 74100 }, { "epoch": 1.57, "learning_rate": 8.690326530612245e-05, "loss": 1.6709, "step": 74200 }, { "epoch": 1.57, "learning_rate": 8.688285714285714e-05, "loss": 1.6553, "step": 74300 }, { "epoch": 1.57, "learning_rate": 8.686244897959184e-05, "loss": 1.6654, "step": 74400 }, { "epoch": 1.58, "learning_rate": 8.684204081632653e-05, "loss": 1.6549, "step": 74500 }, { "epoch": 1.58, "learning_rate": 8.682183673469389e-05, "loss": 1.6685, "step": 74600 }, { "epoch": 1.58, "learning_rate": 8.680142857142857e-05, "loss": 1.6552, "step": 74700 }, { "epoch": 1.58, "learning_rate": 8.678102040816327e-05, "loss": 1.6568, "step": 74800 }, { "epoch": 1.59, "learning_rate": 8.676061224489797e-05, "loss": 1.6621, "step": 74900 }, { "epoch": 1.59, "learning_rate": 8.674020408163266e-05, "loss": 1.665, "step": 75000 }, { "epoch": 1.59, "learning_rate": 8.671979591836735e-05, "loss": 1.6602, "step": 75100 }, { "epoch": 1.59, "learning_rate": 8.669938775510205e-05, "loss": 1.6588, "step": 75200 }, { "epoch": 1.59, "learning_rate": 8.667897959183674e-05, "loss": 1.659, "step": 75300 }, { "epoch": 1.6, "learning_rate": 8.665857142857144e-05, "loss": 1.6661, "step": 75400 }, { "epoch": 1.6, "learning_rate": 8.663816326530612e-05, "loss": 1.6632, "step": 75500 }, { "epoch": 1.6, "learning_rate": 8.661775510204082e-05, "loss": 1.6646, "step": 75600 }, { "epoch": 1.6, "learning_rate": 8.659734693877552e-05, "loss": 1.6618, "step": 75700 }, { "epoch": 1.6, "learning_rate": 8.65769387755102e-05, "loss": 1.6599, "step": 75800 }, { "epoch": 1.61, "learning_rate": 8.65565306122449e-05, "loss": 1.6611, "step": 75900 }, { "epoch": 1.61, "learning_rate": 8.65361224489796e-05, "loss": 1.6468, "step": 76000 }, { "epoch": 1.61, "learning_rate": 8.651571428571428e-05, "loss": 1.6629, "step": 76100 }, { "epoch": 1.61, "learning_rate": 8.649530612244898e-05, "loss": 1.6565, "step": 76200 }, { "epoch": 1.61, "learning_rate": 8.647489795918368e-05, "loss": 1.6618, "step": 76300 }, { "epoch": 1.62, "learning_rate": 8.645448979591836e-05, "loss": 1.6577, "step": 76400 }, { "epoch": 1.62, "learning_rate": 8.643408163265306e-05, "loss": 1.6609, "step": 76500 }, { "epoch": 1.62, "learning_rate": 8.641367346938776e-05, "loss": 1.656, "step": 76600 }, { "epoch": 1.62, "learning_rate": 8.639326530612245e-05, "loss": 1.6584, "step": 76700 }, { "epoch": 1.63, "learning_rate": 8.637285714285714e-05, "loss": 1.6525, "step": 76800 }, { "epoch": 1.63, "learning_rate": 8.63526530612245e-05, "loss": 1.6497, "step": 76900 }, { "epoch": 1.63, "learning_rate": 8.63322448979592e-05, "loss": 1.6528, "step": 77000 }, { "epoch": 1.63, "learning_rate": 8.631183673469388e-05, "loss": 1.6534, "step": 77100 }, { "epoch": 1.63, "learning_rate": 8.629142857142858e-05, "loss": 1.6549, "step": 77200 }, { "epoch": 1.64, "learning_rate": 8.627102040816328e-05, "loss": 1.6509, "step": 77300 }, { "epoch": 1.64, "learning_rate": 8.625061224489796e-05, "loss": 1.6506, "step": 77400 }, { "epoch": 1.64, "learning_rate": 8.623020408163266e-05, "loss": 1.6498, "step": 77500 }, { "epoch": 1.64, "learning_rate": 8.620979591836735e-05, "loss": 1.6564, "step": 77600 }, { "epoch": 1.64, "learning_rate": 8.618938775510205e-05, "loss": 1.6501, "step": 77700 }, { "epoch": 1.65, "learning_rate": 8.616897959183674e-05, "loss": 1.6487, "step": 77800 }, { "epoch": 1.65, "learning_rate": 8.614857142857143e-05, "loss": 1.6444, "step": 77900 }, { "epoch": 1.65, "learning_rate": 8.612816326530613e-05, "loss": 1.6475, "step": 78000 }, { "epoch": 1.65, "learning_rate": 8.610775510204083e-05, "loss": 1.6496, "step": 78100 }, { "epoch": 1.66, "learning_rate": 8.608734693877551e-05, "loss": 1.6542, "step": 78200 }, { "epoch": 1.66, "learning_rate": 8.606693877551021e-05, "loss": 1.6604, "step": 78300 }, { "epoch": 1.66, "learning_rate": 8.60465306122449e-05, "loss": 1.6454, "step": 78400 }, { "epoch": 1.66, "learning_rate": 8.602612244897959e-05, "loss": 1.6475, "step": 78500 }, { "epoch": 1.66, "learning_rate": 8.600571428571429e-05, "loss": 1.6516, "step": 78600 }, { "epoch": 1.67, "learning_rate": 8.598530612244899e-05, "loss": 1.6451, "step": 78700 }, { "epoch": 1.67, "learning_rate": 8.596489795918367e-05, "loss": 1.6511, "step": 78800 }, { "epoch": 1.67, "learning_rate": 8.594448979591837e-05, "loss": 1.6444, "step": 78900 }, { "epoch": 1.67, "learning_rate": 8.592408163265307e-05, "loss": 1.6485, "step": 79000 }, { "epoch": 1.67, "learning_rate": 8.590367346938775e-05, "loss": 1.6492, "step": 79100 }, { "epoch": 1.68, "learning_rate": 8.588326530612245e-05, "loss": 1.6386, "step": 79200 }, { "epoch": 1.68, "learning_rate": 8.586285714285715e-05, "loss": 1.6482, "step": 79300 }, { "epoch": 1.68, "learning_rate": 8.584244897959184e-05, "loss": 1.6493, "step": 79400 }, { "epoch": 1.68, "learning_rate": 8.582204081632653e-05, "loss": 1.6512, "step": 79500 }, { "epoch": 1.68, "learning_rate": 8.580163265306122e-05, "loss": 1.6423, "step": 79600 }, { "epoch": 1.69, "learning_rate": 8.578122448979592e-05, "loss": 1.637, "step": 79700 }, { "epoch": 1.69, "learning_rate": 8.576081632653062e-05, "loss": 1.6464, "step": 79800 }, { "epoch": 1.69, "learning_rate": 8.57404081632653e-05, "loss": 1.6383, "step": 79900 }, { "epoch": 1.69, "learning_rate": 8.572e-05, "loss": 1.6486, "step": 80000 }, { "epoch": 1.7, "learning_rate": 8.56995918367347e-05, "loss": 1.6384, "step": 80100 }, { "epoch": 1.7, "learning_rate": 8.567918367346938e-05, "loss": 1.6405, "step": 80200 }, { "epoch": 1.7, "learning_rate": 8.565897959183674e-05, "loss": 1.6375, "step": 80300 }, { "epoch": 1.7, "learning_rate": 8.563857142857144e-05, "loss": 1.6406, "step": 80400 }, { "epoch": 1.7, "learning_rate": 8.561816326530613e-05, "loss": 1.6478, "step": 80500 }, { "epoch": 1.71, "learning_rate": 8.559775510204082e-05, "loss": 1.6418, "step": 80600 }, { "epoch": 1.71, "learning_rate": 8.557734693877552e-05, "loss": 1.6416, "step": 80700 }, { "epoch": 1.71, "learning_rate": 8.555693877551021e-05, "loss": 1.6395, "step": 80800 }, { "epoch": 1.71, "learning_rate": 8.55365306122449e-05, "loss": 1.6435, "step": 80900 }, { "epoch": 1.71, "learning_rate": 8.55161224489796e-05, "loss": 1.6446, "step": 81000 }, { "epoch": 1.72, "learning_rate": 8.54957142857143e-05, "loss": 1.6399, "step": 81100 }, { "epoch": 1.72, "learning_rate": 8.547530612244898e-05, "loss": 1.6338, "step": 81200 }, { "epoch": 1.72, "learning_rate": 8.545489795918368e-05, "loss": 1.6412, "step": 81300 }, { "epoch": 1.72, "learning_rate": 8.543448979591836e-05, "loss": 1.6326, "step": 81400 }, { "epoch": 1.72, "learning_rate": 8.541408163265306e-05, "loss": 1.6488, "step": 81500 }, { "epoch": 1.73, "learning_rate": 8.539367346938776e-05, "loss": 1.6392, "step": 81600 }, { "epoch": 1.73, "learning_rate": 8.537326530612245e-05, "loss": 1.6355, "step": 81700 }, { "epoch": 1.73, "learning_rate": 8.535285714285714e-05, "loss": 1.6371, "step": 81800 }, { "epoch": 1.73, "learning_rate": 8.533244897959184e-05, "loss": 1.6325, "step": 81900 }, { "epoch": 1.74, "learning_rate": 8.531204081632653e-05, "loss": 1.634, "step": 82000 }, { "epoch": 1.74, "learning_rate": 8.529163265306123e-05, "loss": 1.6442, "step": 82100 }, { "epoch": 1.74, "learning_rate": 8.527122448979592e-05, "loss": 1.6302, "step": 82200 }, { "epoch": 1.74, "learning_rate": 8.525081632653061e-05, "loss": 1.6329, "step": 82300 }, { "epoch": 1.74, "learning_rate": 8.523040816326531e-05, "loss": 1.6356, "step": 82400 }, { "epoch": 1.75, "learning_rate": 8.521e-05, "loss": 1.6328, "step": 82500 }, { "epoch": 1.75, "learning_rate": 8.518959183673469e-05, "loss": 1.6346, "step": 82600 }, { "epoch": 1.75, "learning_rate": 8.516918367346939e-05, "loss": 1.6301, "step": 82700 }, { "epoch": 1.75, "learning_rate": 8.514877551020409e-05, "loss": 1.6325, "step": 82800 }, { "epoch": 1.75, "learning_rate": 8.512836734693877e-05, "loss": 1.6276, "step": 82900 }, { "epoch": 1.76, "learning_rate": 8.510795918367347e-05, "loss": 1.638, "step": 83000 }, { "epoch": 1.76, "learning_rate": 8.508755102040817e-05, "loss": 1.6347, "step": 83100 }, { "epoch": 1.76, "learning_rate": 8.506734693877551e-05, "loss": 1.6288, "step": 83200 }, { "epoch": 1.76, "learning_rate": 8.504693877551021e-05, "loss": 1.6267, "step": 83300 }, { "epoch": 1.77, "learning_rate": 8.50265306122449e-05, "loss": 1.6386, "step": 83400 }, { "epoch": 1.77, "learning_rate": 8.500612244897959e-05, "loss": 1.634, "step": 83500 }, { "epoch": 1.77, "learning_rate": 8.498591836734695e-05, "loss": 1.6339, "step": 83600 }, { "epoch": 1.77, "learning_rate": 8.496551020408163e-05, "loss": 1.6281, "step": 83700 }, { "epoch": 1.77, "learning_rate": 8.494510204081633e-05, "loss": 1.6272, "step": 83800 }, { "epoch": 1.78, "learning_rate": 8.492469387755103e-05, "loss": 1.6306, "step": 83900 }, { "epoch": 1.78, "learning_rate": 8.490428571428571e-05, "loss": 1.6375, "step": 84000 }, { "epoch": 1.78, "learning_rate": 8.488387755102041e-05, "loss": 1.6298, "step": 84100 }, { "epoch": 1.78, "learning_rate": 8.48634693877551e-05, "loss": 1.6336, "step": 84200 }, { "epoch": 1.78, "learning_rate": 8.48430612244898e-05, "loss": 1.6233, "step": 84300 }, { "epoch": 1.79, "learning_rate": 8.482265306122449e-05, "loss": 1.6354, "step": 84400 }, { "epoch": 1.79, "learning_rate": 8.480224489795919e-05, "loss": 1.6274, "step": 84500 }, { "epoch": 1.79, "learning_rate": 8.478183673469389e-05, "loss": 1.6248, "step": 84600 }, { "epoch": 1.79, "learning_rate": 8.476142857142859e-05, "loss": 1.6284, "step": 84700 }, { "epoch": 1.79, "learning_rate": 8.474102040816327e-05, "loss": 1.6327, "step": 84800 }, { "epoch": 1.8, "learning_rate": 8.472061224489797e-05, "loss": 1.6178, "step": 84900 }, { "epoch": 1.8, "learning_rate": 8.470020408163265e-05, "loss": 1.6245, "step": 85000 }, { "epoch": 1.8, "learning_rate": 8.467979591836735e-05, "loss": 1.6274, "step": 85100 }, { "epoch": 1.8, "learning_rate": 8.465938775510205e-05, "loss": 1.6248, "step": 85200 }, { "epoch": 1.81, "learning_rate": 8.463897959183674e-05, "loss": 1.6219, "step": 85300 }, { "epoch": 1.81, "learning_rate": 8.461857142857143e-05, "loss": 1.623, "step": 85400 }, { "epoch": 1.81, "learning_rate": 8.459816326530613e-05, "loss": 1.6258, "step": 85500 }, { "epoch": 1.81, "learning_rate": 8.457775510204082e-05, "loss": 1.6235, "step": 85600 }, { "epoch": 1.81, "learning_rate": 8.455734693877552e-05, "loss": 1.6183, "step": 85700 }, { "epoch": 1.82, "learning_rate": 8.453693877551021e-05, "loss": 1.6229, "step": 85800 }, { "epoch": 1.82, "learning_rate": 8.45165306122449e-05, "loss": 1.6289, "step": 85900 }, { "epoch": 1.82, "learning_rate": 8.44961224489796e-05, "loss": 1.6321, "step": 86000 }, { "epoch": 1.82, "learning_rate": 8.44757142857143e-05, "loss": 1.625, "step": 86100 }, { "epoch": 1.82, "learning_rate": 8.445530612244898e-05, "loss": 1.6288, "step": 86200 }, { "epoch": 1.83, "learning_rate": 8.443489795918368e-05, "loss": 1.6262, "step": 86300 }, { "epoch": 1.83, "learning_rate": 8.441448979591838e-05, "loss": 1.6222, "step": 86400 }, { "epoch": 1.83, "learning_rate": 8.439408163265306e-05, "loss": 1.6253, "step": 86500 }, { "epoch": 1.83, "learning_rate": 8.437367346938776e-05, "loss": 1.6219, "step": 86600 }, { "epoch": 1.83, "learning_rate": 8.435326530612246e-05, "loss": 1.6215, "step": 86700 }, { "epoch": 1.84, "learning_rate": 8.433285714285714e-05, "loss": 1.6131, "step": 86800 }, { "epoch": 1.84, "learning_rate": 8.431244897959184e-05, "loss": 1.6179, "step": 86900 }, { "epoch": 1.84, "learning_rate": 8.429204081632654e-05, "loss": 1.6131, "step": 87000 }, { "epoch": 1.84, "learning_rate": 8.427163265306123e-05, "loss": 1.6247, "step": 87100 }, { "epoch": 1.85, "learning_rate": 8.425122448979592e-05, "loss": 1.6259, "step": 87200 }, { "epoch": 1.85, "learning_rate": 8.423081632653061e-05, "loss": 1.6216, "step": 87300 }, { "epoch": 1.85, "learning_rate": 8.421040816326531e-05, "loss": 1.6218, "step": 87400 }, { "epoch": 1.85, "learning_rate": 8.419e-05, "loss": 1.61, "step": 87500 }, { "epoch": 1.85, "learning_rate": 8.416959183673469e-05, "loss": 1.6201, "step": 87600 }, { "epoch": 1.86, "learning_rate": 8.414918367346939e-05, "loss": 1.6161, "step": 87700 }, { "epoch": 1.86, "learning_rate": 8.412877551020409e-05, "loss": 1.617, "step": 87800 }, { "epoch": 1.86, "learning_rate": 8.410836734693877e-05, "loss": 1.622, "step": 87900 }, { "epoch": 1.86, "learning_rate": 8.408795918367347e-05, "loss": 1.6149, "step": 88000 }, { "epoch": 1.86, "learning_rate": 8.406755102040817e-05, "loss": 1.6155, "step": 88100 }, { "epoch": 1.87, "learning_rate": 8.404714285714285e-05, "loss": 1.6139, "step": 88200 }, { "epoch": 1.87, "learning_rate": 8.402673469387755e-05, "loss": 1.6086, "step": 88300 }, { "epoch": 1.87, "learning_rate": 8.400632653061225e-05, "loss": 1.6116, "step": 88400 }, { "epoch": 1.87, "learning_rate": 8.39861224489796e-05, "loss": 1.621, "step": 88500 }, { "epoch": 1.88, "learning_rate": 8.396571428571429e-05, "loss": 1.6108, "step": 88600 }, { "epoch": 1.88, "learning_rate": 8.394530612244899e-05, "loss": 1.6223, "step": 88700 }, { "epoch": 1.88, "learning_rate": 8.392489795918369e-05, "loss": 1.6185, "step": 88800 }, { "epoch": 1.88, "learning_rate": 8.390448979591837e-05, "loss": 1.6239, "step": 88900 }, { "epoch": 1.88, "learning_rate": 8.388408163265307e-05, "loss": 1.605, "step": 89000 }, { "epoch": 1.89, "learning_rate": 8.386367346938775e-05, "loss": 1.6105, "step": 89100 }, { "epoch": 1.89, "learning_rate": 8.384326530612245e-05, "loss": 1.6151, "step": 89200 }, { "epoch": 1.89, "learning_rate": 8.382285714285715e-05, "loss": 1.6145, "step": 89300 }, { "epoch": 1.89, "learning_rate": 8.380244897959184e-05, "loss": 1.623, "step": 89400 }, { "epoch": 1.89, "learning_rate": 8.378204081632653e-05, "loss": 1.6176, "step": 89500 }, { "epoch": 1.9, "learning_rate": 8.376163265306123e-05, "loss": 1.6083, "step": 89600 }, { "epoch": 1.9, "learning_rate": 8.374122448979592e-05, "loss": 1.6112, "step": 89700 }, { "epoch": 1.9, "learning_rate": 8.372081632653062e-05, "loss": 1.613, "step": 89800 }, { "epoch": 1.9, "learning_rate": 8.370040816326531e-05, "loss": 1.6176, "step": 89900 }, { "epoch": 1.9, "learning_rate": 8.368e-05, "loss": 1.6185, "step": 90000 }, { "epoch": 1.91, "learning_rate": 8.36595918367347e-05, "loss": 1.6205, "step": 90100 }, { "epoch": 1.91, "learning_rate": 8.36391836734694e-05, "loss": 1.617, "step": 90200 }, { "epoch": 1.91, "learning_rate": 8.361877551020408e-05, "loss": 1.6221, "step": 90300 }, { "epoch": 1.91, "learning_rate": 8.359836734693878e-05, "loss": 1.6177, "step": 90400 }, { "epoch": 1.92, "learning_rate": 8.357816326530613e-05, "loss": 1.607, "step": 90500 }, { "epoch": 1.92, "learning_rate": 8.355775510204083e-05, "loss": 1.6132, "step": 90600 }, { "epoch": 1.92, "learning_rate": 8.353734693877552e-05, "loss": 1.6007, "step": 90700 }, { "epoch": 1.92, "learning_rate": 8.351693877551021e-05, "loss": 1.6057, "step": 90800 }, { "epoch": 1.92, "learning_rate": 8.34965306122449e-05, "loss": 1.6116, "step": 90900 }, { "epoch": 1.93, "learning_rate": 8.34761224489796e-05, "loss": 1.6121, "step": 91000 }, { "epoch": 1.93, "learning_rate": 8.34557142857143e-05, "loss": 1.6077, "step": 91100 }, { "epoch": 1.93, "learning_rate": 8.343530612244898e-05, "loss": 1.616, "step": 91200 }, { "epoch": 1.93, "learning_rate": 8.341489795918368e-05, "loss": 1.6066, "step": 91300 }, { "epoch": 1.93, "learning_rate": 8.339448979591838e-05, "loss": 1.6093, "step": 91400 }, { "epoch": 1.94, "learning_rate": 8.337408163265306e-05, "loss": 1.6054, "step": 91500 }, { "epoch": 1.94, "learning_rate": 8.335367346938776e-05, "loss": 1.6099, "step": 91600 }, { "epoch": 1.94, "learning_rate": 8.333326530612246e-05, "loss": 1.6069, "step": 91700 }, { "epoch": 1.94, "learning_rate": 8.331285714285714e-05, "loss": 1.6152, "step": 91800 }, { "epoch": 1.94, "learning_rate": 8.329244897959184e-05, "loss": 1.6062, "step": 91900 }, { "epoch": 1.95, "learning_rate": 8.327204081632654e-05, "loss": 1.6017, "step": 92000 }, { "epoch": 1.95, "learning_rate": 8.325163265306123e-05, "loss": 1.6047, "step": 92100 }, { "epoch": 1.95, "learning_rate": 8.323122448979592e-05, "loss": 1.6063, "step": 92200 }, { "epoch": 1.95, "learning_rate": 8.321081632653062e-05, "loss": 1.6036, "step": 92300 }, { "epoch": 1.96, "learning_rate": 8.31904081632653e-05, "loss": 1.6064, "step": 92400 }, { "epoch": 1.96, "learning_rate": 8.317e-05, "loss": 1.6035, "step": 92500 }, { "epoch": 1.96, "learning_rate": 8.31495918367347e-05, "loss": 1.6031, "step": 92600 }, { "epoch": 1.96, "learning_rate": 8.312938775510204e-05, "loss": 1.6052, "step": 92700 }, { "epoch": 1.96, "learning_rate": 8.310897959183674e-05, "loss": 1.6037, "step": 92800 }, { "epoch": 1.97, "learning_rate": 8.308857142857144e-05, "loss": 1.6044, "step": 92900 }, { "epoch": 1.97, "learning_rate": 8.306816326530613e-05, "loss": 1.6047, "step": 93000 }, { "epoch": 1.97, "learning_rate": 8.304775510204082e-05, "loss": 1.601, "step": 93100 }, { "epoch": 1.97, "learning_rate": 8.302734693877552e-05, "loss": 1.5984, "step": 93200 }, { "epoch": 1.97, "learning_rate": 8.300693877551021e-05, "loss": 1.6022, "step": 93300 }, { "epoch": 1.98, "learning_rate": 8.29865306122449e-05, "loss": 1.5999, "step": 93400 }, { "epoch": 1.98, "learning_rate": 8.29661224489796e-05, "loss": 1.6109, "step": 93500 }, { "epoch": 1.98, "learning_rate": 8.294571428571429e-05, "loss": 1.604, "step": 93600 }, { "epoch": 1.98, "learning_rate": 8.292551020408163e-05, "loss": 1.6048, "step": 93700 }, { "epoch": 1.99, "learning_rate": 8.290510204081633e-05, "loss": 1.6008, "step": 93800 }, { "epoch": 1.99, "learning_rate": 8.288469387755103e-05, "loss": 1.6005, "step": 93900 }, { "epoch": 1.99, "learning_rate": 8.286428571428571e-05, "loss": 1.5987, "step": 94000 }, { "epoch": 1.99, "learning_rate": 8.284387755102041e-05, "loss": 1.6, "step": 94100 }, { "epoch": 1.99, "learning_rate": 8.282346938775511e-05, "loss": 1.602, "step": 94200 }, { "epoch": 2.0, "learning_rate": 8.280306122448979e-05, "loss": 1.6111, "step": 94300 }, { "epoch": 2.0, "learning_rate": 8.278265306122449e-05, "loss": 1.599, "step": 94400 }, { "epoch": 2.0, "learning_rate": 8.276224489795919e-05, "loss": 1.606, "step": 94500 }, { "epoch": 2.0, "learning_rate": 8.274183673469387e-05, "loss": 1.5968, "step": 94600 }, { "epoch": 2.0, "learning_rate": 8.272142857142857e-05, "loss": 1.5868, "step": 94700 }, { "epoch": 2.01, "learning_rate": 8.270102040816327e-05, "loss": 1.6044, "step": 94800 }, { "epoch": 2.01, "learning_rate": 8.268061224489796e-05, "loss": 1.594, "step": 94900 }, { "epoch": 2.01, "learning_rate": 8.266020408163265e-05, "loss": 1.5909, "step": 95000 }, { "epoch": 2.01, "learning_rate": 8.263979591836735e-05, "loss": 1.5955, "step": 95100 }, { "epoch": 2.01, "learning_rate": 8.261938775510204e-05, "loss": 1.588, "step": 95200 }, { "epoch": 2.02, "learning_rate": 8.259897959183674e-05, "loss": 1.5873, "step": 95300 }, { "epoch": 2.02, "learning_rate": 8.257857142857143e-05, "loss": 1.5995, "step": 95400 }, { "epoch": 2.02, "learning_rate": 8.255816326530612e-05, "loss": 1.5928, "step": 95500 }, { "epoch": 2.02, "learning_rate": 8.253775510204083e-05, "loss": 1.5925, "step": 95600 }, { "epoch": 2.03, "learning_rate": 8.251734693877552e-05, "loss": 1.5995, "step": 95700 }, { "epoch": 2.03, "learning_rate": 8.249693877551021e-05, "loss": 1.5906, "step": 95800 }, { "epoch": 2.03, "learning_rate": 8.247653061224491e-05, "loss": 1.5933, "step": 95900 }, { "epoch": 2.03, "learning_rate": 8.24561224489796e-05, "loss": 1.5945, "step": 96000 }, { "epoch": 2.03, "learning_rate": 8.24357142857143e-05, "loss": 1.5872, "step": 96100 }, { "epoch": 2.04, "learning_rate": 8.2415306122449e-05, "loss": 1.5889, "step": 96200 }, { "epoch": 2.04, "learning_rate": 8.239489795918368e-05, "loss": 1.5898, "step": 96300 }, { "epoch": 2.04, "learning_rate": 8.237448979591838e-05, "loss": 1.585, "step": 96400 }, { "epoch": 2.04, "learning_rate": 8.235408163265306e-05, "loss": 1.5857, "step": 96500 }, { "epoch": 2.04, "learning_rate": 8.233367346938776e-05, "loss": 1.5885, "step": 96600 }, { "epoch": 2.05, "learning_rate": 8.231326530612246e-05, "loss": 1.5938, "step": 96700 }, { "epoch": 2.05, "learning_rate": 8.229285714285714e-05, "loss": 1.5889, "step": 96800 }, { "epoch": 2.05, "learning_rate": 8.227244897959184e-05, "loss": 1.5863, "step": 96900 }, { "epoch": 2.05, "learning_rate": 8.225204081632654e-05, "loss": 1.5901, "step": 97000 }, { "epoch": 2.06, "learning_rate": 8.223163265306122e-05, "loss": 1.587, "step": 97100 }, { "epoch": 2.06, "learning_rate": 8.221122448979592e-05, "loss": 1.5944, "step": 97200 }, { "epoch": 2.06, "learning_rate": 8.219081632653062e-05, "loss": 1.5867, "step": 97300 }, { "epoch": 2.06, "learning_rate": 8.21704081632653e-05, "loss": 1.5812, "step": 97400 }, { "epoch": 2.06, "learning_rate": 8.215e-05, "loss": 1.5864, "step": 97500 }, { "epoch": 2.07, "learning_rate": 8.21295918367347e-05, "loss": 1.5928, "step": 97600 }, { "epoch": 2.07, "learning_rate": 8.210938775510204e-05, "loss": 1.5907, "step": 97700 }, { "epoch": 2.07, "learning_rate": 8.208897959183673e-05, "loss": 1.5866, "step": 97800 }, { "epoch": 2.07, "learning_rate": 8.206857142857143e-05, "loss": 1.6052, "step": 97900 }, { "epoch": 2.07, "learning_rate": 8.204816326530613e-05, "loss": 1.5815, "step": 98000 }, { "epoch": 2.08, "learning_rate": 8.202775510204081e-05, "loss": 1.5801, "step": 98100 }, { "epoch": 2.08, "learning_rate": 8.200734693877551e-05, "loss": 1.5841, "step": 98200 }, { "epoch": 2.08, "learning_rate": 8.198693877551021e-05, "loss": 1.5855, "step": 98300 }, { "epoch": 2.08, "learning_rate": 8.19665306122449e-05, "loss": 1.584, "step": 98400 }, { "epoch": 2.08, "learning_rate": 8.19461224489796e-05, "loss": 1.5811, "step": 98500 }, { "epoch": 2.09, "learning_rate": 8.192571428571429e-05, "loss": 1.5792, "step": 98600 }, { "epoch": 2.09, "learning_rate": 8.190530612244899e-05, "loss": 1.5816, "step": 98700 }, { "epoch": 2.09, "learning_rate": 8.188489795918369e-05, "loss": 1.5791, "step": 98800 }, { "epoch": 2.09, "learning_rate": 8.186448979591837e-05, "loss": 1.5806, "step": 98900 }, { "epoch": 2.1, "learning_rate": 8.184408163265307e-05, "loss": 1.5861, "step": 99000 }, { "epoch": 2.1, "learning_rate": 8.182367346938777e-05, "loss": 1.5813, "step": 99100 }, { "epoch": 2.1, "learning_rate": 8.180326530612245e-05, "loss": 1.5888, "step": 99200 }, { "epoch": 2.1, "learning_rate": 8.178285714285715e-05, "loss": 1.5832, "step": 99300 }, { "epoch": 2.1, "learning_rate": 8.176244897959185e-05, "loss": 1.5824, "step": 99400 }, { "epoch": 2.11, "learning_rate": 8.174204081632653e-05, "loss": 1.5756, "step": 99500 }, { "epoch": 2.11, "learning_rate": 8.172163265306123e-05, "loss": 1.5854, "step": 99600 }, { "epoch": 2.11, "learning_rate": 8.170122448979593e-05, "loss": 1.5929, "step": 99700 }, { "epoch": 2.11, "learning_rate": 8.168081632653061e-05, "loss": 1.574, "step": 99800 }, { "epoch": 2.11, "learning_rate": 8.166040816326531e-05, "loss": 1.5755, "step": 99900 }, { "epoch": 2.12, "learning_rate": 8.164000000000001e-05, "loss": 1.5811, "step": 100000 }, { "epoch": 2.12, "learning_rate": 8.16195918367347e-05, "loss": 1.5787, "step": 100100 }, { "epoch": 2.12, "learning_rate": 8.159938775510204e-05, "loss": 1.5844, "step": 100200 }, { "epoch": 2.12, "learning_rate": 8.157897959183674e-05, "loss": 1.5802, "step": 100300 }, { "epoch": 2.12, "learning_rate": 8.155857142857143e-05, "loss": 1.5799, "step": 100400 }, { "epoch": 2.13, "learning_rate": 8.153816326530612e-05, "loss": 1.5914, "step": 100500 }, { "epoch": 2.13, "learning_rate": 8.151775510204082e-05, "loss": 1.5856, "step": 100600 }, { "epoch": 2.13, "learning_rate": 8.149734693877552e-05, "loss": 1.5786, "step": 100700 }, { "epoch": 2.13, "learning_rate": 8.14769387755102e-05, "loss": 1.5799, "step": 100800 }, { "epoch": 2.14, "learning_rate": 8.14565306122449e-05, "loss": 1.5756, "step": 100900 }, { "epoch": 2.14, "learning_rate": 8.14361224489796e-05, "loss": 1.5801, "step": 101000 }, { "epoch": 2.14, "learning_rate": 8.14157142857143e-05, "loss": 1.5814, "step": 101100 }, { "epoch": 2.14, "learning_rate": 8.1395306122449e-05, "loss": 1.5797, "step": 101200 }, { "epoch": 2.14, "learning_rate": 8.137489795918368e-05, "loss": 1.58, "step": 101300 }, { "epoch": 2.15, "learning_rate": 8.135448979591838e-05, "loss": 1.5733, "step": 101400 }, { "epoch": 2.15, "learning_rate": 8.133408163265307e-05, "loss": 1.5771, "step": 101500 }, { "epoch": 2.15, "learning_rate": 8.131367346938776e-05, "loss": 1.5803, "step": 101600 }, { "epoch": 2.15, "learning_rate": 8.129326530612246e-05, "loss": 1.5738, "step": 101700 }, { "epoch": 2.15, "learning_rate": 8.127285714285716e-05, "loss": 1.5729, "step": 101800 }, { "epoch": 2.16, "learning_rate": 8.125244897959184e-05, "loss": 1.5772, "step": 101900 }, { "epoch": 2.16, "learning_rate": 8.123204081632654e-05, "loss": 1.5771, "step": 102000 }, { "epoch": 2.16, "learning_rate": 8.121163265306124e-05, "loss": 1.5776, "step": 102100 }, { "epoch": 2.16, "learning_rate": 8.119122448979592e-05, "loss": 1.583, "step": 102200 }, { "epoch": 2.17, "learning_rate": 8.117081632653062e-05, "loss": 1.5745, "step": 102300 }, { "epoch": 2.17, "learning_rate": 8.11504081632653e-05, "loss": 1.5749, "step": 102400 }, { "epoch": 2.17, "learning_rate": 8.113e-05, "loss": 1.5867, "step": 102500 }, { "epoch": 2.17, "learning_rate": 8.11095918367347e-05, "loss": 1.5795, "step": 102600 }, { "epoch": 2.17, "learning_rate": 8.108938775510204e-05, "loss": 1.5763, "step": 102700 }, { "epoch": 2.18, "learning_rate": 8.106897959183674e-05, "loss": 1.5683, "step": 102800 }, { "epoch": 2.18, "learning_rate": 8.104857142857143e-05, "loss": 1.5766, "step": 102900 }, { "epoch": 2.18, "learning_rate": 8.102816326530612e-05, "loss": 1.5716, "step": 103000 }, { "epoch": 2.18, "learning_rate": 8.100775510204082e-05, "loss": 1.581, "step": 103100 }, { "epoch": 2.18, "learning_rate": 8.098734693877551e-05, "loss": 1.578, "step": 103200 }, { "epoch": 2.19, "learning_rate": 8.09669387755102e-05, "loss": 1.5702, "step": 103300 }, { "epoch": 2.19, "learning_rate": 8.094653061224489e-05, "loss": 1.572, "step": 103400 }, { "epoch": 2.19, "learning_rate": 8.092612244897959e-05, "loss": 1.5754, "step": 103500 }, { "epoch": 2.19, "learning_rate": 8.090571428571429e-05, "loss": 1.5761, "step": 103600 }, { "epoch": 2.19, "learning_rate": 8.088530612244897e-05, "loss": 1.5736, "step": 103700 }, { "epoch": 2.2, "learning_rate": 8.086489795918367e-05, "loss": 1.5668, "step": 103800 }, { "epoch": 2.2, "learning_rate": 8.084448979591838e-05, "loss": 1.5761, "step": 103900 }, { "epoch": 2.2, "learning_rate": 8.082408163265307e-05, "loss": 1.5777, "step": 104000 }, { "epoch": 2.2, "learning_rate": 8.080367346938777e-05, "loss": 1.5655, "step": 104100 }, { "epoch": 2.21, "learning_rate": 8.078326530612245e-05, "loss": 1.579, "step": 104200 }, { "epoch": 2.21, "learning_rate": 8.076285714285715e-05, "loss": 1.5827, "step": 104300 }, { "epoch": 2.21, "learning_rate": 8.074244897959185e-05, "loss": 1.5745, "step": 104400 }, { "epoch": 2.21, "learning_rate": 8.072204081632653e-05, "loss": 1.5625, "step": 104500 }, { "epoch": 2.21, "learning_rate": 8.070163265306123e-05, "loss": 1.5732, "step": 104600 }, { "epoch": 2.22, "learning_rate": 8.068122448979593e-05, "loss": 1.5714, "step": 104700 }, { "epoch": 2.22, "learning_rate": 8.066081632653061e-05, "loss": 1.5755, "step": 104800 }, { "epoch": 2.22, "learning_rate": 8.064040816326531e-05, "loss": 1.5785, "step": 104900 }, { "epoch": 2.22, "learning_rate": 8.062000000000001e-05, "loss": 1.5689, "step": 105000 }, { "epoch": 2.22, "learning_rate": 8.05995918367347e-05, "loss": 1.563, "step": 105100 }, { "epoch": 2.23, "learning_rate": 8.05791836734694e-05, "loss": 1.5673, "step": 105200 }, { "epoch": 2.23, "learning_rate": 8.055877551020409e-05, "loss": 1.5768, "step": 105300 }, { "epoch": 2.23, "learning_rate": 8.053836734693878e-05, "loss": 1.57, "step": 105400 }, { "epoch": 2.23, "learning_rate": 8.051795918367348e-05, "loss": 1.568, "step": 105500 }, { "epoch": 2.23, "learning_rate": 8.049755102040817e-05, "loss": 1.567, "step": 105600 }, { "epoch": 2.24, "learning_rate": 8.047714285714286e-05, "loss": 1.5693, "step": 105700 }, { "epoch": 2.24, "learning_rate": 8.045673469387756e-05, "loss": 1.5817, "step": 105800 }, { "epoch": 2.24, "learning_rate": 8.043632653061226e-05, "loss": 1.5695, "step": 105900 }, { "epoch": 2.24, "learning_rate": 8.041591836734694e-05, "loss": 1.5707, "step": 106000 }, { "epoch": 2.25, "learning_rate": 8.039551020408164e-05, "loss": 1.5695, "step": 106100 }, { "epoch": 2.25, "learning_rate": 8.037510204081634e-05, "loss": 1.5738, "step": 106200 }, { "epoch": 2.25, "learning_rate": 8.035469387755102e-05, "loss": 1.5671, "step": 106300 }, { "epoch": 2.25, "learning_rate": 8.033428571428572e-05, "loss": 1.568, "step": 106400 }, { "epoch": 2.25, "learning_rate": 8.03138775510204e-05, "loss": 1.5706, "step": 106500 }, { "epoch": 2.26, "learning_rate": 8.02934693877551e-05, "loss": 1.5664, "step": 106600 }, { "epoch": 2.26, "learning_rate": 8.02730612244898e-05, "loss": 1.5629, "step": 106700 }, { "epoch": 2.26, "learning_rate": 8.025265306122449e-05, "loss": 1.5689, "step": 106800 }, { "epoch": 2.26, "learning_rate": 8.023244897959184e-05, "loss": 1.564, "step": 106900 }, { "epoch": 2.26, "learning_rate": 8.021204081632654e-05, "loss": 1.5708, "step": 107000 }, { "epoch": 2.27, "learning_rate": 8.019163265306124e-05, "loss": 1.5595, "step": 107100 }, { "epoch": 2.27, "learning_rate": 8.017122448979592e-05, "loss": 1.5546, "step": 107200 }, { "epoch": 2.27, "learning_rate": 8.015081632653062e-05, "loss": 1.5598, "step": 107300 }, { "epoch": 2.27, "learning_rate": 8.013040816326532e-05, "loss": 1.5639, "step": 107400 }, { "epoch": 2.28, "learning_rate": 8.011e-05, "loss": 1.558, "step": 107500 }, { "epoch": 2.28, "learning_rate": 8.00895918367347e-05, "loss": 1.5644, "step": 107600 }, { "epoch": 2.28, "learning_rate": 8.00691836734694e-05, "loss": 1.5677, "step": 107700 }, { "epoch": 2.28, "learning_rate": 8.004877551020409e-05, "loss": 1.5615, "step": 107800 }, { "epoch": 2.28, "learning_rate": 8.002836734693878e-05, "loss": 1.5615, "step": 107900 }, { "epoch": 2.29, "learning_rate": 8.000795918367348e-05, "loss": 1.5653, "step": 108000 }, { "epoch": 2.29, "learning_rate": 7.998755102040817e-05, "loss": 1.5624, "step": 108100 }, { "epoch": 2.29, "learning_rate": 7.996714285714287e-05, "loss": 1.5565, "step": 108200 }, { "epoch": 2.29, "learning_rate": 7.994673469387755e-05, "loss": 1.5667, "step": 108300 }, { "epoch": 2.29, "learning_rate": 7.99265306122449e-05, "loss": 1.5662, "step": 108400 }, { "epoch": 2.3, "learning_rate": 7.990612244897959e-05, "loss": 1.5653, "step": 108500 }, { "epoch": 2.3, "learning_rate": 7.988571428571429e-05, "loss": 1.5635, "step": 108600 }, { "epoch": 2.3, "learning_rate": 7.986530612244899e-05, "loss": 1.5638, "step": 108700 }, { "epoch": 2.3, "learning_rate": 7.984489795918367e-05, "loss": 1.5647, "step": 108800 }, { "epoch": 2.3, "learning_rate": 7.982448979591837e-05, "loss": 1.5653, "step": 108900 }, { "epoch": 2.31, "learning_rate": 7.980408163265305e-05, "loss": 1.5694, "step": 109000 }, { "epoch": 2.31, "learning_rate": 7.978367346938775e-05, "loss": 1.5644, "step": 109100 }, { "epoch": 2.31, "learning_rate": 7.976326530612245e-05, "loss": 1.5638, "step": 109200 }, { "epoch": 2.31, "learning_rate": 7.974285714285714e-05, "loss": 1.5624, "step": 109300 }, { "epoch": 2.32, "learning_rate": 7.972244897959183e-05, "loss": 1.5654, "step": 109400 }, { "epoch": 2.32, "learning_rate": 7.970204081632655e-05, "loss": 1.5575, "step": 109500 }, { "epoch": 2.32, "learning_rate": 7.968163265306123e-05, "loss": 1.5626, "step": 109600 }, { "epoch": 2.32, "learning_rate": 7.966122448979593e-05, "loss": 1.5577, "step": 109700 }, { "epoch": 2.32, "learning_rate": 7.964081632653063e-05, "loss": 1.5663, "step": 109800 }, { "epoch": 2.33, "learning_rate": 7.962040816326531e-05, "loss": 1.5587, "step": 109900 }, { "epoch": 2.33, "learning_rate": 7.960000000000001e-05, "loss": 1.5641, "step": 110000 }, { "epoch": 2.33, "learning_rate": 7.95795918367347e-05, "loss": 1.5583, "step": 110100 }, { "epoch": 2.33, "learning_rate": 7.95591836734694e-05, "loss": 1.5579, "step": 110200 }, { "epoch": 2.33, "learning_rate": 7.953877551020409e-05, "loss": 1.5577, "step": 110300 }, { "epoch": 2.34, "learning_rate": 7.951836734693878e-05, "loss": 1.5526, "step": 110400 }, { "epoch": 2.34, "learning_rate": 7.949795918367348e-05, "loss": 1.5622, "step": 110500 }, { "epoch": 2.34, "learning_rate": 7.947755102040817e-05, "loss": 1.5611, "step": 110600 }, { "epoch": 2.34, "learning_rate": 7.945734693877551e-05, "loss": 1.5596, "step": 110700 }, { "epoch": 2.34, "learning_rate": 7.94369387755102e-05, "loss": 1.5589, "step": 110800 }, { "epoch": 2.35, "learning_rate": 7.94165306122449e-05, "loss": 1.5554, "step": 110900 }, { "epoch": 2.35, "learning_rate": 7.93961224489796e-05, "loss": 1.559, "step": 111000 }, { "epoch": 2.35, "learning_rate": 7.937571428571428e-05, "loss": 1.5522, "step": 111100 }, { "epoch": 2.35, "learning_rate": 7.935530612244898e-05, "loss": 1.559, "step": 111200 }, { "epoch": 2.36, "learning_rate": 7.933489795918368e-05, "loss": 1.5542, "step": 111300 }, { "epoch": 2.36, "learning_rate": 7.931448979591836e-05, "loss": 1.5536, "step": 111400 }, { "epoch": 2.36, "learning_rate": 7.929408163265306e-05, "loss": 1.5498, "step": 111500 }, { "epoch": 2.36, "learning_rate": 7.927367346938776e-05, "loss": 1.5556, "step": 111600 }, { "epoch": 2.36, "learning_rate": 7.925326530612244e-05, "loss": 1.5617, "step": 111700 }, { "epoch": 2.37, "learning_rate": 7.923285714285714e-05, "loss": 1.5517, "step": 111800 }, { "epoch": 2.37, "learning_rate": 7.921244897959184e-05, "loss": 1.5526, "step": 111900 }, { "epoch": 2.37, "learning_rate": 7.919204081632653e-05, "loss": 1.5576, "step": 112000 }, { "epoch": 2.37, "learning_rate": 7.917163265306122e-05, "loss": 1.5517, "step": 112100 }, { "epoch": 2.37, "learning_rate": 7.915122448979592e-05, "loss": 1.55, "step": 112200 }, { "epoch": 2.38, "learning_rate": 7.913081632653062e-05, "loss": 1.551, "step": 112300 }, { "epoch": 2.38, "learning_rate": 7.911040816326532e-05, "loss": 1.5561, "step": 112400 }, { "epoch": 2.38, "learning_rate": 7.909e-05, "loss": 1.5479, "step": 112500 }, { "epoch": 2.38, "learning_rate": 7.90695918367347e-05, "loss": 1.5498, "step": 112600 }, { "epoch": 2.39, "learning_rate": 7.90491836734694e-05, "loss": 1.5467, "step": 112700 }, { "epoch": 2.39, "learning_rate": 7.902877551020408e-05, "loss": 1.5532, "step": 112800 }, { "epoch": 2.39, "learning_rate": 7.900836734693878e-05, "loss": 1.5613, "step": 112900 }, { "epoch": 2.39, "learning_rate": 7.898795918367348e-05, "loss": 1.5587, "step": 113000 }, { "epoch": 2.39, "learning_rate": 7.896755102040817e-05, "loss": 1.5509, "step": 113100 }, { "epoch": 2.4, "learning_rate": 7.894714285714286e-05, "loss": 1.551, "step": 113200 }, { "epoch": 2.4, "learning_rate": 7.89269387755102e-05, "loss": 1.5522, "step": 113300 }, { "epoch": 2.4, "learning_rate": 7.89065306122449e-05, "loss": 1.5469, "step": 113400 }, { "epoch": 2.4, "learning_rate": 7.888612244897959e-05, "loss": 1.552, "step": 113500 }, { "epoch": 2.4, "learning_rate": 7.886571428571429e-05, "loss": 1.5568, "step": 113600 }, { "epoch": 2.41, "learning_rate": 7.884530612244899e-05, "loss": 1.552, "step": 113700 }, { "epoch": 2.41, "learning_rate": 7.882489795918367e-05, "loss": 1.543, "step": 113800 }, { "epoch": 2.41, "learning_rate": 7.880448979591837e-05, "loss": 1.5518, "step": 113900 }, { "epoch": 2.41, "learning_rate": 7.878408163265307e-05, "loss": 1.5574, "step": 114000 }, { "epoch": 2.41, "learning_rate": 7.876367346938775e-05, "loss": 1.5493, "step": 114100 }, { "epoch": 2.42, "learning_rate": 7.874326530612245e-05, "loss": 1.5509, "step": 114200 }, { "epoch": 2.42, "learning_rate": 7.872285714285715e-05, "loss": 1.5438, "step": 114300 }, { "epoch": 2.42, "learning_rate": 7.870244897959183e-05, "loss": 1.5488, "step": 114400 }, { "epoch": 2.42, "learning_rate": 7.868204081632653e-05, "loss": 1.5539, "step": 114500 }, { "epoch": 2.43, "learning_rate": 7.866163265306123e-05, "loss": 1.5434, "step": 114600 }, { "epoch": 2.43, "learning_rate": 7.864122448979591e-05, "loss": 1.5465, "step": 114700 }, { "epoch": 2.43, "learning_rate": 7.862081632653061e-05, "loss": 1.5467, "step": 114800 }, { "epoch": 2.43, "learning_rate": 7.86004081632653e-05, "loss": 1.5436, "step": 114900 }, { "epoch": 2.43, "learning_rate": 7.858000000000001e-05, "loss": 1.5447, "step": 115000 }, { "epoch": 2.44, "learning_rate": 7.855959183673471e-05, "loss": 1.5457, "step": 115100 }, { "epoch": 2.44, "learning_rate": 7.853918367346939e-05, "loss": 1.547, "step": 115200 }, { "epoch": 2.44, "learning_rate": 7.851877551020409e-05, "loss": 1.5493, "step": 115300 }, { "epoch": 2.44, "learning_rate": 7.849857142857143e-05, "loss": 1.5352, "step": 115400 }, { "epoch": 2.44, "learning_rate": 7.847816326530613e-05, "loss": 1.5444, "step": 115500 }, { "epoch": 2.45, "learning_rate": 7.845775510204082e-05, "loss": 1.5563, "step": 115600 }, { "epoch": 2.45, "learning_rate": 7.843734693877551e-05, "loss": 1.5493, "step": 115700 }, { "epoch": 2.45, "learning_rate": 7.841693877551021e-05, "loss": 1.5445, "step": 115800 }, { "epoch": 2.45, "learning_rate": 7.83965306122449e-05, "loss": 1.548, "step": 115900 }, { "epoch": 2.46, "learning_rate": 7.83761224489796e-05, "loss": 1.5448, "step": 116000 }, { "epoch": 2.46, "learning_rate": 7.83557142857143e-05, "loss": 1.5489, "step": 116100 }, { "epoch": 2.46, "learning_rate": 7.833530612244898e-05, "loss": 1.5479, "step": 116200 }, { "epoch": 2.46, "learning_rate": 7.831489795918368e-05, "loss": 1.5432, "step": 116300 }, { "epoch": 2.46, "learning_rate": 7.829448979591838e-05, "loss": 1.5413, "step": 116400 }, { "epoch": 2.47, "learning_rate": 7.827408163265306e-05, "loss": 1.5402, "step": 116500 }, { "epoch": 2.47, "learning_rate": 7.825367346938776e-05, "loss": 1.5461, "step": 116600 }, { "epoch": 2.47, "learning_rate": 7.823326530612244e-05, "loss": 1.5456, "step": 116700 }, { "epoch": 2.47, "learning_rate": 7.821285714285714e-05, "loss": 1.5503, "step": 116800 }, { "epoch": 2.47, "learning_rate": 7.819244897959184e-05, "loss": 1.5402, "step": 116900 }, { "epoch": 2.48, "learning_rate": 7.817204081632652e-05, "loss": 1.5477, "step": 117000 }, { "epoch": 2.48, "learning_rate": 7.815163265306122e-05, "loss": 1.5453, "step": 117100 }, { "epoch": 2.48, "learning_rate": 7.813122448979592e-05, "loss": 1.5508, "step": 117200 }, { "epoch": 2.48, "learning_rate": 7.81108163265306e-05, "loss": 1.5441, "step": 117300 }, { "epoch": 2.48, "learning_rate": 7.80904081632653e-05, "loss": 1.5413, "step": 117400 }, { "epoch": 2.49, "learning_rate": 7.807020408163266e-05, "loss": 1.547, "step": 117500 }, { "epoch": 2.49, "learning_rate": 7.804979591836736e-05, "loss": 1.5493, "step": 117600 }, { "epoch": 2.49, "learning_rate": 7.802938775510204e-05, "loss": 1.5385, "step": 117700 }, { "epoch": 2.49, "learning_rate": 7.800897959183674e-05, "loss": 1.5419, "step": 117800 }, { "epoch": 2.5, "learning_rate": 7.798857142857144e-05, "loss": 1.5409, "step": 117900 }, { "epoch": 2.5, "learning_rate": 7.796816326530612e-05, "loss": 1.5382, "step": 118000 }, { "epoch": 2.5, "learning_rate": 7.794775510204082e-05, "loss": 1.5337, "step": 118100 }, { "epoch": 2.5, "learning_rate": 7.792734693877552e-05, "loss": 1.5345, "step": 118200 }, { "epoch": 2.5, "learning_rate": 7.79069387755102e-05, "loss": 1.5412, "step": 118300 }, { "epoch": 2.51, "learning_rate": 7.78865306122449e-05, "loss": 1.5362, "step": 118400 }, { "epoch": 2.51, "learning_rate": 7.786612244897959e-05, "loss": 1.5429, "step": 118500 }, { "epoch": 2.51, "learning_rate": 7.784571428571429e-05, "loss": 1.5394, "step": 118600 }, { "epoch": 2.51, "learning_rate": 7.782530612244899e-05, "loss": 1.5422, "step": 118700 }, { "epoch": 2.51, "learning_rate": 7.780489795918367e-05, "loss": 1.5367, "step": 118800 }, { "epoch": 2.52, "learning_rate": 7.778448979591837e-05, "loss": 1.54, "step": 118900 }, { "epoch": 2.52, "learning_rate": 7.776408163265307e-05, "loss": 1.539, "step": 119000 }, { "epoch": 2.52, "learning_rate": 7.774367346938775e-05, "loss": 1.5392, "step": 119100 }, { "epoch": 2.52, "learning_rate": 7.772326530612245e-05, "loss": 1.5401, "step": 119200 }, { "epoch": 2.52, "learning_rate": 7.770285714285715e-05, "loss": 1.5364, "step": 119300 }, { "epoch": 2.53, "learning_rate": 7.768244897959183e-05, "loss": 1.5404, "step": 119400 }, { "epoch": 2.53, "learning_rate": 7.766204081632653e-05, "loss": 1.5374, "step": 119500 }, { "epoch": 2.53, "learning_rate": 7.764163265306123e-05, "loss": 1.5386, "step": 119600 }, { "epoch": 2.53, "learning_rate": 7.762122448979591e-05, "loss": 1.5436, "step": 119700 }, { "epoch": 2.54, "learning_rate": 7.760081632653061e-05, "loss": 1.5483, "step": 119800 }, { "epoch": 2.54, "learning_rate": 7.758040816326531e-05, "loss": 1.5424, "step": 119900 }, { "epoch": 2.54, "learning_rate": 7.756e-05, "loss": 1.5456, "step": 120000 }, { "epoch": 2.54, "learning_rate": 7.75395918367347e-05, "loss": 1.5483, "step": 120100 }, { "epoch": 2.54, "learning_rate": 7.751918367346939e-05, "loss": 1.5346, "step": 120200 }, { "epoch": 2.55, "learning_rate": 7.749877551020408e-05, "loss": 1.5387, "step": 120300 }, { "epoch": 2.55, "learning_rate": 7.747836734693878e-05, "loss": 1.5327, "step": 120400 }, { "epoch": 2.55, "learning_rate": 7.745795918367347e-05, "loss": 1.5299, "step": 120500 }, { "epoch": 2.55, "learning_rate": 7.743755102040817e-05, "loss": 1.5365, "step": 120600 }, { "epoch": 2.55, "learning_rate": 7.741714285714287e-05, "loss": 1.5332, "step": 120700 }, { "epoch": 2.56, "learning_rate": 7.739673469387756e-05, "loss": 1.5309, "step": 120800 }, { "epoch": 2.56, "learning_rate": 7.737632653061225e-05, "loss": 1.5325, "step": 120900 }, { "epoch": 2.56, "learning_rate": 7.735591836734695e-05, "loss": 1.5408, "step": 121000 }, { "epoch": 2.56, "learning_rate": 7.733551020408164e-05, "loss": 1.5385, "step": 121100 }, { "epoch": 2.57, "learning_rate": 7.731510204081634e-05, "loss": 1.5373, "step": 121200 }, { "epoch": 2.57, "learning_rate": 7.729489795918368e-05, "loss": 1.5339, "step": 121300 }, { "epoch": 2.57, "learning_rate": 7.727448979591837e-05, "loss": 1.5329, "step": 121400 }, { "epoch": 2.57, "learning_rate": 7.725408163265306e-05, "loss": 1.5338, "step": 121500 }, { "epoch": 2.57, "learning_rate": 7.723367346938776e-05, "loss": 1.5435, "step": 121600 }, { "epoch": 2.58, "learning_rate": 7.721326530612246e-05, "loss": 1.5353, "step": 121700 }, { "epoch": 2.58, "learning_rate": 7.719285714285714e-05, "loss": 1.5441, "step": 121800 }, { "epoch": 2.58, "learning_rate": 7.717244897959184e-05, "loss": 1.5382, "step": 121900 }, { "epoch": 2.58, "learning_rate": 7.715204081632654e-05, "loss": 1.5281, "step": 122000 }, { "epoch": 2.58, "learning_rate": 7.713163265306122e-05, "loss": 1.5395, "step": 122100 }, { "epoch": 2.59, "learning_rate": 7.711122448979592e-05, "loss": 1.5414, "step": 122200 }, { "epoch": 2.59, "learning_rate": 7.709081632653062e-05, "loss": 1.5403, "step": 122300 }, { "epoch": 2.59, "learning_rate": 7.70704081632653e-05, "loss": 1.5377, "step": 122400 }, { "epoch": 2.59, "learning_rate": 7.705e-05, "loss": 1.5357, "step": 122500 }, { "epoch": 2.59, "learning_rate": 7.702959183673469e-05, "loss": 1.5267, "step": 122600 }, { "epoch": 2.6, "learning_rate": 7.700918367346939e-05, "loss": 1.5313, "step": 122700 }, { "epoch": 2.6, "learning_rate": 7.698877551020408e-05, "loss": 1.5313, "step": 122800 }, { "epoch": 2.6, "learning_rate": 7.696836734693877e-05, "loss": 1.5416, "step": 122900 }, { "epoch": 2.6, "learning_rate": 7.694795918367347e-05, "loss": 1.5324, "step": 123000 }, { "epoch": 2.61, "learning_rate": 7.692755102040817e-05, "loss": 1.5337, "step": 123100 }, { "epoch": 2.61, "learning_rate": 7.690714285714285e-05, "loss": 1.5251, "step": 123200 }, { "epoch": 2.61, "learning_rate": 7.688673469387756e-05, "loss": 1.5319, "step": 123300 }, { "epoch": 2.61, "learning_rate": 7.686632653061225e-05, "loss": 1.5345, "step": 123400 }, { "epoch": 2.61, "learning_rate": 7.684591836734695e-05, "loss": 1.5258, "step": 123500 }, { "epoch": 2.62, "learning_rate": 7.682551020408164e-05, "loss": 1.5312, "step": 123600 }, { "epoch": 2.62, "learning_rate": 7.680530612244898e-05, "loss": 1.5261, "step": 123700 }, { "epoch": 2.62, "learning_rate": 7.678489795918368e-05, "loss": 1.5297, "step": 123800 }, { "epoch": 2.62, "learning_rate": 7.676469387755102e-05, "loss": 1.5295, "step": 123900 }, { "epoch": 2.62, "learning_rate": 7.674428571428571e-05, "loss": 1.5279, "step": 124000 }, { "epoch": 2.63, "learning_rate": 7.672387755102041e-05, "loss": 1.531, "step": 124100 }, { "epoch": 2.63, "learning_rate": 7.67034693877551e-05, "loss": 1.5365, "step": 124200 }, { "epoch": 2.63, "learning_rate": 7.66830612244898e-05, "loss": 1.5293, "step": 124300 }, { "epoch": 2.63, "learning_rate": 7.66626530612245e-05, "loss": 1.533, "step": 124400 }, { "epoch": 2.63, "learning_rate": 7.664224489795919e-05, "loss": 1.5199, "step": 124500 }, { "epoch": 2.64, "learning_rate": 7.662183673469389e-05, "loss": 1.5206, "step": 124600 }, { "epoch": 2.64, "learning_rate": 7.660142857142858e-05, "loss": 1.5226, "step": 124700 }, { "epoch": 2.64, "learning_rate": 7.658102040816327e-05, "loss": 1.5292, "step": 124800 }, { "epoch": 2.64, "learning_rate": 7.656061224489797e-05, "loss": 1.5311, "step": 124900 }, { "epoch": 2.65, "learning_rate": 7.654020408163267e-05, "loss": 1.5315, "step": 125000 }, { "epoch": 2.65, "learning_rate": 7.651979591836735e-05, "loss": 1.5295, "step": 125100 }, { "epoch": 2.65, "learning_rate": 7.649938775510205e-05, "loss": 1.5281, "step": 125200 }, { "epoch": 2.65, "learning_rate": 7.647897959183675e-05, "loss": 1.5298, "step": 125300 }, { "epoch": 2.65, "learning_rate": 7.645857142857143e-05, "loss": 1.5273, "step": 125400 }, { "epoch": 2.66, "learning_rate": 7.643816326530613e-05, "loss": 1.5355, "step": 125500 }, { "epoch": 2.66, "learning_rate": 7.641775510204083e-05, "loss": 1.5248, "step": 125600 }, { "epoch": 2.66, "learning_rate": 7.639734693877551e-05, "loss": 1.5235, "step": 125700 }, { "epoch": 2.66, "learning_rate": 7.637693877551021e-05, "loss": 1.5206, "step": 125800 }, { "epoch": 2.66, "learning_rate": 7.635653061224491e-05, "loss": 1.5353, "step": 125900 }, { "epoch": 2.67, "learning_rate": 7.63361224489796e-05, "loss": 1.5237, "step": 126000 }, { "epoch": 2.67, "learning_rate": 7.631571428571429e-05, "loss": 1.5284, "step": 126100 }, { "epoch": 2.67, "learning_rate": 7.629530612244898e-05, "loss": 1.5201, "step": 126200 }, { "epoch": 2.67, "learning_rate": 7.627489795918368e-05, "loss": 1.5184, "step": 126300 }, { "epoch": 2.68, "learning_rate": 7.625448979591837e-05, "loss": 1.5288, "step": 126400 }, { "epoch": 2.68, "learning_rate": 7.623408163265306e-05, "loss": 1.5185, "step": 126500 }, { "epoch": 2.68, "learning_rate": 7.621367346938776e-05, "loss": 1.5197, "step": 126600 }, { "epoch": 2.68, "learning_rate": 7.619326530612246e-05, "loss": 1.5183, "step": 126700 }, { "epoch": 2.68, "learning_rate": 7.617285714285714e-05, "loss": 1.526, "step": 126800 }, { "epoch": 2.69, "learning_rate": 7.615244897959184e-05, "loss": 1.5263, "step": 126900 }, { "epoch": 2.69, "learning_rate": 7.613204081632654e-05, "loss": 1.5355, "step": 127000 }, { "epoch": 2.69, "learning_rate": 7.611163265306122e-05, "loss": 1.5153, "step": 127100 }, { "epoch": 2.69, "learning_rate": 7.609122448979592e-05, "loss": 1.5238, "step": 127200 }, { "epoch": 2.69, "learning_rate": 7.607081632653062e-05, "loss": 1.5249, "step": 127300 }, { "epoch": 2.7, "learning_rate": 7.60504081632653e-05, "loss": 1.5221, "step": 127400 }, { "epoch": 2.7, "learning_rate": 7.603e-05, "loss": 1.5308, "step": 127500 }, { "epoch": 2.7, "learning_rate": 7.60095918367347e-05, "loss": 1.5278, "step": 127600 }, { "epoch": 2.7, "learning_rate": 7.598918367346939e-05, "loss": 1.5243, "step": 127700 }, { "epoch": 2.7, "learning_rate": 7.596877551020408e-05, "loss": 1.5192, "step": 127800 }, { "epoch": 2.71, "learning_rate": 7.594836734693878e-05, "loss": 1.5234, "step": 127900 }, { "epoch": 2.71, "learning_rate": 7.592795918367347e-05, "loss": 1.5179, "step": 128000 }, { "epoch": 2.71, "learning_rate": 7.590755102040817e-05, "loss": 1.5199, "step": 128100 }, { "epoch": 2.71, "learning_rate": 7.588714285714285e-05, "loss": 1.5272, "step": 128200 }, { "epoch": 2.72, "learning_rate": 7.586673469387755e-05, "loss": 1.5181, "step": 128300 }, { "epoch": 2.72, "learning_rate": 7.584632653061225e-05, "loss": 1.5181, "step": 128400 }, { "epoch": 2.72, "learning_rate": 7.582591836734693e-05, "loss": 1.5264, "step": 128500 }, { "epoch": 2.72, "learning_rate": 7.580591836734694e-05, "loss": 1.5234, "step": 128600 }, { "epoch": 2.72, "learning_rate": 7.578551020408163e-05, "loss": 1.5209, "step": 128700 }, { "epoch": 2.73, "learning_rate": 7.576510204081633e-05, "loss": 1.5243, "step": 128800 }, { "epoch": 2.73, "learning_rate": 7.574469387755102e-05, "loss": 1.5177, "step": 128900 }, { "epoch": 2.73, "learning_rate": 7.572428571428571e-05, "loss": 1.5149, "step": 129000 }, { "epoch": 2.73, "learning_rate": 7.57038775510204e-05, "loss": 1.5152, "step": 129100 }, { "epoch": 2.73, "learning_rate": 7.56834693877551e-05, "loss": 1.5168, "step": 129200 }, { "epoch": 2.74, "learning_rate": 7.566306122448979e-05, "loss": 1.5249, "step": 129300 }, { "epoch": 2.74, "learning_rate": 7.564265306122449e-05, "loss": 1.5187, "step": 129400 }, { "epoch": 2.74, "learning_rate": 7.562224489795919e-05, "loss": 1.5214, "step": 129500 }, { "epoch": 2.74, "learning_rate": 7.560183673469387e-05, "loss": 1.5098, "step": 129600 }, { "epoch": 2.74, "learning_rate": 7.558142857142857e-05, "loss": 1.5203, "step": 129700 }, { "epoch": 2.75, "learning_rate": 7.556102040816327e-05, "loss": 1.5166, "step": 129800 }, { "epoch": 2.75, "learning_rate": 7.554061224489797e-05, "loss": 1.5162, "step": 129900 }, { "epoch": 2.75, "learning_rate": 7.552020408163266e-05, "loss": 1.5211, "step": 130000 }, { "epoch": 2.75, "learning_rate": 7.549979591836735e-05, "loss": 1.516, "step": 130100 }, { "epoch": 2.76, "learning_rate": 7.547938775510205e-05, "loss": 1.5189, "step": 130200 }, { "epoch": 2.76, "learning_rate": 7.545897959183675e-05, "loss": 1.5171, "step": 130300 }, { "epoch": 2.76, "learning_rate": 7.543857142857143e-05, "loss": 1.5168, "step": 130400 }, { "epoch": 2.76, "learning_rate": 7.541816326530613e-05, "loss": 1.5272, "step": 130500 }, { "epoch": 2.76, "learning_rate": 7.539775510204083e-05, "loss": 1.5202, "step": 130600 }, { "epoch": 2.77, "learning_rate": 7.537734693877551e-05, "loss": 1.5196, "step": 130700 }, { "epoch": 2.77, "learning_rate": 7.535693877551021e-05, "loss": 1.5122, "step": 130800 }, { "epoch": 2.77, "learning_rate": 7.533653061224491e-05, "loss": 1.5158, "step": 130900 }, { "epoch": 2.77, "learning_rate": 7.53161224489796e-05, "loss": 1.5127, "step": 131000 }, { "epoch": 2.77, "learning_rate": 7.529571428571429e-05, "loss": 1.5193, "step": 131100 }, { "epoch": 2.78, "learning_rate": 7.527530612244899e-05, "loss": 1.5264, "step": 131200 }, { "epoch": 2.78, "learning_rate": 7.525489795918368e-05, "loss": 1.5092, "step": 131300 }, { "epoch": 2.78, "learning_rate": 7.523448979591837e-05, "loss": 1.5148, "step": 131400 }, { "epoch": 2.78, "learning_rate": 7.521408163265307e-05, "loss": 1.5172, "step": 131500 }, { "epoch": 2.79, "learning_rate": 7.519367346938776e-05, "loss": 1.5146, "step": 131600 }, { "epoch": 2.79, "learning_rate": 7.517326530612246e-05, "loss": 1.5272, "step": 131700 }, { "epoch": 2.79, "learning_rate": 7.515285714285714e-05, "loss": 1.5156, "step": 131800 }, { "epoch": 2.79, "learning_rate": 7.513244897959184e-05, "loss": 1.5165, "step": 131900 }, { "epoch": 2.79, "learning_rate": 7.511204081632654e-05, "loss": 1.5151, "step": 132000 }, { "epoch": 2.8, "learning_rate": 7.509163265306122e-05, "loss": 1.5146, "step": 132100 }, { "epoch": 2.8, "learning_rate": 7.507122448979592e-05, "loss": 1.5261, "step": 132200 }, { "epoch": 2.8, "learning_rate": 7.505081632653062e-05, "loss": 1.5172, "step": 132300 }, { "epoch": 2.8, "learning_rate": 7.50304081632653e-05, "loss": 1.5118, "step": 132400 }, { "epoch": 2.8, "learning_rate": 7.501e-05, "loss": 1.5136, "step": 132500 }, { "epoch": 2.81, "learning_rate": 7.49895918367347e-05, "loss": 1.5107, "step": 132600 }, { "epoch": 2.81, "learning_rate": 7.496918367346938e-05, "loss": 1.5172, "step": 132700 }, { "epoch": 2.81, "learning_rate": 7.494877551020408e-05, "loss": 1.52, "step": 132800 }, { "epoch": 2.81, "learning_rate": 7.492836734693878e-05, "loss": 1.5162, "step": 132900 }, { "epoch": 2.81, "learning_rate": 7.490795918367347e-05, "loss": 1.5069, "step": 133000 }, { "epoch": 2.82, "learning_rate": 7.488755102040816e-05, "loss": 1.5107, "step": 133100 }, { "epoch": 2.82, "learning_rate": 7.486714285714286e-05, "loss": 1.5024, "step": 133200 }, { "epoch": 2.82, "learning_rate": 7.484673469387755e-05, "loss": 1.5077, "step": 133300 }, { "epoch": 2.82, "learning_rate": 7.482632653061225e-05, "loss": 1.5104, "step": 133400 }, { "epoch": 2.83, "learning_rate": 7.480591836734694e-05, "loss": 1.5059, "step": 133500 }, { "epoch": 2.83, "learning_rate": 7.478551020408163e-05, "loss": 1.5116, "step": 133600 }, { "epoch": 2.83, "learning_rate": 7.476510204081633e-05, "loss": 1.5048, "step": 133700 }, { "epoch": 2.83, "learning_rate": 7.474469387755103e-05, "loss": 1.51, "step": 133800 }, { "epoch": 2.83, "learning_rate": 7.472428571428571e-05, "loss": 1.5127, "step": 133900 }, { "epoch": 2.84, "learning_rate": 7.470387755102041e-05, "loss": 1.5077, "step": 134000 }, { "epoch": 2.84, "learning_rate": 7.46834693877551e-05, "loss": 1.51, "step": 134100 }, { "epoch": 2.84, "learning_rate": 7.466326530612245e-05, "loss": 1.5082, "step": 134200 }, { "epoch": 2.84, "learning_rate": 7.464285714285715e-05, "loss": 1.5151, "step": 134300 }, { "epoch": 2.84, "learning_rate": 7.462244897959185e-05, "loss": 1.5116, "step": 134400 }, { "epoch": 2.85, "learning_rate": 7.460204081632653e-05, "loss": 1.507, "step": 134500 }, { "epoch": 2.85, "learning_rate": 7.458163265306123e-05, "loss": 1.5088, "step": 134600 }, { "epoch": 2.85, "learning_rate": 7.456122448979593e-05, "loss": 1.5147, "step": 134700 }, { "epoch": 2.85, "learning_rate": 7.454081632653061e-05, "loss": 1.5039, "step": 134800 }, { "epoch": 2.86, "learning_rate": 7.452040816326531e-05, "loss": 1.5101, "step": 134900 }, { "epoch": 2.86, "learning_rate": 7.450000000000001e-05, "loss": 1.5042, "step": 135000 }, { "epoch": 2.86, "learning_rate": 7.447959183673469e-05, "loss": 1.5117, "step": 135100 }, { "epoch": 2.86, "learning_rate": 7.445918367346939e-05, "loss": 1.5149, "step": 135200 }, { "epoch": 2.86, "learning_rate": 7.443877551020409e-05, "loss": 1.506, "step": 135300 }, { "epoch": 2.87, "learning_rate": 7.441836734693877e-05, "loss": 1.5112, "step": 135400 }, { "epoch": 2.87, "learning_rate": 7.439795918367347e-05, "loss": 1.5203, "step": 135500 }, { "epoch": 2.87, "learning_rate": 7.437755102040817e-05, "loss": 1.5137, "step": 135600 }, { "epoch": 2.87, "learning_rate": 7.435714285714286e-05, "loss": 1.5105, "step": 135700 }, { "epoch": 2.87, "learning_rate": 7.433673469387755e-05, "loss": 1.5049, "step": 135800 }, { "epoch": 2.88, "learning_rate": 7.431632653061224e-05, "loss": 1.5072, "step": 135900 }, { "epoch": 2.88, "learning_rate": 7.429591836734694e-05, "loss": 1.5087, "step": 136000 }, { "epoch": 2.88, "learning_rate": 7.427551020408164e-05, "loss": 1.5089, "step": 136100 }, { "epoch": 2.88, "learning_rate": 7.425530612244899e-05, "loss": 1.506, "step": 136200 }, { "epoch": 2.88, "learning_rate": 7.423489795918368e-05, "loss": 1.5045, "step": 136300 }, { "epoch": 2.89, "learning_rate": 7.421448979591837e-05, "loss": 1.5111, "step": 136400 }, { "epoch": 2.89, "learning_rate": 7.419408163265307e-05, "loss": 1.509, "step": 136500 }, { "epoch": 2.89, "learning_rate": 7.417367346938776e-05, "loss": 1.5069, "step": 136600 }, { "epoch": 2.89, "learning_rate": 7.415326530612246e-05, "loss": 1.513, "step": 136700 }, { "epoch": 2.9, "learning_rate": 7.413285714285715e-05, "loss": 1.5051, "step": 136800 }, { "epoch": 2.9, "learning_rate": 7.411244897959184e-05, "loss": 1.4956, "step": 136900 }, { "epoch": 2.9, "learning_rate": 7.409204081632654e-05, "loss": 1.5124, "step": 137000 }, { "epoch": 2.9, "learning_rate": 7.407163265306124e-05, "loss": 1.5031, "step": 137100 }, { "epoch": 2.9, "learning_rate": 7.405122448979592e-05, "loss": 1.506, "step": 137200 }, { "epoch": 2.91, "learning_rate": 7.403081632653062e-05, "loss": 1.5137, "step": 137300 }, { "epoch": 2.91, "learning_rate": 7.401040816326532e-05, "loss": 1.5057, "step": 137400 }, { "epoch": 2.91, "learning_rate": 7.399e-05, "loss": 1.5096, "step": 137500 }, { "epoch": 2.91, "learning_rate": 7.39695918367347e-05, "loss": 1.5013, "step": 137600 }, { "epoch": 2.91, "learning_rate": 7.394918367346938e-05, "loss": 1.4997, "step": 137700 }, { "epoch": 2.92, "learning_rate": 7.392877551020408e-05, "loss": 1.5044, "step": 137800 }, { "epoch": 2.92, "learning_rate": 7.390836734693878e-05, "loss": 1.4998, "step": 137900 }, { "epoch": 2.92, "learning_rate": 7.388795918367347e-05, "loss": 1.4997, "step": 138000 }, { "epoch": 2.92, "learning_rate": 7.386755102040816e-05, "loss": 1.5011, "step": 138100 }, { "epoch": 2.92, "learning_rate": 7.384714285714286e-05, "loss": 1.5059, "step": 138200 }, { "epoch": 2.93, "learning_rate": 7.382673469387755e-05, "loss": 1.4967, "step": 138300 }, { "epoch": 2.93, "learning_rate": 7.380632653061225e-05, "loss": 1.5058, "step": 138400 }, { "epoch": 2.93, "learning_rate": 7.378591836734694e-05, "loss": 1.507, "step": 138500 }, { "epoch": 2.93, "learning_rate": 7.376551020408163e-05, "loss": 1.5055, "step": 138600 }, { "epoch": 2.94, "learning_rate": 7.374530612244898e-05, "loss": 1.4997, "step": 138700 }, { "epoch": 2.94, "learning_rate": 7.372489795918368e-05, "loss": 1.5018, "step": 138800 }, { "epoch": 2.94, "learning_rate": 7.370448979591838e-05, "loss": 1.5025, "step": 138900 }, { "epoch": 2.94, "learning_rate": 7.368408163265307e-05, "loss": 1.4973, "step": 139000 }, { "epoch": 2.94, "learning_rate": 7.366367346938776e-05, "loss": 1.5026, "step": 139100 }, { "epoch": 2.95, "learning_rate": 7.364326530612246e-05, "loss": 1.4982, "step": 139200 }, { "epoch": 2.95, "learning_rate": 7.362285714285715e-05, "loss": 1.5012, "step": 139300 }, { "epoch": 2.95, "learning_rate": 7.360244897959184e-05, "loss": 1.496, "step": 139400 }, { "epoch": 2.95, "learning_rate": 7.358204081632653e-05, "loss": 1.4993, "step": 139500 }, { "epoch": 2.95, "learning_rate": 7.356163265306123e-05, "loss": 1.5052, "step": 139600 }, { "epoch": 2.96, "learning_rate": 7.354122448979593e-05, "loss": 1.4959, "step": 139700 }, { "epoch": 2.96, "learning_rate": 7.352081632653061e-05, "loss": 1.5008, "step": 139800 }, { "epoch": 2.96, "learning_rate": 7.350040816326531e-05, "loss": 1.5012, "step": 139900 }, { "epoch": 2.96, "learning_rate": 7.348000000000001e-05, "loss": 1.4916, "step": 140000 }, { "epoch": 2.97, "learning_rate": 7.345959183673469e-05, "loss": 1.5001, "step": 140100 }, { "epoch": 2.97, "learning_rate": 7.343918367346939e-05, "loss": 1.4972, "step": 140200 }, { "epoch": 2.97, "learning_rate": 7.341877551020409e-05, "loss": 1.508, "step": 140300 }, { "epoch": 2.97, "learning_rate": 7.339836734693877e-05, "loss": 1.5028, "step": 140400 }, { "epoch": 2.97, "learning_rate": 7.337795918367347e-05, "loss": 1.4981, "step": 140500 }, { "epoch": 2.98, "learning_rate": 7.335755102040817e-05, "loss": 1.5012, "step": 140600 }, { "epoch": 2.98, "learning_rate": 7.333714285714286e-05, "loss": 1.5039, "step": 140700 }, { "epoch": 2.98, "learning_rate": 7.331673469387755e-05, "loss": 1.4998, "step": 140800 }, { "epoch": 2.98, "learning_rate": 7.329632653061225e-05, "loss": 1.5055, "step": 140900 }, { "epoch": 2.98, "learning_rate": 7.327591836734694e-05, "loss": 1.5009, "step": 141000 }, { "epoch": 2.99, "learning_rate": 7.325551020408164e-05, "loss": 1.4966, "step": 141100 }, { "epoch": 2.99, "learning_rate": 7.323510204081633e-05, "loss": 1.4976, "step": 141200 }, { "epoch": 2.99, "learning_rate": 7.321469387755102e-05, "loss": 1.5013, "step": 141300 }, { "epoch": 2.99, "learning_rate": 7.319428571428572e-05, "loss": 1.5025, "step": 141400 }, { "epoch": 2.99, "learning_rate": 7.317387755102042e-05, "loss": 1.4963, "step": 141500 }, { "epoch": 3.0, "learning_rate": 7.31534693877551e-05, "loss": 1.5062, "step": 141600 }, { "epoch": 3.0, "learning_rate": 7.31330612244898e-05, "loss": 1.496, "step": 141700 }, { "epoch": 3.0, "learning_rate": 7.311265306122448e-05, "loss": 1.5063, "step": 141800 }, { "epoch": 3.0, "learning_rate": 7.309224489795918e-05, "loss": 1.493, "step": 141900 }, { "epoch": 3.01, "learning_rate": 7.307183673469388e-05, "loss": 1.4966, "step": 142000 }, { "epoch": 3.01, "learning_rate": 7.305142857142857e-05, "loss": 1.4851, "step": 142100 }, { "epoch": 3.01, "learning_rate": 7.303102040816326e-05, "loss": 1.4939, "step": 142200 }, { "epoch": 3.01, "learning_rate": 7.301061224489796e-05, "loss": 1.4906, "step": 142300 }, { "epoch": 3.01, "learning_rate": 7.299020408163265e-05, "loss": 1.483, "step": 142400 }, { "epoch": 3.02, "learning_rate": 7.296979591836735e-05, "loss": 1.4873, "step": 142500 }, { "epoch": 3.02, "learning_rate": 7.294938775510204e-05, "loss": 1.4995, "step": 142600 }, { "epoch": 3.02, "learning_rate": 7.29291836734694e-05, "loss": 1.4982, "step": 142700 }, { "epoch": 3.02, "learning_rate": 7.290877551020408e-05, "loss": 1.4879, "step": 142800 }, { "epoch": 3.02, "learning_rate": 7.288836734693878e-05, "loss": 1.4877, "step": 142900 }, { "epoch": 3.03, "learning_rate": 7.286816326530612e-05, "loss": 1.4944, "step": 143000 }, { "epoch": 3.03, "learning_rate": 7.284775510204082e-05, "loss": 1.4964, "step": 143100 }, { "epoch": 3.03, "learning_rate": 7.28273469387755e-05, "loss": 1.4956, "step": 143200 }, { "epoch": 3.03, "learning_rate": 7.28069387755102e-05, "loss": 1.4902, "step": 143300 }, { "epoch": 3.03, "learning_rate": 7.27865306122449e-05, "loss": 1.5054, "step": 143400 }, { "epoch": 3.04, "learning_rate": 7.276612244897959e-05, "loss": 1.4937, "step": 143500 }, { "epoch": 3.04, "learning_rate": 7.274571428571428e-05, "loss": 1.4873, "step": 143600 }, { "epoch": 3.04, "learning_rate": 7.272530612244898e-05, "loss": 1.4904, "step": 143700 }, { "epoch": 3.04, "learning_rate": 7.270489795918368e-05, "loss": 1.4901, "step": 143800 }, { "epoch": 3.05, "learning_rate": 7.268448979591838e-05, "loss": 1.4897, "step": 143900 }, { "epoch": 3.05, "learning_rate": 7.266408163265306e-05, "loss": 1.4886, "step": 144000 }, { "epoch": 3.05, "learning_rate": 7.264367346938776e-05, "loss": 1.4874, "step": 144100 }, { "epoch": 3.05, "learning_rate": 7.262326530612246e-05, "loss": 1.4873, "step": 144200 }, { "epoch": 3.05, "learning_rate": 7.260285714285715e-05, "loss": 1.4831, "step": 144300 }, { "epoch": 3.06, "learning_rate": 7.258244897959184e-05, "loss": 1.4915, "step": 144400 }, { "epoch": 3.06, "learning_rate": 7.256204081632654e-05, "loss": 1.4896, "step": 144500 }, { "epoch": 3.06, "learning_rate": 7.254163265306123e-05, "loss": 1.494, "step": 144600 }, { "epoch": 3.06, "learning_rate": 7.252122448979593e-05, "loss": 1.4907, "step": 144700 }, { "epoch": 3.06, "learning_rate": 7.250081632653062e-05, "loss": 1.4886, "step": 144800 }, { "epoch": 3.07, "learning_rate": 7.248040816326531e-05, "loss": 1.4901, "step": 144900 }, { "epoch": 3.07, "learning_rate": 7.246000000000001e-05, "loss": 1.4895, "step": 145000 }, { "epoch": 3.07, "learning_rate": 7.24395918367347e-05, "loss": 1.4902, "step": 145100 }, { "epoch": 3.07, "learning_rate": 7.241918367346939e-05, "loss": 1.4891, "step": 145200 }, { "epoch": 3.08, "learning_rate": 7.239877551020409e-05, "loss": 1.4909, "step": 145300 }, { "epoch": 3.08, "learning_rate": 7.237836734693877e-05, "loss": 1.4858, "step": 145400 }, { "epoch": 3.08, "learning_rate": 7.235795918367347e-05, "loss": 1.4814, "step": 145500 }, { "epoch": 3.08, "learning_rate": 7.233755102040817e-05, "loss": 1.4896, "step": 145600 }, { "epoch": 3.08, "learning_rate": 7.231714285714286e-05, "loss": 1.4909, "step": 145700 }, { "epoch": 3.09, "learning_rate": 7.229673469387755e-05, "loss": 1.4817, "step": 145800 }, { "epoch": 3.09, "learning_rate": 7.227632653061225e-05, "loss": 1.4996, "step": 145900 }, { "epoch": 3.09, "learning_rate": 7.225612244897959e-05, "loss": 1.4846, "step": 146000 }, { "epoch": 3.09, "learning_rate": 7.223571428571428e-05, "loss": 1.4848, "step": 146100 }, { "epoch": 3.09, "learning_rate": 7.221530612244898e-05, "loss": 1.4927, "step": 146200 }, { "epoch": 3.1, "learning_rate": 7.219489795918367e-05, "loss": 1.4827, "step": 146300 }, { "epoch": 3.1, "learning_rate": 7.217448979591837e-05, "loss": 1.4828, "step": 146400 }, { "epoch": 3.1, "learning_rate": 7.215408163265307e-05, "loss": 1.4836, "step": 146500 }, { "epoch": 3.1, "learning_rate": 7.213367346938777e-05, "loss": 1.492, "step": 146600 }, { "epoch": 3.1, "learning_rate": 7.211326530612245e-05, "loss": 1.486, "step": 146700 }, { "epoch": 3.11, "learning_rate": 7.209285714285715e-05, "loss": 1.4876, "step": 146800 }, { "epoch": 3.11, "learning_rate": 7.207244897959185e-05, "loss": 1.4951, "step": 146900 }, { "epoch": 3.11, "learning_rate": 7.205204081632654e-05, "loss": 1.4902, "step": 147000 }, { "epoch": 3.11, "learning_rate": 7.203163265306123e-05, "loss": 1.4879, "step": 147100 }, { "epoch": 3.12, "learning_rate": 7.201122448979592e-05, "loss": 1.4919, "step": 147200 }, { "epoch": 3.12, "learning_rate": 7.199081632653062e-05, "loss": 1.4878, "step": 147300 }, { "epoch": 3.12, "learning_rate": 7.197040816326532e-05, "loss": 1.4864, "step": 147400 }, { "epoch": 3.12, "learning_rate": 7.195e-05, "loss": 1.491, "step": 147500 }, { "epoch": 3.12, "learning_rate": 7.19295918367347e-05, "loss": 1.487, "step": 147600 }, { "epoch": 3.13, "learning_rate": 7.19091836734694e-05, "loss": 1.4847, "step": 147700 }, { "epoch": 3.13, "learning_rate": 7.188877551020408e-05, "loss": 1.4858, "step": 147800 }, { "epoch": 3.13, "learning_rate": 7.186836734693878e-05, "loss": 1.4811, "step": 147900 }, { "epoch": 3.13, "learning_rate": 7.184795918367348e-05, "loss": 1.4852, "step": 148000 }, { "epoch": 3.13, "learning_rate": 7.182755102040816e-05, "loss": 1.4834, "step": 148100 }, { "epoch": 3.14, "learning_rate": 7.180714285714286e-05, "loss": 1.4882, "step": 148200 }, { "epoch": 3.14, "learning_rate": 7.178673469387756e-05, "loss": 1.4876, "step": 148300 }, { "epoch": 3.14, "learning_rate": 7.176632653061225e-05, "loss": 1.4891, "step": 148400 }, { "epoch": 3.14, "learning_rate": 7.174612244897959e-05, "loss": 1.4888, "step": 148500 }, { "epoch": 3.14, "learning_rate": 7.172571428571428e-05, "loss": 1.4906, "step": 148600 }, { "epoch": 3.15, "learning_rate": 7.170530612244898e-05, "loss": 1.4777, "step": 148700 }, { "epoch": 3.15, "learning_rate": 7.168489795918367e-05, "loss": 1.4833, "step": 148800 }, { "epoch": 3.15, "learning_rate": 7.166448979591837e-05, "loss": 1.4839, "step": 148900 }, { "epoch": 3.15, "learning_rate": 7.164408163265306e-05, "loss": 1.497, "step": 149000 }, { "epoch": 3.16, "learning_rate": 7.162367346938775e-05, "loss": 1.485, "step": 149100 }, { "epoch": 3.16, "learning_rate": 7.160326530612246e-05, "loss": 1.4841, "step": 149200 }, { "epoch": 3.16, "learning_rate": 7.158285714285715e-05, "loss": 1.4745, "step": 149300 }, { "epoch": 3.16, "learning_rate": 7.156244897959184e-05, "loss": 1.4864, "step": 149400 }, { "epoch": 3.16, "learning_rate": 7.154204081632654e-05, "loss": 1.4819, "step": 149500 }, { "epoch": 3.17, "learning_rate": 7.152163265306123e-05, "loss": 1.4867, "step": 149600 }, { "epoch": 3.17, "learning_rate": 7.150122448979593e-05, "loss": 1.4801, "step": 149700 }, { "epoch": 3.17, "learning_rate": 7.148081632653062e-05, "loss": 1.4701, "step": 149800 }, { "epoch": 3.17, "learning_rate": 7.146040816326531e-05, "loss": 1.4851, "step": 149900 }, { "epoch": 3.17, "learning_rate": 7.144000000000001e-05, "loss": 1.4769, "step": 150000 }, { "epoch": 3.18, "learning_rate": 7.14195918367347e-05, "loss": 1.4796, "step": 150100 }, { "epoch": 3.18, "learning_rate": 7.139918367346939e-05, "loss": 1.4736, "step": 150200 }, { "epoch": 3.18, "learning_rate": 7.137877551020409e-05, "loss": 1.4836, "step": 150300 }, { "epoch": 3.18, "learning_rate": 7.135836734693879e-05, "loss": 1.4787, "step": 150400 }, { "epoch": 3.19, "learning_rate": 7.133795918367347e-05, "loss": 1.4834, "step": 150500 }, { "epoch": 3.19, "learning_rate": 7.131755102040817e-05, "loss": 1.4826, "step": 150600 }, { "epoch": 3.19, "learning_rate": 7.129714285714287e-05, "loss": 1.4824, "step": 150700 }, { "epoch": 3.19, "learning_rate": 7.127673469387755e-05, "loss": 1.4822, "step": 150800 }, { "epoch": 3.19, "learning_rate": 7.125632653061225e-05, "loss": 1.4784, "step": 150900 }, { "epoch": 3.2, "learning_rate": 7.123591836734694e-05, "loss": 1.4739, "step": 151000 }, { "epoch": 3.2, "learning_rate": 7.121551020408163e-05, "loss": 1.4851, "step": 151100 }, { "epoch": 3.2, "learning_rate": 7.119510204081633e-05, "loss": 1.475, "step": 151200 }, { "epoch": 3.2, "learning_rate": 7.117469387755102e-05, "loss": 1.4827, "step": 151300 }, { "epoch": 3.2, "learning_rate": 7.115428571428572e-05, "loss": 1.4764, "step": 151400 }, { "epoch": 3.21, "learning_rate": 7.113387755102041e-05, "loss": 1.4717, "step": 151500 }, { "epoch": 3.21, "learning_rate": 7.11134693877551e-05, "loss": 1.4854, "step": 151600 }, { "epoch": 3.21, "learning_rate": 7.109326530612245e-05, "loss": 1.4696, "step": 151700 }, { "epoch": 3.21, "learning_rate": 7.107285714285714e-05, "loss": 1.4815, "step": 151800 }, { "epoch": 3.21, "learning_rate": 7.105244897959184e-05, "loss": 1.4826, "step": 151900 }, { "epoch": 3.22, "learning_rate": 7.103204081632654e-05, "loss": 1.4785, "step": 152000 }, { "epoch": 3.22, "learning_rate": 7.101163265306123e-05, "loss": 1.4766, "step": 152100 }, { "epoch": 3.22, "learning_rate": 7.099122448979593e-05, "loss": 1.4779, "step": 152200 }, { "epoch": 3.22, "learning_rate": 7.097081632653062e-05, "loss": 1.4775, "step": 152300 }, { "epoch": 3.23, "learning_rate": 7.095040816326532e-05, "loss": 1.4779, "step": 152400 }, { "epoch": 3.23, "learning_rate": 7.093000000000001e-05, "loss": 1.4756, "step": 152500 }, { "epoch": 3.23, "learning_rate": 7.09095918367347e-05, "loss": 1.482, "step": 152600 }, { "epoch": 3.23, "learning_rate": 7.08891836734694e-05, "loss": 1.4791, "step": 152700 }, { "epoch": 3.23, "learning_rate": 7.086877551020408e-05, "loss": 1.4696, "step": 152800 }, { "epoch": 3.24, "learning_rate": 7.084836734693878e-05, "loss": 1.4822, "step": 152900 }, { "epoch": 3.24, "learning_rate": 7.082795918367348e-05, "loss": 1.4795, "step": 153000 }, { "epoch": 3.24, "learning_rate": 7.080755102040816e-05, "loss": 1.4753, "step": 153100 }, { "epoch": 3.24, "learning_rate": 7.078714285714286e-05, "loss": 1.4844, "step": 153200 }, { "epoch": 3.24, "learning_rate": 7.076673469387756e-05, "loss": 1.4719, "step": 153300 }, { "epoch": 3.25, "learning_rate": 7.074632653061224e-05, "loss": 1.4787, "step": 153400 }, { "epoch": 3.25, "learning_rate": 7.072591836734694e-05, "loss": 1.4747, "step": 153500 }, { "epoch": 3.25, "learning_rate": 7.070551020408164e-05, "loss": 1.477, "step": 153600 }, { "epoch": 3.25, "learning_rate": 7.068510204081633e-05, "loss": 1.4793, "step": 153700 }, { "epoch": 3.26, "learning_rate": 7.066469387755102e-05, "loss": 1.4716, "step": 153800 }, { "epoch": 3.26, "learning_rate": 7.064448979591837e-05, "loss": 1.4776, "step": 153900 }, { "epoch": 3.26, "learning_rate": 7.062408163265306e-05, "loss": 1.4762, "step": 154000 }, { "epoch": 3.26, "learning_rate": 7.060367346938775e-05, "loss": 1.4777, "step": 154100 }, { "epoch": 3.26, "learning_rate": 7.058326530612245e-05, "loss": 1.4844, "step": 154200 }, { "epoch": 3.27, "learning_rate": 7.056285714285715e-05, "loss": 1.4796, "step": 154300 }, { "epoch": 3.27, "learning_rate": 7.054244897959183e-05, "loss": 1.4695, "step": 154400 }, { "epoch": 3.27, "learning_rate": 7.052204081632653e-05, "loss": 1.4783, "step": 154500 }, { "epoch": 3.27, "learning_rate": 7.050163265306123e-05, "loss": 1.4821, "step": 154600 }, { "epoch": 3.27, "learning_rate": 7.048122448979593e-05, "loss": 1.4741, "step": 154700 }, { "epoch": 3.28, "learning_rate": 7.046081632653062e-05, "loss": 1.4705, "step": 154800 }, { "epoch": 3.28, "learning_rate": 7.044040816326531e-05, "loss": 1.4731, "step": 154900 }, { "epoch": 3.28, "learning_rate": 7.042000000000001e-05, "loss": 1.4795, "step": 155000 }, { "epoch": 3.28, "learning_rate": 7.03995918367347e-05, "loss": 1.4709, "step": 155100 }, { "epoch": 3.28, "learning_rate": 7.037918367346939e-05, "loss": 1.4758, "step": 155200 }, { "epoch": 3.29, "learning_rate": 7.035877551020409e-05, "loss": 1.4721, "step": 155300 }, { "epoch": 3.29, "learning_rate": 7.033836734693879e-05, "loss": 1.4812, "step": 155400 }, { "epoch": 3.29, "learning_rate": 7.031795918367347e-05, "loss": 1.4775, "step": 155500 }, { "epoch": 3.29, "learning_rate": 7.029755102040817e-05, "loss": 1.4835, "step": 155600 }, { "epoch": 3.3, "learning_rate": 7.027714285714287e-05, "loss": 1.4796, "step": 155700 }, { "epoch": 3.3, "learning_rate": 7.025673469387755e-05, "loss": 1.4798, "step": 155800 }, { "epoch": 3.3, "learning_rate": 7.023632653061225e-05, "loss": 1.4718, "step": 155900 }, { "epoch": 3.3, "learning_rate": 7.021591836734695e-05, "loss": 1.4766, "step": 156000 }, { "epoch": 3.3, "learning_rate": 7.019571428571429e-05, "loss": 1.4761, "step": 156100 }, { "epoch": 3.31, "learning_rate": 7.017530612244898e-05, "loss": 1.4791, "step": 156200 }, { "epoch": 3.31, "learning_rate": 7.015489795918367e-05, "loss": 1.463, "step": 156300 }, { "epoch": 3.31, "learning_rate": 7.013448979591837e-05, "loss": 1.4671, "step": 156400 }, { "epoch": 3.31, "learning_rate": 7.011408163265306e-05, "loss": 1.4708, "step": 156500 }, { "epoch": 3.31, "learning_rate": 7.009367346938776e-05, "loss": 1.4685, "step": 156600 }, { "epoch": 3.32, "learning_rate": 7.007326530612245e-05, "loss": 1.4649, "step": 156700 }, { "epoch": 3.32, "learning_rate": 7.005285714285714e-05, "loss": 1.4684, "step": 156800 }, { "epoch": 3.32, "learning_rate": 7.003244897959184e-05, "loss": 1.4714, "step": 156900 }, { "epoch": 3.32, "learning_rate": 7.001204081632654e-05, "loss": 1.4737, "step": 157000 }, { "epoch": 3.32, "learning_rate": 6.999163265306122e-05, "loss": 1.4659, "step": 157100 }, { "epoch": 3.33, "learning_rate": 6.997122448979592e-05, "loss": 1.4762, "step": 157200 }, { "epoch": 3.33, "learning_rate": 6.995081632653062e-05, "loss": 1.4687, "step": 157300 }, { "epoch": 3.33, "learning_rate": 6.99304081632653e-05, "loss": 1.473, "step": 157400 }, { "epoch": 3.33, "learning_rate": 6.991000000000001e-05, "loss": 1.4657, "step": 157500 }, { "epoch": 3.34, "learning_rate": 6.98895918367347e-05, "loss": 1.4718, "step": 157600 }, { "epoch": 3.34, "learning_rate": 6.98691836734694e-05, "loss": 1.4785, "step": 157700 }, { "epoch": 3.34, "learning_rate": 6.98487755102041e-05, "loss": 1.4707, "step": 157800 }, { "epoch": 3.34, "learning_rate": 6.982836734693878e-05, "loss": 1.4636, "step": 157900 }, { "epoch": 3.34, "learning_rate": 6.980795918367348e-05, "loss": 1.4707, "step": 158000 }, { "epoch": 3.35, "learning_rate": 6.978755102040818e-05, "loss": 1.4648, "step": 158100 }, { "epoch": 3.35, "learning_rate": 6.976714285714286e-05, "loss": 1.4699, "step": 158200 }, { "epoch": 3.35, "learning_rate": 6.974673469387756e-05, "loss": 1.4668, "step": 158300 }, { "epoch": 3.35, "learning_rate": 6.972632653061226e-05, "loss": 1.4699, "step": 158400 }, { "epoch": 3.35, "learning_rate": 6.970591836734694e-05, "loss": 1.4691, "step": 158500 }, { "epoch": 3.36, "learning_rate": 6.968571428571428e-05, "loss": 1.4771, "step": 158600 }, { "epoch": 3.36, "learning_rate": 6.966530612244898e-05, "loss": 1.4666, "step": 158700 }, { "epoch": 3.36, "learning_rate": 6.964489795918368e-05, "loss": 1.4655, "step": 158800 }, { "epoch": 3.36, "learning_rate": 6.962448979591837e-05, "loss": 1.4689, "step": 158900 }, { "epoch": 3.37, "learning_rate": 6.960408163265306e-05, "loss": 1.4633, "step": 159000 }, { "epoch": 3.37, "learning_rate": 6.958367346938776e-05, "loss": 1.4655, "step": 159100 }, { "epoch": 3.37, "learning_rate": 6.956326530612245e-05, "loss": 1.4622, "step": 159200 }, { "epoch": 3.37, "learning_rate": 6.954285714285714e-05, "loss": 1.4644, "step": 159300 }, { "epoch": 3.37, "learning_rate": 6.952244897959184e-05, "loss": 1.4699, "step": 159400 }, { "epoch": 3.38, "learning_rate": 6.950204081632653e-05, "loss": 1.4622, "step": 159500 }, { "epoch": 3.38, "learning_rate": 6.948163265306123e-05, "loss": 1.4687, "step": 159600 }, { "epoch": 3.38, "learning_rate": 6.946122448979591e-05, "loss": 1.4754, "step": 159700 }, { "epoch": 3.38, "learning_rate": 6.944081632653061e-05, "loss": 1.4585, "step": 159800 }, { "epoch": 3.38, "learning_rate": 6.942040816326531e-05, "loss": 1.4764, "step": 159900 }, { "epoch": 3.39, "learning_rate": 6.939999999999999e-05, "loss": 1.477, "step": 160000 }, { "epoch": 3.39, "learning_rate": 6.937959183673469e-05, "loss": 1.4675, "step": 160100 }, { "epoch": 3.39, "learning_rate": 6.935918367346939e-05, "loss": 1.4614, "step": 160200 }, { "epoch": 3.39, "learning_rate": 6.933877551020409e-05, "loss": 1.4692, "step": 160300 }, { "epoch": 3.39, "learning_rate": 6.931836734693879e-05, "loss": 1.4738, "step": 160400 }, { "epoch": 3.4, "learning_rate": 6.929795918367347e-05, "loss": 1.4686, "step": 160500 }, { "epoch": 3.4, "learning_rate": 6.927755102040817e-05, "loss": 1.4662, "step": 160600 }, { "epoch": 3.4, "learning_rate": 6.925714285714287e-05, "loss": 1.4678, "step": 160700 }, { "epoch": 3.4, "learning_rate": 6.923673469387755e-05, "loss": 1.4637, "step": 160800 }, { "epoch": 3.41, "learning_rate": 6.921632653061225e-05, "loss": 1.4693, "step": 160900 }, { "epoch": 3.41, "learning_rate": 6.919591836734695e-05, "loss": 1.4652, "step": 161000 }, { "epoch": 3.41, "learning_rate": 6.917551020408163e-05, "loss": 1.4715, "step": 161100 }, { "epoch": 3.41, "learning_rate": 6.915510204081633e-05, "loss": 1.4652, "step": 161200 }, { "epoch": 3.41, "learning_rate": 6.913469387755103e-05, "loss": 1.4633, "step": 161300 }, { "epoch": 3.42, "learning_rate": 6.911428571428572e-05, "loss": 1.4604, "step": 161400 }, { "epoch": 3.42, "learning_rate": 6.909387755102041e-05, "loss": 1.4643, "step": 161500 }, { "epoch": 3.42, "learning_rate": 6.907346938775511e-05, "loss": 1.4675, "step": 161600 }, { "epoch": 3.42, "learning_rate": 6.90530612244898e-05, "loss": 1.4678, "step": 161700 }, { "epoch": 3.42, "learning_rate": 6.90326530612245e-05, "loss": 1.4709, "step": 161800 }, { "epoch": 3.43, "learning_rate": 6.90122448979592e-05, "loss": 1.4652, "step": 161900 }, { "epoch": 3.43, "learning_rate": 6.899183673469388e-05, "loss": 1.4665, "step": 162000 }, { "epoch": 3.43, "learning_rate": 6.897142857142858e-05, "loss": 1.4581, "step": 162100 }, { "epoch": 3.43, "learning_rate": 6.895102040816328e-05, "loss": 1.4595, "step": 162200 }, { "epoch": 3.43, "learning_rate": 6.893061224489796e-05, "loss": 1.4611, "step": 162300 }, { "epoch": 3.44, "learning_rate": 6.891020408163266e-05, "loss": 1.4668, "step": 162400 }, { "epoch": 3.44, "learning_rate": 6.888979591836736e-05, "loss": 1.4709, "step": 162500 }, { "epoch": 3.44, "learning_rate": 6.886938775510204e-05, "loss": 1.4499, "step": 162600 }, { "epoch": 3.44, "learning_rate": 6.884918367346938e-05, "loss": 1.4748, "step": 162700 }, { "epoch": 3.45, "learning_rate": 6.882877551020408e-05, "loss": 1.4707, "step": 162800 }, { "epoch": 3.45, "learning_rate": 6.880836734693878e-05, "loss": 1.47, "step": 162900 }, { "epoch": 3.45, "learning_rate": 6.878795918367346e-05, "loss": 1.4619, "step": 163000 }, { "epoch": 3.45, "learning_rate": 6.876755102040818e-05, "loss": 1.4681, "step": 163100 }, { "epoch": 3.45, "learning_rate": 6.874714285714286e-05, "loss": 1.474, "step": 163200 }, { "epoch": 3.46, "learning_rate": 6.872673469387756e-05, "loss": 1.4628, "step": 163300 }, { "epoch": 3.46, "learning_rate": 6.870632653061226e-05, "loss": 1.4585, "step": 163400 }, { "epoch": 3.46, "learning_rate": 6.868591836734694e-05, "loss": 1.4648, "step": 163500 }, { "epoch": 3.46, "learning_rate": 6.866551020408164e-05, "loss": 1.4648, "step": 163600 }, { "epoch": 3.46, "learning_rate": 6.864530612244898e-05, "loss": 1.4679, "step": 163700 }, { "epoch": 3.47, "learning_rate": 6.862489795918368e-05, "loss": 1.4626, "step": 163800 }, { "epoch": 3.47, "learning_rate": 6.860448979591836e-05, "loss": 1.4632, "step": 163900 }, { "epoch": 3.47, "learning_rate": 6.858408163265306e-05, "loss": 1.4665, "step": 164000 }, { "epoch": 3.47, "learning_rate": 6.856367346938776e-05, "loss": 1.4637, "step": 164100 }, { "epoch": 3.48, "learning_rate": 6.854326530612245e-05, "loss": 1.4647, "step": 164200 }, { "epoch": 3.48, "learning_rate": 6.852285714285714e-05, "loss": 1.4545, "step": 164300 }, { "epoch": 3.48, "learning_rate": 6.850244897959184e-05, "loss": 1.468, "step": 164400 }, { "epoch": 3.48, "learning_rate": 6.848204081632653e-05, "loss": 1.4599, "step": 164500 }, { "epoch": 3.48, "learning_rate": 6.846163265306123e-05, "loss": 1.4655, "step": 164600 }, { "epoch": 3.49, "learning_rate": 6.844122448979592e-05, "loss": 1.46, "step": 164700 }, { "epoch": 3.49, "learning_rate": 6.842081632653061e-05, "loss": 1.4621, "step": 164800 }, { "epoch": 3.49, "learning_rate": 6.840040816326531e-05, "loss": 1.4609, "step": 164900 }, { "epoch": 3.49, "learning_rate": 6.838e-05, "loss": 1.4595, "step": 165000 }, { "epoch": 3.49, "learning_rate": 6.835959183673469e-05, "loss": 1.4637, "step": 165100 }, { "epoch": 3.5, "learning_rate": 6.833918367346939e-05, "loss": 1.4643, "step": 165200 }, { "epoch": 3.5, "learning_rate": 6.831877551020407e-05, "loss": 1.4607, "step": 165300 }, { "epoch": 3.5, "learning_rate": 6.829836734693877e-05, "loss": 1.4653, "step": 165400 }, { "epoch": 3.5, "learning_rate": 6.827795918367347e-05, "loss": 1.4641, "step": 165500 }, { "epoch": 3.5, "learning_rate": 6.825755102040816e-05, "loss": 1.4606, "step": 165600 }, { "epoch": 3.51, "learning_rate": 6.823714285714285e-05, "loss": 1.4565, "step": 165700 }, { "epoch": 3.51, "learning_rate": 6.821673469387757e-05, "loss": 1.4664, "step": 165800 }, { "epoch": 3.51, "learning_rate": 6.819632653061225e-05, "loss": 1.4644, "step": 165900 }, { "epoch": 3.51, "learning_rate": 6.817591836734695e-05, "loss": 1.464, "step": 166000 }, { "epoch": 3.52, "learning_rate": 6.815551020408165e-05, "loss": 1.4656, "step": 166100 }, { "epoch": 3.52, "learning_rate": 6.813510204081633e-05, "loss": 1.4568, "step": 166200 }, { "epoch": 3.52, "learning_rate": 6.811469387755103e-05, "loss": 1.4682, "step": 166300 }, { "epoch": 3.52, "learning_rate": 6.809428571428572e-05, "loss": 1.462, "step": 166400 }, { "epoch": 3.52, "learning_rate": 6.807387755102041e-05, "loss": 1.466, "step": 166500 }, { "epoch": 3.53, "learning_rate": 6.805346938775511e-05, "loss": 1.4559, "step": 166600 }, { "epoch": 3.53, "learning_rate": 6.80330612244898e-05, "loss": 1.4558, "step": 166700 }, { "epoch": 3.53, "learning_rate": 6.80126530612245e-05, "loss": 1.4545, "step": 166800 }, { "epoch": 3.53, "learning_rate": 6.79922448979592e-05, "loss": 1.4587, "step": 166900 }, { "epoch": 3.53, "learning_rate": 6.797183673469388e-05, "loss": 1.4619, "step": 167000 }, { "epoch": 3.54, "learning_rate": 6.795142857142858e-05, "loss": 1.4557, "step": 167100 }, { "epoch": 3.54, "learning_rate": 6.793102040816328e-05, "loss": 1.4665, "step": 167200 }, { "epoch": 3.54, "learning_rate": 6.791061224489796e-05, "loss": 1.466, "step": 167300 }, { "epoch": 3.54, "learning_rate": 6.789020408163266e-05, "loss": 1.4621, "step": 167400 }, { "epoch": 3.54, "learning_rate": 6.786979591836736e-05, "loss": 1.4557, "step": 167500 }, { "epoch": 3.55, "learning_rate": 6.784938775510204e-05, "loss": 1.4589, "step": 167600 }, { "epoch": 3.55, "learning_rate": 6.782897959183674e-05, "loss": 1.4491, "step": 167700 }, { "epoch": 3.55, "learning_rate": 6.780857142857144e-05, "loss": 1.4534, "step": 167800 }, { "epoch": 3.55, "learning_rate": 6.778836734693878e-05, "loss": 1.4545, "step": 167900 }, { "epoch": 3.56, "learning_rate": 6.776795918367346e-05, "loss": 1.4624, "step": 168000 }, { "epoch": 3.56, "learning_rate": 6.774755102040816e-05, "loss": 1.4559, "step": 168100 }, { "epoch": 3.56, "learning_rate": 6.772714285714286e-05, "loss": 1.459, "step": 168200 }, { "epoch": 3.56, "learning_rate": 6.770673469387755e-05, "loss": 1.4538, "step": 168300 }, { "epoch": 3.56, "learning_rate": 6.768632653061224e-05, "loss": 1.4577, "step": 168400 }, { "epoch": 3.57, "learning_rate": 6.766591836734694e-05, "loss": 1.4527, "step": 168500 }, { "epoch": 3.57, "learning_rate": 6.764551020408164e-05, "loss": 1.4545, "step": 168600 }, { "epoch": 3.57, "learning_rate": 6.762510204081634e-05, "loss": 1.4587, "step": 168700 }, { "epoch": 3.57, "learning_rate": 6.760469387755102e-05, "loss": 1.4558, "step": 168800 }, { "epoch": 3.57, "learning_rate": 6.758428571428572e-05, "loss": 1.4493, "step": 168900 }, { "epoch": 3.58, "learning_rate": 6.756408163265306e-05, "loss": 1.4554, "step": 169000 }, { "epoch": 3.58, "learning_rate": 6.754367346938776e-05, "loss": 1.4521, "step": 169100 }, { "epoch": 3.58, "learning_rate": 6.752326530612245e-05, "loss": 1.4568, "step": 169200 }, { "epoch": 3.58, "learning_rate": 6.750285714285714e-05, "loss": 1.4459, "step": 169300 }, { "epoch": 3.59, "learning_rate": 6.748244897959184e-05, "loss": 1.4529, "step": 169400 }, { "epoch": 3.59, "learning_rate": 6.746204081632653e-05, "loss": 1.4647, "step": 169500 }, { "epoch": 3.59, "learning_rate": 6.744163265306123e-05, "loss": 1.4514, "step": 169600 }, { "epoch": 3.59, "learning_rate": 6.742122448979592e-05, "loss": 1.4546, "step": 169700 }, { "epoch": 3.59, "learning_rate": 6.740081632653061e-05, "loss": 1.4522, "step": 169800 }, { "epoch": 3.6, "learning_rate": 6.738040816326531e-05, "loss": 1.4545, "step": 169900 }, { "epoch": 3.6, "learning_rate": 6.736e-05, "loss": 1.4591, "step": 170000 }, { "epoch": 3.6, "learning_rate": 6.733959183673469e-05, "loss": 1.4543, "step": 170100 }, { "epoch": 3.6, "learning_rate": 6.731918367346939e-05, "loss": 1.451, "step": 170200 }, { "epoch": 3.6, "learning_rate": 6.729877551020409e-05, "loss": 1.4572, "step": 170300 }, { "epoch": 3.61, "learning_rate": 6.727836734693877e-05, "loss": 1.4536, "step": 170400 }, { "epoch": 3.61, "learning_rate": 6.725795918367347e-05, "loss": 1.4485, "step": 170500 }, { "epoch": 3.61, "learning_rate": 6.723755102040817e-05, "loss": 1.4536, "step": 170600 }, { "epoch": 3.61, "learning_rate": 6.721714285714285e-05, "loss": 1.4581, "step": 170700 }, { "epoch": 3.61, "learning_rate": 6.719673469387755e-05, "loss": 1.4488, "step": 170800 }, { "epoch": 3.62, "learning_rate": 6.717632653061225e-05, "loss": 1.4555, "step": 170900 }, { "epoch": 3.62, "learning_rate": 6.715612244897959e-05, "loss": 1.4499, "step": 171000 }, { "epoch": 3.62, "learning_rate": 6.713571428571429e-05, "loss": 1.4531, "step": 171100 }, { "epoch": 3.62, "learning_rate": 6.711530612244899e-05, "loss": 1.4526, "step": 171200 }, { "epoch": 3.63, "learning_rate": 6.709489795918367e-05, "loss": 1.4559, "step": 171300 }, { "epoch": 3.63, "learning_rate": 6.707448979591837e-05, "loss": 1.4425, "step": 171400 }, { "epoch": 3.63, "learning_rate": 6.705408163265307e-05, "loss": 1.4522, "step": 171500 }, { "epoch": 3.63, "learning_rate": 6.703367346938775e-05, "loss": 1.4468, "step": 171600 }, { "epoch": 3.63, "learning_rate": 6.701326530612245e-05, "loss": 1.4513, "step": 171700 }, { "epoch": 3.64, "learning_rate": 6.699285714285715e-05, "loss": 1.4513, "step": 171800 }, { "epoch": 3.64, "learning_rate": 6.697244897959184e-05, "loss": 1.4582, "step": 171900 }, { "epoch": 3.64, "learning_rate": 6.695204081632653e-05, "loss": 1.4562, "step": 172000 }, { "epoch": 3.64, "learning_rate": 6.693163265306123e-05, "loss": 1.4456, "step": 172100 }, { "epoch": 3.64, "learning_rate": 6.691122448979592e-05, "loss": 1.4475, "step": 172200 }, { "epoch": 3.65, "learning_rate": 6.689081632653062e-05, "loss": 1.4458, "step": 172300 }, { "epoch": 3.65, "learning_rate": 6.687040816326531e-05, "loss": 1.444, "step": 172400 }, { "epoch": 3.65, "learning_rate": 6.685e-05, "loss": 1.4507, "step": 172500 }, { "epoch": 3.65, "learning_rate": 6.68295918367347e-05, "loss": 1.451, "step": 172600 }, { "epoch": 3.66, "learning_rate": 6.68091836734694e-05, "loss": 1.4528, "step": 172700 }, { "epoch": 3.66, "learning_rate": 6.678877551020408e-05, "loss": 1.4532, "step": 172800 }, { "epoch": 3.66, "learning_rate": 6.676836734693878e-05, "loss": 1.4425, "step": 172900 }, { "epoch": 3.66, "learning_rate": 6.674795918367346e-05, "loss": 1.4472, "step": 173000 }, { "epoch": 3.66, "learning_rate": 6.672755102040816e-05, "loss": 1.4475, "step": 173100 }, { "epoch": 3.67, "learning_rate": 6.670714285714286e-05, "loss": 1.4531, "step": 173200 }, { "epoch": 3.67, "learning_rate": 6.668673469387754e-05, "loss": 1.4475, "step": 173300 }, { "epoch": 3.67, "learning_rate": 6.666632653061224e-05, "loss": 1.4532, "step": 173400 }, { "epoch": 3.67, "learning_rate": 6.66461224489796e-05, "loss": 1.461, "step": 173500 }, { "epoch": 3.67, "learning_rate": 6.66257142857143e-05, "loss": 1.4396, "step": 173600 }, { "epoch": 3.68, "learning_rate": 6.660530612244898e-05, "loss": 1.4489, "step": 173700 }, { "epoch": 3.68, "learning_rate": 6.658489795918368e-05, "loss": 1.4457, "step": 173800 }, { "epoch": 3.68, "learning_rate": 6.656448979591838e-05, "loss": 1.4496, "step": 173900 }, { "epoch": 3.68, "learning_rate": 6.654408163265306e-05, "loss": 1.4456, "step": 174000 }, { "epoch": 3.68, "learning_rate": 6.652367346938776e-05, "loss": 1.4436, "step": 174100 }, { "epoch": 3.69, "learning_rate": 6.650326530612246e-05, "loss": 1.4414, "step": 174200 }, { "epoch": 3.69, "learning_rate": 6.648285714285714e-05, "loss": 1.451, "step": 174300 }, { "epoch": 3.69, "learning_rate": 6.646244897959184e-05, "loss": 1.4498, "step": 174400 }, { "epoch": 3.69, "learning_rate": 6.644204081632654e-05, "loss": 1.4499, "step": 174500 }, { "epoch": 3.7, "learning_rate": 6.642163265306123e-05, "loss": 1.4449, "step": 174600 }, { "epoch": 3.7, "learning_rate": 6.640122448979592e-05, "loss": 1.4533, "step": 174700 }, { "epoch": 3.7, "learning_rate": 6.638081632653061e-05, "loss": 1.4513, "step": 174800 }, { "epoch": 3.7, "learning_rate": 6.636040816326531e-05, "loss": 1.4592, "step": 174900 }, { "epoch": 3.7, "learning_rate": 6.634e-05, "loss": 1.4425, "step": 175000 }, { "epoch": 3.71, "learning_rate": 6.631959183673469e-05, "loss": 1.4557, "step": 175100 }, { "epoch": 3.71, "learning_rate": 6.629918367346939e-05, "loss": 1.4598, "step": 175200 }, { "epoch": 3.71, "learning_rate": 6.627877551020409e-05, "loss": 1.4538, "step": 175300 }, { "epoch": 3.71, "learning_rate": 6.625836734693877e-05, "loss": 1.4389, "step": 175400 }, { "epoch": 3.71, "learning_rate": 6.623795918367347e-05, "loss": 1.4462, "step": 175500 }, { "epoch": 3.72, "learning_rate": 6.621755102040817e-05, "loss": 1.446, "step": 175600 }, { "epoch": 3.72, "learning_rate": 6.619714285714285e-05, "loss": 1.4461, "step": 175700 }, { "epoch": 3.72, "learning_rate": 6.617673469387755e-05, "loss": 1.4489, "step": 175800 }, { "epoch": 3.72, "learning_rate": 6.615632653061225e-05, "loss": 1.4474, "step": 175900 }, { "epoch": 3.72, "learning_rate": 6.613591836734693e-05, "loss": 1.444, "step": 176000 }, { "epoch": 3.73, "learning_rate": 6.611551020408163e-05, "loss": 1.4533, "step": 176100 }, { "epoch": 3.73, "learning_rate": 6.609530612244899e-05, "loss": 1.4454, "step": 176200 }, { "epoch": 3.73, "learning_rate": 6.607489795918369e-05, "loss": 1.4361, "step": 176300 }, { "epoch": 3.73, "learning_rate": 6.605448979591837e-05, "loss": 1.4436, "step": 176400 }, { "epoch": 3.74, "learning_rate": 6.603428571428571e-05, "loss": 1.4406, "step": 176500 }, { "epoch": 3.74, "learning_rate": 6.601387755102041e-05, "loss": 1.4432, "step": 176600 }, { "epoch": 3.74, "learning_rate": 6.599346938775511e-05, "loss": 1.4469, "step": 176700 }, { "epoch": 3.74, "learning_rate": 6.597306122448979e-05, "loss": 1.4446, "step": 176800 }, { "epoch": 3.74, "learning_rate": 6.595265306122449e-05, "loss": 1.442, "step": 176900 }, { "epoch": 3.75, "learning_rate": 6.593224489795919e-05, "loss": 1.4429, "step": 177000 }, { "epoch": 3.75, "learning_rate": 6.591183673469387e-05, "loss": 1.4458, "step": 177100 }, { "epoch": 3.75, "learning_rate": 6.589142857142857e-05, "loss": 1.4481, "step": 177200 }, { "epoch": 3.75, "learning_rate": 6.587102040816326e-05, "loss": 1.4408, "step": 177300 }, { "epoch": 3.75, "learning_rate": 6.585061224489796e-05, "loss": 1.4464, "step": 177400 }, { "epoch": 3.76, "learning_rate": 6.583020408163265e-05, "loss": 1.4403, "step": 177500 }, { "epoch": 3.76, "learning_rate": 6.580979591836734e-05, "loss": 1.4466, "step": 177600 }, { "epoch": 3.76, "learning_rate": 6.578938775510204e-05, "loss": 1.4415, "step": 177700 }, { "epoch": 3.76, "learning_rate": 6.576897959183674e-05, "loss": 1.4466, "step": 177800 }, { "epoch": 3.77, "learning_rate": 6.574857142857143e-05, "loss": 1.4457, "step": 177900 }, { "epoch": 3.77, "learning_rate": 6.572816326530613e-05, "loss": 1.4421, "step": 178000 }, { "epoch": 3.77, "learning_rate": 6.570775510204083e-05, "loss": 1.4445, "step": 178100 }, { "epoch": 3.77, "learning_rate": 6.568734693877552e-05, "loss": 1.4483, "step": 178200 }, { "epoch": 3.77, "learning_rate": 6.566693877551021e-05, "loss": 1.4496, "step": 178300 }, { "epoch": 3.78, "learning_rate": 6.56465306122449e-05, "loss": 1.4471, "step": 178400 }, { "epoch": 3.78, "learning_rate": 6.56261224489796e-05, "loss": 1.4483, "step": 178500 }, { "epoch": 3.78, "learning_rate": 6.56057142857143e-05, "loss": 1.4459, "step": 178600 }, { "epoch": 3.78, "learning_rate": 6.558530612244898e-05, "loss": 1.4456, "step": 178700 }, { "epoch": 3.78, "learning_rate": 6.556489795918368e-05, "loss": 1.4442, "step": 178800 }, { "epoch": 3.79, "learning_rate": 6.554448979591838e-05, "loss": 1.4451, "step": 178900 }, { "epoch": 3.79, "learning_rate": 6.552408163265306e-05, "loss": 1.4377, "step": 179000 }, { "epoch": 3.79, "learning_rate": 6.550367346938776e-05, "loss": 1.4498, "step": 179100 }, { "epoch": 3.79, "learning_rate": 6.548326530612246e-05, "loss": 1.439, "step": 179200 }, { "epoch": 3.79, "learning_rate": 6.546285714285714e-05, "loss": 1.4383, "step": 179300 }, { "epoch": 3.8, "learning_rate": 6.544244897959184e-05, "loss": 1.4344, "step": 179400 }, { "epoch": 3.8, "learning_rate": 6.542204081632654e-05, "loss": 1.4415, "step": 179500 }, { "epoch": 3.8, "learning_rate": 6.540163265306122e-05, "loss": 1.443, "step": 179600 }, { "epoch": 3.8, "learning_rate": 6.538122448979592e-05, "loss": 1.4432, "step": 179700 }, { "epoch": 3.81, "learning_rate": 6.536081632653062e-05, "loss": 1.4444, "step": 179800 }, { "epoch": 3.81, "learning_rate": 6.53404081632653e-05, "loss": 1.4433, "step": 179900 }, { "epoch": 3.81, "learning_rate": 6.532e-05, "loss": 1.4456, "step": 180000 }, { "epoch": 3.81, "learning_rate": 6.52995918367347e-05, "loss": 1.4504, "step": 180100 }, { "epoch": 3.81, "learning_rate": 6.527918367346939e-05, "loss": 1.4386, "step": 180200 }, { "epoch": 3.82, "learning_rate": 6.525877551020409e-05, "loss": 1.4451, "step": 180300 }, { "epoch": 3.82, "learning_rate": 6.523836734693878e-05, "loss": 1.4381, "step": 180400 }, { "epoch": 3.82, "learning_rate": 6.521795918367347e-05, "loss": 1.4386, "step": 180500 }, { "epoch": 3.82, "learning_rate": 6.519755102040817e-05, "loss": 1.4445, "step": 180600 }, { "epoch": 3.82, "learning_rate": 6.517734693877552e-05, "loss": 1.4447, "step": 180700 }, { "epoch": 3.83, "learning_rate": 6.515693877551021e-05, "loss": 1.4458, "step": 180800 }, { "epoch": 3.83, "learning_rate": 6.51365306122449e-05, "loss": 1.4426, "step": 180900 }, { "epoch": 3.83, "learning_rate": 6.51161224489796e-05, "loss": 1.4437, "step": 181000 }, { "epoch": 3.83, "learning_rate": 6.509571428571429e-05, "loss": 1.436, "step": 181100 }, { "epoch": 3.83, "learning_rate": 6.507530612244899e-05, "loss": 1.4431, "step": 181200 }, { "epoch": 3.84, "learning_rate": 6.505489795918369e-05, "loss": 1.4411, "step": 181300 }, { "epoch": 3.84, "learning_rate": 6.503448979591837e-05, "loss": 1.4425, "step": 181400 }, { "epoch": 3.84, "learning_rate": 6.501408163265307e-05, "loss": 1.442, "step": 181500 }, { "epoch": 3.84, "learning_rate": 6.499367346938777e-05, "loss": 1.4388, "step": 181600 }, { "epoch": 3.85, "learning_rate": 6.497326530612245e-05, "loss": 1.4404, "step": 181700 }, { "epoch": 3.85, "learning_rate": 6.495285714285715e-05, "loss": 1.4486, "step": 181800 }, { "epoch": 3.85, "learning_rate": 6.493244897959185e-05, "loss": 1.4376, "step": 181900 }, { "epoch": 3.85, "learning_rate": 6.491204081632653e-05, "loss": 1.4356, "step": 182000 }, { "epoch": 3.85, "learning_rate": 6.489163265306123e-05, "loss": 1.4375, "step": 182100 }, { "epoch": 3.86, "learning_rate": 6.487122448979593e-05, "loss": 1.4396, "step": 182200 }, { "epoch": 3.86, "learning_rate": 6.485081632653061e-05, "loss": 1.4366, "step": 182300 }, { "epoch": 3.86, "learning_rate": 6.483040816326531e-05, "loss": 1.44, "step": 182400 }, { "epoch": 3.86, "learning_rate": 6.481e-05, "loss": 1.4448, "step": 182500 }, { "epoch": 3.86, "learning_rate": 6.47895918367347e-05, "loss": 1.4372, "step": 182600 }, { "epoch": 3.87, "learning_rate": 6.47691836734694e-05, "loss": 1.4427, "step": 182700 }, { "epoch": 3.87, "learning_rate": 6.474877551020408e-05, "loss": 1.4434, "step": 182800 }, { "epoch": 3.87, "learning_rate": 6.472836734693878e-05, "loss": 1.4332, "step": 182900 }, { "epoch": 3.87, "learning_rate": 6.470795918367348e-05, "loss": 1.4402, "step": 183000 }, { "epoch": 3.88, "learning_rate": 6.468755102040816e-05, "loss": 1.4364, "step": 183100 }, { "epoch": 3.88, "learning_rate": 6.466714285714286e-05, "loss": 1.4406, "step": 183200 }, { "epoch": 3.88, "learning_rate": 6.464673469387756e-05, "loss": 1.4308, "step": 183300 }, { "epoch": 3.88, "learning_rate": 6.462632653061224e-05, "loss": 1.4375, "step": 183400 }, { "epoch": 3.88, "learning_rate": 6.460591836734694e-05, "loss": 1.4368, "step": 183500 }, { "epoch": 3.89, "learning_rate": 6.458551020408164e-05, "loss": 1.4438, "step": 183600 }, { "epoch": 3.89, "learning_rate": 6.456510204081632e-05, "loss": 1.4479, "step": 183700 }, { "epoch": 3.89, "learning_rate": 6.454469387755102e-05, "loss": 1.4405, "step": 183800 }, { "epoch": 3.89, "learning_rate": 6.452428571428572e-05, "loss": 1.4366, "step": 183900 }, { "epoch": 3.89, "learning_rate": 6.45038775510204e-05, "loss": 1.4372, "step": 184000 }, { "epoch": 3.9, "learning_rate": 6.44834693877551e-05, "loss": 1.4367, "step": 184100 }, { "epoch": 3.9, "learning_rate": 6.446326530612246e-05, "loss": 1.4404, "step": 184200 }, { "epoch": 3.9, "learning_rate": 6.444285714285714e-05, "loss": 1.4326, "step": 184300 }, { "epoch": 3.9, "learning_rate": 6.442244897959184e-05, "loss": 1.4479, "step": 184400 }, { "epoch": 3.9, "learning_rate": 6.440204081632654e-05, "loss": 1.4353, "step": 184500 }, { "epoch": 3.91, "learning_rate": 6.438163265306122e-05, "loss": 1.4333, "step": 184600 }, { "epoch": 3.91, "learning_rate": 6.436122448979592e-05, "loss": 1.4371, "step": 184700 }, { "epoch": 3.91, "learning_rate": 6.434081632653062e-05, "loss": 1.4392, "step": 184800 }, { "epoch": 3.91, "learning_rate": 6.43204081632653e-05, "loss": 1.4349, "step": 184900 }, { "epoch": 3.92, "learning_rate": 6.43e-05, "loss": 1.4387, "step": 185000 }, { "epoch": 3.92, "learning_rate": 6.42795918367347e-05, "loss": 1.4401, "step": 185100 }, { "epoch": 3.92, "learning_rate": 6.425918367346939e-05, "loss": 1.4369, "step": 185200 }, { "epoch": 3.92, "learning_rate": 6.423877551020409e-05, "loss": 1.4377, "step": 185300 }, { "epoch": 3.92, "learning_rate": 6.421836734693878e-05, "loss": 1.4337, "step": 185400 }, { "epoch": 3.93, "learning_rate": 6.419795918367347e-05, "loss": 1.4405, "step": 185500 }, { "epoch": 3.93, "learning_rate": 6.417755102040817e-05, "loss": 1.4305, "step": 185600 }, { "epoch": 3.93, "learning_rate": 6.415714285714287e-05, "loss": 1.4364, "step": 185700 }, { "epoch": 3.93, "learning_rate": 6.413673469387755e-05, "loss": 1.4351, "step": 185800 }, { "epoch": 3.93, "learning_rate": 6.411632653061225e-05, "loss": 1.4293, "step": 185900 }, { "epoch": 3.94, "learning_rate": 6.409591836734695e-05, "loss": 1.4342, "step": 186000 }, { "epoch": 3.94, "learning_rate": 6.407551020408163e-05, "loss": 1.4455, "step": 186100 }, { "epoch": 3.94, "learning_rate": 6.405530612244899e-05, "loss": 1.4377, "step": 186200 }, { "epoch": 3.94, "learning_rate": 6.403489795918368e-05, "loss": 1.4333, "step": 186300 }, { "epoch": 3.94, "learning_rate": 6.401448979591837e-05, "loss": 1.4367, "step": 186400 }, { "epoch": 3.95, "learning_rate": 6.399408163265307e-05, "loss": 1.434, "step": 186500 }, { "epoch": 3.95, "learning_rate": 6.397367346938777e-05, "loss": 1.4362, "step": 186600 }, { "epoch": 3.95, "learning_rate": 6.395326530612245e-05, "loss": 1.4346, "step": 186700 }, { "epoch": 3.95, "learning_rate": 6.393285714285715e-05, "loss": 1.4307, "step": 186800 }, { "epoch": 3.96, "learning_rate": 6.391244897959185e-05, "loss": 1.4335, "step": 186900 }, { "epoch": 3.96, "learning_rate": 6.389204081632653e-05, "loss": 1.4326, "step": 187000 }, { "epoch": 3.96, "learning_rate": 6.387163265306123e-05, "loss": 1.4343, "step": 187100 }, { "epoch": 3.96, "learning_rate": 6.385122448979593e-05, "loss": 1.4377, "step": 187200 }, { "epoch": 3.96, "learning_rate": 6.383081632653061e-05, "loss": 1.4354, "step": 187300 }, { "epoch": 3.97, "learning_rate": 6.381040816326531e-05, "loss": 1.4357, "step": 187400 }, { "epoch": 3.97, "learning_rate": 6.379000000000001e-05, "loss": 1.4269, "step": 187500 }, { "epoch": 3.97, "learning_rate": 6.37695918367347e-05, "loss": 1.4449, "step": 187600 }, { "epoch": 3.97, "learning_rate": 6.37491836734694e-05, "loss": 1.4258, "step": 187700 }, { "epoch": 3.97, "learning_rate": 6.372877551020409e-05, "loss": 1.4349, "step": 187800 }, { "epoch": 3.98, "learning_rate": 6.370836734693878e-05, "loss": 1.4298, "step": 187900 }, { "epoch": 3.98, "learning_rate": 6.368795918367348e-05, "loss": 1.4251, "step": 188000 }, { "epoch": 3.98, "learning_rate": 6.366755102040816e-05, "loss": 1.4376, "step": 188100 }, { "epoch": 3.98, "learning_rate": 6.364714285714286e-05, "loss": 1.4268, "step": 188200 }, { "epoch": 3.99, "learning_rate": 6.362673469387756e-05, "loss": 1.4285, "step": 188300 }, { "epoch": 3.99, "learning_rate": 6.360632653061224e-05, "loss": 1.4305, "step": 188400 }, { "epoch": 3.99, "learning_rate": 6.358591836734694e-05, "loss": 1.4288, "step": 188500 }, { "epoch": 3.99, "learning_rate": 6.356551020408164e-05, "loss": 1.4387, "step": 188600 }, { "epoch": 3.99, "learning_rate": 6.354510204081632e-05, "loss": 1.4235, "step": 188700 }, { "epoch": 4.0, "learning_rate": 6.352469387755102e-05, "loss": 1.4333, "step": 188800 }, { "epoch": 4.0, "learning_rate": 6.350448979591836e-05, "loss": 1.4287, "step": 188900 }, { "epoch": 4.0, "learning_rate": 6.348408163265307e-05, "loss": 1.4286, "step": 189000 }, { "epoch": 4.0, "learning_rate": 6.346367346938776e-05, "loss": 1.4227, "step": 189100 }, { "epoch": 4.0, "learning_rate": 6.344326530612246e-05, "loss": 1.4155, "step": 189200 }, { "epoch": 4.01, "learning_rate": 6.342285714285716e-05, "loss": 1.4221, "step": 189300 }, { "epoch": 4.01, "learning_rate": 6.340244897959184e-05, "loss": 1.4314, "step": 189400 }, { "epoch": 4.01, "learning_rate": 6.338204081632654e-05, "loss": 1.4283, "step": 189500 }, { "epoch": 4.01, "learning_rate": 6.336163265306124e-05, "loss": 1.4354, "step": 189600 }, { "epoch": 4.01, "learning_rate": 6.334122448979592e-05, "loss": 1.4281, "step": 189700 }, { "epoch": 4.02, "learning_rate": 6.332081632653062e-05, "loss": 1.4304, "step": 189800 }, { "epoch": 4.02, "learning_rate": 6.33004081632653e-05, "loss": 1.4311, "step": 189900 }, { "epoch": 4.02, "learning_rate": 6.328e-05, "loss": 1.4265, "step": 190000 }, { "epoch": 4.02, "learning_rate": 6.32595918367347e-05, "loss": 1.4252, "step": 190100 }, { "epoch": 4.03, "learning_rate": 6.323918367346939e-05, "loss": 1.4302, "step": 190200 }, { "epoch": 4.03, "learning_rate": 6.321877551020409e-05, "loss": 1.4273, "step": 190300 }, { "epoch": 4.03, "learning_rate": 6.319836734693878e-05, "loss": 1.431, "step": 190400 }, { "epoch": 4.03, "learning_rate": 6.317795918367347e-05, "loss": 1.4249, "step": 190500 }, { "epoch": 4.03, "learning_rate": 6.315755102040817e-05, "loss": 1.4218, "step": 190600 }, { "epoch": 4.04, "learning_rate": 6.313714285714287e-05, "loss": 1.423, "step": 190700 }, { "epoch": 4.04, "learning_rate": 6.311673469387755e-05, "loss": 1.4276, "step": 190800 }, { "epoch": 4.04, "learning_rate": 6.309632653061225e-05, "loss": 1.4213, "step": 190900 }, { "epoch": 4.04, "learning_rate": 6.307591836734695e-05, "loss": 1.4147, "step": 191000 }, { "epoch": 4.04, "learning_rate": 6.305551020408163e-05, "loss": 1.4253, "step": 191100 }, { "epoch": 4.05, "learning_rate": 6.303510204081633e-05, "loss": 1.425, "step": 191200 }, { "epoch": 4.05, "learning_rate": 6.301469387755103e-05, "loss": 1.4224, "step": 191300 }, { "epoch": 4.05, "learning_rate": 6.299428571428571e-05, "loss": 1.4329, "step": 191400 }, { "epoch": 4.05, "learning_rate": 6.297387755102041e-05, "loss": 1.4255, "step": 191500 }, { "epoch": 4.06, "learning_rate": 6.295346938775511e-05, "loss": 1.4216, "step": 191600 }, { "epoch": 4.06, "learning_rate": 6.29330612244898e-05, "loss": 1.4159, "step": 191700 }, { "epoch": 4.06, "learning_rate": 6.291265306122449e-05, "loss": 1.4251, "step": 191800 }, { "epoch": 4.06, "learning_rate": 6.289244897959185e-05, "loss": 1.4245, "step": 191900 }, { "epoch": 4.06, "learning_rate": 6.287204081632653e-05, "loss": 1.4248, "step": 192000 }, { "epoch": 4.07, "learning_rate": 6.285163265306123e-05, "loss": 1.4188, "step": 192100 }, { "epoch": 4.07, "learning_rate": 6.283122448979593e-05, "loss": 1.4197, "step": 192200 }, { "epoch": 4.07, "learning_rate": 6.281081632653061e-05, "loss": 1.4295, "step": 192300 }, { "epoch": 4.07, "learning_rate": 6.279040816326531e-05, "loss": 1.426, "step": 192400 }, { "epoch": 4.07, "learning_rate": 6.277000000000001e-05, "loss": 1.4273, "step": 192500 }, { "epoch": 4.08, "learning_rate": 6.27495918367347e-05, "loss": 1.4299, "step": 192600 }, { "epoch": 4.08, "learning_rate": 6.27291836734694e-05, "loss": 1.4154, "step": 192700 }, { "epoch": 4.08, "learning_rate": 6.270877551020409e-05, "loss": 1.4249, "step": 192800 }, { "epoch": 4.08, "learning_rate": 6.268836734693878e-05, "loss": 1.4183, "step": 192900 }, { "epoch": 4.08, "learning_rate": 6.266795918367348e-05, "loss": 1.4252, "step": 193000 }, { "epoch": 4.09, "learning_rate": 6.264755102040817e-05, "loss": 1.4217, "step": 193100 }, { "epoch": 4.09, "learning_rate": 6.262734693877551e-05, "loss": 1.4244, "step": 193200 }, { "epoch": 4.09, "learning_rate": 6.26069387755102e-05, "loss": 1.4244, "step": 193300 }, { "epoch": 4.09, "learning_rate": 6.25865306122449e-05, "loss": 1.4235, "step": 193400 }, { "epoch": 4.1, "learning_rate": 6.25661224489796e-05, "loss": 1.4225, "step": 193500 }, { "epoch": 4.1, "learning_rate": 6.254571428571428e-05, "loss": 1.4244, "step": 193600 }, { "epoch": 4.1, "learning_rate": 6.252530612244898e-05, "loss": 1.4259, "step": 193700 }, { "epoch": 4.1, "learning_rate": 6.250489795918368e-05, "loss": 1.4301, "step": 193800 }, { "epoch": 4.1, "learning_rate": 6.248448979591836e-05, "loss": 1.4201, "step": 193900 }, { "epoch": 4.11, "learning_rate": 6.246408163265306e-05, "loss": 1.4182, "step": 194000 }, { "epoch": 4.11, "learning_rate": 6.244367346938776e-05, "loss": 1.4292, "step": 194100 }, { "epoch": 4.11, "learning_rate": 6.242326530612244e-05, "loss": 1.4224, "step": 194200 }, { "epoch": 4.11, "learning_rate": 6.240285714285714e-05, "loss": 1.4178, "step": 194300 }, { "epoch": 4.11, "learning_rate": 6.238244897959184e-05, "loss": 1.4167, "step": 194400 }, { "epoch": 4.12, "learning_rate": 6.236204081632654e-05, "loss": 1.4292, "step": 194500 }, { "epoch": 4.12, "learning_rate": 6.234163265306124e-05, "loss": 1.4121, "step": 194600 }, { "epoch": 4.12, "learning_rate": 6.232122448979592e-05, "loss": 1.4211, "step": 194700 }, { "epoch": 4.12, "learning_rate": 6.230081632653062e-05, "loss": 1.4181, "step": 194800 }, { "epoch": 4.12, "learning_rate": 6.228040816326532e-05, "loss": 1.422, "step": 194900 }, { "epoch": 4.13, "learning_rate": 6.226e-05, "loss": 1.4266, "step": 195000 }, { "epoch": 4.13, "learning_rate": 6.22395918367347e-05, "loss": 1.4206, "step": 195100 }, { "epoch": 4.13, "learning_rate": 6.22191836734694e-05, "loss": 1.4197, "step": 195200 }, { "epoch": 4.13, "learning_rate": 6.219877551020409e-05, "loss": 1.4274, "step": 195300 }, { "epoch": 4.14, "learning_rate": 6.217836734693878e-05, "loss": 1.4238, "step": 195400 }, { "epoch": 4.14, "learning_rate": 6.215795918367348e-05, "loss": 1.4234, "step": 195500 }, { "epoch": 4.14, "learning_rate": 6.213755102040817e-05, "loss": 1.4275, "step": 195600 }, { "epoch": 4.14, "learning_rate": 6.211714285714287e-05, "loss": 1.4313, "step": 195700 }, { "epoch": 4.14, "learning_rate": 6.209673469387755e-05, "loss": 1.4172, "step": 195800 }, { "epoch": 4.15, "learning_rate": 6.207632653061225e-05, "loss": 1.4196, "step": 195900 }, { "epoch": 4.15, "learning_rate": 6.205591836734695e-05, "loss": 1.4224, "step": 196000 }, { "epoch": 4.15, "learning_rate": 6.203551020408163e-05, "loss": 1.424, "step": 196100 }, { "epoch": 4.15, "learning_rate": 6.201510204081633e-05, "loss": 1.4144, "step": 196200 }, { "epoch": 4.15, "learning_rate": 6.199469387755103e-05, "loss": 1.4276, "step": 196300 }, { "epoch": 4.16, "learning_rate": 6.197428571428571e-05, "loss": 1.427, "step": 196400 }, { "epoch": 4.16, "learning_rate": 6.195387755102041e-05, "loss": 1.4167, "step": 196500 }, { "epoch": 4.16, "learning_rate": 6.193346938775511e-05, "loss": 1.4221, "step": 196600 }, { "epoch": 4.16, "learning_rate": 6.19130612244898e-05, "loss": 1.4281, "step": 196700 }, { "epoch": 4.17, "learning_rate": 6.189265306122449e-05, "loss": 1.4188, "step": 196800 }, { "epoch": 4.17, "learning_rate": 6.187224489795919e-05, "loss": 1.4206, "step": 196900 }, { "epoch": 4.17, "learning_rate": 6.185183673469388e-05, "loss": 1.4125, "step": 197000 }, { "epoch": 4.17, "learning_rate": 6.183142857142857e-05, "loss": 1.4118, "step": 197100 }, { "epoch": 4.17, "learning_rate": 6.181122448979592e-05, "loss": 1.4289, "step": 197200 }, { "epoch": 4.18, "learning_rate": 6.179081632653063e-05, "loss": 1.4164, "step": 197300 }, { "epoch": 4.18, "learning_rate": 6.177040816326531e-05, "loss": 1.4257, "step": 197400 }, { "epoch": 4.18, "learning_rate": 6.175000000000001e-05, "loss": 1.4117, "step": 197500 }, { "epoch": 4.18, "learning_rate": 6.17295918367347e-05, "loss": 1.4185, "step": 197600 }, { "epoch": 4.18, "learning_rate": 6.17091836734694e-05, "loss": 1.4199, "step": 197700 }, { "epoch": 4.19, "learning_rate": 6.168877551020409e-05, "loss": 1.4126, "step": 197800 }, { "epoch": 4.19, "learning_rate": 6.166836734693878e-05, "loss": 1.4199, "step": 197900 }, { "epoch": 4.19, "learning_rate": 6.164795918367347e-05, "loss": 1.4156, "step": 198000 }, { "epoch": 4.19, "learning_rate": 6.162755102040817e-05, "loss": 1.4157, "step": 198100 }, { "epoch": 4.19, "learning_rate": 6.160714285714286e-05, "loss": 1.421, "step": 198200 }, { "epoch": 4.2, "learning_rate": 6.158673469387756e-05, "loss": 1.4257, "step": 198300 }, { "epoch": 4.2, "learning_rate": 6.156632653061225e-05, "loss": 1.4168, "step": 198400 }, { "epoch": 4.2, "learning_rate": 6.154591836734694e-05, "loss": 1.4145, "step": 198500 }, { "epoch": 4.2, "learning_rate": 6.152551020408164e-05, "loss": 1.419, "step": 198600 }, { "epoch": 4.21, "learning_rate": 6.150510204081634e-05, "loss": 1.4159, "step": 198700 }, { "epoch": 4.21, "learning_rate": 6.148469387755102e-05, "loss": 1.4189, "step": 198800 }, { "epoch": 4.21, "learning_rate": 6.146428571428572e-05, "loss": 1.4137, "step": 198900 }, { "epoch": 4.21, "learning_rate": 6.144387755102042e-05, "loss": 1.4187, "step": 199000 }, { "epoch": 4.21, "learning_rate": 6.14234693877551e-05, "loss": 1.4223, "step": 199100 }, { "epoch": 4.22, "learning_rate": 6.14030612244898e-05, "loss": 1.4292, "step": 199200 }, { "epoch": 4.22, "learning_rate": 6.13826530612245e-05, "loss": 1.4133, "step": 199300 }, { "epoch": 4.22, "learning_rate": 6.136224489795918e-05, "loss": 1.4122, "step": 199400 }, { "epoch": 4.22, "learning_rate": 6.134183673469388e-05, "loss": 1.4174, "step": 199500 }, { "epoch": 4.22, "learning_rate": 6.132142857142858e-05, "loss": 1.4114, "step": 199600 }, { "epoch": 4.23, "learning_rate": 6.130102040816327e-05, "loss": 1.4132, "step": 199700 }, { "epoch": 4.23, "learning_rate": 6.128061224489796e-05, "loss": 1.4167, "step": 199800 }, { "epoch": 4.23, "learning_rate": 6.126020408163265e-05, "loss": 1.4112, "step": 199900 }, { "epoch": 4.23, "learning_rate": 6.123979591836735e-05, "loss": 1.4204, "step": 200000 }, { "epoch": 4.23, "learning_rate": 6.121938775510205e-05, "loss": 1.4199, "step": 200100 }, { "epoch": 4.24, "learning_rate": 6.119897959183673e-05, "loss": 1.4143, "step": 200200 }, { "epoch": 4.24, "learning_rate": 6.117857142857143e-05, "loss": 1.4089, "step": 200300 }, { "epoch": 4.24, "learning_rate": 6.115816326530613e-05, "loss": 1.4135, "step": 200400 }, { "epoch": 4.24, "learning_rate": 6.113775510204081e-05, "loss": 1.4137, "step": 200500 }, { "epoch": 4.25, "learning_rate": 6.111734693877551e-05, "loss": 1.4073, "step": 200600 }, { "epoch": 4.25, "learning_rate": 6.109693877551021e-05, "loss": 1.4108, "step": 200700 }, { "epoch": 4.25, "learning_rate": 6.10765306122449e-05, "loss": 1.4244, "step": 200800 }, { "epoch": 4.25, "learning_rate": 6.105612244897959e-05, "loss": 1.4208, "step": 200900 }, { "epoch": 4.25, "learning_rate": 6.103571428571429e-05, "loss": 1.4136, "step": 201000 }, { "epoch": 4.26, "learning_rate": 6.101530612244898e-05, "loss": 1.4216, "step": 201100 }, { "epoch": 4.26, "learning_rate": 6.099489795918367e-05, "loss": 1.4109, "step": 201200 }, { "epoch": 4.26, "learning_rate": 6.0974489795918365e-05, "loss": 1.4162, "step": 201300 }, { "epoch": 4.26, "learning_rate": 6.0954081632653056e-05, "loss": 1.4099, "step": 201400 }, { "epoch": 4.26, "learning_rate": 6.0933673469387755e-05, "loss": 1.4188, "step": 201500 }, { "epoch": 4.27, "learning_rate": 6.0913265306122446e-05, "loss": 1.4116, "step": 201600 }, { "epoch": 4.27, "learning_rate": 6.089285714285714e-05, "loss": 1.4163, "step": 201700 }, { "epoch": 4.27, "learning_rate": 6.087244897959184e-05, "loss": 1.4141, "step": 201800 }, { "epoch": 4.27, "learning_rate": 6.085204081632654e-05, "loss": 1.4135, "step": 201900 }, { "epoch": 4.28, "learning_rate": 6.083163265306123e-05, "loss": 1.4166, "step": 202000 }, { "epoch": 4.28, "learning_rate": 6.0811224489795925e-05, "loss": 1.4144, "step": 202100 }, { "epoch": 4.28, "learning_rate": 6.079081632653062e-05, "loss": 1.4151, "step": 202200 }, { "epoch": 4.28, "learning_rate": 6.0770408163265315e-05, "loss": 1.4212, "step": 202300 }, { "epoch": 4.28, "learning_rate": 6.0750000000000006e-05, "loss": 1.408, "step": 202400 }, { "epoch": 4.29, "learning_rate": 6.072979591836735e-05, "loss": 1.4142, "step": 202500 }, { "epoch": 4.29, "learning_rate": 6.0709387755102045e-05, "loss": 1.4018, "step": 202600 }, { "epoch": 4.29, "learning_rate": 6.068897959183674e-05, "loss": 1.4106, "step": 202700 }, { "epoch": 4.29, "learning_rate": 6.066857142857143e-05, "loss": 1.4162, "step": 202800 }, { "epoch": 4.29, "learning_rate": 6.064816326530613e-05, "loss": 1.4122, "step": 202900 }, { "epoch": 4.3, "learning_rate": 6.062775510204082e-05, "loss": 1.4079, "step": 203000 }, { "epoch": 4.3, "learning_rate": 6.060734693877551e-05, "loss": 1.4124, "step": 203100 }, { "epoch": 4.3, "learning_rate": 6.05869387755102e-05, "loss": 1.4136, "step": 203200 }, { "epoch": 4.3, "learning_rate": 6.05665306122449e-05, "loss": 1.4119, "step": 203300 }, { "epoch": 4.3, "learning_rate": 6.0546326530612254e-05, "loss": 1.415, "step": 203400 }, { "epoch": 4.31, "learning_rate": 6.0525918367346946e-05, "loss": 1.4218, "step": 203500 }, { "epoch": 4.31, "learning_rate": 6.050551020408164e-05, "loss": 1.4143, "step": 203600 }, { "epoch": 4.31, "learning_rate": 6.0485102040816336e-05, "loss": 1.4201, "step": 203700 }, { "epoch": 4.31, "learning_rate": 6.046469387755103e-05, "loss": 1.4185, "step": 203800 }, { "epoch": 4.32, "learning_rate": 6.044428571428572e-05, "loss": 1.4195, "step": 203900 }, { "epoch": 4.32, "learning_rate": 6.042387755102041e-05, "loss": 1.4096, "step": 204000 }, { "epoch": 4.32, "learning_rate": 6.040346938775511e-05, "loss": 1.4074, "step": 204100 }, { "epoch": 4.32, "learning_rate": 6.03830612244898e-05, "loss": 1.4086, "step": 204200 }, { "epoch": 4.32, "learning_rate": 6.036265306122449e-05, "loss": 1.4111, "step": 204300 }, { "epoch": 4.33, "learning_rate": 6.034224489795919e-05, "loss": 1.405, "step": 204400 }, { "epoch": 4.33, "learning_rate": 6.032183673469388e-05, "loss": 1.4101, "step": 204500 }, { "epoch": 4.33, "learning_rate": 6.0301428571428574e-05, "loss": 1.4148, "step": 204600 }, { "epoch": 4.33, "learning_rate": 6.028102040816327e-05, "loss": 1.4137, "step": 204700 }, { "epoch": 4.33, "learning_rate": 6.0260612244897964e-05, "loss": 1.4115, "step": 204800 }, { "epoch": 4.34, "learning_rate": 6.0240204081632655e-05, "loss": 1.4087, "step": 204900 }, { "epoch": 4.34, "learning_rate": 6.021979591836735e-05, "loss": 1.4085, "step": 205000 }, { "epoch": 4.34, "learning_rate": 6.0199387755102045e-05, "loss": 1.4082, "step": 205100 }, { "epoch": 4.34, "learning_rate": 6.017897959183674e-05, "loss": 1.4124, "step": 205200 }, { "epoch": 4.34, "learning_rate": 6.015857142857143e-05, "loss": 1.4086, "step": 205300 }, { "epoch": 4.35, "learning_rate": 6.013816326530613e-05, "loss": 1.4181, "step": 205400 }, { "epoch": 4.35, "learning_rate": 6.011775510204082e-05, "loss": 1.4173, "step": 205500 }, { "epoch": 4.35, "learning_rate": 6.009734693877551e-05, "loss": 1.4258, "step": 205600 }, { "epoch": 4.35, "learning_rate": 6.007693877551021e-05, "loss": 1.4115, "step": 205700 }, { "epoch": 4.36, "learning_rate": 6.00565306122449e-05, "loss": 1.4147, "step": 205800 }, { "epoch": 4.36, "learning_rate": 6.003612244897959e-05, "loss": 1.4112, "step": 205900 }, { "epoch": 4.36, "learning_rate": 6.001571428571428e-05, "loss": 1.4077, "step": 206000 }, { "epoch": 4.36, "learning_rate": 5.999530612244898e-05, "loss": 1.4079, "step": 206100 }, { "epoch": 4.36, "learning_rate": 5.997489795918367e-05, "loss": 1.4088, "step": 206200 }, { "epoch": 4.37, "learning_rate": 5.9954489795918364e-05, "loss": 1.4112, "step": 206300 }, { "epoch": 4.37, "learning_rate": 5.993408163265306e-05, "loss": 1.4124, "step": 206400 }, { "epoch": 4.37, "learning_rate": 5.9913673469387754e-05, "loss": 1.4074, "step": 206500 }, { "epoch": 4.37, "learning_rate": 5.9893265306122446e-05, "loss": 1.413, "step": 206600 }, { "epoch": 4.37, "learning_rate": 5.9872857142857144e-05, "loss": 1.4145, "step": 206700 }, { "epoch": 4.38, "learning_rate": 5.9852448979591836e-05, "loss": 1.4045, "step": 206800 }, { "epoch": 4.38, "learning_rate": 5.983204081632653e-05, "loss": 1.4172, "step": 206900 }, { "epoch": 4.38, "learning_rate": 5.9811632653061226e-05, "loss": 1.4078, "step": 207000 }, { "epoch": 4.38, "learning_rate": 5.979122448979592e-05, "loss": 1.4117, "step": 207100 }, { "epoch": 4.39, "learning_rate": 5.977081632653061e-05, "loss": 1.4072, "step": 207200 }, { "epoch": 4.39, "learning_rate": 5.97504081632653e-05, "loss": 1.4051, "step": 207300 }, { "epoch": 4.39, "learning_rate": 5.9730000000000006e-05, "loss": 1.4093, "step": 207400 }, { "epoch": 4.39, "learning_rate": 5.9709591836734704e-05, "loss": 1.4099, "step": 207500 }, { "epoch": 4.39, "learning_rate": 5.9689183673469396e-05, "loss": 1.4056, "step": 207600 }, { "epoch": 4.4, "learning_rate": 5.966877551020409e-05, "loss": 1.408, "step": 207700 }, { "epoch": 4.4, "learning_rate": 5.9648367346938786e-05, "loss": 1.4046, "step": 207800 }, { "epoch": 4.4, "learning_rate": 5.962795918367348e-05, "loss": 1.4112, "step": 207900 }, { "epoch": 4.4, "learning_rate": 5.960755102040817e-05, "loss": 1.4077, "step": 208000 }, { "epoch": 4.4, "learning_rate": 5.958714285714286e-05, "loss": 1.412, "step": 208100 }, { "epoch": 4.41, "learning_rate": 5.956673469387756e-05, "loss": 1.408, "step": 208200 }, { "epoch": 4.41, "learning_rate": 5.954632653061225e-05, "loss": 1.4075, "step": 208300 }, { "epoch": 4.41, "learning_rate": 5.952591836734694e-05, "loss": 1.404, "step": 208400 }, { "epoch": 4.41, "learning_rate": 5.950551020408164e-05, "loss": 1.4147, "step": 208500 }, { "epoch": 4.41, "learning_rate": 5.948530612244898e-05, "loss": 1.4089, "step": 208600 }, { "epoch": 4.42, "learning_rate": 5.946489795918367e-05, "loss": 1.417, "step": 208700 }, { "epoch": 4.42, "learning_rate": 5.944448979591837e-05, "loss": 1.4023, "step": 208800 }, { "epoch": 4.42, "learning_rate": 5.942408163265306e-05, "loss": 1.4093, "step": 208900 }, { "epoch": 4.42, "learning_rate": 5.9403673469387754e-05, "loss": 1.4139, "step": 209000 }, { "epoch": 4.43, "learning_rate": 5.9383265306122446e-05, "loss": 1.4026, "step": 209100 }, { "epoch": 4.43, "learning_rate": 5.9362857142857144e-05, "loss": 1.4187, "step": 209200 }, { "epoch": 4.43, "learning_rate": 5.9342448979591836e-05, "loss": 1.4034, "step": 209300 }, { "epoch": 4.43, "learning_rate": 5.932204081632653e-05, "loss": 1.4013, "step": 209400 }, { "epoch": 4.43, "learning_rate": 5.930183673469388e-05, "loss": 1.3987, "step": 209500 }, { "epoch": 4.44, "learning_rate": 5.928142857142857e-05, "loss": 1.4042, "step": 209600 }, { "epoch": 4.44, "learning_rate": 5.926102040816327e-05, "loss": 1.4077, "step": 209700 }, { "epoch": 4.44, "learning_rate": 5.924061224489796e-05, "loss": 1.4009, "step": 209800 }, { "epoch": 4.44, "learning_rate": 5.9220204081632655e-05, "loss": 1.4147, "step": 209900 }, { "epoch": 4.44, "learning_rate": 5.919979591836735e-05, "loss": 1.4082, "step": 210000 }, { "epoch": 4.45, "learning_rate": 5.9179387755102045e-05, "loss": 1.4001, "step": 210100 }, { "epoch": 4.45, "learning_rate": 5.9158979591836736e-05, "loss": 1.4195, "step": 210200 }, { "epoch": 4.45, "learning_rate": 5.9138571428571435e-05, "loss": 1.4051, "step": 210300 }, { "epoch": 4.45, "learning_rate": 5.9118163265306126e-05, "loss": 1.409, "step": 210400 }, { "epoch": 4.46, "learning_rate": 5.909775510204082e-05, "loss": 1.4046, "step": 210500 }, { "epoch": 4.46, "learning_rate": 5.9077346938775516e-05, "loss": 1.4012, "step": 210600 }, { "epoch": 4.46, "learning_rate": 5.905693877551021e-05, "loss": 1.4025, "step": 210700 }, { "epoch": 4.46, "learning_rate": 5.90365306122449e-05, "loss": 1.408, "step": 210800 }, { "epoch": 4.46, "learning_rate": 5.901612244897959e-05, "loss": 1.4045, "step": 210900 }, { "epoch": 4.47, "learning_rate": 5.899571428571429e-05, "loss": 1.4053, "step": 211000 }, { "epoch": 4.47, "learning_rate": 5.897530612244898e-05, "loss": 1.4055, "step": 211100 }, { "epoch": 4.47, "learning_rate": 5.895489795918367e-05, "loss": 1.3999, "step": 211200 }, { "epoch": 4.47, "learning_rate": 5.893448979591837e-05, "loss": 1.4076, "step": 211300 }, { "epoch": 4.47, "learning_rate": 5.891408163265306e-05, "loss": 1.3992, "step": 211400 }, { "epoch": 4.48, "learning_rate": 5.8893673469387754e-05, "loss": 1.4015, "step": 211500 }, { "epoch": 4.48, "learning_rate": 5.887326530612245e-05, "loss": 1.404, "step": 211600 }, { "epoch": 4.48, "learning_rate": 5.8852857142857144e-05, "loss": 1.4057, "step": 211700 }, { "epoch": 4.48, "learning_rate": 5.8832448979591836e-05, "loss": 1.4067, "step": 211800 }, { "epoch": 4.48, "learning_rate": 5.881204081632653e-05, "loss": 1.4045, "step": 211900 }, { "epoch": 4.49, "learning_rate": 5.8791632653061226e-05, "loss": 1.4099, "step": 212000 }, { "epoch": 4.49, "learning_rate": 5.877122448979592e-05, "loss": 1.405, "step": 212100 }, { "epoch": 4.49, "learning_rate": 5.875081632653061e-05, "loss": 1.4077, "step": 212200 }, { "epoch": 4.49, "learning_rate": 5.873040816326531e-05, "loss": 1.4061, "step": 212300 }, { "epoch": 4.5, "learning_rate": 5.871e-05, "loss": 1.4098, "step": 212400 }, { "epoch": 4.5, "learning_rate": 5.868959183673469e-05, "loss": 1.4043, "step": 212500 }, { "epoch": 4.5, "learning_rate": 5.866918367346939e-05, "loss": 1.3997, "step": 212600 }, { "epoch": 4.5, "learning_rate": 5.864877551020408e-05, "loss": 1.4013, "step": 212700 }, { "epoch": 4.5, "learning_rate": 5.862836734693877e-05, "loss": 1.401, "step": 212800 }, { "epoch": 4.51, "learning_rate": 5.860795918367348e-05, "loss": 1.4035, "step": 212900 }, { "epoch": 4.51, "learning_rate": 5.858755102040817e-05, "loss": 1.4077, "step": 213000 }, { "epoch": 4.51, "learning_rate": 5.856714285714287e-05, "loss": 1.411, "step": 213100 }, { "epoch": 4.51, "learning_rate": 5.854673469387756e-05, "loss": 1.3997, "step": 213200 }, { "epoch": 4.51, "learning_rate": 5.852632653061225e-05, "loss": 1.4058, "step": 213300 }, { "epoch": 4.52, "learning_rate": 5.850591836734695e-05, "loss": 1.4071, "step": 213400 }, { "epoch": 4.52, "learning_rate": 5.848551020408164e-05, "loss": 1.4006, "step": 213500 }, { "epoch": 4.52, "learning_rate": 5.846510204081633e-05, "loss": 1.4025, "step": 213600 }, { "epoch": 4.52, "learning_rate": 5.844469387755103e-05, "loss": 1.4034, "step": 213700 }, { "epoch": 4.52, "learning_rate": 5.842428571428572e-05, "loss": 1.3961, "step": 213800 }, { "epoch": 4.53, "learning_rate": 5.840408163265306e-05, "loss": 1.3938, "step": 213900 }, { "epoch": 4.53, "learning_rate": 5.8383673469387754e-05, "loss": 1.404, "step": 214000 }, { "epoch": 4.53, "learning_rate": 5.836326530612245e-05, "loss": 1.4045, "step": 214100 }, { "epoch": 4.53, "learning_rate": 5.8342857142857144e-05, "loss": 1.4049, "step": 214200 }, { "epoch": 4.54, "learning_rate": 5.8322448979591835e-05, "loss": 1.4032, "step": 214300 }, { "epoch": 4.54, "learning_rate": 5.8302040816326534e-05, "loss": 1.4043, "step": 214400 }, { "epoch": 4.54, "learning_rate": 5.8281632653061225e-05, "loss": 1.4072, "step": 214500 }, { "epoch": 4.54, "learning_rate": 5.826122448979592e-05, "loss": 1.3978, "step": 214600 }, { "epoch": 4.54, "learning_rate": 5.824081632653061e-05, "loss": 1.4076, "step": 214700 }, { "epoch": 4.55, "learning_rate": 5.822040816326531e-05, "loss": 1.392, "step": 214800 }, { "epoch": 4.55, "learning_rate": 5.82e-05, "loss": 1.3962, "step": 214900 }, { "epoch": 4.55, "learning_rate": 5.817959183673469e-05, "loss": 1.3993, "step": 215000 }, { "epoch": 4.55, "learning_rate": 5.815918367346939e-05, "loss": 1.3982, "step": 215100 }, { "epoch": 4.55, "learning_rate": 5.813877551020408e-05, "loss": 1.3922, "step": 215200 }, { "epoch": 4.56, "learning_rate": 5.811836734693877e-05, "loss": 1.397, "step": 215300 }, { "epoch": 4.56, "learning_rate": 5.809795918367347e-05, "loss": 1.4086, "step": 215400 }, { "epoch": 4.56, "learning_rate": 5.807755102040816e-05, "loss": 1.4039, "step": 215500 }, { "epoch": 4.56, "learning_rate": 5.805714285714285e-05, "loss": 1.3971, "step": 215600 }, { "epoch": 4.57, "learning_rate": 5.803673469387756e-05, "loss": 1.3937, "step": 215700 }, { "epoch": 4.57, "learning_rate": 5.801632653061225e-05, "loss": 1.3922, "step": 215800 }, { "epoch": 4.57, "learning_rate": 5.799591836734695e-05, "loss": 1.3989, "step": 215900 }, { "epoch": 4.57, "learning_rate": 5.797551020408164e-05, "loss": 1.4012, "step": 216000 }, { "epoch": 4.57, "learning_rate": 5.795510204081633e-05, "loss": 1.4098, "step": 216100 }, { "epoch": 4.58, "learning_rate": 5.793469387755103e-05, "loss": 1.4027, "step": 216200 }, { "epoch": 4.58, "learning_rate": 5.791428571428572e-05, "loss": 1.3988, "step": 216300 }, { "epoch": 4.58, "learning_rate": 5.789387755102041e-05, "loss": 1.4035, "step": 216400 }, { "epoch": 4.58, "learning_rate": 5.787346938775511e-05, "loss": 1.3945, "step": 216500 }, { "epoch": 4.58, "learning_rate": 5.78530612244898e-05, "loss": 1.3958, "step": 216600 }, { "epoch": 4.59, "learning_rate": 5.7832653061224494e-05, "loss": 1.4012, "step": 216700 }, { "epoch": 4.59, "learning_rate": 5.7812244897959186e-05, "loss": 1.3956, "step": 216800 }, { "epoch": 4.59, "learning_rate": 5.7791836734693884e-05, "loss": 1.397, "step": 216900 }, { "epoch": 4.59, "learning_rate": 5.7771428571428576e-05, "loss": 1.4022, "step": 217000 }, { "epoch": 4.59, "learning_rate": 5.775102040816327e-05, "loss": 1.3978, "step": 217100 }, { "epoch": 4.6, "learning_rate": 5.7730612244897966e-05, "loss": 1.3999, "step": 217200 }, { "epoch": 4.6, "learning_rate": 5.771020408163266e-05, "loss": 1.4066, "step": 217300 }, { "epoch": 4.6, "learning_rate": 5.769e-05, "loss": 1.3896, "step": 217400 }, { "epoch": 4.6, "learning_rate": 5.7669591836734697e-05, "loss": 1.4034, "step": 217500 }, { "epoch": 4.61, "learning_rate": 5.764918367346939e-05, "loss": 1.3997, "step": 217600 }, { "epoch": 4.61, "learning_rate": 5.762877551020408e-05, "loss": 1.3975, "step": 217700 }, { "epoch": 4.61, "learning_rate": 5.760836734693877e-05, "loss": 1.3996, "step": 217800 }, { "epoch": 4.61, "learning_rate": 5.758795918367347e-05, "loss": 1.3945, "step": 217900 }, { "epoch": 4.61, "learning_rate": 5.756755102040816e-05, "loss": 1.3963, "step": 218000 }, { "epoch": 4.62, "learning_rate": 5.754714285714285e-05, "loss": 1.3992, "step": 218100 }, { "epoch": 4.62, "learning_rate": 5.752673469387755e-05, "loss": 1.4032, "step": 218200 }, { "epoch": 4.62, "learning_rate": 5.750632653061224e-05, "loss": 1.3951, "step": 218300 }, { "epoch": 4.62, "learning_rate": 5.748591836734695e-05, "loss": 1.4036, "step": 218400 }, { "epoch": 4.62, "learning_rate": 5.746551020408164e-05, "loss": 1.3971, "step": 218500 }, { "epoch": 4.63, "learning_rate": 5.744510204081633e-05, "loss": 1.4007, "step": 218600 }, { "epoch": 4.63, "learning_rate": 5.742469387755103e-05, "loss": 1.3972, "step": 218700 }, { "epoch": 4.63, "learning_rate": 5.740428571428572e-05, "loss": 1.4015, "step": 218800 }, { "epoch": 4.63, "learning_rate": 5.738387755102041e-05, "loss": 1.4013, "step": 218900 }, { "epoch": 4.63, "learning_rate": 5.736346938775511e-05, "loss": 1.3963, "step": 219000 }, { "epoch": 4.64, "learning_rate": 5.73430612244898e-05, "loss": 1.3968, "step": 219100 }, { "epoch": 4.64, "learning_rate": 5.7322653061224494e-05, "loss": 1.39, "step": 219200 }, { "epoch": 4.64, "learning_rate": 5.730244897959184e-05, "loss": 1.3921, "step": 219300 }, { "epoch": 4.64, "learning_rate": 5.728204081632653e-05, "loss": 1.3942, "step": 219400 }, { "epoch": 4.65, "learning_rate": 5.7261632653061225e-05, "loss": 1.3987, "step": 219500 }, { "epoch": 4.65, "learning_rate": 5.7241224489795916e-05, "loss": 1.3988, "step": 219600 }, { "epoch": 4.65, "learning_rate": 5.7220816326530615e-05, "loss": 1.3955, "step": 219700 }, { "epoch": 4.65, "learning_rate": 5.7200408163265306e-05, "loss": 1.3946, "step": 219800 }, { "epoch": 4.65, "learning_rate": 5.718e-05, "loss": 1.4045, "step": 219900 }, { "epoch": 4.66, "learning_rate": 5.7159591836734696e-05, "loss": 1.3956, "step": 220000 }, { "epoch": 4.66, "learning_rate": 5.713918367346939e-05, "loss": 1.3978, "step": 220100 }, { "epoch": 4.66, "learning_rate": 5.711877551020408e-05, "loss": 1.3982, "step": 220200 }, { "epoch": 4.66, "learning_rate": 5.709836734693878e-05, "loss": 1.3949, "step": 220300 }, { "epoch": 4.66, "learning_rate": 5.707795918367347e-05, "loss": 1.3947, "step": 220400 }, { "epoch": 4.67, "learning_rate": 5.705755102040816e-05, "loss": 1.4004, "step": 220500 }, { "epoch": 4.67, "learning_rate": 5.703714285714285e-05, "loss": 1.4058, "step": 220600 }, { "epoch": 4.67, "learning_rate": 5.701673469387755e-05, "loss": 1.4008, "step": 220700 }, { "epoch": 4.67, "learning_rate": 5.699632653061224e-05, "loss": 1.3931, "step": 220800 }, { "epoch": 4.68, "learning_rate": 5.6975918367346934e-05, "loss": 1.3989, "step": 220900 }, { "epoch": 4.68, "learning_rate": 5.695551020408163e-05, "loss": 1.3918, "step": 221000 }, { "epoch": 4.68, "learning_rate": 5.6935102040816324e-05, "loss": 1.3937, "step": 221100 }, { "epoch": 4.68, "learning_rate": 5.691469387755103e-05, "loss": 1.3974, "step": 221200 }, { "epoch": 4.68, "learning_rate": 5.689428571428572e-05, "loss": 1.3933, "step": 221300 }, { "epoch": 4.69, "learning_rate": 5.687387755102041e-05, "loss": 1.3934, "step": 221400 }, { "epoch": 4.69, "learning_rate": 5.685367346938776e-05, "loss": 1.3922, "step": 221500 }, { "epoch": 4.69, "learning_rate": 5.683326530612245e-05, "loss": 1.3911, "step": 221600 }, { "epoch": 4.69, "learning_rate": 5.681285714285714e-05, "loss": 1.3908, "step": 221700 }, { "epoch": 4.69, "learning_rate": 5.679244897959184e-05, "loss": 1.3965, "step": 221800 }, { "epoch": 4.7, "learning_rate": 5.677204081632653e-05, "loss": 1.4021, "step": 221900 }, { "epoch": 4.7, "learning_rate": 5.6751632653061225e-05, "loss": 1.3955, "step": 222000 }, { "epoch": 4.7, "learning_rate": 5.673122448979592e-05, "loss": 1.3934, "step": 222100 }, { "epoch": 4.7, "learning_rate": 5.6710816326530615e-05, "loss": 1.3968, "step": 222200 }, { "epoch": 4.7, "learning_rate": 5.6690408163265306e-05, "loss": 1.3932, "step": 222300 }, { "epoch": 4.71, "learning_rate": 5.667e-05, "loss": 1.3976, "step": 222400 }, { "epoch": 4.71, "learning_rate": 5.6649591836734696e-05, "loss": 1.3959, "step": 222500 }, { "epoch": 4.71, "learning_rate": 5.662918367346939e-05, "loss": 1.392, "step": 222600 }, { "epoch": 4.71, "learning_rate": 5.660877551020408e-05, "loss": 1.3955, "step": 222700 }, { "epoch": 4.72, "learning_rate": 5.658836734693878e-05, "loss": 1.3874, "step": 222800 }, { "epoch": 4.72, "learning_rate": 5.656795918367347e-05, "loss": 1.3968, "step": 222900 }, { "epoch": 4.72, "learning_rate": 5.654755102040816e-05, "loss": 1.3904, "step": 223000 }, { "epoch": 4.72, "learning_rate": 5.652714285714286e-05, "loss": 1.3912, "step": 223100 }, { "epoch": 4.72, "learning_rate": 5.650673469387755e-05, "loss": 1.3996, "step": 223200 }, { "epoch": 4.73, "learning_rate": 5.648632653061224e-05, "loss": 1.3897, "step": 223300 }, { "epoch": 4.73, "learning_rate": 5.6465918367346934e-05, "loss": 1.3922, "step": 223400 }, { "epoch": 4.73, "learning_rate": 5.644551020408163e-05, "loss": 1.4015, "step": 223500 }, { "epoch": 4.73, "learning_rate": 5.6425102040816324e-05, "loss": 1.3882, "step": 223600 }, { "epoch": 4.73, "learning_rate": 5.6404693877551016e-05, "loss": 1.3984, "step": 223700 }, { "epoch": 4.74, "learning_rate": 5.6384285714285714e-05, "loss": 1.3913, "step": 223800 }, { "epoch": 4.74, "learning_rate": 5.6363877551020405e-05, "loss": 1.3874, "step": 223900 }, { "epoch": 4.74, "learning_rate": 5.634346938775511e-05, "loss": 1.393, "step": 224000 }, { "epoch": 4.74, "learning_rate": 5.63230612244898e-05, "loss": 1.3916, "step": 224100 }, { "epoch": 4.74, "learning_rate": 5.63026530612245e-05, "loss": 1.391, "step": 224200 }, { "epoch": 4.75, "learning_rate": 5.628224489795919e-05, "loss": 1.3869, "step": 224300 }, { "epoch": 4.75, "learning_rate": 5.6261836734693884e-05, "loss": 1.3942, "step": 224400 }, { "epoch": 4.75, "learning_rate": 5.6241428571428575e-05, "loss": 1.3863, "step": 224500 }, { "epoch": 4.75, "learning_rate": 5.6221020408163274e-05, "loss": 1.3916, "step": 224600 }, { "epoch": 4.76, "learning_rate": 5.6200612244897965e-05, "loss": 1.3891, "step": 224700 }, { "epoch": 4.76, "learning_rate": 5.618020408163266e-05, "loss": 1.3901, "step": 224800 }, { "epoch": 4.76, "learning_rate": 5.6159795918367355e-05, "loss": 1.3934, "step": 224900 }, { "epoch": 4.76, "learning_rate": 5.613938775510205e-05, "loss": 1.3956, "step": 225000 }, { "epoch": 4.76, "learning_rate": 5.611897959183674e-05, "loss": 1.3877, "step": 225100 }, { "epoch": 4.77, "learning_rate": 5.609857142857144e-05, "loss": 1.3958, "step": 225200 }, { "epoch": 4.77, "learning_rate": 5.607816326530613e-05, "loss": 1.3907, "step": 225300 }, { "epoch": 4.77, "learning_rate": 5.605775510204082e-05, "loss": 1.3949, "step": 225400 }, { "epoch": 4.77, "learning_rate": 5.603734693877551e-05, "loss": 1.3919, "step": 225500 }, { "epoch": 4.77, "learning_rate": 5.601693877551021e-05, "loss": 1.3983, "step": 225600 }, { "epoch": 4.78, "learning_rate": 5.59965306122449e-05, "loss": 1.3869, "step": 225700 }, { "epoch": 4.78, "learning_rate": 5.597612244897959e-05, "loss": 1.3941, "step": 225800 }, { "epoch": 4.78, "learning_rate": 5.595571428571429e-05, "loss": 1.3953, "step": 225900 }, { "epoch": 4.78, "learning_rate": 5.593551020408163e-05, "loss": 1.3968, "step": 226000 }, { "epoch": 4.79, "learning_rate": 5.5915102040816324e-05, "loss": 1.3905, "step": 226100 }, { "epoch": 4.79, "learning_rate": 5.589469387755102e-05, "loss": 1.3922, "step": 226200 }, { "epoch": 4.79, "learning_rate": 5.5874285714285714e-05, "loss": 1.389, "step": 226300 }, { "epoch": 4.79, "learning_rate": 5.5853877551020405e-05, "loss": 1.3823, "step": 226400 }, { "epoch": 4.79, "learning_rate": 5.58334693877551e-05, "loss": 1.3936, "step": 226500 }, { "epoch": 4.8, "learning_rate": 5.5813061224489795e-05, "loss": 1.3956, "step": 226600 }, { "epoch": 4.8, "learning_rate": 5.57926530612245e-05, "loss": 1.3901, "step": 226700 }, { "epoch": 4.8, "learning_rate": 5.577224489795919e-05, "loss": 1.3857, "step": 226800 }, { "epoch": 4.8, "learning_rate": 5.5751836734693884e-05, "loss": 1.3895, "step": 226900 }, { "epoch": 4.8, "learning_rate": 5.573142857142858e-05, "loss": 1.3905, "step": 227000 }, { "epoch": 4.81, "learning_rate": 5.5711020408163273e-05, "loss": 1.3901, "step": 227100 }, { "epoch": 4.81, "learning_rate": 5.5690612244897965e-05, "loss": 1.3913, "step": 227200 }, { "epoch": 4.81, "learning_rate": 5.567020408163266e-05, "loss": 1.3875, "step": 227300 }, { "epoch": 4.81, "learning_rate": 5.5649795918367355e-05, "loss": 1.382, "step": 227400 }, { "epoch": 4.81, "learning_rate": 5.5629387755102047e-05, "loss": 1.3959, "step": 227500 }, { "epoch": 4.82, "learning_rate": 5.560897959183674e-05, "loss": 1.386, "step": 227600 }, { "epoch": 4.82, "learning_rate": 5.5588571428571437e-05, "loss": 1.3884, "step": 227700 }, { "epoch": 4.82, "learning_rate": 5.556816326530613e-05, "loss": 1.3885, "step": 227800 }, { "epoch": 4.82, "learning_rate": 5.554775510204082e-05, "loss": 1.3945, "step": 227900 }, { "epoch": 4.83, "learning_rate": 5.552734693877552e-05, "loss": 1.3819, "step": 228000 }, { "epoch": 4.83, "learning_rate": 5.550693877551021e-05, "loss": 1.3916, "step": 228100 }, { "epoch": 4.83, "learning_rate": 5.54865306122449e-05, "loss": 1.3913, "step": 228200 }, { "epoch": 4.83, "learning_rate": 5.546612244897959e-05, "loss": 1.3866, "step": 228300 }, { "epoch": 4.83, "learning_rate": 5.544591836734694e-05, "loss": 1.3878, "step": 228400 }, { "epoch": 4.84, "learning_rate": 5.542551020408163e-05, "loss": 1.3955, "step": 228500 }, { "epoch": 4.84, "learning_rate": 5.5405102040816324e-05, "loss": 1.3854, "step": 228600 }, { "epoch": 4.84, "learning_rate": 5.538469387755102e-05, "loss": 1.3889, "step": 228700 }, { "epoch": 4.84, "learning_rate": 5.5364285714285713e-05, "loss": 1.3907, "step": 228800 }, { "epoch": 4.84, "learning_rate": 5.5343877551020405e-05, "loss": 1.3943, "step": 228900 }, { "epoch": 4.85, "learning_rate": 5.5323469387755103e-05, "loss": 1.3867, "step": 229000 }, { "epoch": 4.85, "learning_rate": 5.5303061224489795e-05, "loss": 1.3864, "step": 229100 }, { "epoch": 4.85, "learning_rate": 5.5282653061224487e-05, "loss": 1.3888, "step": 229200 }, { "epoch": 4.85, "learning_rate": 5.526224489795918e-05, "loss": 1.3842, "step": 229300 }, { "epoch": 4.86, "learning_rate": 5.5241836734693877e-05, "loss": 1.3886, "step": 229400 }, { "epoch": 4.86, "learning_rate": 5.522142857142858e-05, "loss": 1.3955, "step": 229500 }, { "epoch": 4.86, "learning_rate": 5.520122448979592e-05, "loss": 1.3783, "step": 229600 }, { "epoch": 4.86, "learning_rate": 5.5180816326530614e-05, "loss": 1.3809, "step": 229700 }, { "epoch": 4.86, "learning_rate": 5.516040816326531e-05, "loss": 1.386, "step": 229800 }, { "epoch": 4.87, "learning_rate": 5.5140000000000004e-05, "loss": 1.3852, "step": 229900 }, { "epoch": 4.87, "learning_rate": 5.5119591836734696e-05, "loss": 1.3826, "step": 230000 }, { "epoch": 4.87, "learning_rate": 5.509918367346939e-05, "loss": 1.3838, "step": 230100 }, { "epoch": 4.87, "learning_rate": 5.5078775510204086e-05, "loss": 1.3899, "step": 230200 }, { "epoch": 4.87, "learning_rate": 5.505836734693878e-05, "loss": 1.3862, "step": 230300 }, { "epoch": 4.88, "learning_rate": 5.503795918367347e-05, "loss": 1.388, "step": 230400 }, { "epoch": 4.88, "learning_rate": 5.501755102040817e-05, "loss": 1.3864, "step": 230500 }, { "epoch": 4.88, "learning_rate": 5.499714285714286e-05, "loss": 1.3791, "step": 230600 }, { "epoch": 4.88, "learning_rate": 5.497673469387755e-05, "loss": 1.3915, "step": 230700 }, { "epoch": 4.88, "learning_rate": 5.495632653061225e-05, "loss": 1.3864, "step": 230800 }, { "epoch": 4.89, "learning_rate": 5.493591836734694e-05, "loss": 1.3748, "step": 230900 }, { "epoch": 4.89, "learning_rate": 5.491551020408163e-05, "loss": 1.3906, "step": 231000 }, { "epoch": 4.89, "learning_rate": 5.489510204081632e-05, "loss": 1.3836, "step": 231100 }, { "epoch": 4.89, "learning_rate": 5.487469387755102e-05, "loss": 1.3837, "step": 231200 }, { "epoch": 4.9, "learning_rate": 5.485428571428571e-05, "loss": 1.3869, "step": 231300 }, { "epoch": 4.9, "learning_rate": 5.4833877551020405e-05, "loss": 1.3804, "step": 231400 }, { "epoch": 4.9, "learning_rate": 5.48134693877551e-05, "loss": 1.3866, "step": 231500 }, { "epoch": 4.9, "learning_rate": 5.4793061224489795e-05, "loss": 1.3783, "step": 231600 }, { "epoch": 4.9, "learning_rate": 5.4772653061224486e-05, "loss": 1.3925, "step": 231700 }, { "epoch": 4.91, "learning_rate": 5.4752244897959185e-05, "loss": 1.3832, "step": 231800 }, { "epoch": 4.91, "learning_rate": 5.4731836734693876e-05, "loss": 1.387, "step": 231900 }, { "epoch": 4.91, "learning_rate": 5.471163265306123e-05, "loss": 1.3928, "step": 232000 }, { "epoch": 4.91, "learning_rate": 5.469122448979592e-05, "loss": 1.3731, "step": 232100 }, { "epoch": 4.91, "learning_rate": 5.4670816326530614e-05, "loss": 1.3833, "step": 232200 }, { "epoch": 4.92, "learning_rate": 5.465040816326531e-05, "loss": 1.3912, "step": 232300 }, { "epoch": 4.92, "learning_rate": 5.4630000000000004e-05, "loss": 1.3806, "step": 232400 }, { "epoch": 4.92, "learning_rate": 5.4609591836734695e-05, "loss": 1.3874, "step": 232500 }, { "epoch": 4.92, "learning_rate": 5.4589183673469394e-05, "loss": 1.3715, "step": 232600 }, { "epoch": 4.92, "learning_rate": 5.4568775510204085e-05, "loss": 1.394, "step": 232700 }, { "epoch": 4.93, "learning_rate": 5.454836734693878e-05, "loss": 1.3899, "step": 232800 }, { "epoch": 4.93, "learning_rate": 5.452795918367347e-05, "loss": 1.388, "step": 232900 }, { "epoch": 4.93, "learning_rate": 5.450755102040817e-05, "loss": 1.3854, "step": 233000 }, { "epoch": 4.93, "learning_rate": 5.448714285714286e-05, "loss": 1.3906, "step": 233100 }, { "epoch": 4.94, "learning_rate": 5.446673469387755e-05, "loss": 1.3852, "step": 233200 }, { "epoch": 4.94, "learning_rate": 5.444632653061225e-05, "loss": 1.3783, "step": 233300 }, { "epoch": 4.94, "learning_rate": 5.442591836734694e-05, "loss": 1.3822, "step": 233400 }, { "epoch": 4.94, "learning_rate": 5.440551020408163e-05, "loss": 1.3801, "step": 233500 }, { "epoch": 4.94, "learning_rate": 5.438510204081633e-05, "loss": 1.3828, "step": 233600 }, { "epoch": 4.95, "learning_rate": 5.436469387755102e-05, "loss": 1.3793, "step": 233700 }, { "epoch": 4.95, "learning_rate": 5.434428571428571e-05, "loss": 1.3888, "step": 233800 }, { "epoch": 4.95, "learning_rate": 5.4323877551020405e-05, "loss": 1.3893, "step": 233900 }, { "epoch": 4.95, "learning_rate": 5.43034693877551e-05, "loss": 1.3863, "step": 234000 }, { "epoch": 4.95, "learning_rate": 5.4283061224489795e-05, "loss": 1.3819, "step": 234100 }, { "epoch": 4.96, "learning_rate": 5.4262653061224486e-05, "loss": 1.3797, "step": 234200 }, { "epoch": 4.96, "learning_rate": 5.4242244897959185e-05, "loss": 1.3859, "step": 234300 }, { "epoch": 4.96, "learning_rate": 5.4221836734693876e-05, "loss": 1.3799, "step": 234400 }, { "epoch": 4.96, "learning_rate": 5.420142857142857e-05, "loss": 1.3792, "step": 234500 }, { "epoch": 4.97, "learning_rate": 5.4181020408163266e-05, "loss": 1.3811, "step": 234600 }, { "epoch": 4.97, "learning_rate": 5.416061224489796e-05, "loss": 1.3802, "step": 234700 }, { "epoch": 4.97, "learning_rate": 5.414020408163265e-05, "loss": 1.3867, "step": 234800 }, { "epoch": 4.97, "learning_rate": 5.411979591836734e-05, "loss": 1.3771, "step": 234900 }, { "epoch": 4.97, "learning_rate": 5.4099387755102046e-05, "loss": 1.3859, "step": 235000 }, { "epoch": 4.98, "learning_rate": 5.4078979591836744e-05, "loss": 1.3873, "step": 235100 }, { "epoch": 4.98, "learning_rate": 5.4058571428571436e-05, "loss": 1.3838, "step": 235200 }, { "epoch": 4.98, "learning_rate": 5.403816326530613e-05, "loss": 1.3731, "step": 235300 }, { "epoch": 4.98, "learning_rate": 5.4017755102040826e-05, "loss": 1.3794, "step": 235400 }, { "epoch": 4.98, "learning_rate": 5.399734693877552e-05, "loss": 1.3826, "step": 235500 }, { "epoch": 4.99, "learning_rate": 5.397693877551021e-05, "loss": 1.3785, "step": 235600 }, { "epoch": 4.99, "learning_rate": 5.395653061224491e-05, "loss": 1.3761, "step": 235700 }, { "epoch": 4.99, "learning_rate": 5.39361224489796e-05, "loss": 1.3798, "step": 235800 }, { "epoch": 4.99, "learning_rate": 5.391571428571429e-05, "loss": 1.3788, "step": 235900 }, { "epoch": 4.99, "learning_rate": 5.389530612244898e-05, "loss": 1.3782, "step": 236000 }, { "epoch": 5.0, "learning_rate": 5.387489795918368e-05, "loss": 1.3784, "step": 236100 }, { "epoch": 5.0, "learning_rate": 5.385448979591837e-05, "loss": 1.379, "step": 236200 }, { "epoch": 5.0, "learning_rate": 5.3834081632653064e-05, "loss": 1.3728, "step": 236300 }, { "epoch": 5.0, "learning_rate": 5.381367346938776e-05, "loss": 1.3765, "step": 236400 }, { "epoch": 5.01, "learning_rate": 5.3793265306122454e-05, "loss": 1.3776, "step": 236500 }, { "epoch": 5.01, "learning_rate": 5.3773061224489794e-05, "loss": 1.3735, "step": 236600 }, { "epoch": 5.01, "learning_rate": 5.375265306122449e-05, "loss": 1.3798, "step": 236700 }, { "epoch": 5.01, "learning_rate": 5.3732244897959184e-05, "loss": 1.3693, "step": 236800 }, { "epoch": 5.01, "learning_rate": 5.3711836734693876e-05, "loss": 1.3738, "step": 236900 }, { "epoch": 5.02, "learning_rate": 5.369142857142857e-05, "loss": 1.3739, "step": 237000 }, { "epoch": 5.02, "learning_rate": 5.3671020408163266e-05, "loss": 1.3664, "step": 237100 }, { "epoch": 5.02, "learning_rate": 5.365061224489796e-05, "loss": 1.375, "step": 237200 }, { "epoch": 5.02, "learning_rate": 5.363020408163265e-05, "loss": 1.3725, "step": 237300 }, { "epoch": 5.02, "learning_rate": 5.360979591836735e-05, "loss": 1.3726, "step": 237400 }, { "epoch": 5.03, "learning_rate": 5.358938775510204e-05, "loss": 1.3735, "step": 237500 }, { "epoch": 5.03, "learning_rate": 5.356897959183673e-05, "loss": 1.3666, "step": 237600 }, { "epoch": 5.03, "learning_rate": 5.354857142857143e-05, "loss": 1.3782, "step": 237700 }, { "epoch": 5.03, "learning_rate": 5.352816326530613e-05, "loss": 1.3752, "step": 237800 }, { "epoch": 5.03, "learning_rate": 5.3507755102040826e-05, "loss": 1.3752, "step": 237900 }, { "epoch": 5.04, "learning_rate": 5.348734693877552e-05, "loss": 1.3802, "step": 238000 }, { "epoch": 5.04, "learning_rate": 5.346693877551021e-05, "loss": 1.3747, "step": 238100 }, { "epoch": 5.04, "learning_rate": 5.344653061224491e-05, "loss": 1.3736, "step": 238200 }, { "epoch": 5.04, "learning_rate": 5.34261224489796e-05, "loss": 1.3809, "step": 238300 }, { "epoch": 5.05, "learning_rate": 5.340571428571429e-05, "loss": 1.3719, "step": 238400 }, { "epoch": 5.05, "learning_rate": 5.338530612244899e-05, "loss": 1.384, "step": 238500 }, { "epoch": 5.05, "learning_rate": 5.336489795918368e-05, "loss": 1.3688, "step": 238600 }, { "epoch": 5.05, "learning_rate": 5.334448979591837e-05, "loss": 1.3824, "step": 238700 }, { "epoch": 5.05, "learning_rate": 5.3324081632653063e-05, "loss": 1.3654, "step": 238800 }, { "epoch": 5.06, "learning_rate": 5.330367346938776e-05, "loss": 1.3766, "step": 238900 }, { "epoch": 5.06, "learning_rate": 5.3283265306122453e-05, "loss": 1.3762, "step": 239000 }, { "epoch": 5.06, "learning_rate": 5.3263061224489794e-05, "loss": 1.3794, "step": 239100 }, { "epoch": 5.06, "learning_rate": 5.324265306122449e-05, "loss": 1.3763, "step": 239200 }, { "epoch": 5.06, "learning_rate": 5.3222244897959184e-05, "loss": 1.3725, "step": 239300 }, { "epoch": 5.07, "learning_rate": 5.3201836734693876e-05, "loss": 1.3763, "step": 239400 }, { "epoch": 5.07, "learning_rate": 5.3181428571428574e-05, "loss": 1.3667, "step": 239500 }, { "epoch": 5.07, "learning_rate": 5.3161020408163266e-05, "loss": 1.3746, "step": 239600 }, { "epoch": 5.07, "learning_rate": 5.314061224489796e-05, "loss": 1.3727, "step": 239700 }, { "epoch": 5.08, "learning_rate": 5.312020408163265e-05, "loss": 1.3799, "step": 239800 }, { "epoch": 5.08, "learning_rate": 5.309979591836735e-05, "loss": 1.3714, "step": 239900 }, { "epoch": 5.08, "learning_rate": 5.307938775510204e-05, "loss": 1.3683, "step": 240000 }, { "epoch": 5.08, "learning_rate": 5.305897959183673e-05, "loss": 1.3774, "step": 240100 }, { "epoch": 5.08, "learning_rate": 5.303857142857143e-05, "loss": 1.369, "step": 240200 }, { "epoch": 5.09, "learning_rate": 5.301816326530612e-05, "loss": 1.3806, "step": 240300 }, { "epoch": 5.09, "learning_rate": 5.299775510204081e-05, "loss": 1.3725, "step": 240400 }, { "epoch": 5.09, "learning_rate": 5.297734693877551e-05, "loss": 1.3733, "step": 240500 }, { "epoch": 5.09, "learning_rate": 5.295693877551021e-05, "loss": 1.383, "step": 240600 }, { "epoch": 5.09, "learning_rate": 5.293653061224491e-05, "loss": 1.3655, "step": 240700 }, { "epoch": 5.1, "learning_rate": 5.29161224489796e-05, "loss": 1.3752, "step": 240800 }, { "epoch": 5.1, "learning_rate": 5.289571428571429e-05, "loss": 1.3796, "step": 240900 }, { "epoch": 5.1, "learning_rate": 5.287530612244899e-05, "loss": 1.3732, "step": 241000 }, { "epoch": 5.1, "learning_rate": 5.285489795918368e-05, "loss": 1.3668, "step": 241100 }, { "epoch": 5.1, "learning_rate": 5.283448979591837e-05, "loss": 1.3679, "step": 241200 }, { "epoch": 5.11, "learning_rate": 5.281408163265307e-05, "loss": 1.376, "step": 241300 }, { "epoch": 5.11, "learning_rate": 5.279367346938776e-05, "loss": 1.3758, "step": 241400 }, { "epoch": 5.11, "learning_rate": 5.277326530612245e-05, "loss": 1.3716, "step": 241500 }, { "epoch": 5.11, "learning_rate": 5.275285714285715e-05, "loss": 1.3811, "step": 241600 }, { "epoch": 5.12, "learning_rate": 5.273244897959184e-05, "loss": 1.3784, "step": 241700 }, { "epoch": 5.12, "learning_rate": 5.2712040816326535e-05, "loss": 1.3682, "step": 241800 }, { "epoch": 5.12, "learning_rate": 5.2691632653061226e-05, "loss": 1.3745, "step": 241900 }, { "epoch": 5.12, "learning_rate": 5.2671224489795925e-05, "loss": 1.37, "step": 242000 }, { "epoch": 5.12, "learning_rate": 5.2650816326530616e-05, "loss": 1.3733, "step": 242100 }, { "epoch": 5.13, "learning_rate": 5.263040816326531e-05, "loss": 1.3629, "step": 242200 }, { "epoch": 5.13, "learning_rate": 5.2610000000000006e-05, "loss": 1.3735, "step": 242300 }, { "epoch": 5.13, "learning_rate": 5.258979591836735e-05, "loss": 1.368, "step": 242400 }, { "epoch": 5.13, "learning_rate": 5.256938775510204e-05, "loss": 1.3758, "step": 242500 }, { "epoch": 5.13, "learning_rate": 5.254897959183673e-05, "loss": 1.3735, "step": 242600 }, { "epoch": 5.14, "learning_rate": 5.252857142857143e-05, "loss": 1.3688, "step": 242700 }, { "epoch": 5.14, "learning_rate": 5.250816326530612e-05, "loss": 1.3777, "step": 242800 }, { "epoch": 5.14, "learning_rate": 5.248775510204081e-05, "loss": 1.3649, "step": 242900 }, { "epoch": 5.14, "learning_rate": 5.246734693877551e-05, "loss": 1.3765, "step": 243000 }, { "epoch": 5.14, "learning_rate": 5.24469387755102e-05, "loss": 1.3756, "step": 243100 }, { "epoch": 5.15, "learning_rate": 5.242653061224489e-05, "loss": 1.3668, "step": 243200 }, { "epoch": 5.15, "learning_rate": 5.24061224489796e-05, "loss": 1.37, "step": 243300 }, { "epoch": 5.15, "learning_rate": 5.23857142857143e-05, "loss": 1.3694, "step": 243400 }, { "epoch": 5.15, "learning_rate": 5.236530612244899e-05, "loss": 1.3737, "step": 243500 }, { "epoch": 5.16, "learning_rate": 5.234489795918368e-05, "loss": 1.3688, "step": 243600 }, { "epoch": 5.16, "learning_rate": 5.232448979591837e-05, "loss": 1.3714, "step": 243700 }, { "epoch": 5.16, "learning_rate": 5.230408163265307e-05, "loss": 1.3711, "step": 243800 }, { "epoch": 5.16, "learning_rate": 5.228367346938776e-05, "loss": 1.3686, "step": 243900 }, { "epoch": 5.16, "learning_rate": 5.226326530612245e-05, "loss": 1.3589, "step": 244000 }, { "epoch": 5.17, "learning_rate": 5.224285714285715e-05, "loss": 1.3818, "step": 244100 }, { "epoch": 5.17, "learning_rate": 5.222244897959184e-05, "loss": 1.3743, "step": 244200 }, { "epoch": 5.17, "learning_rate": 5.2202040816326535e-05, "loss": 1.3707, "step": 244300 }, { "epoch": 5.17, "learning_rate": 5.2181836734693875e-05, "loss": 1.3706, "step": 244400 }, { "epoch": 5.17, "learning_rate": 5.2161428571428574e-05, "loss": 1.3684, "step": 244500 }, { "epoch": 5.18, "learning_rate": 5.2141020408163265e-05, "loss": 1.3695, "step": 244600 }, { "epoch": 5.18, "learning_rate": 5.212061224489796e-05, "loss": 1.3676, "step": 244700 }, { "epoch": 5.18, "learning_rate": 5.2100204081632655e-05, "loss": 1.3762, "step": 244800 }, { "epoch": 5.18, "learning_rate": 5.207979591836735e-05, "loss": 1.3768, "step": 244900 }, { "epoch": 5.19, "learning_rate": 5.205938775510204e-05, "loss": 1.3673, "step": 245000 }, { "epoch": 5.19, "learning_rate": 5.203897959183674e-05, "loss": 1.3667, "step": 245100 }, { "epoch": 5.19, "learning_rate": 5.201857142857143e-05, "loss": 1.3749, "step": 245200 }, { "epoch": 5.19, "learning_rate": 5.199816326530612e-05, "loss": 1.3715, "step": 245300 }, { "epoch": 5.19, "learning_rate": 5.197775510204081e-05, "loss": 1.3654, "step": 245400 }, { "epoch": 5.2, "learning_rate": 5.195734693877551e-05, "loss": 1.372, "step": 245500 }, { "epoch": 5.2, "learning_rate": 5.19369387755102e-05, "loss": 1.3669, "step": 245600 }, { "epoch": 5.2, "learning_rate": 5.191653061224489e-05, "loss": 1.3771, "step": 245700 }, { "epoch": 5.2, "learning_rate": 5.189612244897959e-05, "loss": 1.3663, "step": 245800 }, { "epoch": 5.2, "learning_rate": 5.187571428571428e-05, "loss": 1.371, "step": 245900 }, { "epoch": 5.21, "learning_rate": 5.1855306122448975e-05, "loss": 1.3738, "step": 246000 }, { "epoch": 5.21, "learning_rate": 5.183489795918368e-05, "loss": 1.3744, "step": 246100 }, { "epoch": 5.21, "learning_rate": 5.181448979591838e-05, "loss": 1.3747, "step": 246200 }, { "epoch": 5.21, "learning_rate": 5.179428571428572e-05, "loss": 1.3614, "step": 246300 }, { "epoch": 5.21, "learning_rate": 5.177387755102041e-05, "loss": 1.3677, "step": 246400 }, { "epoch": 5.22, "learning_rate": 5.17534693877551e-05, "loss": 1.3644, "step": 246500 }, { "epoch": 5.22, "learning_rate": 5.17330612244898e-05, "loss": 1.3696, "step": 246600 }, { "epoch": 5.22, "learning_rate": 5.171265306122449e-05, "loss": 1.3718, "step": 246700 }, { "epoch": 5.22, "learning_rate": 5.1692244897959184e-05, "loss": 1.3666, "step": 246800 }, { "epoch": 5.23, "learning_rate": 5.167183673469388e-05, "loss": 1.3634, "step": 246900 }, { "epoch": 5.23, "learning_rate": 5.1651428571428574e-05, "loss": 1.3669, "step": 247000 }, { "epoch": 5.23, "learning_rate": 5.1631020408163265e-05, "loss": 1.3611, "step": 247100 }, { "epoch": 5.23, "learning_rate": 5.1610612244897963e-05, "loss": 1.3688, "step": 247200 }, { "epoch": 5.23, "learning_rate": 5.1590204081632655e-05, "loss": 1.3583, "step": 247300 }, { "epoch": 5.24, "learning_rate": 5.156979591836735e-05, "loss": 1.3703, "step": 247400 }, { "epoch": 5.24, "learning_rate": 5.154938775510204e-05, "loss": 1.374, "step": 247500 }, { "epoch": 5.24, "learning_rate": 5.1528979591836737e-05, "loss": 1.3702, "step": 247600 }, { "epoch": 5.24, "learning_rate": 5.150857142857143e-05, "loss": 1.3673, "step": 247700 }, { "epoch": 5.24, "learning_rate": 5.148816326530612e-05, "loss": 1.375, "step": 247800 }, { "epoch": 5.25, "learning_rate": 5.146775510204082e-05, "loss": 1.3668, "step": 247900 }, { "epoch": 5.25, "learning_rate": 5.144734693877551e-05, "loss": 1.3645, "step": 248000 }, { "epoch": 5.25, "learning_rate": 5.14269387755102e-05, "loss": 1.37, "step": 248100 }, { "epoch": 5.25, "learning_rate": 5.14065306122449e-05, "loss": 1.3684, "step": 248200 }, { "epoch": 5.26, "learning_rate": 5.138612244897959e-05, "loss": 1.3699, "step": 248300 }, { "epoch": 5.26, "learning_rate": 5.136571428571428e-05, "loss": 1.3612, "step": 248400 }, { "epoch": 5.26, "learning_rate": 5.1345306122448974e-05, "loss": 1.3729, "step": 248500 }, { "epoch": 5.26, "learning_rate": 5.132489795918367e-05, "loss": 1.3631, "step": 248600 }, { "epoch": 5.26, "learning_rate": 5.1304489795918364e-05, "loss": 1.3682, "step": 248700 }, { "epoch": 5.27, "learning_rate": 5.1284081632653056e-05, "loss": 1.3639, "step": 248800 }, { "epoch": 5.27, "learning_rate": 5.126367346938776e-05, "loss": 1.3699, "step": 248900 }, { "epoch": 5.27, "learning_rate": 5.124326530612246e-05, "loss": 1.3675, "step": 249000 }, { "epoch": 5.27, "learning_rate": 5.122285714285715e-05, "loss": 1.3771, "step": 249100 }, { "epoch": 5.27, "learning_rate": 5.120244897959184e-05, "loss": 1.3675, "step": 249200 }, { "epoch": 5.28, "learning_rate": 5.1182040816326534e-05, "loss": 1.3714, "step": 249300 }, { "epoch": 5.28, "learning_rate": 5.116163265306123e-05, "loss": 1.3658, "step": 249400 }, { "epoch": 5.28, "learning_rate": 5.1141224489795924e-05, "loss": 1.3634, "step": 249500 }, { "epoch": 5.28, "learning_rate": 5.1120816326530616e-05, "loss": 1.3612, "step": 249600 }, { "epoch": 5.28, "learning_rate": 5.1100408163265314e-05, "loss": 1.3638, "step": 249700 }, { "epoch": 5.29, "learning_rate": 5.1080000000000006e-05, "loss": 1.369, "step": 249800 }, { "epoch": 5.29, "learning_rate": 5.10595918367347e-05, "loss": 1.3629, "step": 249900 }, { "epoch": 5.29, "learning_rate": 5.1039183673469396e-05, "loss": 1.3725, "step": 250000 }, { "epoch": 5.29, "learning_rate": 5.101877551020409e-05, "loss": 1.3663, "step": 250100 }, { "epoch": 5.3, "learning_rate": 5.099836734693878e-05, "loss": 1.3668, "step": 250200 }, { "epoch": 5.3, "learning_rate": 5.097795918367348e-05, "loss": 1.371, "step": 250300 }, { "epoch": 5.3, "learning_rate": 5.095755102040817e-05, "loss": 1.3677, "step": 250400 }, { "epoch": 5.3, "learning_rate": 5.093714285714286e-05, "loss": 1.3704, "step": 250500 }, { "epoch": 5.3, "learning_rate": 5.091673469387755e-05, "loss": 1.3606, "step": 250600 }, { "epoch": 5.31, "learning_rate": 5.089632653061225e-05, "loss": 1.3665, "step": 250700 }, { "epoch": 5.31, "learning_rate": 5.087591836734694e-05, "loss": 1.3671, "step": 250800 }, { "epoch": 5.31, "learning_rate": 5.085551020408163e-05, "loss": 1.3599, "step": 250900 }, { "epoch": 5.31, "learning_rate": 5.083510204081633e-05, "loss": 1.3704, "step": 251000 }, { "epoch": 5.31, "learning_rate": 5.081469387755102e-05, "loss": 1.3655, "step": 251100 }, { "epoch": 5.32, "learning_rate": 5.0794285714285715e-05, "loss": 1.3656, "step": 251200 }, { "epoch": 5.32, "learning_rate": 5.077387755102041e-05, "loss": 1.3651, "step": 251300 }, { "epoch": 5.32, "learning_rate": 5.0753469387755105e-05, "loss": 1.3607, "step": 251400 }, { "epoch": 5.32, "learning_rate": 5.0733265306122446e-05, "loss": 1.3639, "step": 251500 }, { "epoch": 5.32, "learning_rate": 5.071285714285715e-05, "loss": 1.3607, "step": 251600 }, { "epoch": 5.33, "learning_rate": 5.069244897959184e-05, "loss": 1.3622, "step": 251700 }, { "epoch": 5.33, "learning_rate": 5.067224489795919e-05, "loss": 1.3674, "step": 251800 }, { "epoch": 5.33, "learning_rate": 5.065183673469388e-05, "loss": 1.3547, "step": 251900 }, { "epoch": 5.33, "learning_rate": 5.063142857142857e-05, "loss": 1.3675, "step": 252000 }, { "epoch": 5.34, "learning_rate": 5.0611020408163265e-05, "loss": 1.3667, "step": 252100 }, { "epoch": 5.34, "learning_rate": 5.059061224489796e-05, "loss": 1.366, "step": 252200 }, { "epoch": 5.34, "learning_rate": 5.0570204081632655e-05, "loss": 1.3609, "step": 252300 }, { "epoch": 5.34, "learning_rate": 5.0549795918367346e-05, "loss": 1.3662, "step": 252400 }, { "epoch": 5.34, "learning_rate": 5.0529387755102045e-05, "loss": 1.3656, "step": 252500 }, { "epoch": 5.35, "learning_rate": 5.0508979591836736e-05, "loss": 1.3602, "step": 252600 }, { "epoch": 5.35, "learning_rate": 5.048857142857143e-05, "loss": 1.3609, "step": 252700 }, { "epoch": 5.35, "learning_rate": 5.0468163265306126e-05, "loss": 1.37, "step": 252800 }, { "epoch": 5.35, "learning_rate": 5.044775510204082e-05, "loss": 1.3692, "step": 252900 }, { "epoch": 5.35, "learning_rate": 5.042734693877551e-05, "loss": 1.3616, "step": 253000 }, { "epoch": 5.36, "learning_rate": 5.04069387755102e-05, "loss": 1.3564, "step": 253100 }, { "epoch": 5.36, "learning_rate": 5.03865306122449e-05, "loss": 1.3671, "step": 253200 }, { "epoch": 5.36, "learning_rate": 5.036612244897959e-05, "loss": 1.3626, "step": 253300 }, { "epoch": 5.36, "learning_rate": 5.034571428571428e-05, "loss": 1.3637, "step": 253400 }, { "epoch": 5.37, "learning_rate": 5.032530612244898e-05, "loss": 1.3721, "step": 253500 }, { "epoch": 5.37, "learning_rate": 5.030489795918367e-05, "loss": 1.3671, "step": 253600 }, { "epoch": 5.37, "learning_rate": 5.0284489795918364e-05, "loss": 1.3607, "step": 253700 }, { "epoch": 5.37, "learning_rate": 5.026408163265306e-05, "loss": 1.3707, "step": 253800 }, { "epoch": 5.37, "learning_rate": 5.0243673469387754e-05, "loss": 1.3642, "step": 253900 }, { "epoch": 5.38, "learning_rate": 5.0223265306122445e-05, "loss": 1.3632, "step": 254000 }, { "epoch": 5.38, "learning_rate": 5.020285714285714e-05, "loss": 1.357, "step": 254100 }, { "epoch": 5.38, "learning_rate": 5.0182448979591835e-05, "loss": 1.3646, "step": 254200 }, { "epoch": 5.38, "learning_rate": 5.016204081632653e-05, "loss": 1.3602, "step": 254300 }, { "epoch": 5.38, "learning_rate": 5.014163265306123e-05, "loss": 1.3586, "step": 254400 }, { "epoch": 5.39, "learning_rate": 5.0121224489795924e-05, "loss": 1.3614, "step": 254500 }, { "epoch": 5.39, "learning_rate": 5.010081632653062e-05, "loss": 1.3678, "step": 254600 }, { "epoch": 5.39, "learning_rate": 5.0080408163265314e-05, "loss": 1.354, "step": 254700 }, { "epoch": 5.39, "learning_rate": 5.0060000000000005e-05, "loss": 1.3695, "step": 254800 }, { "epoch": 5.39, "learning_rate": 5.0039591836734704e-05, "loss": 1.3717, "step": 254900 }, { "epoch": 5.4, "learning_rate": 5.0019183673469395e-05, "loss": 1.3631, "step": 255000 }, { "epoch": 5.4, "learning_rate": 4.999877551020408e-05, "loss": 1.354, "step": 255100 }, { "epoch": 5.4, "learning_rate": 4.997836734693878e-05, "loss": 1.3627, "step": 255200 }, { "epoch": 5.4, "learning_rate": 4.995795918367347e-05, "loss": 1.3661, "step": 255300 }, { "epoch": 5.41, "learning_rate": 4.993755102040816e-05, "loss": 1.3643, "step": 255400 }, { "epoch": 5.41, "learning_rate": 4.991714285714286e-05, "loss": 1.3659, "step": 255500 }, { "epoch": 5.41, "learning_rate": 4.989673469387756e-05, "loss": 1.3579, "step": 255600 }, { "epoch": 5.41, "learning_rate": 4.987632653061225e-05, "loss": 1.3626, "step": 255700 }, { "epoch": 5.41, "learning_rate": 4.985591836734694e-05, "loss": 1.3663, "step": 255800 }, { "epoch": 5.42, "learning_rate": 4.983551020408164e-05, "loss": 1.3669, "step": 255900 }, { "epoch": 5.42, "learning_rate": 4.981510204081633e-05, "loss": 1.3658, "step": 256000 }, { "epoch": 5.42, "learning_rate": 4.979469387755102e-05, "loss": 1.3646, "step": 256100 }, { "epoch": 5.42, "learning_rate": 4.9774285714285714e-05, "loss": 1.3545, "step": 256200 }, { "epoch": 5.42, "learning_rate": 4.975387755102041e-05, "loss": 1.3558, "step": 256300 }, { "epoch": 5.43, "learning_rate": 4.9733469387755104e-05, "loss": 1.3715, "step": 256400 }, { "epoch": 5.43, "learning_rate": 4.9713061224489796e-05, "loss": 1.3543, "step": 256500 }, { "epoch": 5.43, "learning_rate": 4.9692653061224494e-05, "loss": 1.3574, "step": 256600 }, { "epoch": 5.43, "learning_rate": 4.9672244897959186e-05, "loss": 1.366, "step": 256700 }, { "epoch": 5.43, "learning_rate": 4.965183673469388e-05, "loss": 1.3644, "step": 256800 }, { "epoch": 5.44, "learning_rate": 4.9631428571428576e-05, "loss": 1.3538, "step": 256900 }, { "epoch": 5.44, "learning_rate": 4.961102040816327e-05, "loss": 1.3584, "step": 257000 }, { "epoch": 5.44, "learning_rate": 4.959061224489796e-05, "loss": 1.364, "step": 257100 }, { "epoch": 5.44, "learning_rate": 4.957020408163266e-05, "loss": 1.3648, "step": 257200 }, { "epoch": 5.45, "learning_rate": 4.954979591836735e-05, "loss": 1.3677, "step": 257300 }, { "epoch": 5.45, "learning_rate": 4.952938775510204e-05, "loss": 1.3627, "step": 257400 }, { "epoch": 5.45, "learning_rate": 4.950897959183673e-05, "loss": 1.3604, "step": 257500 }, { "epoch": 5.45, "learning_rate": 4.948857142857143e-05, "loss": 1.3644, "step": 257600 }, { "epoch": 5.45, "learning_rate": 4.946816326530612e-05, "loss": 1.3597, "step": 257700 }, { "epoch": 5.46, "learning_rate": 4.944775510204082e-05, "loss": 1.3584, "step": 257800 }, { "epoch": 5.46, "learning_rate": 4.942734693877551e-05, "loss": 1.3579, "step": 257900 }, { "epoch": 5.46, "learning_rate": 4.940693877551021e-05, "loss": 1.3517, "step": 258000 }, { "epoch": 5.46, "learning_rate": 4.93865306122449e-05, "loss": 1.3607, "step": 258100 }, { "epoch": 5.46, "learning_rate": 4.9366122448979594e-05, "loss": 1.361, "step": 258200 }, { "epoch": 5.47, "learning_rate": 4.934591836734694e-05, "loss": 1.3581, "step": 258300 }, { "epoch": 5.47, "learning_rate": 4.932571428571429e-05, "loss": 1.364, "step": 258400 }, { "epoch": 5.47, "learning_rate": 4.930530612244898e-05, "loss": 1.3659, "step": 258500 }, { "epoch": 5.47, "learning_rate": 4.928489795918367e-05, "loss": 1.3594, "step": 258600 }, { "epoch": 5.48, "learning_rate": 4.926448979591837e-05, "loss": 1.3557, "step": 258700 }, { "epoch": 5.48, "learning_rate": 4.924408163265307e-05, "loss": 1.3619, "step": 258800 }, { "epoch": 5.48, "learning_rate": 4.922367346938776e-05, "loss": 1.3588, "step": 258900 }, { "epoch": 5.48, "learning_rate": 4.920326530612245e-05, "loss": 1.3564, "step": 259000 }, { "epoch": 5.48, "learning_rate": 4.918285714285715e-05, "loss": 1.3562, "step": 259100 }, { "epoch": 5.49, "learning_rate": 4.916244897959184e-05, "loss": 1.3595, "step": 259200 }, { "epoch": 5.49, "learning_rate": 4.914204081632653e-05, "loss": 1.3622, "step": 259300 }, { "epoch": 5.49, "learning_rate": 4.9121632653061225e-05, "loss": 1.3632, "step": 259400 }, { "epoch": 5.49, "learning_rate": 4.910142857142858e-05, "loss": 1.3518, "step": 259500 }, { "epoch": 5.49, "learning_rate": 4.908102040816327e-05, "loss": 1.3604, "step": 259600 }, { "epoch": 5.5, "learning_rate": 4.906061224489796e-05, "loss": 1.3563, "step": 259700 }, { "epoch": 5.5, "learning_rate": 4.9040204081632654e-05, "loss": 1.3605, "step": 259800 }, { "epoch": 5.5, "learning_rate": 4.901979591836735e-05, "loss": 1.3585, "step": 259900 }, { "epoch": 5.5, "learning_rate": 4.8999387755102044e-05, "loss": 1.3552, "step": 260000 }, { "epoch": 5.5, "learning_rate": 4.8978979591836736e-05, "loss": 1.3663, "step": 260100 }, { "epoch": 5.51, "learning_rate": 4.8958571428571434e-05, "loss": 1.3552, "step": 260200 }, { "epoch": 5.51, "learning_rate": 4.8938163265306126e-05, "loss": 1.3525, "step": 260300 }, { "epoch": 5.51, "learning_rate": 4.891775510204082e-05, "loss": 1.3558, "step": 260400 }, { "epoch": 5.51, "learning_rate": 4.8897346938775515e-05, "loss": 1.3521, "step": 260500 }, { "epoch": 5.52, "learning_rate": 4.887693877551021e-05, "loss": 1.3548, "step": 260600 }, { "epoch": 5.52, "learning_rate": 4.88565306122449e-05, "loss": 1.3492, "step": 260700 }, { "epoch": 5.52, "learning_rate": 4.883612244897959e-05, "loss": 1.3522, "step": 260800 }, { "epoch": 5.52, "learning_rate": 4.881571428571429e-05, "loss": 1.3587, "step": 260900 }, { "epoch": 5.52, "learning_rate": 4.879530612244898e-05, "loss": 1.356, "step": 261000 }, { "epoch": 5.53, "learning_rate": 4.877489795918367e-05, "loss": 1.3501, "step": 261100 }, { "epoch": 5.53, "learning_rate": 4.875448979591837e-05, "loss": 1.361, "step": 261200 }, { "epoch": 5.53, "learning_rate": 4.873408163265306e-05, "loss": 1.3567, "step": 261300 }, { "epoch": 5.53, "learning_rate": 4.871367346938775e-05, "loss": 1.3509, "step": 261400 }, { "epoch": 5.53, "learning_rate": 4.869326530612245e-05, "loss": 1.356, "step": 261500 }, { "epoch": 5.54, "learning_rate": 4.867285714285715e-05, "loss": 1.3551, "step": 261600 }, { "epoch": 5.54, "learning_rate": 4.865244897959184e-05, "loss": 1.3602, "step": 261700 }, { "epoch": 5.54, "learning_rate": 4.863204081632653e-05, "loss": 1.3582, "step": 261800 }, { "epoch": 5.54, "learning_rate": 4.861163265306123e-05, "loss": 1.3511, "step": 261900 }, { "epoch": 5.54, "learning_rate": 4.859122448979592e-05, "loss": 1.3636, "step": 262000 }, { "epoch": 5.55, "learning_rate": 4.857102040816327e-05, "loss": 1.3549, "step": 262100 }, { "epoch": 5.55, "learning_rate": 4.855061224489796e-05, "loss": 1.3544, "step": 262200 }, { "epoch": 5.55, "learning_rate": 4.853020408163266e-05, "loss": 1.3574, "step": 262300 }, { "epoch": 5.55, "learning_rate": 4.850979591836735e-05, "loss": 1.3552, "step": 262400 }, { "epoch": 5.56, "learning_rate": 4.8489387755102044e-05, "loss": 1.358, "step": 262500 }, { "epoch": 5.56, "learning_rate": 4.8468979591836735e-05, "loss": 1.3516, "step": 262600 }, { "epoch": 5.56, "learning_rate": 4.8448571428571434e-05, "loss": 1.3533, "step": 262700 }, { "epoch": 5.56, "learning_rate": 4.8428163265306125e-05, "loss": 1.3445, "step": 262800 }, { "epoch": 5.56, "learning_rate": 4.840775510204082e-05, "loss": 1.357, "step": 262900 }, { "epoch": 5.57, "learning_rate": 4.8387346938775515e-05, "loss": 1.3589, "step": 263000 }, { "epoch": 5.57, "learning_rate": 4.836693877551021e-05, "loss": 1.3528, "step": 263100 }, { "epoch": 5.57, "learning_rate": 4.83465306122449e-05, "loss": 1.3559, "step": 263200 }, { "epoch": 5.57, "learning_rate": 4.83261224489796e-05, "loss": 1.3541, "step": 263300 }, { "epoch": 5.57, "learning_rate": 4.830571428571429e-05, "loss": 1.3598, "step": 263400 }, { "epoch": 5.58, "learning_rate": 4.828530612244898e-05, "loss": 1.3551, "step": 263500 }, { "epoch": 5.58, "learning_rate": 4.826489795918367e-05, "loss": 1.3555, "step": 263600 }, { "epoch": 5.58, "learning_rate": 4.824448979591837e-05, "loss": 1.356, "step": 263700 }, { "epoch": 5.58, "learning_rate": 4.822408163265306e-05, "loss": 1.3579, "step": 263800 }, { "epoch": 5.59, "learning_rate": 4.820367346938775e-05, "loss": 1.349, "step": 263900 }, { "epoch": 5.59, "learning_rate": 4.818326530612245e-05, "loss": 1.3577, "step": 264000 }, { "epoch": 5.59, "learning_rate": 4.816285714285714e-05, "loss": 1.3547, "step": 264100 }, { "epoch": 5.59, "learning_rate": 4.8142448979591835e-05, "loss": 1.3527, "step": 264200 }, { "epoch": 5.59, "learning_rate": 4.812204081632653e-05, "loss": 1.3547, "step": 264300 }, { "epoch": 5.6, "learning_rate": 4.810163265306123e-05, "loss": 1.355, "step": 264400 }, { "epoch": 5.6, "learning_rate": 4.808122448979592e-05, "loss": 1.3486, "step": 264500 }, { "epoch": 5.6, "learning_rate": 4.8060816326530614e-05, "loss": 1.3456, "step": 264600 }, { "epoch": 5.6, "learning_rate": 4.804040816326531e-05, "loss": 1.3508, "step": 264700 }, { "epoch": 5.6, "learning_rate": 4.8020000000000004e-05, "loss": 1.357, "step": 264800 }, { "epoch": 5.61, "learning_rate": 4.7999591836734696e-05, "loss": 1.3491, "step": 264900 }, { "epoch": 5.61, "learning_rate": 4.7979183673469394e-05, "loss": 1.3544, "step": 265000 }, { "epoch": 5.61, "learning_rate": 4.7958775510204086e-05, "loss": 1.3528, "step": 265100 }, { "epoch": 5.61, "learning_rate": 4.793836734693878e-05, "loss": 1.3547, "step": 265200 }, { "epoch": 5.61, "learning_rate": 4.791795918367347e-05, "loss": 1.3523, "step": 265300 }, { "epoch": 5.62, "learning_rate": 4.789755102040817e-05, "loss": 1.3545, "step": 265400 }, { "epoch": 5.62, "learning_rate": 4.787714285714286e-05, "loss": 1.3518, "step": 265500 }, { "epoch": 5.62, "learning_rate": 4.785673469387755e-05, "loss": 1.3518, "step": 265600 }, { "epoch": 5.62, "learning_rate": 4.783632653061225e-05, "loss": 1.3509, "step": 265700 }, { "epoch": 5.63, "learning_rate": 4.781591836734694e-05, "loss": 1.3615, "step": 265800 }, { "epoch": 5.63, "learning_rate": 4.779551020408163e-05, "loss": 1.3546, "step": 265900 }, { "epoch": 5.63, "learning_rate": 4.777510204081633e-05, "loss": 1.3509, "step": 266000 }, { "epoch": 5.63, "learning_rate": 4.775469387755102e-05, "loss": 1.3513, "step": 266100 }, { "epoch": 5.63, "learning_rate": 4.7734285714285714e-05, "loss": 1.3494, "step": 266200 }, { "epoch": 5.64, "learning_rate": 4.7713877551020405e-05, "loss": 1.349, "step": 266300 }, { "epoch": 5.64, "learning_rate": 4.769367346938776e-05, "loss": 1.3497, "step": 266400 }, { "epoch": 5.64, "learning_rate": 4.767326530612245e-05, "loss": 1.3567, "step": 266500 }, { "epoch": 5.64, "learning_rate": 4.765285714285714e-05, "loss": 1.3459, "step": 266600 }, { "epoch": 5.64, "learning_rate": 4.7632448979591834e-05, "loss": 1.3518, "step": 266700 }, { "epoch": 5.65, "learning_rate": 4.761204081632653e-05, "loss": 1.3449, "step": 266800 }, { "epoch": 5.65, "learning_rate": 4.7591632653061224e-05, "loss": 1.3547, "step": 266900 }, { "epoch": 5.65, "learning_rate": 4.7571224489795916e-05, "loss": 1.3518, "step": 267000 }, { "epoch": 5.65, "learning_rate": 4.7550816326530614e-05, "loss": 1.355, "step": 267100 }, { "epoch": 5.66, "learning_rate": 4.753040816326531e-05, "loss": 1.3594, "step": 267200 }, { "epoch": 5.66, "learning_rate": 4.7510000000000004e-05, "loss": 1.3578, "step": 267300 }, { "epoch": 5.66, "learning_rate": 4.7489795918367345e-05, "loss": 1.3415, "step": 267400 }, { "epoch": 5.66, "learning_rate": 4.746938775510204e-05, "loss": 1.3566, "step": 267500 }, { "epoch": 5.66, "learning_rate": 4.744897959183674e-05, "loss": 1.3475, "step": 267600 }, { "epoch": 5.67, "learning_rate": 4.742857142857143e-05, "loss": 1.3459, "step": 267700 }, { "epoch": 5.67, "learning_rate": 4.7408163265306125e-05, "loss": 1.3535, "step": 267800 }, { "epoch": 5.67, "learning_rate": 4.738775510204082e-05, "loss": 1.3524, "step": 267900 }, { "epoch": 5.67, "learning_rate": 4.7367346938775515e-05, "loss": 1.3577, "step": 268000 }, { "epoch": 5.67, "learning_rate": 4.7346938775510206e-05, "loss": 1.3492, "step": 268100 }, { "epoch": 5.68, "learning_rate": 4.7326530612244905e-05, "loss": 1.3512, "step": 268200 }, { "epoch": 5.68, "learning_rate": 4.7306122448979596e-05, "loss": 1.3496, "step": 268300 }, { "epoch": 5.68, "learning_rate": 4.728571428571429e-05, "loss": 1.3584, "step": 268400 }, { "epoch": 5.68, "learning_rate": 4.726530612244898e-05, "loss": 1.3619, "step": 268500 }, { "epoch": 5.68, "learning_rate": 4.724489795918368e-05, "loss": 1.3485, "step": 268600 }, { "epoch": 5.69, "learning_rate": 4.722448979591837e-05, "loss": 1.3514, "step": 268700 }, { "epoch": 5.69, "learning_rate": 4.720408163265306e-05, "loss": 1.3496, "step": 268800 }, { "epoch": 5.69, "learning_rate": 4.718367346938776e-05, "loss": 1.3482, "step": 268900 }, { "epoch": 5.69, "learning_rate": 4.716326530612245e-05, "loss": 1.3519, "step": 269000 }, { "epoch": 5.7, "learning_rate": 4.714285714285714e-05, "loss": 1.3434, "step": 269100 }, { "epoch": 5.7, "learning_rate": 4.712244897959184e-05, "loss": 1.3528, "step": 269200 }, { "epoch": 5.7, "learning_rate": 4.710204081632653e-05, "loss": 1.3501, "step": 269300 }, { "epoch": 5.7, "learning_rate": 4.7081632653061224e-05, "loss": 1.3531, "step": 269400 }, { "epoch": 5.7, "learning_rate": 4.7061224489795916e-05, "loss": 1.3506, "step": 269500 }, { "epoch": 5.71, "learning_rate": 4.7040816326530614e-05, "loss": 1.3509, "step": 269600 }, { "epoch": 5.71, "learning_rate": 4.7020408163265306e-05, "loss": 1.3494, "step": 269700 }, { "epoch": 5.71, "learning_rate": 4.7e-05, "loss": 1.3502, "step": 269800 }, { "epoch": 5.71, "learning_rate": 4.6979591836734696e-05, "loss": 1.3478, "step": 269900 }, { "epoch": 5.71, "learning_rate": 4.6959183673469394e-05, "loss": 1.3474, "step": 270000 }, { "epoch": 5.72, "learning_rate": 4.6938775510204086e-05, "loss": 1.3509, "step": 270100 }, { "epoch": 5.72, "learning_rate": 4.691836734693878e-05, "loss": 1.3512, "step": 270200 }, { "epoch": 5.72, "learning_rate": 4.6897959183673475e-05, "loss": 1.3464, "step": 270300 }, { "epoch": 5.72, "learning_rate": 4.687755102040817e-05, "loss": 1.3499, "step": 270400 }, { "epoch": 5.72, "learning_rate": 4.6857346938775515e-05, "loss": 1.3504, "step": 270500 }, { "epoch": 5.73, "learning_rate": 4.6836938775510206e-05, "loss": 1.3491, "step": 270600 }, { "epoch": 5.73, "learning_rate": 4.6816530612244905e-05, "loss": 1.3474, "step": 270700 }, { "epoch": 5.73, "learning_rate": 4.6796122448979596e-05, "loss": 1.3493, "step": 270800 }, { "epoch": 5.73, "learning_rate": 4.677571428571429e-05, "loss": 1.3453, "step": 270900 }, { "epoch": 5.74, "learning_rate": 4.6755306122448986e-05, "loss": 1.3475, "step": 271000 }, { "epoch": 5.74, "learning_rate": 4.673489795918368e-05, "loss": 1.3389, "step": 271100 }, { "epoch": 5.74, "learning_rate": 4.671448979591837e-05, "loss": 1.3562, "step": 271200 }, { "epoch": 5.74, "learning_rate": 4.669408163265306e-05, "loss": 1.347, "step": 271300 }, { "epoch": 5.74, "learning_rate": 4.667367346938776e-05, "loss": 1.3447, "step": 271400 }, { "epoch": 5.75, "learning_rate": 4.665326530612245e-05, "loss": 1.3423, "step": 271500 }, { "epoch": 5.75, "learning_rate": 4.663285714285714e-05, "loss": 1.3465, "step": 271600 }, { "epoch": 5.75, "learning_rate": 4.661244897959184e-05, "loss": 1.3532, "step": 271700 }, { "epoch": 5.75, "learning_rate": 4.659204081632653e-05, "loss": 1.3523, "step": 271800 }, { "epoch": 5.75, "learning_rate": 4.6571632653061224e-05, "loss": 1.3499, "step": 271900 }, { "epoch": 5.76, "learning_rate": 4.655122448979592e-05, "loss": 1.3379, "step": 272000 }, { "epoch": 5.76, "learning_rate": 4.6530816326530614e-05, "loss": 1.3507, "step": 272100 }, { "epoch": 5.76, "learning_rate": 4.6510408163265305e-05, "loss": 1.3504, "step": 272200 }, { "epoch": 5.76, "learning_rate": 4.649e-05, "loss": 1.3476, "step": 272300 }, { "epoch": 5.77, "learning_rate": 4.6469591836734695e-05, "loss": 1.3441, "step": 272400 }, { "epoch": 5.77, "learning_rate": 4.644918367346939e-05, "loss": 1.348, "step": 272500 }, { "epoch": 5.77, "learning_rate": 4.642877551020408e-05, "loss": 1.3441, "step": 272600 }, { "epoch": 5.77, "learning_rate": 4.640836734693878e-05, "loss": 1.3603, "step": 272700 }, { "epoch": 5.77, "learning_rate": 4.6387959183673475e-05, "loss": 1.3454, "step": 272800 }, { "epoch": 5.78, "learning_rate": 4.636755102040817e-05, "loss": 1.3501, "step": 272900 }, { "epoch": 5.78, "learning_rate": 4.634714285714286e-05, "loss": 1.3571, "step": 273000 }, { "epoch": 5.78, "learning_rate": 4.632673469387756e-05, "loss": 1.3456, "step": 273100 }, { "epoch": 5.78, "learning_rate": 4.630632653061225e-05, "loss": 1.3444, "step": 273200 }, { "epoch": 5.78, "learning_rate": 4.628591836734694e-05, "loss": 1.3509, "step": 273300 }, { "epoch": 5.79, "learning_rate": 4.626551020408164e-05, "loss": 1.353, "step": 273400 }, { "epoch": 5.79, "learning_rate": 4.624510204081633e-05, "loss": 1.3496, "step": 273500 }, { "epoch": 5.79, "learning_rate": 4.622469387755102e-05, "loss": 1.3433, "step": 273600 }, { "epoch": 5.79, "learning_rate": 4.620428571428572e-05, "loss": 1.3516, "step": 273700 }, { "epoch": 5.79, "learning_rate": 4.618387755102041e-05, "loss": 1.3459, "step": 273800 }, { "epoch": 5.8, "learning_rate": 4.61634693877551e-05, "loss": 1.3496, "step": 273900 }, { "epoch": 5.8, "learning_rate": 4.6143061224489795e-05, "loss": 1.3548, "step": 274000 }, { "epoch": 5.8, "learning_rate": 4.612265306122449e-05, "loss": 1.3448, "step": 274100 }, { "epoch": 5.8, "learning_rate": 4.6102244897959185e-05, "loss": 1.3405, "step": 274200 }, { "epoch": 5.81, "learning_rate": 4.6081836734693876e-05, "loss": 1.3427, "step": 274300 }, { "epoch": 5.81, "learning_rate": 4.6061428571428575e-05, "loss": 1.3477, "step": 274400 }, { "epoch": 5.81, "learning_rate": 4.6041020408163266e-05, "loss": 1.3471, "step": 274500 }, { "epoch": 5.81, "learning_rate": 4.6020816326530614e-05, "loss": 1.3434, "step": 274600 }, { "epoch": 5.81, "learning_rate": 4.6000408163265305e-05, "loss": 1.3432, "step": 274700 }, { "epoch": 5.82, "learning_rate": 4.5980000000000004e-05, "loss": 1.3433, "step": 274800 }, { "epoch": 5.82, "learning_rate": 4.5959591836734695e-05, "loss": 1.347, "step": 274900 }, { "epoch": 5.82, "learning_rate": 4.593918367346939e-05, "loss": 1.3484, "step": 275000 }, { "epoch": 5.82, "learning_rate": 4.591877551020408e-05, "loss": 1.3401, "step": 275100 }, { "epoch": 5.82, "learning_rate": 4.589836734693878e-05, "loss": 1.3389, "step": 275200 }, { "epoch": 5.83, "learning_rate": 4.587795918367347e-05, "loss": 1.3432, "step": 275300 }, { "epoch": 5.83, "learning_rate": 4.585755102040817e-05, "loss": 1.3445, "step": 275400 }, { "epoch": 5.83, "learning_rate": 4.5837142857142865e-05, "loss": 1.353, "step": 275500 }, { "epoch": 5.83, "learning_rate": 4.581673469387756e-05, "loss": 1.3429, "step": 275600 }, { "epoch": 5.83, "learning_rate": 4.579632653061225e-05, "loss": 1.3466, "step": 275700 }, { "epoch": 5.84, "learning_rate": 4.577591836734694e-05, "loss": 1.3392, "step": 275800 }, { "epoch": 5.84, "learning_rate": 4.575551020408164e-05, "loss": 1.3471, "step": 275900 }, { "epoch": 5.84, "learning_rate": 4.573510204081633e-05, "loss": 1.3481, "step": 276000 }, { "epoch": 5.84, "learning_rate": 4.571469387755102e-05, "loss": 1.3497, "step": 276100 }, { "epoch": 5.85, "learning_rate": 4.569428571428572e-05, "loss": 1.3433, "step": 276200 }, { "epoch": 5.85, "learning_rate": 4.567387755102041e-05, "loss": 1.3518, "step": 276300 }, { "epoch": 5.85, "learning_rate": 4.56534693877551e-05, "loss": 1.3487, "step": 276400 }, { "epoch": 5.85, "learning_rate": 4.56330612244898e-05, "loss": 1.3424, "step": 276500 }, { "epoch": 5.85, "learning_rate": 4.561265306122449e-05, "loss": 1.348, "step": 276600 }, { "epoch": 5.86, "learning_rate": 4.5592244897959184e-05, "loss": 1.3455, "step": 276700 }, { "epoch": 5.86, "learning_rate": 4.5571836734693876e-05, "loss": 1.35, "step": 276800 }, { "epoch": 5.86, "learning_rate": 4.5551428571428574e-05, "loss": 1.344, "step": 276900 }, { "epoch": 5.86, "learning_rate": 4.5531020408163266e-05, "loss": 1.3352, "step": 277000 }, { "epoch": 5.86, "learning_rate": 4.5510816326530613e-05, "loss": 1.3472, "step": 277100 }, { "epoch": 5.87, "learning_rate": 4.5490408163265305e-05, "loss": 1.3417, "step": 277200 }, { "epoch": 5.87, "learning_rate": 4.5470000000000003e-05, "loss": 1.343, "step": 277300 }, { "epoch": 5.87, "learning_rate": 4.5449591836734695e-05, "loss": 1.3394, "step": 277400 }, { "epoch": 5.87, "learning_rate": 4.5429183673469387e-05, "loss": 1.3352, "step": 277500 }, { "epoch": 5.88, "learning_rate": 4.5408775510204085e-05, "loss": 1.3409, "step": 277600 }, { "epoch": 5.88, "learning_rate": 4.5388367346938777e-05, "loss": 1.342, "step": 277700 }, { "epoch": 5.88, "learning_rate": 4.536795918367347e-05, "loss": 1.3487, "step": 277800 }, { "epoch": 5.88, "learning_rate": 4.5347551020408166e-05, "loss": 1.3347, "step": 277900 }, { "epoch": 5.88, "learning_rate": 4.532714285714286e-05, "loss": 1.3452, "step": 278000 }, { "epoch": 5.89, "learning_rate": 4.530673469387755e-05, "loss": 1.3467, "step": 278100 }, { "epoch": 5.89, "learning_rate": 4.528632653061225e-05, "loss": 1.3372, "step": 278200 }, { "epoch": 5.89, "learning_rate": 4.5265918367346946e-05, "loss": 1.345, "step": 278300 }, { "epoch": 5.89, "learning_rate": 4.524551020408164e-05, "loss": 1.3479, "step": 278400 }, { "epoch": 5.89, "learning_rate": 4.522510204081633e-05, "loss": 1.3459, "step": 278500 }, { "epoch": 5.9, "learning_rate": 4.520469387755102e-05, "loss": 1.3469, "step": 278600 }, { "epoch": 5.9, "learning_rate": 4.518428571428572e-05, "loss": 1.3439, "step": 278700 }, { "epoch": 5.9, "learning_rate": 4.516387755102041e-05, "loss": 1.3485, "step": 278800 }, { "epoch": 5.9, "learning_rate": 4.51434693877551e-05, "loss": 1.3465, "step": 278900 }, { "epoch": 5.9, "learning_rate": 4.51230612244898e-05, "loss": 1.3453, "step": 279000 }, { "epoch": 5.91, "learning_rate": 4.510265306122449e-05, "loss": 1.3403, "step": 279100 }, { "epoch": 5.91, "learning_rate": 4.5082244897959184e-05, "loss": 1.3401, "step": 279200 }, { "epoch": 5.91, "learning_rate": 4.506183673469388e-05, "loss": 1.342, "step": 279300 }, { "epoch": 5.91, "learning_rate": 4.5041428571428574e-05, "loss": 1.35, "step": 279400 }, { "epoch": 5.92, "learning_rate": 4.5021020408163266e-05, "loss": 1.3474, "step": 279500 }, { "epoch": 5.92, "learning_rate": 4.500061224489796e-05, "loss": 1.3487, "step": 279600 }, { "epoch": 5.92, "learning_rate": 4.4980204081632656e-05, "loss": 1.3438, "step": 279700 }, { "epoch": 5.92, "learning_rate": 4.495979591836735e-05, "loss": 1.3435, "step": 279800 }, { "epoch": 5.92, "learning_rate": 4.493938775510204e-05, "loss": 1.3462, "step": 279900 }, { "epoch": 5.93, "learning_rate": 4.491897959183674e-05, "loss": 1.3392, "step": 280000 }, { "epoch": 5.93, "learning_rate": 4.489857142857143e-05, "loss": 1.3455, "step": 280100 }, { "epoch": 5.93, "learning_rate": 4.487816326530612e-05, "loss": 1.3355, "step": 280200 }, { "epoch": 5.93, "learning_rate": 4.485775510204082e-05, "loss": 1.3454, "step": 280300 }, { "epoch": 5.93, "learning_rate": 4.483734693877551e-05, "loss": 1.3421, "step": 280400 }, { "epoch": 5.94, "learning_rate": 4.481693877551021e-05, "loss": 1.3418, "step": 280500 }, { "epoch": 5.94, "learning_rate": 4.47965306122449e-05, "loss": 1.3387, "step": 280600 }, { "epoch": 5.94, "learning_rate": 4.477632653061225e-05, "loss": 1.3361, "step": 280700 }, { "epoch": 5.94, "learning_rate": 4.475591836734694e-05, "loss": 1.3422, "step": 280800 }, { "epoch": 5.94, "learning_rate": 4.473551020408163e-05, "loss": 1.337, "step": 280900 }, { "epoch": 5.95, "learning_rate": 4.471510204081633e-05, "loss": 1.3355, "step": 281000 }, { "epoch": 5.95, "learning_rate": 4.469469387755103e-05, "loss": 1.3407, "step": 281100 }, { "epoch": 5.95, "learning_rate": 4.467428571428572e-05, "loss": 1.3399, "step": 281200 }, { "epoch": 5.95, "learning_rate": 4.465387755102041e-05, "loss": 1.3435, "step": 281300 }, { "epoch": 5.96, "learning_rate": 4.46334693877551e-05, "loss": 1.3507, "step": 281400 }, { "epoch": 5.96, "learning_rate": 4.46130612244898e-05, "loss": 1.344, "step": 281500 }, { "epoch": 5.96, "learning_rate": 4.459265306122449e-05, "loss": 1.3464, "step": 281600 }, { "epoch": 5.96, "learning_rate": 4.4572244897959184e-05, "loss": 1.345, "step": 281700 }, { "epoch": 5.96, "learning_rate": 4.455183673469388e-05, "loss": 1.3423, "step": 281800 }, { "epoch": 5.97, "learning_rate": 4.4531428571428574e-05, "loss": 1.3356, "step": 281900 }, { "epoch": 5.97, "learning_rate": 4.4511020408163265e-05, "loss": 1.3349, "step": 282000 }, { "epoch": 5.97, "learning_rate": 4.4490612244897964e-05, "loss": 1.3366, "step": 282100 }, { "epoch": 5.97, "learning_rate": 4.4470204081632655e-05, "loss": 1.3421, "step": 282200 }, { "epoch": 5.97, "learning_rate": 4.444979591836735e-05, "loss": 1.332, "step": 282300 }, { "epoch": 5.98, "learning_rate": 4.4429387755102045e-05, "loss": 1.3393, "step": 282400 }, { "epoch": 5.98, "learning_rate": 4.440897959183674e-05, "loss": 1.3377, "step": 282500 }, { "epoch": 5.98, "learning_rate": 4.438857142857143e-05, "loss": 1.3301, "step": 282600 }, { "epoch": 5.98, "learning_rate": 4.436816326530612e-05, "loss": 1.342, "step": 282700 }, { "epoch": 5.99, "learning_rate": 4.434775510204082e-05, "loss": 1.3413, "step": 282800 }, { "epoch": 5.99, "learning_rate": 4.432734693877551e-05, "loss": 1.3391, "step": 282900 }, { "epoch": 5.99, "learning_rate": 4.43069387755102e-05, "loss": 1.3439, "step": 283000 }, { "epoch": 5.99, "learning_rate": 4.42865306122449e-05, "loss": 1.3376, "step": 283100 }, { "epoch": 5.99, "learning_rate": 4.42661224489796e-05, "loss": 1.3525, "step": 283200 }, { "epoch": 6.0, "learning_rate": 4.424571428571429e-05, "loss": 1.3381, "step": 283300 }, { "epoch": 6.0, "learning_rate": 4.422530612244898e-05, "loss": 1.34, "step": 283400 }, { "epoch": 6.0, "learning_rate": 4.420489795918368e-05, "loss": 1.3415, "step": 283500 }, { "epoch": 6.0, "learning_rate": 4.418448979591837e-05, "loss": 1.3354, "step": 283600 }, { "epoch": 6.0, "learning_rate": 4.416408163265306e-05, "loss": 1.3298, "step": 283700 }, { "epoch": 6.01, "learning_rate": 4.414367346938776e-05, "loss": 1.339, "step": 283800 }, { "epoch": 6.01, "learning_rate": 4.412326530612245e-05, "loss": 1.3375, "step": 283900 }, { "epoch": 6.01, "learning_rate": 4.4102857142857145e-05, "loss": 1.3392, "step": 284000 }, { "epoch": 6.01, "learning_rate": 4.4082448979591836e-05, "loss": 1.3396, "step": 284100 }, { "epoch": 6.01, "learning_rate": 4.4062040816326535e-05, "loss": 1.3316, "step": 284200 }, { "epoch": 6.02, "learning_rate": 4.404183673469388e-05, "loss": 1.3385, "step": 284300 }, { "epoch": 6.02, "learning_rate": 4.4021428571428574e-05, "loss": 1.343, "step": 284400 }, { "epoch": 6.02, "learning_rate": 4.4001020408163265e-05, "loss": 1.3366, "step": 284500 }, { "epoch": 6.02, "learning_rate": 4.3980612244897964e-05, "loss": 1.3309, "step": 284600 }, { "epoch": 6.03, "learning_rate": 4.3960204081632655e-05, "loss": 1.3334, "step": 284700 }, { "epoch": 6.03, "learning_rate": 4.393979591836735e-05, "loss": 1.3319, "step": 284800 }, { "epoch": 6.03, "learning_rate": 4.3919387755102045e-05, "loss": 1.3326, "step": 284900 }, { "epoch": 6.03, "learning_rate": 4.389897959183674e-05, "loss": 1.3369, "step": 285000 }, { "epoch": 6.03, "learning_rate": 4.387857142857143e-05, "loss": 1.3434, "step": 285100 }, { "epoch": 6.04, "learning_rate": 4.385816326530613e-05, "loss": 1.3365, "step": 285200 }, { "epoch": 6.04, "learning_rate": 4.383775510204082e-05, "loss": 1.3283, "step": 285300 }, { "epoch": 6.04, "learning_rate": 4.381734693877551e-05, "loss": 1.3421, "step": 285400 }, { "epoch": 6.04, "learning_rate": 4.37969387755102e-05, "loss": 1.3341, "step": 285500 }, { "epoch": 6.04, "learning_rate": 4.37765306122449e-05, "loss": 1.3389, "step": 285600 }, { "epoch": 6.05, "learning_rate": 4.375612244897959e-05, "loss": 1.3407, "step": 285700 }, { "epoch": 6.05, "learning_rate": 4.373571428571428e-05, "loss": 1.3303, "step": 285800 }, { "epoch": 6.05, "learning_rate": 4.371530612244898e-05, "loss": 1.3381, "step": 285900 }, { "epoch": 6.05, "learning_rate": 4.369489795918368e-05, "loss": 1.3354, "step": 286000 }, { "epoch": 6.06, "learning_rate": 4.367448979591837e-05, "loss": 1.3276, "step": 286100 }, { "epoch": 6.06, "learning_rate": 4.365408163265306e-05, "loss": 1.3286, "step": 286200 }, { "epoch": 6.06, "learning_rate": 4.363367346938776e-05, "loss": 1.3329, "step": 286300 }, { "epoch": 6.06, "learning_rate": 4.361326530612245e-05, "loss": 1.3352, "step": 286400 }, { "epoch": 6.06, "learning_rate": 4.3592857142857144e-05, "loss": 1.342, "step": 286500 }, { "epoch": 6.07, "learning_rate": 4.357244897959184e-05, "loss": 1.3327, "step": 286600 }, { "epoch": 6.07, "learning_rate": 4.355224489795919e-05, "loss": 1.3269, "step": 286700 }, { "epoch": 6.07, "learning_rate": 4.353183673469388e-05, "loss": 1.333, "step": 286800 }, { "epoch": 6.07, "learning_rate": 4.3511428571428574e-05, "loss": 1.3328, "step": 286900 }, { "epoch": 6.07, "learning_rate": 4.349102040816327e-05, "loss": 1.3344, "step": 287000 }, { "epoch": 6.08, "learning_rate": 4.3470612244897963e-05, "loss": 1.3306, "step": 287100 }, { "epoch": 6.08, "learning_rate": 4.3450204081632655e-05, "loss": 1.3337, "step": 287200 }, { "epoch": 6.08, "learning_rate": 4.342979591836735e-05, "loss": 1.3351, "step": 287300 }, { "epoch": 6.08, "learning_rate": 4.3409387755102045e-05, "loss": 1.3411, "step": 287400 }, { "epoch": 6.08, "learning_rate": 4.3388979591836737e-05, "loss": 1.3384, "step": 287500 }, { "epoch": 6.09, "learning_rate": 4.336857142857143e-05, "loss": 1.342, "step": 287600 }, { "epoch": 6.09, "learning_rate": 4.3348163265306127e-05, "loss": 1.3367, "step": 287700 }, { "epoch": 6.09, "learning_rate": 4.332775510204082e-05, "loss": 1.3284, "step": 287800 }, { "epoch": 6.09, "learning_rate": 4.330734693877551e-05, "loss": 1.341, "step": 287900 }, { "epoch": 6.1, "learning_rate": 4.328693877551021e-05, "loss": 1.3353, "step": 288000 }, { "epoch": 6.1, "learning_rate": 4.32665306122449e-05, "loss": 1.3286, "step": 288100 }, { "epoch": 6.1, "learning_rate": 4.324612244897959e-05, "loss": 1.3329, "step": 288200 }, { "epoch": 6.1, "learning_rate": 4.322571428571428e-05, "loss": 1.3288, "step": 288300 }, { "epoch": 6.1, "learning_rate": 4.320530612244898e-05, "loss": 1.3316, "step": 288400 }, { "epoch": 6.11, "learning_rate": 4.318489795918367e-05, "loss": 1.3349, "step": 288500 }, { "epoch": 6.11, "learning_rate": 4.3164489795918364e-05, "loss": 1.3362, "step": 288600 }, { "epoch": 6.11, "learning_rate": 4.314408163265306e-05, "loss": 1.3388, "step": 288700 }, { "epoch": 6.11, "learning_rate": 4.312367346938776e-05, "loss": 1.3298, "step": 288800 }, { "epoch": 6.11, "learning_rate": 4.310326530612245e-05, "loss": 1.3323, "step": 288900 }, { "epoch": 6.12, "learning_rate": 4.3082857142857144e-05, "loss": 1.3256, "step": 289000 }, { "epoch": 6.12, "learning_rate": 4.306244897959184e-05, "loss": 1.3281, "step": 289100 }, { "epoch": 6.12, "learning_rate": 4.3042040816326534e-05, "loss": 1.3363, "step": 289200 }, { "epoch": 6.12, "learning_rate": 4.3021632653061226e-05, "loss": 1.3302, "step": 289300 }, { "epoch": 6.12, "learning_rate": 4.3001224489795924e-05, "loss": 1.3341, "step": 289400 }, { "epoch": 6.13, "learning_rate": 4.2980816326530616e-05, "loss": 1.329, "step": 289500 }, { "epoch": 6.13, "learning_rate": 4.296040816326531e-05, "loss": 1.3314, "step": 289600 }, { "epoch": 6.13, "learning_rate": 4.2940000000000006e-05, "loss": 1.3385, "step": 289700 }, { "epoch": 6.13, "learning_rate": 4.29195918367347e-05, "loss": 1.3387, "step": 289800 }, { "epoch": 6.14, "learning_rate": 4.289918367346939e-05, "loss": 1.3306, "step": 289900 }, { "epoch": 6.14, "learning_rate": 4.287877551020408e-05, "loss": 1.3336, "step": 290000 }, { "epoch": 6.14, "learning_rate": 4.285836734693878e-05, "loss": 1.336, "step": 290100 }, { "epoch": 6.14, "learning_rate": 4.283795918367347e-05, "loss": 1.3374, "step": 290200 }, { "epoch": 6.14, "learning_rate": 4.281755102040816e-05, "loss": 1.3331, "step": 290300 }, { "epoch": 6.15, "learning_rate": 4.279714285714286e-05, "loss": 1.3254, "step": 290400 }, { "epoch": 6.15, "learning_rate": 4.277673469387755e-05, "loss": 1.3313, "step": 290500 }, { "epoch": 6.15, "learning_rate": 4.2756326530612243e-05, "loss": 1.3255, "step": 290600 }, { "epoch": 6.15, "learning_rate": 4.273591836734694e-05, "loss": 1.3337, "step": 290700 }, { "epoch": 6.15, "learning_rate": 4.271571428571429e-05, "loss": 1.3215, "step": 290800 }, { "epoch": 6.16, "learning_rate": 4.269530612244898e-05, "loss": 1.3266, "step": 290900 }, { "epoch": 6.16, "learning_rate": 4.267489795918367e-05, "loss": 1.3328, "step": 291000 }, { "epoch": 6.16, "learning_rate": 4.265448979591837e-05, "loss": 1.3328, "step": 291100 }, { "epoch": 6.16, "learning_rate": 4.263408163265306e-05, "loss": 1.3267, "step": 291200 }, { "epoch": 6.17, "learning_rate": 4.2613673469387754e-05, "loss": 1.3329, "step": 291300 }, { "epoch": 6.17, "learning_rate": 4.2593265306122446e-05, "loss": 1.3281, "step": 291400 }, { "epoch": 6.17, "learning_rate": 4.257285714285715e-05, "loss": 1.3345, "step": 291500 }, { "epoch": 6.17, "learning_rate": 4.255244897959184e-05, "loss": 1.3294, "step": 291600 }, { "epoch": 6.17, "learning_rate": 4.253224489795918e-05, "loss": 1.3314, "step": 291700 }, { "epoch": 6.18, "learning_rate": 4.2511836734693875e-05, "loss": 1.3254, "step": 291800 }, { "epoch": 6.18, "learning_rate": 4.249142857142857e-05, "loss": 1.3333, "step": 291900 }, { "epoch": 6.18, "learning_rate": 4.247102040816327e-05, "loss": 1.3368, "step": 292000 }, { "epoch": 6.18, "learning_rate": 4.245061224489796e-05, "loss": 1.3384, "step": 292100 }, { "epoch": 6.18, "learning_rate": 4.2430204081632655e-05, "loss": 1.3312, "step": 292200 }, { "epoch": 6.19, "learning_rate": 4.240979591836735e-05, "loss": 1.336, "step": 292300 }, { "epoch": 6.19, "learning_rate": 4.2389387755102045e-05, "loss": 1.3343, "step": 292400 }, { "epoch": 6.19, "learning_rate": 4.2368979591836736e-05, "loss": 1.3281, "step": 292500 }, { "epoch": 6.19, "learning_rate": 4.2348571428571435e-05, "loss": 1.3312, "step": 292600 }, { "epoch": 6.19, "learning_rate": 4.2328163265306126e-05, "loss": 1.3395, "step": 292700 }, { "epoch": 6.2, "learning_rate": 4.230775510204082e-05, "loss": 1.3291, "step": 292800 }, { "epoch": 6.2, "learning_rate": 4.2287346938775516e-05, "loss": 1.3359, "step": 292900 }, { "epoch": 6.2, "learning_rate": 4.226693877551021e-05, "loss": 1.3228, "step": 293000 }, { "epoch": 6.2, "learning_rate": 4.22465306122449e-05, "loss": 1.3324, "step": 293100 }, { "epoch": 6.21, "learning_rate": 4.222612244897959e-05, "loss": 1.3299, "step": 293200 }, { "epoch": 6.21, "learning_rate": 4.220571428571429e-05, "loss": 1.3326, "step": 293300 }, { "epoch": 6.21, "learning_rate": 4.218530612244898e-05, "loss": 1.3344, "step": 293400 }, { "epoch": 6.21, "learning_rate": 4.216489795918367e-05, "loss": 1.324, "step": 293500 }, { "epoch": 6.21, "learning_rate": 4.214448979591837e-05, "loss": 1.324, "step": 293600 }, { "epoch": 6.22, "learning_rate": 4.212408163265306e-05, "loss": 1.3296, "step": 293700 }, { "epoch": 6.22, "learning_rate": 4.2103673469387754e-05, "loss": 1.3252, "step": 293800 }, { "epoch": 6.22, "learning_rate": 4.208326530612245e-05, "loss": 1.3327, "step": 293900 }, { "epoch": 6.22, "learning_rate": 4.2062857142857144e-05, "loss": 1.3246, "step": 294000 }, { "epoch": 6.22, "learning_rate": 4.204265306122449e-05, "loss": 1.3382, "step": 294100 }, { "epoch": 6.23, "learning_rate": 4.202224489795918e-05, "loss": 1.3306, "step": 294200 }, { "epoch": 6.23, "learning_rate": 4.2001836734693875e-05, "loss": 1.3293, "step": 294300 }, { "epoch": 6.23, "learning_rate": 4.198163265306123e-05, "loss": 1.3291, "step": 294400 }, { "epoch": 6.23, "learning_rate": 4.196122448979592e-05, "loss": 1.3355, "step": 294500 }, { "epoch": 6.23, "learning_rate": 4.194081632653061e-05, "loss": 1.3385, "step": 294600 }, { "epoch": 6.24, "learning_rate": 4.1920408163265304e-05, "loss": 1.3289, "step": 294700 }, { "epoch": 6.24, "learning_rate": 4.19e-05, "loss": 1.3301, "step": 294800 }, { "epoch": 6.24, "learning_rate": 4.1879591836734694e-05, "loss": 1.3195, "step": 294900 }, { "epoch": 6.24, "learning_rate": 4.1859183673469385e-05, "loss": 1.3366, "step": 295000 }, { "epoch": 6.25, "learning_rate": 4.1838775510204084e-05, "loss": 1.3306, "step": 295100 }, { "epoch": 6.25, "learning_rate": 4.1818367346938775e-05, "loss": 1.3331, "step": 295200 }, { "epoch": 6.25, "learning_rate": 4.1797959183673473e-05, "loss": 1.3288, "step": 295300 }, { "epoch": 6.25, "learning_rate": 4.1777551020408165e-05, "loss": 1.3346, "step": 295400 }, { "epoch": 6.25, "learning_rate": 4.1757142857142863e-05, "loss": 1.3198, "step": 295500 }, { "epoch": 6.26, "learning_rate": 4.1736734693877555e-05, "loss": 1.324, "step": 295600 }, { "epoch": 6.26, "learning_rate": 4.1716326530612247e-05, "loss": 1.328, "step": 295700 }, { "epoch": 6.26, "learning_rate": 4.1695918367346945e-05, "loss": 1.3288, "step": 295800 }, { "epoch": 6.26, "learning_rate": 4.1675510204081637e-05, "loss": 1.3293, "step": 295900 }, { "epoch": 6.26, "learning_rate": 4.165510204081633e-05, "loss": 1.3365, "step": 296000 }, { "epoch": 6.27, "learning_rate": 4.1634693877551026e-05, "loss": 1.3325, "step": 296100 }, { "epoch": 6.27, "learning_rate": 4.161428571428572e-05, "loss": 1.3273, "step": 296200 }, { "epoch": 6.27, "learning_rate": 4.159387755102041e-05, "loss": 1.3268, "step": 296300 }, { "epoch": 6.27, "learning_rate": 4.15734693877551e-05, "loss": 1.3318, "step": 296400 }, { "epoch": 6.28, "learning_rate": 4.15530612244898e-05, "loss": 1.3242, "step": 296500 }, { "epoch": 6.28, "learning_rate": 4.153265306122449e-05, "loss": 1.3341, "step": 296600 }, { "epoch": 6.28, "learning_rate": 4.151224489795918e-05, "loss": 1.3291, "step": 296700 }, { "epoch": 6.28, "learning_rate": 4.149204081632653e-05, "loss": 1.3285, "step": 296800 }, { "epoch": 6.28, "learning_rate": 4.147163265306123e-05, "loss": 1.3295, "step": 296900 }, { "epoch": 6.29, "learning_rate": 4.145122448979592e-05, "loss": 1.3319, "step": 297000 }, { "epoch": 6.29, "learning_rate": 4.143081632653061e-05, "loss": 1.332, "step": 297100 }, { "epoch": 6.29, "learning_rate": 4.141040816326531e-05, "loss": 1.3232, "step": 297200 }, { "epoch": 6.29, "learning_rate": 4.139e-05, "loss": 1.326, "step": 297300 }, { "epoch": 6.29, "learning_rate": 4.1369591836734693e-05, "loss": 1.3215, "step": 297400 }, { "epoch": 6.3, "learning_rate": 4.1349183673469385e-05, "loss": 1.3292, "step": 297500 }, { "epoch": 6.3, "learning_rate": 4.132877551020408e-05, "loss": 1.3246, "step": 297600 }, { "epoch": 6.3, "learning_rate": 4.1308367346938775e-05, "loss": 1.3297, "step": 297700 }, { "epoch": 6.3, "learning_rate": 4.1287959183673467e-05, "loss": 1.3299, "step": 297800 }, { "epoch": 6.3, "learning_rate": 4.1267551020408165e-05, "loss": 1.3294, "step": 297900 }, { "epoch": 6.31, "learning_rate": 4.1247142857142856e-05, "loss": 1.324, "step": 298000 }, { "epoch": 6.31, "learning_rate": 4.1226734693877555e-05, "loss": 1.3301, "step": 298100 }, { "epoch": 6.31, "learning_rate": 4.1206326530612246e-05, "loss": 1.335, "step": 298200 }, { "epoch": 6.31, "learning_rate": 4.1185918367346945e-05, "loss": 1.3306, "step": 298300 }, { "epoch": 6.32, "learning_rate": 4.1165510204081636e-05, "loss": 1.3263, "step": 298400 }, { "epoch": 6.32, "learning_rate": 4.114510204081633e-05, "loss": 1.3282, "step": 298500 }, { "epoch": 6.32, "learning_rate": 4.1124693877551026e-05, "loss": 1.3322, "step": 298600 }, { "epoch": 6.32, "learning_rate": 4.110428571428572e-05, "loss": 1.3278, "step": 298700 }, { "epoch": 6.32, "learning_rate": 4.108387755102041e-05, "loss": 1.3255, "step": 298800 }, { "epoch": 6.33, "learning_rate": 4.106346938775511e-05, "loss": 1.3229, "step": 298900 }, { "epoch": 6.33, "learning_rate": 4.10430612244898e-05, "loss": 1.3337, "step": 299000 }, { "epoch": 6.33, "learning_rate": 4.102265306122449e-05, "loss": 1.3301, "step": 299100 }, { "epoch": 6.33, "learning_rate": 4.100224489795918e-05, "loss": 1.3248, "step": 299200 }, { "epoch": 6.33, "learning_rate": 4.098183673469388e-05, "loss": 1.3266, "step": 299300 }, { "epoch": 6.34, "learning_rate": 4.096142857142857e-05, "loss": 1.3258, "step": 299400 }, { "epoch": 6.34, "learning_rate": 4.0941020408163264e-05, "loss": 1.3233, "step": 299500 }, { "epoch": 6.34, "learning_rate": 4.092061224489796e-05, "loss": 1.3349, "step": 299600 }, { "epoch": 6.34, "learning_rate": 4.0900204081632654e-05, "loss": 1.3269, "step": 299700 }, { "epoch": 6.34, "learning_rate": 4.0879795918367346e-05, "loss": 1.3183, "step": 299800 }, { "epoch": 6.35, "learning_rate": 4.0859387755102044e-05, "loss": 1.3233, "step": 299900 }, { "epoch": 6.35, "learning_rate": 4.0838979591836736e-05, "loss": 1.3278, "step": 300000 }, { "epoch": 6.35, "learning_rate": 4.081857142857143e-05, "loss": 1.3298, "step": 300100 }, { "epoch": 6.35, "learning_rate": 4.079816326530612e-05, "loss": 1.3276, "step": 300200 }, { "epoch": 6.36, "learning_rate": 4.0777755102040824e-05, "loss": 1.3239, "step": 300300 }, { "epoch": 6.36, "learning_rate": 4.0757346938775515e-05, "loss": 1.3323, "step": 300400 }, { "epoch": 6.36, "learning_rate": 4.073693877551021e-05, "loss": 1.329, "step": 300500 }, { "epoch": 6.36, "learning_rate": 4.07165306122449e-05, "loss": 1.3304, "step": 300600 }, { "epoch": 6.36, "learning_rate": 4.06961224489796e-05, "loss": 1.3177, "step": 300700 }, { "epoch": 6.37, "learning_rate": 4.067571428571429e-05, "loss": 1.3218, "step": 300800 }, { "epoch": 6.37, "learning_rate": 4.065530612244898e-05, "loss": 1.3271, "step": 300900 }, { "epoch": 6.37, "learning_rate": 4.063489795918368e-05, "loss": 1.3224, "step": 301000 }, { "epoch": 6.37, "learning_rate": 4.0614693877551026e-05, "loss": 1.3159, "step": 301100 }, { "epoch": 6.37, "learning_rate": 4.059428571428572e-05, "loss": 1.3284, "step": 301200 }, { "epoch": 6.38, "learning_rate": 4.057387755102041e-05, "loss": 1.3276, "step": 301300 }, { "epoch": 6.38, "learning_rate": 4.055346938775511e-05, "loss": 1.3288, "step": 301400 }, { "epoch": 6.38, "learning_rate": 4.05330612244898e-05, "loss": 1.3234, "step": 301500 }, { "epoch": 6.38, "learning_rate": 4.051265306122449e-05, "loss": 1.3255, "step": 301600 }, { "epoch": 6.39, "learning_rate": 4.049224489795919e-05, "loss": 1.3282, "step": 301700 }, { "epoch": 6.39, "learning_rate": 4.047183673469388e-05, "loss": 1.3234, "step": 301800 }, { "epoch": 6.39, "learning_rate": 4.045142857142857e-05, "loss": 1.3254, "step": 301900 }, { "epoch": 6.39, "learning_rate": 4.0431020408163264e-05, "loss": 1.3226, "step": 302000 }, { "epoch": 6.39, "learning_rate": 4.041061224489796e-05, "loss": 1.3284, "step": 302100 }, { "epoch": 6.4, "learning_rate": 4.0390204081632654e-05, "loss": 1.3249, "step": 302200 }, { "epoch": 6.4, "learning_rate": 4.0369795918367345e-05, "loss": 1.3257, "step": 302300 }, { "epoch": 6.4, "learning_rate": 4.0349387755102044e-05, "loss": 1.3319, "step": 302400 }, { "epoch": 6.4, "learning_rate": 4.0328979591836735e-05, "loss": 1.3203, "step": 302500 }, { "epoch": 6.4, "learning_rate": 4.030857142857143e-05, "loss": 1.3167, "step": 302600 }, { "epoch": 6.41, "learning_rate": 4.0288163265306125e-05, "loss": 1.3221, "step": 302700 }, { "epoch": 6.41, "learning_rate": 4.026775510204082e-05, "loss": 1.3205, "step": 302800 }, { "epoch": 6.41, "learning_rate": 4.024734693877551e-05, "loss": 1.328, "step": 302900 }, { "epoch": 6.41, "learning_rate": 4.02269387755102e-05, "loss": 1.3292, "step": 303000 }, { "epoch": 6.41, "learning_rate": 4.0206530612244905e-05, "loss": 1.3229, "step": 303100 }, { "epoch": 6.42, "learning_rate": 4.0186326530612246e-05, "loss": 1.3306, "step": 303200 }, { "epoch": 6.42, "learning_rate": 4.016591836734694e-05, "loss": 1.326, "step": 303300 }, { "epoch": 6.42, "learning_rate": 4.014551020408163e-05, "loss": 1.3293, "step": 303400 }, { "epoch": 6.42, "learning_rate": 4.012510204081633e-05, "loss": 1.3209, "step": 303500 }, { "epoch": 6.43, "learning_rate": 4.0104693877551026e-05, "loss": 1.3244, "step": 303600 }, { "epoch": 6.43, "learning_rate": 4.008428571428572e-05, "loss": 1.3213, "step": 303700 }, { "epoch": 6.43, "learning_rate": 4.006387755102041e-05, "loss": 1.3211, "step": 303800 }, { "epoch": 6.43, "learning_rate": 4.004346938775511e-05, "loss": 1.3206, "step": 303900 }, { "epoch": 6.43, "learning_rate": 4.00230612244898e-05, "loss": 1.3316, "step": 304000 }, { "epoch": 6.44, "learning_rate": 4.000265306122449e-05, "loss": 1.3205, "step": 304100 }, { "epoch": 6.44, "learning_rate": 3.998224489795919e-05, "loss": 1.3256, "step": 304200 }, { "epoch": 6.44, "learning_rate": 3.996183673469388e-05, "loss": 1.3185, "step": 304300 }, { "epoch": 6.44, "learning_rate": 3.994142857142857e-05, "loss": 1.3267, "step": 304400 }, { "epoch": 6.44, "learning_rate": 3.992102040816327e-05, "loss": 1.3304, "step": 304500 }, { "epoch": 6.45, "learning_rate": 3.990061224489796e-05, "loss": 1.3165, "step": 304600 }, { "epoch": 6.45, "learning_rate": 3.9880204081632654e-05, "loss": 1.326, "step": 304700 }, { "epoch": 6.45, "learning_rate": 3.9859795918367345e-05, "loss": 1.3202, "step": 304800 }, { "epoch": 6.45, "learning_rate": 3.9839387755102044e-05, "loss": 1.3171, "step": 304900 }, { "epoch": 6.46, "learning_rate": 3.9818979591836735e-05, "loss": 1.323, "step": 305000 }, { "epoch": 6.46, "learning_rate": 3.979857142857143e-05, "loss": 1.321, "step": 305100 }, { "epoch": 6.46, "learning_rate": 3.9778163265306125e-05, "loss": 1.3249, "step": 305200 }, { "epoch": 6.46, "learning_rate": 3.975775510204082e-05, "loss": 1.3184, "step": 305300 }, { "epoch": 6.46, "learning_rate": 3.973734693877551e-05, "loss": 1.3143, "step": 305400 }, { "epoch": 6.47, "learning_rate": 3.971693877551021e-05, "loss": 1.3207, "step": 305500 }, { "epoch": 6.47, "learning_rate": 3.96965306122449e-05, "loss": 1.3271, "step": 305600 }, { "epoch": 6.47, "learning_rate": 3.967612244897959e-05, "loss": 1.3215, "step": 305700 }, { "epoch": 6.47, "learning_rate": 3.965571428571429e-05, "loss": 1.3137, "step": 305800 }, { "epoch": 6.47, "learning_rate": 3.9635306122448987e-05, "loss": 1.3218, "step": 305900 }, { "epoch": 6.48, "learning_rate": 3.961489795918368e-05, "loss": 1.3197, "step": 306000 }, { "epoch": 6.48, "learning_rate": 3.959448979591837e-05, "loss": 1.3151, "step": 306100 }, { "epoch": 6.48, "learning_rate": 3.957428571428571e-05, "loss": 1.3235, "step": 306200 }, { "epoch": 6.48, "learning_rate": 3.955387755102041e-05, "loss": 1.3356, "step": 306300 }, { "epoch": 6.48, "learning_rate": 3.953346938775511e-05, "loss": 1.3146, "step": 306400 }, { "epoch": 6.49, "learning_rate": 3.95130612244898e-05, "loss": 1.3164, "step": 306500 }, { "epoch": 6.49, "learning_rate": 3.94926530612245e-05, "loss": 1.3195, "step": 306600 }, { "epoch": 6.49, "learning_rate": 3.947224489795919e-05, "loss": 1.3221, "step": 306700 }, { "epoch": 6.49, "learning_rate": 3.945183673469388e-05, "loss": 1.3194, "step": 306800 }, { "epoch": 6.5, "learning_rate": 3.943142857142857e-05, "loss": 1.3209, "step": 306900 }, { "epoch": 6.5, "learning_rate": 3.941102040816327e-05, "loss": 1.3189, "step": 307000 }, { "epoch": 6.5, "learning_rate": 3.939061224489796e-05, "loss": 1.3166, "step": 307100 }, { "epoch": 6.5, "learning_rate": 3.9370204081632653e-05, "loss": 1.3185, "step": 307200 }, { "epoch": 6.5, "learning_rate": 3.934979591836735e-05, "loss": 1.3212, "step": 307300 }, { "epoch": 6.51, "learning_rate": 3.932938775510204e-05, "loss": 1.3183, "step": 307400 }, { "epoch": 6.51, "learning_rate": 3.9308979591836735e-05, "loss": 1.3144, "step": 307500 }, { "epoch": 6.51, "learning_rate": 3.928857142857143e-05, "loss": 1.3148, "step": 307600 }, { "epoch": 6.51, "learning_rate": 3.9268163265306125e-05, "loss": 1.3184, "step": 307700 }, { "epoch": 6.51, "learning_rate": 3.9247755102040816e-05, "loss": 1.3162, "step": 307800 }, { "epoch": 6.52, "learning_rate": 3.922734693877551e-05, "loss": 1.3134, "step": 307900 }, { "epoch": 6.52, "learning_rate": 3.9206938775510206e-05, "loss": 1.3265, "step": 308000 }, { "epoch": 6.52, "learning_rate": 3.91865306122449e-05, "loss": 1.3164, "step": 308100 }, { "epoch": 6.52, "learning_rate": 3.916612244897959e-05, "loss": 1.3278, "step": 308200 }, { "epoch": 6.52, "learning_rate": 3.914571428571429e-05, "loss": 1.3117, "step": 308300 }, { "epoch": 6.53, "learning_rate": 3.912530612244898e-05, "loss": 1.3206, "step": 308400 }, { "epoch": 6.53, "learning_rate": 3.910489795918367e-05, "loss": 1.312, "step": 308500 }, { "epoch": 6.53, "learning_rate": 3.908448979591837e-05, "loss": 1.3182, "step": 308600 }, { "epoch": 6.53, "learning_rate": 3.906408163265307e-05, "loss": 1.3242, "step": 308700 }, { "epoch": 6.54, "learning_rate": 3.904367346938776e-05, "loss": 1.3133, "step": 308800 }, { "epoch": 6.54, "learning_rate": 3.902326530612245e-05, "loss": 1.3246, "step": 308900 }, { "epoch": 6.54, "learning_rate": 3.900285714285715e-05, "loss": 1.319, "step": 309000 }, { "epoch": 6.54, "learning_rate": 3.898244897959184e-05, "loss": 1.3172, "step": 309100 }, { "epoch": 6.54, "learning_rate": 3.896204081632653e-05, "loss": 1.3197, "step": 309200 }, { "epoch": 6.55, "learning_rate": 3.8941632653061224e-05, "loss": 1.3124, "step": 309300 }, { "epoch": 6.55, "learning_rate": 3.892122448979592e-05, "loss": 1.3159, "step": 309400 }, { "epoch": 6.55, "learning_rate": 3.8900816326530614e-05, "loss": 1.3187, "step": 309500 }, { "epoch": 6.55, "learning_rate": 3.8880408163265306e-05, "loss": 1.3232, "step": 309600 }, { "epoch": 6.55, "learning_rate": 3.8860000000000004e-05, "loss": 1.3144, "step": 309700 }, { "epoch": 6.56, "learning_rate": 3.8839591836734696e-05, "loss": 1.3214, "step": 309800 }, { "epoch": 6.56, "learning_rate": 3.881918367346939e-05, "loss": 1.3186, "step": 309900 }, { "epoch": 6.56, "learning_rate": 3.8798775510204086e-05, "loss": 1.3225, "step": 310000 }, { "epoch": 6.56, "learning_rate": 3.877836734693878e-05, "loss": 1.3161, "step": 310100 }, { "epoch": 6.57, "learning_rate": 3.875795918367347e-05, "loss": 1.3167, "step": 310200 }, { "epoch": 6.57, "learning_rate": 3.873755102040817e-05, "loss": 1.3172, "step": 310300 }, { "epoch": 6.57, "learning_rate": 3.871714285714286e-05, "loss": 1.3186, "step": 310400 }, { "epoch": 6.57, "learning_rate": 3.8696938775510206e-05, "loss": 1.3218, "step": 310500 }, { "epoch": 6.57, "learning_rate": 3.86765306122449e-05, "loss": 1.3192, "step": 310600 }, { "epoch": 6.58, "learning_rate": 3.865612244897959e-05, "loss": 1.31, "step": 310700 }, { "epoch": 6.58, "learning_rate": 3.863571428571429e-05, "loss": 1.3186, "step": 310800 }, { "epoch": 6.58, "learning_rate": 3.861530612244898e-05, "loss": 1.3096, "step": 310900 }, { "epoch": 6.58, "learning_rate": 3.859489795918367e-05, "loss": 1.3186, "step": 311000 }, { "epoch": 6.58, "learning_rate": 3.857448979591837e-05, "loss": 1.3208, "step": 311100 }, { "epoch": 6.59, "learning_rate": 3.855408163265306e-05, "loss": 1.3205, "step": 311200 }, { "epoch": 6.59, "learning_rate": 3.853367346938775e-05, "loss": 1.3187, "step": 311300 }, { "epoch": 6.59, "learning_rate": 3.85134693877551e-05, "loss": 1.3121, "step": 311400 }, { "epoch": 6.59, "learning_rate": 3.84930612244898e-05, "loss": 1.3198, "step": 311500 }, { "epoch": 6.59, "learning_rate": 3.847265306122449e-05, "loss": 1.319, "step": 311600 }, { "epoch": 6.6, "learning_rate": 3.845224489795918e-05, "loss": 1.3173, "step": 311700 }, { "epoch": 6.6, "learning_rate": 3.843183673469388e-05, "loss": 1.3244, "step": 311800 }, { "epoch": 6.6, "learning_rate": 3.841142857142858e-05, "loss": 1.315, "step": 311900 }, { "epoch": 6.6, "learning_rate": 3.839102040816327e-05, "loss": 1.3205, "step": 312000 }, { "epoch": 6.61, "learning_rate": 3.837061224489796e-05, "loss": 1.3151, "step": 312100 }, { "epoch": 6.61, "learning_rate": 3.835020408163266e-05, "loss": 1.3139, "step": 312200 }, { "epoch": 6.61, "learning_rate": 3.832979591836735e-05, "loss": 1.3142, "step": 312300 }, { "epoch": 6.61, "learning_rate": 3.830938775510204e-05, "loss": 1.3106, "step": 312400 }, { "epoch": 6.61, "learning_rate": 3.8288979591836735e-05, "loss": 1.3155, "step": 312500 }, { "epoch": 6.62, "learning_rate": 3.826857142857143e-05, "loss": 1.3141, "step": 312600 }, { "epoch": 6.62, "learning_rate": 3.8248163265306125e-05, "loss": 1.3135, "step": 312700 }, { "epoch": 6.62, "learning_rate": 3.8227755102040816e-05, "loss": 1.3199, "step": 312800 }, { "epoch": 6.62, "learning_rate": 3.8207346938775514e-05, "loss": 1.3079, "step": 312900 }, { "epoch": 6.62, "learning_rate": 3.8186938775510206e-05, "loss": 1.3156, "step": 313000 }, { "epoch": 6.63, "learning_rate": 3.81665306122449e-05, "loss": 1.3202, "step": 313100 }, { "epoch": 6.63, "learning_rate": 3.8146122448979596e-05, "loss": 1.3177, "step": 313200 }, { "epoch": 6.63, "learning_rate": 3.812571428571429e-05, "loss": 1.3097, "step": 313300 }, { "epoch": 6.63, "learning_rate": 3.810530612244898e-05, "loss": 1.3173, "step": 313400 }, { "epoch": 6.63, "learning_rate": 3.808489795918367e-05, "loss": 1.3215, "step": 313500 }, { "epoch": 6.64, "learning_rate": 3.806448979591837e-05, "loss": 1.3134, "step": 313600 }, { "epoch": 6.64, "learning_rate": 3.804408163265306e-05, "loss": 1.3152, "step": 313700 }, { "epoch": 6.64, "learning_rate": 3.802367346938775e-05, "loss": 1.3109, "step": 313800 }, { "epoch": 6.64, "learning_rate": 3.800326530612245e-05, "loss": 1.3119, "step": 313900 }, { "epoch": 6.65, "learning_rate": 3.798285714285714e-05, "loss": 1.3127, "step": 314000 }, { "epoch": 6.65, "learning_rate": 3.7962448979591834e-05, "loss": 1.3182, "step": 314100 }, { "epoch": 6.65, "learning_rate": 3.794204081632653e-05, "loss": 1.3185, "step": 314200 }, { "epoch": 6.65, "learning_rate": 3.792163265306123e-05, "loss": 1.3087, "step": 314300 }, { "epoch": 6.65, "learning_rate": 3.790122448979592e-05, "loss": 1.3229, "step": 314400 }, { "epoch": 6.66, "learning_rate": 3.7880816326530614e-05, "loss": 1.3167, "step": 314500 }, { "epoch": 6.66, "learning_rate": 3.786040816326531e-05, "loss": 1.3142, "step": 314600 }, { "epoch": 6.66, "learning_rate": 3.7840000000000004e-05, "loss": 1.3124, "step": 314700 }, { "epoch": 6.66, "learning_rate": 3.781979591836735e-05, "loss": 1.3226, "step": 314800 }, { "epoch": 6.66, "learning_rate": 3.779938775510204e-05, "loss": 1.3105, "step": 314900 }, { "epoch": 6.67, "learning_rate": 3.777897959183674e-05, "loss": 1.3145, "step": 315000 }, { "epoch": 6.67, "learning_rate": 3.775857142857143e-05, "loss": 1.3144, "step": 315100 }, { "epoch": 6.67, "learning_rate": 3.773836734693878e-05, "loss": 1.3147, "step": 315200 }, { "epoch": 6.67, "learning_rate": 3.771795918367347e-05, "loss": 1.3084, "step": 315300 }, { "epoch": 6.68, "learning_rate": 3.769755102040817e-05, "loss": 1.3124, "step": 315400 }, { "epoch": 6.68, "learning_rate": 3.767714285714286e-05, "loss": 1.3171, "step": 315500 }, { "epoch": 6.68, "learning_rate": 3.7656734693877553e-05, "loss": 1.3134, "step": 315600 }, { "epoch": 6.68, "learning_rate": 3.7636326530612245e-05, "loss": 1.3104, "step": 315700 }, { "epoch": 6.68, "learning_rate": 3.761591836734694e-05, "loss": 1.3171, "step": 315800 }, { "epoch": 6.69, "learning_rate": 3.7595510204081635e-05, "loss": 1.3114, "step": 315900 }, { "epoch": 6.69, "learning_rate": 3.7575102040816327e-05, "loss": 1.3132, "step": 316000 }, { "epoch": 6.69, "learning_rate": 3.7554693877551025e-05, "loss": 1.3143, "step": 316100 }, { "epoch": 6.69, "learning_rate": 3.7534285714285716e-05, "loss": 1.3085, "step": 316200 }, { "epoch": 6.69, "learning_rate": 3.751387755102041e-05, "loss": 1.3163, "step": 316300 }, { "epoch": 6.7, "learning_rate": 3.7493469387755106e-05, "loss": 1.3171, "step": 316400 }, { "epoch": 6.7, "learning_rate": 3.74730612244898e-05, "loss": 1.3019, "step": 316500 }, { "epoch": 6.7, "learning_rate": 3.745265306122449e-05, "loss": 1.3107, "step": 316600 }, { "epoch": 6.7, "learning_rate": 3.743224489795918e-05, "loss": 1.314, "step": 316700 }, { "epoch": 6.7, "learning_rate": 3.741183673469388e-05, "loss": 1.3068, "step": 316800 }, { "epoch": 6.71, "learning_rate": 3.739142857142857e-05, "loss": 1.3174, "step": 316900 }, { "epoch": 6.71, "learning_rate": 3.737102040816326e-05, "loss": 1.3094, "step": 317000 }, { "epoch": 6.71, "learning_rate": 3.735061224489796e-05, "loss": 1.3182, "step": 317100 }, { "epoch": 6.71, "learning_rate": 3.733020408163265e-05, "loss": 1.3152, "step": 317200 }, { "epoch": 6.72, "learning_rate": 3.7309795918367344e-05, "loss": 1.3123, "step": 317300 }, { "epoch": 6.72, "learning_rate": 3.728938775510204e-05, "loss": 1.3154, "step": 317400 }, { "epoch": 6.72, "learning_rate": 3.726897959183674e-05, "loss": 1.3166, "step": 317500 }, { "epoch": 6.72, "learning_rate": 3.724857142857143e-05, "loss": 1.3154, "step": 317600 }, { "epoch": 6.72, "learning_rate": 3.7228163265306124e-05, "loss": 1.3135, "step": 317700 }, { "epoch": 6.73, "learning_rate": 3.720775510204082e-05, "loss": 1.3085, "step": 317800 }, { "epoch": 6.73, "learning_rate": 3.7187346938775514e-05, "loss": 1.3059, "step": 317900 }, { "epoch": 6.73, "learning_rate": 3.7166938775510206e-05, "loss": 1.311, "step": 318000 }, { "epoch": 6.73, "learning_rate": 3.7146530612244904e-05, "loss": 1.3134, "step": 318100 }, { "epoch": 6.73, "learning_rate": 3.7126122448979596e-05, "loss": 1.311, "step": 318200 }, { "epoch": 6.74, "learning_rate": 3.710571428571429e-05, "loss": 1.3127, "step": 318300 }, { "epoch": 6.74, "learning_rate": 3.708530612244898e-05, "loss": 1.3133, "step": 318400 }, { "epoch": 6.74, "learning_rate": 3.706489795918368e-05, "loss": 1.3104, "step": 318500 }, { "epoch": 6.74, "learning_rate": 3.704448979591837e-05, "loss": 1.3058, "step": 318600 }, { "epoch": 6.74, "learning_rate": 3.702408163265306e-05, "loss": 1.3101, "step": 318700 }, { "epoch": 6.75, "learning_rate": 3.700367346938776e-05, "loss": 1.3102, "step": 318800 }, { "epoch": 6.75, "learning_rate": 3.698326530612245e-05, "loss": 1.3169, "step": 318900 }, { "epoch": 6.75, "learning_rate": 3.696285714285714e-05, "loss": 1.3097, "step": 319000 }, { "epoch": 6.75, "learning_rate": 3.694244897959184e-05, "loss": 1.3084, "step": 319100 }, { "epoch": 6.76, "learning_rate": 3.692204081632653e-05, "loss": 1.3071, "step": 319200 }, { "epoch": 6.76, "learning_rate": 3.690163265306122e-05, "loss": 1.3156, "step": 319300 }, { "epoch": 6.76, "learning_rate": 3.6881224489795915e-05, "loss": 1.3128, "step": 319400 }, { "epoch": 6.76, "learning_rate": 3.686081632653061e-05, "loss": 1.3094, "step": 319500 }, { "epoch": 6.76, "learning_rate": 3.684061224489796e-05, "loss": 1.3172, "step": 319600 }, { "epoch": 6.77, "learning_rate": 3.682020408163265e-05, "loss": 1.3105, "step": 319700 }, { "epoch": 6.77, "learning_rate": 3.6799795918367344e-05, "loss": 1.3099, "step": 319800 }, { "epoch": 6.77, "learning_rate": 3.677938775510204e-05, "loss": 1.3064, "step": 319900 }, { "epoch": 6.77, "learning_rate": 3.6758979591836734e-05, "loss": 1.3207, "step": 320000 }, { "epoch": 6.77, "learning_rate": 3.6738571428571426e-05, "loss": 1.3059, "step": 320100 }, { "epoch": 6.78, "learning_rate": 3.6718163265306124e-05, "loss": 1.3141, "step": 320200 }, { "epoch": 6.78, "learning_rate": 3.669775510204082e-05, "loss": 1.3138, "step": 320300 }, { "epoch": 6.78, "learning_rate": 3.6677346938775514e-05, "loss": 1.3121, "step": 320400 }, { "epoch": 6.78, "learning_rate": 3.6656938775510205e-05, "loss": 1.3124, "step": 320500 }, { "epoch": 6.79, "learning_rate": 3.6636530612244904e-05, "loss": 1.3116, "step": 320600 }, { "epoch": 6.79, "learning_rate": 3.6616122448979595e-05, "loss": 1.3113, "step": 320700 }, { "epoch": 6.79, "learning_rate": 3.659571428571429e-05, "loss": 1.3055, "step": 320800 }, { "epoch": 6.79, "learning_rate": 3.6575306122448985e-05, "loss": 1.3192, "step": 320900 }, { "epoch": 6.79, "learning_rate": 3.655489795918368e-05, "loss": 1.3088, "step": 321000 }, { "epoch": 6.8, "learning_rate": 3.653448979591837e-05, "loss": 1.3121, "step": 321100 }, { "epoch": 6.8, "learning_rate": 3.651408163265306e-05, "loss": 1.3061, "step": 321200 }, { "epoch": 6.8, "learning_rate": 3.649367346938776e-05, "loss": 1.3089, "step": 321300 }, { "epoch": 6.8, "learning_rate": 3.647326530612245e-05, "loss": 1.3049, "step": 321400 }, { "epoch": 6.8, "learning_rate": 3.645285714285714e-05, "loss": 1.3, "step": 321500 }, { "epoch": 6.81, "learning_rate": 3.643244897959184e-05, "loss": 1.311, "step": 321600 }, { "epoch": 6.81, "learning_rate": 3.641204081632653e-05, "loss": 1.3216, "step": 321700 }, { "epoch": 6.81, "learning_rate": 3.639163265306122e-05, "loss": 1.3031, "step": 321800 }, { "epoch": 6.81, "learning_rate": 3.637142857142857e-05, "loss": 1.3118, "step": 321900 }, { "epoch": 6.81, "learning_rate": 3.635102040816327e-05, "loss": 1.3147, "step": 322000 }, { "epoch": 6.82, "learning_rate": 3.633061224489796e-05, "loss": 1.3097, "step": 322100 }, { "epoch": 6.82, "learning_rate": 3.631020408163265e-05, "loss": 1.3123, "step": 322200 }, { "epoch": 6.82, "learning_rate": 3.628979591836735e-05, "loss": 1.3118, "step": 322300 }, { "epoch": 6.82, "learning_rate": 3.626938775510204e-05, "loss": 1.3127, "step": 322400 }, { "epoch": 6.83, "learning_rate": 3.6248979591836734e-05, "loss": 1.3042, "step": 322500 }, { "epoch": 6.83, "learning_rate": 3.6228571428571425e-05, "loss": 1.311, "step": 322600 }, { "epoch": 6.83, "learning_rate": 3.6208163265306124e-05, "loss": 1.3126, "step": 322700 }, { "epoch": 6.83, "learning_rate": 3.6187755102040815e-05, "loss": 1.3107, "step": 322800 }, { "epoch": 6.83, "learning_rate": 3.616734693877551e-05, "loss": 1.3204, "step": 322900 }, { "epoch": 6.84, "learning_rate": 3.6146938775510205e-05, "loss": 1.3122, "step": 323000 }, { "epoch": 6.84, "learning_rate": 3.6126530612244904e-05, "loss": 1.3043, "step": 323100 }, { "epoch": 6.84, "learning_rate": 3.6106122448979595e-05, "loss": 1.3094, "step": 323200 }, { "epoch": 6.84, "learning_rate": 3.608571428571429e-05, "loss": 1.3133, "step": 323300 }, { "epoch": 6.84, "learning_rate": 3.6065306122448985e-05, "loss": 1.3099, "step": 323400 }, { "epoch": 6.85, "learning_rate": 3.604489795918368e-05, "loss": 1.3089, "step": 323500 }, { "epoch": 6.85, "learning_rate": 3.602448979591837e-05, "loss": 1.3064, "step": 323600 }, { "epoch": 6.85, "learning_rate": 3.600408163265307e-05, "loss": 1.301, "step": 323700 }, { "epoch": 6.85, "learning_rate": 3.598367346938776e-05, "loss": 1.3134, "step": 323800 }, { "epoch": 6.86, "learning_rate": 3.596326530612245e-05, "loss": 1.3116, "step": 323900 }, { "epoch": 6.86, "learning_rate": 3.594285714285714e-05, "loss": 1.3117, "step": 324000 }, { "epoch": 6.86, "learning_rate": 3.592244897959184e-05, "loss": 1.3162, "step": 324100 }, { "epoch": 6.86, "learning_rate": 3.590204081632653e-05, "loss": 1.3099, "step": 324200 }, { "epoch": 6.86, "learning_rate": 3.588163265306122e-05, "loss": 1.315, "step": 324300 }, { "epoch": 6.87, "learning_rate": 3.586122448979592e-05, "loss": 1.3109, "step": 324400 }, { "epoch": 6.87, "learning_rate": 3.584081632653061e-05, "loss": 1.3116, "step": 324500 }, { "epoch": 6.87, "learning_rate": 3.5820408163265304e-05, "loss": 1.3056, "step": 324600 }, { "epoch": 6.87, "learning_rate": 3.58e-05, "loss": 1.3085, "step": 324700 }, { "epoch": 6.87, "learning_rate": 3.577979591836735e-05, "loss": 1.3095, "step": 324800 }, { "epoch": 6.88, "learning_rate": 3.575938775510204e-05, "loss": 1.3091, "step": 324900 }, { "epoch": 6.88, "learning_rate": 3.5738979591836734e-05, "loss": 1.3172, "step": 325000 }, { "epoch": 6.88, "learning_rate": 3.571857142857143e-05, "loss": 1.3093, "step": 325100 }, { "epoch": 6.88, "learning_rate": 3.5698163265306124e-05, "loss": 1.3083, "step": 325200 }, { "epoch": 6.88, "learning_rate": 3.5677755102040815e-05, "loss": 1.3097, "step": 325300 }, { "epoch": 6.89, "learning_rate": 3.565734693877551e-05, "loss": 1.3098, "step": 325400 }, { "epoch": 6.89, "learning_rate": 3.5636938775510205e-05, "loss": 1.2987, "step": 325500 }, { "epoch": 6.89, "learning_rate": 3.56165306122449e-05, "loss": 1.3139, "step": 325600 }, { "epoch": 6.89, "learning_rate": 3.559612244897959e-05, "loss": 1.3119, "step": 325700 }, { "epoch": 6.9, "learning_rate": 3.557571428571429e-05, "loss": 1.3087, "step": 325800 }, { "epoch": 6.9, "learning_rate": 3.5555306122448985e-05, "loss": 1.3107, "step": 325900 }, { "epoch": 6.9, "learning_rate": 3.5534897959183677e-05, "loss": 1.3146, "step": 326000 }, { "epoch": 6.9, "learning_rate": 3.551448979591837e-05, "loss": 1.3141, "step": 326100 }, { "epoch": 6.9, "learning_rate": 3.5494081632653066e-05, "loss": 1.3091, "step": 326200 }, { "epoch": 6.91, "learning_rate": 3.547367346938776e-05, "loss": 1.3104, "step": 326300 }, { "epoch": 6.91, "learning_rate": 3.545326530612245e-05, "loss": 1.3057, "step": 326400 }, { "epoch": 6.91, "learning_rate": 3.543285714285715e-05, "loss": 1.3078, "step": 326500 }, { "epoch": 6.91, "learning_rate": 3.541244897959184e-05, "loss": 1.3033, "step": 326600 }, { "epoch": 6.91, "learning_rate": 3.539204081632653e-05, "loss": 1.3005, "step": 326700 }, { "epoch": 6.92, "learning_rate": 3.537163265306123e-05, "loss": 1.3044, "step": 326800 }, { "epoch": 6.92, "learning_rate": 3.535122448979592e-05, "loss": 1.3139, "step": 326900 }, { "epoch": 6.92, "learning_rate": 3.533081632653061e-05, "loss": 1.3003, "step": 327000 }, { "epoch": 6.92, "learning_rate": 3.5310408163265304e-05, "loss": 1.2999, "step": 327100 }, { "epoch": 6.92, "learning_rate": 3.529e-05, "loss": 1.3091, "step": 327200 }, { "epoch": 6.93, "learning_rate": 3.5269591836734694e-05, "loss": 1.306, "step": 327300 }, { "epoch": 6.93, "learning_rate": 3.5249183673469386e-05, "loss": 1.3001, "step": 327400 }, { "epoch": 6.93, "learning_rate": 3.5228775510204084e-05, "loss": 1.3039, "step": 327500 }, { "epoch": 6.93, "learning_rate": 3.5208367346938776e-05, "loss": 1.3114, "step": 327600 }, { "epoch": 6.94, "learning_rate": 3.518795918367347e-05, "loss": 1.3011, "step": 327700 }, { "epoch": 6.94, "learning_rate": 3.5167755102040815e-05, "loss": 1.31, "step": 327800 }, { "epoch": 6.94, "learning_rate": 3.514734693877551e-05, "loss": 1.3121, "step": 327900 }, { "epoch": 6.94, "learning_rate": 3.5126938775510205e-05, "loss": 1.3061, "step": 328000 }, { "epoch": 6.94, "learning_rate": 3.5106530612244896e-05, "loss": 1.303, "step": 328100 }, { "epoch": 6.95, "learning_rate": 3.5086122448979595e-05, "loss": 1.309, "step": 328200 }, { "epoch": 6.95, "learning_rate": 3.5065714285714286e-05, "loss": 1.3098, "step": 328300 }, { "epoch": 6.95, "learning_rate": 3.504530612244898e-05, "loss": 1.3101, "step": 328400 }, { "epoch": 6.95, "learning_rate": 3.5024897959183676e-05, "loss": 1.3033, "step": 328500 }, { "epoch": 6.95, "learning_rate": 3.5004489795918375e-05, "loss": 1.308, "step": 328600 }, { "epoch": 6.96, "learning_rate": 3.4984081632653066e-05, "loss": 1.3056, "step": 328700 }, { "epoch": 6.96, "learning_rate": 3.496367346938776e-05, "loss": 1.3109, "step": 328800 }, { "epoch": 6.96, "learning_rate": 3.494326530612245e-05, "loss": 1.3041, "step": 328900 }, { "epoch": 6.96, "learning_rate": 3.492285714285715e-05, "loss": 1.3021, "step": 329000 }, { "epoch": 6.97, "learning_rate": 3.490244897959184e-05, "loss": 1.306, "step": 329100 }, { "epoch": 6.97, "learning_rate": 3.488204081632653e-05, "loss": 1.3007, "step": 329200 }, { "epoch": 6.97, "learning_rate": 3.486163265306123e-05, "loss": 1.3063, "step": 329300 }, { "epoch": 6.97, "learning_rate": 3.484122448979592e-05, "loss": 1.3068, "step": 329400 }, { "epoch": 6.97, "learning_rate": 3.482081632653061e-05, "loss": 1.3096, "step": 329500 }, { "epoch": 6.98, "learning_rate": 3.480040816326531e-05, "loss": 1.3119, "step": 329600 }, { "epoch": 6.98, "learning_rate": 3.478e-05, "loss": 1.3046, "step": 329700 }, { "epoch": 6.98, "learning_rate": 3.4759591836734694e-05, "loss": 1.3103, "step": 329800 }, { "epoch": 6.98, "learning_rate": 3.4739183673469386e-05, "loss": 1.3052, "step": 329900 }, { "epoch": 6.98, "learning_rate": 3.4718775510204084e-05, "loss": 1.3097, "step": 330000 }, { "epoch": 6.99, "learning_rate": 3.4698367346938776e-05, "loss": 1.3139, "step": 330100 }, { "epoch": 6.99, "learning_rate": 3.467795918367347e-05, "loss": 1.3062, "step": 330200 }, { "epoch": 6.99, "learning_rate": 3.4657551020408165e-05, "loss": 1.3089, "step": 330300 }, { "epoch": 6.99, "learning_rate": 3.463714285714286e-05, "loss": 1.3098, "step": 330400 }, { "epoch": 6.99, "learning_rate": 3.4616938775510205e-05, "loss": 1.3033, "step": 330500 }, { "epoch": 7.0, "learning_rate": 3.4596530612244896e-05, "loss": 1.307, "step": 330600 }, { "epoch": 7.0, "learning_rate": 3.4576122448979595e-05, "loss": 1.3047, "step": 330700 }, { "epoch": 7.0, "learning_rate": 3.4555714285714286e-05, "loss": 1.3163, "step": 330800 }, { "epoch": 7.0, "learning_rate": 3.453530612244898e-05, "loss": 1.3105, "step": 330900 }, { "epoch": 7.01, "learning_rate": 3.4514897959183676e-05, "loss": 1.3024, "step": 331000 }, { "epoch": 7.01, "learning_rate": 3.449448979591837e-05, "loss": 1.2983, "step": 331100 }, { "epoch": 7.01, "learning_rate": 3.447408163265306e-05, "loss": 1.3086, "step": 331200 }, { "epoch": 7.01, "learning_rate": 3.445367346938776e-05, "loss": 1.301, "step": 331300 }, { "epoch": 7.01, "learning_rate": 3.4433265306122456e-05, "loss": 1.3044, "step": 331400 }, { "epoch": 7.02, "learning_rate": 3.441285714285715e-05, "loss": 1.3011, "step": 331500 }, { "epoch": 7.02, "learning_rate": 3.439244897959184e-05, "loss": 1.3021, "step": 331600 }, { "epoch": 7.02, "learning_rate": 3.437204081632653e-05, "loss": 1.3072, "step": 331700 }, { "epoch": 7.02, "learning_rate": 3.435163265306123e-05, "loss": 1.2992, "step": 331800 }, { "epoch": 7.02, "learning_rate": 3.433122448979592e-05, "loss": 1.2948, "step": 331900 }, { "epoch": 7.03, "learning_rate": 3.431081632653061e-05, "loss": 1.3023, "step": 332000 }, { "epoch": 7.03, "learning_rate": 3.429040816326531e-05, "loss": 1.2973, "step": 332100 }, { "epoch": 7.03, "learning_rate": 3.427e-05, "loss": 1.308, "step": 332200 }, { "epoch": 7.03, "learning_rate": 3.4249591836734694e-05, "loss": 1.2983, "step": 332300 }, { "epoch": 7.03, "learning_rate": 3.422918367346939e-05, "loss": 1.3004, "step": 332400 }, { "epoch": 7.04, "learning_rate": 3.4208775510204084e-05, "loss": 1.2989, "step": 332500 }, { "epoch": 7.04, "learning_rate": 3.4188367346938775e-05, "loss": 1.298, "step": 332600 }, { "epoch": 7.04, "learning_rate": 3.416795918367347e-05, "loss": 1.2955, "step": 332700 }, { "epoch": 7.04, "learning_rate": 3.4147551020408165e-05, "loss": 1.3023, "step": 332800 }, { "epoch": 7.05, "learning_rate": 3.412714285714286e-05, "loss": 1.3, "step": 332900 }, { "epoch": 7.05, "learning_rate": 3.410673469387755e-05, "loss": 1.2944, "step": 333000 }, { "epoch": 7.05, "learning_rate": 3.408632653061225e-05, "loss": 1.3052, "step": 333100 }, { "epoch": 7.05, "learning_rate": 3.406591836734694e-05, "loss": 1.3006, "step": 333200 }, { "epoch": 7.05, "learning_rate": 3.4045714285714286e-05, "loss": 1.2956, "step": 333300 }, { "epoch": 7.06, "learning_rate": 3.402530612244898e-05, "loss": 1.3024, "step": 333400 }, { "epoch": 7.06, "learning_rate": 3.4004897959183676e-05, "loss": 1.3043, "step": 333500 }, { "epoch": 7.06, "learning_rate": 3.398448979591837e-05, "loss": 1.2999, "step": 333600 }, { "epoch": 7.06, "learning_rate": 3.396408163265306e-05, "loss": 1.3052, "step": 333700 }, { "epoch": 7.06, "learning_rate": 3.394367346938776e-05, "loss": 1.2954, "step": 333800 }, { "epoch": 7.07, "learning_rate": 3.392326530612245e-05, "loss": 1.3117, "step": 333900 }, { "epoch": 7.07, "learning_rate": 3.390285714285714e-05, "loss": 1.3017, "step": 334000 }, { "epoch": 7.07, "learning_rate": 3.388244897959184e-05, "loss": 1.3048, "step": 334100 }, { "epoch": 7.07, "learning_rate": 3.386204081632654e-05, "loss": 1.299, "step": 334200 }, { "epoch": 7.08, "learning_rate": 3.384163265306123e-05, "loss": 1.3038, "step": 334300 }, { "epoch": 7.08, "learning_rate": 3.382122448979592e-05, "loss": 1.3089, "step": 334400 }, { "epoch": 7.08, "learning_rate": 3.380081632653061e-05, "loss": 1.2962, "step": 334500 }, { "epoch": 7.08, "learning_rate": 3.378040816326531e-05, "loss": 1.2973, "step": 334600 }, { "epoch": 7.08, "learning_rate": 3.376e-05, "loss": 1.3028, "step": 334700 }, { "epoch": 7.09, "learning_rate": 3.3739591836734694e-05, "loss": 1.2994, "step": 334800 }, { "epoch": 7.09, "learning_rate": 3.371918367346939e-05, "loss": 1.2988, "step": 334900 }, { "epoch": 7.09, "learning_rate": 3.3698775510204084e-05, "loss": 1.301, "step": 335000 }, { "epoch": 7.09, "learning_rate": 3.3678367346938775e-05, "loss": 1.3051, "step": 335100 }, { "epoch": 7.09, "learning_rate": 3.3657959183673474e-05, "loss": 1.3061, "step": 335200 }, { "epoch": 7.1, "learning_rate": 3.3637551020408165e-05, "loss": 1.3036, "step": 335300 }, { "epoch": 7.1, "learning_rate": 3.361714285714286e-05, "loss": 1.3026, "step": 335400 }, { "epoch": 7.1, "learning_rate": 3.3596734693877555e-05, "loss": 1.3025, "step": 335500 }, { "epoch": 7.1, "learning_rate": 3.357632653061225e-05, "loss": 1.2965, "step": 335600 }, { "epoch": 7.1, "learning_rate": 3.355591836734694e-05, "loss": 1.2959, "step": 335700 }, { "epoch": 7.11, "learning_rate": 3.353551020408163e-05, "loss": 1.2977, "step": 335800 }, { "epoch": 7.11, "learning_rate": 3.351510204081633e-05, "loss": 1.305, "step": 335900 }, { "epoch": 7.11, "learning_rate": 3.349469387755102e-05, "loss": 1.3095, "step": 336000 }, { "epoch": 7.11, "learning_rate": 3.347428571428571e-05, "loss": 1.2972, "step": 336100 }, { "epoch": 7.12, "learning_rate": 3.345387755102041e-05, "loss": 1.293, "step": 336200 }, { "epoch": 7.12, "learning_rate": 3.343346938775511e-05, "loss": 1.3013, "step": 336300 }, { "epoch": 7.12, "learning_rate": 3.34130612244898e-05, "loss": 1.2942, "step": 336400 }, { "epoch": 7.12, "learning_rate": 3.339265306122449e-05, "loss": 1.3028, "step": 336500 }, { "epoch": 7.12, "learning_rate": 3.337224489795919e-05, "loss": 1.2994, "step": 336600 }, { "epoch": 7.13, "learning_rate": 3.335183673469388e-05, "loss": 1.2935, "step": 336700 }, { "epoch": 7.13, "learning_rate": 3.333142857142857e-05, "loss": 1.2984, "step": 336800 }, { "epoch": 7.13, "learning_rate": 3.331102040816327e-05, "loss": 1.293, "step": 336900 }, { "epoch": 7.13, "learning_rate": 3.329061224489796e-05, "loss": 1.2963, "step": 337000 }, { "epoch": 7.13, "learning_rate": 3.3270204081632654e-05, "loss": 1.2949, "step": 337100 }, { "epoch": 7.14, "learning_rate": 3.3249795918367346e-05, "loss": 1.3018, "step": 337200 }, { "epoch": 7.14, "learning_rate": 3.3229387755102044e-05, "loss": 1.301, "step": 337300 }, { "epoch": 7.14, "learning_rate": 3.320918367346939e-05, "loss": 1.2994, "step": 337400 }, { "epoch": 7.14, "learning_rate": 3.318877551020408e-05, "loss": 1.2977, "step": 337500 }, { "epoch": 7.14, "learning_rate": 3.3168367346938775e-05, "loss": 1.2964, "step": 337600 }, { "epoch": 7.15, "learning_rate": 3.314795918367347e-05, "loss": 1.3011, "step": 337700 }, { "epoch": 7.15, "learning_rate": 3.3127551020408165e-05, "loss": 1.2955, "step": 337800 }, { "epoch": 7.15, "learning_rate": 3.3107142857142856e-05, "loss": 1.3005, "step": 337900 }, { "epoch": 7.15, "learning_rate": 3.3086734693877555e-05, "loss": 1.2973, "step": 338000 }, { "epoch": 7.16, "learning_rate": 3.3066326530612246e-05, "loss": 1.2966, "step": 338100 }, { "epoch": 7.16, "learning_rate": 3.304591836734694e-05, "loss": 1.309, "step": 338200 }, { "epoch": 7.16, "learning_rate": 3.3025510204081636e-05, "loss": 1.301, "step": 338300 }, { "epoch": 7.16, "learning_rate": 3.300510204081633e-05, "loss": 1.2924, "step": 338400 }, { "epoch": 7.16, "learning_rate": 3.298469387755102e-05, "loss": 1.2964, "step": 338500 }, { "epoch": 7.17, "learning_rate": 3.296428571428571e-05, "loss": 1.3052, "step": 338600 }, { "epoch": 7.17, "learning_rate": 3.294387755102041e-05, "loss": 1.2924, "step": 338700 }, { "epoch": 7.17, "learning_rate": 3.29234693877551e-05, "loss": 1.3044, "step": 338800 }, { "epoch": 7.17, "learning_rate": 3.290306122448979e-05, "loss": 1.2986, "step": 338900 }, { "epoch": 7.17, "learning_rate": 3.288265306122449e-05, "loss": 1.2973, "step": 339000 }, { "epoch": 7.18, "learning_rate": 3.286224489795919e-05, "loss": 1.2934, "step": 339100 }, { "epoch": 7.18, "learning_rate": 3.284183673469388e-05, "loss": 1.2973, "step": 339200 }, { "epoch": 7.18, "learning_rate": 3.282142857142857e-05, "loss": 1.3004, "step": 339300 }, { "epoch": 7.18, "learning_rate": 3.280102040816327e-05, "loss": 1.3021, "step": 339400 }, { "epoch": 7.19, "learning_rate": 3.278061224489796e-05, "loss": 1.2899, "step": 339500 }, { "epoch": 7.19, "learning_rate": 3.276040816326531e-05, "loss": 1.2916, "step": 339600 }, { "epoch": 7.19, "learning_rate": 3.274e-05, "loss": 1.2915, "step": 339700 }, { "epoch": 7.19, "learning_rate": 3.27195918367347e-05, "loss": 1.2867, "step": 339800 }, { "epoch": 7.19, "learning_rate": 3.269918367346939e-05, "loss": 1.2958, "step": 339900 }, { "epoch": 7.2, "learning_rate": 3.267877551020408e-05, "loss": 1.2957, "step": 340000 }, { "epoch": 7.2, "learning_rate": 3.265836734693878e-05, "loss": 1.2919, "step": 340100 }, { "epoch": 7.2, "learning_rate": 3.263795918367347e-05, "loss": 1.2972, "step": 340200 }, { "epoch": 7.2, "learning_rate": 3.2617551020408165e-05, "loss": 1.2976, "step": 340300 }, { "epoch": 7.2, "learning_rate": 3.2597142857142856e-05, "loss": 1.2939, "step": 340400 }, { "epoch": 7.21, "learning_rate": 3.2576734693877555e-05, "loss": 1.2917, "step": 340500 }, { "epoch": 7.21, "learning_rate": 3.2556326530612246e-05, "loss": 1.2998, "step": 340600 }, { "epoch": 7.21, "learning_rate": 3.253591836734694e-05, "loss": 1.3076, "step": 340700 }, { "epoch": 7.21, "learning_rate": 3.2515510204081636e-05, "loss": 1.3019, "step": 340800 }, { "epoch": 7.21, "learning_rate": 3.2495306122448984e-05, "loss": 1.2993, "step": 340900 }, { "epoch": 7.22, "learning_rate": 3.2474897959183675e-05, "loss": 1.3, "step": 341000 }, { "epoch": 7.22, "learning_rate": 3.245448979591837e-05, "loss": 1.3019, "step": 341100 }, { "epoch": 7.22, "learning_rate": 3.2434081632653065e-05, "loss": 1.297, "step": 341200 }, { "epoch": 7.22, "learning_rate": 3.241367346938776e-05, "loss": 1.2942, "step": 341300 }, { "epoch": 7.23, "learning_rate": 3.239326530612245e-05, "loss": 1.3009, "step": 341400 }, { "epoch": 7.23, "learning_rate": 3.237285714285715e-05, "loss": 1.2914, "step": 341500 }, { "epoch": 7.23, "learning_rate": 3.235244897959184e-05, "loss": 1.3011, "step": 341600 }, { "epoch": 7.23, "learning_rate": 3.233204081632653e-05, "loss": 1.2988, "step": 341700 }, { "epoch": 7.23, "learning_rate": 3.231163265306122e-05, "loss": 1.2929, "step": 341800 }, { "epoch": 7.24, "learning_rate": 3.229122448979592e-05, "loss": 1.2994, "step": 341900 }, { "epoch": 7.24, "learning_rate": 3.227081632653061e-05, "loss": 1.2908, "step": 342000 }, { "epoch": 7.24, "learning_rate": 3.22504081632653e-05, "loss": 1.296, "step": 342100 }, { "epoch": 7.24, "learning_rate": 3.223e-05, "loss": 1.3053, "step": 342200 }, { "epoch": 7.24, "learning_rate": 3.220959183673469e-05, "loss": 1.2978, "step": 342300 }, { "epoch": 7.25, "learning_rate": 3.218918367346939e-05, "loss": 1.291, "step": 342400 }, { "epoch": 7.25, "learning_rate": 3.216877551020408e-05, "loss": 1.2957, "step": 342500 }, { "epoch": 7.25, "learning_rate": 3.214836734693878e-05, "loss": 1.2954, "step": 342600 }, { "epoch": 7.25, "learning_rate": 3.212816326530612e-05, "loss": 1.2906, "step": 342700 }, { "epoch": 7.26, "learning_rate": 3.2107755102040814e-05, "loss": 1.303, "step": 342800 }, { "epoch": 7.26, "learning_rate": 3.208734693877551e-05, "loss": 1.294, "step": 342900 }, { "epoch": 7.26, "learning_rate": 3.206693877551021e-05, "loss": 1.2978, "step": 343000 }, { "epoch": 7.26, "learning_rate": 3.20465306122449e-05, "loss": 1.3041, "step": 343100 }, { "epoch": 7.26, "learning_rate": 3.2026122448979594e-05, "loss": 1.2926, "step": 343200 }, { "epoch": 7.27, "learning_rate": 3.200571428571429e-05, "loss": 1.2953, "step": 343300 }, { "epoch": 7.27, "learning_rate": 3.1985306122448984e-05, "loss": 1.2961, "step": 343400 }, { "epoch": 7.27, "learning_rate": 3.1964897959183675e-05, "loss": 1.2883, "step": 343500 }, { "epoch": 7.27, "learning_rate": 3.194448979591837e-05, "loss": 1.2871, "step": 343600 }, { "epoch": 7.27, "learning_rate": 3.1924081632653065e-05, "loss": 1.2943, "step": 343700 }, { "epoch": 7.28, "learning_rate": 3.190367346938776e-05, "loss": 1.2927, "step": 343800 }, { "epoch": 7.28, "learning_rate": 3.188326530612245e-05, "loss": 1.296, "step": 343900 }, { "epoch": 7.28, "learning_rate": 3.1862857142857147e-05, "loss": 1.3018, "step": 344000 }, { "epoch": 7.28, "learning_rate": 3.184244897959184e-05, "loss": 1.2908, "step": 344100 }, { "epoch": 7.28, "learning_rate": 3.182204081632653e-05, "loss": 1.2863, "step": 344200 }, { "epoch": 7.29, "learning_rate": 3.180163265306123e-05, "loss": 1.2922, "step": 344300 }, { "epoch": 7.29, "learning_rate": 3.178122448979592e-05, "loss": 1.2941, "step": 344400 }, { "epoch": 7.29, "learning_rate": 3.176081632653061e-05, "loss": 1.2915, "step": 344500 }, { "epoch": 7.29, "learning_rate": 3.17404081632653e-05, "loss": 1.2908, "step": 344600 }, { "epoch": 7.3, "learning_rate": 3.172e-05, "loss": 1.2966, "step": 344700 }, { "epoch": 7.3, "learning_rate": 3.169959183673469e-05, "loss": 1.2973, "step": 344800 }, { "epoch": 7.3, "learning_rate": 3.1679183673469384e-05, "loss": 1.2879, "step": 344900 }, { "epoch": 7.3, "learning_rate": 3.165877551020408e-05, "loss": 1.297, "step": 345000 }, { "epoch": 7.3, "learning_rate": 3.163836734693878e-05, "loss": 1.2983, "step": 345100 }, { "epoch": 7.31, "learning_rate": 3.161795918367347e-05, "loss": 1.295, "step": 345200 }, { "epoch": 7.31, "learning_rate": 3.1597551020408164e-05, "loss": 1.291, "step": 345300 }, { "epoch": 7.31, "learning_rate": 3.157714285714286e-05, "loss": 1.2889, "step": 345400 }, { "epoch": 7.31, "learning_rate": 3.1556734693877554e-05, "loss": 1.2996, "step": 345500 }, { "epoch": 7.31, "learning_rate": 3.1536326530612246e-05, "loss": 1.2922, "step": 345600 }, { "epoch": 7.32, "learning_rate": 3.1515918367346944e-05, "loss": 1.2941, "step": 345700 }, { "epoch": 7.32, "learning_rate": 3.1495510204081636e-05, "loss": 1.2859, "step": 345800 }, { "epoch": 7.32, "learning_rate": 3.147510204081633e-05, "loss": 1.2844, "step": 345900 }, { "epoch": 7.32, "learning_rate": 3.1454693877551026e-05, "loss": 1.2953, "step": 346000 }, { "epoch": 7.32, "learning_rate": 3.143428571428572e-05, "loss": 1.2956, "step": 346100 }, { "epoch": 7.33, "learning_rate": 3.141387755102041e-05, "loss": 1.29, "step": 346200 }, { "epoch": 7.33, "learning_rate": 3.13934693877551e-05, "loss": 1.2898, "step": 346300 }, { "epoch": 7.33, "learning_rate": 3.13730612244898e-05, "loss": 1.2926, "step": 346400 }, { "epoch": 7.33, "learning_rate": 3.135265306122449e-05, "loss": 1.2953, "step": 346500 }, { "epoch": 7.34, "learning_rate": 3.133224489795918e-05, "loss": 1.2867, "step": 346600 }, { "epoch": 7.34, "learning_rate": 3.131183673469388e-05, "loss": 1.288, "step": 346700 }, { "epoch": 7.34, "learning_rate": 3.129142857142857e-05, "loss": 1.2917, "step": 346800 }, { "epoch": 7.34, "learning_rate": 3.1271020408163264e-05, "loss": 1.2945, "step": 346900 }, { "epoch": 7.34, "learning_rate": 3.125061224489796e-05, "loss": 1.2923, "step": 347000 }, { "epoch": 7.35, "learning_rate": 3.1230204081632653e-05, "loss": 1.2895, "step": 347100 }, { "epoch": 7.35, "learning_rate": 3.1209795918367345e-05, "loss": 1.2969, "step": 347200 }, { "epoch": 7.35, "learning_rate": 3.118938775510204e-05, "loss": 1.3009, "step": 347300 }, { "epoch": 7.35, "learning_rate": 3.116897959183674e-05, "loss": 1.2961, "step": 347400 }, { "epoch": 7.35, "learning_rate": 3.114857142857143e-05, "loss": 1.2925, "step": 347500 }, { "epoch": 7.36, "learning_rate": 3.1128163265306125e-05, "loss": 1.294, "step": 347600 }, { "epoch": 7.36, "learning_rate": 3.1107755102040817e-05, "loss": 1.2896, "step": 347700 }, { "epoch": 7.36, "learning_rate": 3.1087346938775515e-05, "loss": 1.2941, "step": 347800 }, { "epoch": 7.36, "learning_rate": 3.106714285714286e-05, "loss": 1.2905, "step": 347900 }, { "epoch": 7.37, "learning_rate": 3.1046734693877554e-05, "loss": 1.2898, "step": 348000 }, { "epoch": 7.37, "learning_rate": 3.1026326530612246e-05, "loss": 1.2947, "step": 348100 }, { "epoch": 7.37, "learning_rate": 3.1005918367346944e-05, "loss": 1.2925, "step": 348200 }, { "epoch": 7.37, "learning_rate": 3.0985510204081636e-05, "loss": 1.2983, "step": 348300 }, { "epoch": 7.37, "learning_rate": 3.096510204081633e-05, "loss": 1.292, "step": 348400 }, { "epoch": 7.38, "learning_rate": 3.0944693877551026e-05, "loss": 1.2885, "step": 348500 }, { "epoch": 7.38, "learning_rate": 3.092428571428572e-05, "loss": 1.3001, "step": 348600 }, { "epoch": 7.38, "learning_rate": 3.090387755102041e-05, "loss": 1.2919, "step": 348700 }, { "epoch": 7.38, "learning_rate": 3.088346938775511e-05, "loss": 1.292, "step": 348800 }, { "epoch": 7.38, "learning_rate": 3.08630612244898e-05, "loss": 1.2971, "step": 348900 }, { "epoch": 7.39, "learning_rate": 3.084265306122449e-05, "loss": 1.2983, "step": 349000 }, { "epoch": 7.39, "learning_rate": 3.082224489795918e-05, "loss": 1.2907, "step": 349100 }, { "epoch": 7.39, "learning_rate": 3.080183673469388e-05, "loss": 1.2945, "step": 349200 }, { "epoch": 7.39, "learning_rate": 3.078142857142857e-05, "loss": 1.2935, "step": 349300 }, { "epoch": 7.39, "learning_rate": 3.076102040816326e-05, "loss": 1.2901, "step": 349400 }, { "epoch": 7.4, "learning_rate": 3.074061224489796e-05, "loss": 1.2951, "step": 349500 }, { "epoch": 7.4, "learning_rate": 3.072020408163265e-05, "loss": 1.2873, "step": 349600 }, { "epoch": 7.4, "learning_rate": 3.0699795918367345e-05, "loss": 1.2834, "step": 349700 }, { "epoch": 7.4, "learning_rate": 3.067938775510204e-05, "loss": 1.293, "step": 349800 }, { "epoch": 7.41, "learning_rate": 3.0658979591836735e-05, "loss": 1.2874, "step": 349900 }, { "epoch": 7.41, "learning_rate": 3.063877551020408e-05, "loss": 1.2897, "step": 350000 }, { "epoch": 7.41, "learning_rate": 3.0618367346938774e-05, "loss": 1.2959, "step": 350100 }, { "epoch": 7.41, "learning_rate": 3.059795918367347e-05, "loss": 1.293, "step": 350200 }, { "epoch": 7.41, "learning_rate": 3.0577551020408164e-05, "loss": 1.2925, "step": 350300 }, { "epoch": 7.42, "learning_rate": 3.0557142857142855e-05, "loss": 1.285, "step": 350400 }, { "epoch": 7.42, "learning_rate": 3.053673469387755e-05, "loss": 1.2925, "step": 350500 }, { "epoch": 7.42, "learning_rate": 3.0516326530612242e-05, "loss": 1.2962, "step": 350600 }, { "epoch": 7.42, "learning_rate": 3.0495918367346944e-05, "loss": 1.2846, "step": 350700 }, { "epoch": 7.42, "learning_rate": 3.0475510204081635e-05, "loss": 1.2923, "step": 350800 }, { "epoch": 7.43, "learning_rate": 3.045510204081633e-05, "loss": 1.2885, "step": 350900 }, { "epoch": 7.43, "learning_rate": 3.0434693877551025e-05, "loss": 1.285, "step": 351000 }, { "epoch": 7.43, "learning_rate": 3.0414285714285717e-05, "loss": 1.2884, "step": 351100 }, { "epoch": 7.43, "learning_rate": 3.0393877551020412e-05, "loss": 1.2922, "step": 351200 }, { "epoch": 7.43, "learning_rate": 3.0373469387755103e-05, "loss": 1.2928, "step": 351300 }, { "epoch": 7.44, "learning_rate": 3.03530612244898e-05, "loss": 1.2952, "step": 351400 }, { "epoch": 7.44, "learning_rate": 3.0332653061224493e-05, "loss": 1.2903, "step": 351500 }, { "epoch": 7.44, "learning_rate": 3.0312244897959185e-05, "loss": 1.292, "step": 351600 }, { "epoch": 7.44, "learning_rate": 3.029183673469388e-05, "loss": 1.2913, "step": 351700 }, { "epoch": 7.45, "learning_rate": 3.027142857142857e-05, "loss": 1.2856, "step": 351800 }, { "epoch": 7.45, "learning_rate": 3.0251020408163266e-05, "loss": 1.2961, "step": 351900 }, { "epoch": 7.45, "learning_rate": 3.023061224489796e-05, "loss": 1.2861, "step": 352000 }, { "epoch": 7.45, "learning_rate": 3.0210204081632653e-05, "loss": 1.2824, "step": 352100 }, { "epoch": 7.45, "learning_rate": 3.0189795918367348e-05, "loss": 1.2957, "step": 352200 }, { "epoch": 7.46, "learning_rate": 3.016938775510204e-05, "loss": 1.29, "step": 352300 }, { "epoch": 7.46, "learning_rate": 3.0148979591836735e-05, "loss": 1.2917, "step": 352400 }, { "epoch": 7.46, "learning_rate": 3.012857142857143e-05, "loss": 1.295, "step": 352500 }, { "epoch": 7.46, "learning_rate": 3.010816326530612e-05, "loss": 1.2948, "step": 352600 }, { "epoch": 7.46, "learning_rate": 3.0087755102040816e-05, "loss": 1.2868, "step": 352700 }, { "epoch": 7.47, "learning_rate": 3.0067346938775508e-05, "loss": 1.2895, "step": 352800 }, { "epoch": 7.47, "learning_rate": 3.0046938775510203e-05, "loss": 1.2917, "step": 352900 }, { "epoch": 7.47, "learning_rate": 3.00265306122449e-05, "loss": 1.2942, "step": 353000 }, { "epoch": 7.47, "learning_rate": 3.0006122448979596e-05, "loss": 1.2845, "step": 353100 }, { "epoch": 7.48, "learning_rate": 2.9985714285714288e-05, "loss": 1.2915, "step": 353200 }, { "epoch": 7.48, "learning_rate": 2.9965306122448983e-05, "loss": 1.2916, "step": 353300 }, { "epoch": 7.48, "learning_rate": 2.9944897959183678e-05, "loss": 1.2915, "step": 353400 }, { "epoch": 7.48, "learning_rate": 2.992448979591837e-05, "loss": 1.2914, "step": 353500 }, { "epoch": 7.48, "learning_rate": 2.9904081632653064e-05, "loss": 1.2877, "step": 353600 }, { "epoch": 7.49, "learning_rate": 2.988367346938776e-05, "loss": 1.2855, "step": 353700 }, { "epoch": 7.49, "learning_rate": 2.986326530612245e-05, "loss": 1.2848, "step": 353800 }, { "epoch": 7.49, "learning_rate": 2.9842857142857146e-05, "loss": 1.2856, "step": 353900 }, { "epoch": 7.49, "learning_rate": 2.9822448979591837e-05, "loss": 1.286, "step": 354000 }, { "epoch": 7.49, "learning_rate": 2.9802040816326532e-05, "loss": 1.2871, "step": 354100 }, { "epoch": 7.5, "learning_rate": 2.9781632653061227e-05, "loss": 1.2923, "step": 354200 }, { "epoch": 7.5, "learning_rate": 2.9761428571428575e-05, "loss": 1.2878, "step": 354300 }, { "epoch": 7.5, "learning_rate": 2.9741020408163266e-05, "loss": 1.2877, "step": 354400 }, { "epoch": 7.5, "learning_rate": 2.972061224489796e-05, "loss": 1.2887, "step": 354500 }, { "epoch": 7.5, "learning_rate": 2.9700204081632653e-05, "loss": 1.2844, "step": 354600 }, { "epoch": 7.51, "learning_rate": 2.9679795918367348e-05, "loss": 1.2941, "step": 354700 }, { "epoch": 7.51, "learning_rate": 2.9659387755102043e-05, "loss": 1.2875, "step": 354800 }, { "epoch": 7.51, "learning_rate": 2.9638979591836734e-05, "loss": 1.2861, "step": 354900 }, { "epoch": 7.51, "learning_rate": 2.961857142857143e-05, "loss": 1.2888, "step": 355000 }, { "epoch": 7.52, "learning_rate": 2.959816326530612e-05, "loss": 1.2912, "step": 355100 }, { "epoch": 7.52, "learning_rate": 2.9577755102040816e-05, "loss": 1.2893, "step": 355200 }, { "epoch": 7.52, "learning_rate": 2.9557551020408163e-05, "loss": 1.2963, "step": 355300 }, { "epoch": 7.52, "learning_rate": 2.953714285714286e-05, "loss": 1.2893, "step": 355400 }, { "epoch": 7.52, "learning_rate": 2.951673469387755e-05, "loss": 1.2925, "step": 355500 }, { "epoch": 7.53, "learning_rate": 2.9496326530612245e-05, "loss": 1.2886, "step": 355600 }, { "epoch": 7.53, "learning_rate": 2.947591836734694e-05, "loss": 1.2839, "step": 355700 }, { "epoch": 7.53, "learning_rate": 2.945551020408163e-05, "loss": 1.2912, "step": 355800 }, { "epoch": 7.53, "learning_rate": 2.9435102040816327e-05, "loss": 1.2853, "step": 355900 }, { "epoch": 7.53, "learning_rate": 2.9414693877551018e-05, "loss": 1.2883, "step": 356000 }, { "epoch": 7.54, "learning_rate": 2.9394285714285713e-05, "loss": 1.2924, "step": 356100 }, { "epoch": 7.54, "learning_rate": 2.937387755102041e-05, "loss": 1.2887, "step": 356200 }, { "epoch": 7.54, "learning_rate": 2.9353469387755106e-05, "loss": 1.2871, "step": 356300 }, { "epoch": 7.54, "learning_rate": 2.9333061224489798e-05, "loss": 1.2898, "step": 356400 }, { "epoch": 7.54, "learning_rate": 2.9312653061224493e-05, "loss": 1.2974, "step": 356500 }, { "epoch": 7.55, "learning_rate": 2.9292244897959188e-05, "loss": 1.2853, "step": 356600 }, { "epoch": 7.55, "learning_rate": 2.927183673469388e-05, "loss": 1.2917, "step": 356700 }, { "epoch": 7.55, "learning_rate": 2.9251428571428575e-05, "loss": 1.2844, "step": 356800 }, { "epoch": 7.55, "learning_rate": 2.9231020408163266e-05, "loss": 1.2825, "step": 356900 }, { "epoch": 7.56, "learning_rate": 2.921061224489796e-05, "loss": 1.2787, "step": 357000 }, { "epoch": 7.56, "learning_rate": 2.9190204081632656e-05, "loss": 1.2816, "step": 357100 }, { "epoch": 7.56, "learning_rate": 2.9169795918367348e-05, "loss": 1.2835, "step": 357200 }, { "epoch": 7.56, "learning_rate": 2.9149387755102043e-05, "loss": 1.288, "step": 357300 }, { "epoch": 7.56, "learning_rate": 2.9128979591836734e-05, "loss": 1.2838, "step": 357400 }, { "epoch": 7.57, "learning_rate": 2.910857142857143e-05, "loss": 1.2791, "step": 357500 }, { "epoch": 7.57, "learning_rate": 2.9088163265306124e-05, "loss": 1.2919, "step": 357600 }, { "epoch": 7.57, "learning_rate": 2.9067755102040816e-05, "loss": 1.2882, "step": 357700 }, { "epoch": 7.57, "learning_rate": 2.904734693877551e-05, "loss": 1.2906, "step": 357800 }, { "epoch": 7.57, "learning_rate": 2.9026938775510206e-05, "loss": 1.2883, "step": 357900 }, { "epoch": 7.58, "learning_rate": 2.9006530612244897e-05, "loss": 1.2857, "step": 358000 }, { "epoch": 7.58, "learning_rate": 2.8986122448979592e-05, "loss": 1.2797, "step": 358100 }, { "epoch": 7.58, "learning_rate": 2.8965714285714284e-05, "loss": 1.2811, "step": 358200 }, { "epoch": 7.58, "learning_rate": 2.894551020408163e-05, "loss": 1.2908, "step": 358300 }, { "epoch": 7.59, "learning_rate": 2.8925102040816326e-05, "loss": 1.2872, "step": 358400 }, { "epoch": 7.59, "learning_rate": 2.890469387755102e-05, "loss": 1.2767, "step": 358500 }, { "epoch": 7.59, "learning_rate": 2.8884285714285713e-05, "loss": 1.2941, "step": 358600 }, { "epoch": 7.59, "learning_rate": 2.8863877551020408e-05, "loss": 1.2854, "step": 358700 }, { "epoch": 7.59, "learning_rate": 2.88434693877551e-05, "loss": 1.2763, "step": 358800 }, { "epoch": 7.6, "learning_rate": 2.8823061224489794e-05, "loss": 1.2837, "step": 358900 }, { "epoch": 7.6, "learning_rate": 2.8802653061224493e-05, "loss": 1.278, "step": 359000 }, { "epoch": 7.6, "learning_rate": 2.8782448979591837e-05, "loss": 1.2906, "step": 359100 }, { "epoch": 7.6, "learning_rate": 2.876204081632653e-05, "loss": 1.2836, "step": 359200 }, { "epoch": 7.6, "learning_rate": 2.8741632653061224e-05, "loss": 1.2908, "step": 359300 }, { "epoch": 7.61, "learning_rate": 2.872122448979592e-05, "loss": 1.2887, "step": 359400 }, { "epoch": 7.61, "learning_rate": 2.8700816326530617e-05, "loss": 1.2855, "step": 359500 }, { "epoch": 7.61, "learning_rate": 2.868040816326531e-05, "loss": 1.2855, "step": 359600 }, { "epoch": 7.61, "learning_rate": 2.8660000000000003e-05, "loss": 1.2858, "step": 359700 }, { "epoch": 7.61, "learning_rate": 2.86395918367347e-05, "loss": 1.284, "step": 359800 }, { "epoch": 7.62, "learning_rate": 2.861918367346939e-05, "loss": 1.2785, "step": 359900 }, { "epoch": 7.62, "learning_rate": 2.8598775510204085e-05, "loss": 1.2803, "step": 360000 }, { "epoch": 7.62, "learning_rate": 2.8578367346938777e-05, "loss": 1.2821, "step": 360100 }, { "epoch": 7.62, "learning_rate": 2.855795918367347e-05, "loss": 1.2841, "step": 360200 }, { "epoch": 7.63, "learning_rate": 2.8537551020408166e-05, "loss": 1.28, "step": 360300 }, { "epoch": 7.63, "learning_rate": 2.8517142857142858e-05, "loss": 1.2831, "step": 360400 }, { "epoch": 7.63, "learning_rate": 2.8496734693877553e-05, "loss": 1.287, "step": 360500 }, { "epoch": 7.63, "learning_rate": 2.8476326530612245e-05, "loss": 1.2871, "step": 360600 }, { "epoch": 7.63, "learning_rate": 2.845591836734694e-05, "loss": 1.2758, "step": 360700 }, { "epoch": 7.64, "learning_rate": 2.8435510204081635e-05, "loss": 1.2892, "step": 360800 }, { "epoch": 7.64, "learning_rate": 2.8415102040816326e-05, "loss": 1.2854, "step": 360900 }, { "epoch": 7.64, "learning_rate": 2.839469387755102e-05, "loss": 1.2857, "step": 361000 }, { "epoch": 7.64, "learning_rate": 2.8374285714285713e-05, "loss": 1.2808, "step": 361100 }, { "epoch": 7.64, "learning_rate": 2.8353877551020408e-05, "loss": 1.2867, "step": 361200 }, { "epoch": 7.65, "learning_rate": 2.8333469387755103e-05, "loss": 1.2836, "step": 361300 }, { "epoch": 7.65, "learning_rate": 2.8313061224489794e-05, "loss": 1.29, "step": 361400 }, { "epoch": 7.65, "learning_rate": 2.829265306122449e-05, "loss": 1.28, "step": 361500 }, { "epoch": 7.65, "learning_rate": 2.8272244897959184e-05, "loss": 1.2846, "step": 361600 }, { "epoch": 7.66, "learning_rate": 2.8251836734693876e-05, "loss": 1.283, "step": 361700 }, { "epoch": 7.66, "learning_rate": 2.8231428571428574e-05, "loss": 1.2807, "step": 361800 }, { "epoch": 7.66, "learning_rate": 2.821102040816327e-05, "loss": 1.2994, "step": 361900 }, { "epoch": 7.66, "learning_rate": 2.8190612244897964e-05, "loss": 1.2895, "step": 362000 }, { "epoch": 7.66, "learning_rate": 2.8170204081632656e-05, "loss": 1.2841, "step": 362100 }, { "epoch": 7.67, "learning_rate": 2.814979591836735e-05, "loss": 1.2795, "step": 362200 }, { "epoch": 7.67, "learning_rate": 2.8129387755102042e-05, "loss": 1.2858, "step": 362300 }, { "epoch": 7.67, "learning_rate": 2.8108979591836737e-05, "loss": 1.2847, "step": 362400 }, { "epoch": 7.67, "learning_rate": 2.8088571428571432e-05, "loss": 1.2844, "step": 362500 }, { "epoch": 7.67, "learning_rate": 2.8068163265306124e-05, "loss": 1.2881, "step": 362600 }, { "epoch": 7.68, "learning_rate": 2.804775510204082e-05, "loss": 1.2865, "step": 362700 }, { "epoch": 7.68, "learning_rate": 2.802734693877551e-05, "loss": 1.2924, "step": 362800 }, { "epoch": 7.68, "learning_rate": 2.8006938775510205e-05, "loss": 1.2798, "step": 362900 }, { "epoch": 7.68, "learning_rate": 2.79865306122449e-05, "loss": 1.2857, "step": 363000 }, { "epoch": 7.68, "learning_rate": 2.7966122448979592e-05, "loss": 1.2842, "step": 363100 }, { "epoch": 7.69, "learning_rate": 2.7945714285714287e-05, "loss": 1.2828, "step": 363200 }, { "epoch": 7.69, "learning_rate": 2.792530612244898e-05, "loss": 1.2823, "step": 363300 }, { "epoch": 7.69, "learning_rate": 2.7904897959183673e-05, "loss": 1.2853, "step": 363400 }, { "epoch": 7.69, "learning_rate": 2.788448979591837e-05, "loss": 1.2882, "step": 363500 }, { "epoch": 7.7, "learning_rate": 2.786408163265306e-05, "loss": 1.2871, "step": 363600 }, { "epoch": 7.7, "learning_rate": 2.7843673469387755e-05, "loss": 1.2749, "step": 363700 }, { "epoch": 7.7, "learning_rate": 2.7823265306122446e-05, "loss": 1.2753, "step": 363800 }, { "epoch": 7.7, "learning_rate": 2.780285714285714e-05, "loss": 1.277, "step": 363900 }, { "epoch": 7.7, "learning_rate": 2.778244897959184e-05, "loss": 1.2835, "step": 364000 }, { "epoch": 7.71, "learning_rate": 2.7762040816326535e-05, "loss": 1.2791, "step": 364100 }, { "epoch": 7.71, "learning_rate": 2.774163265306123e-05, "loss": 1.287, "step": 364200 }, { "epoch": 7.71, "learning_rate": 2.772122448979592e-05, "loss": 1.2695, "step": 364300 }, { "epoch": 7.71, "learning_rate": 2.7700816326530616e-05, "loss": 1.2803, "step": 364400 }, { "epoch": 7.71, "learning_rate": 2.7680408163265308e-05, "loss": 1.2806, "step": 364500 }, { "epoch": 7.72, "learning_rate": 2.7660000000000003e-05, "loss": 1.2846, "step": 364600 }, { "epoch": 7.72, "learning_rate": 2.7639591836734698e-05, "loss": 1.2824, "step": 364700 }, { "epoch": 7.72, "learning_rate": 2.761918367346939e-05, "loss": 1.2764, "step": 364800 }, { "epoch": 7.72, "learning_rate": 2.7598775510204084e-05, "loss": 1.2811, "step": 364900 }, { "epoch": 7.72, "learning_rate": 2.7578367346938776e-05, "loss": 1.2876, "step": 365000 }, { "epoch": 7.73, "learning_rate": 2.755795918367347e-05, "loss": 1.2791, "step": 365100 }, { "epoch": 7.73, "learning_rate": 2.7537551020408166e-05, "loss": 1.2867, "step": 365200 }, { "epoch": 7.73, "learning_rate": 2.7517142857142857e-05, "loss": 1.2803, "step": 365300 }, { "epoch": 7.73, "learning_rate": 2.7496734693877552e-05, "loss": 1.2844, "step": 365400 }, { "epoch": 7.74, "learning_rate": 2.74765306122449e-05, "loss": 1.287, "step": 365500 }, { "epoch": 7.74, "learning_rate": 2.745612244897959e-05, "loss": 1.2741, "step": 365600 }, { "epoch": 7.74, "learning_rate": 2.7435714285714287e-05, "loss": 1.2776, "step": 365700 }, { "epoch": 7.74, "learning_rate": 2.7415510204081634e-05, "loss": 1.2867, "step": 365800 }, { "epoch": 7.74, "learning_rate": 2.739510204081633e-05, "loss": 1.2806, "step": 365900 }, { "epoch": 7.75, "learning_rate": 2.737469387755102e-05, "loss": 1.2823, "step": 366000 }, { "epoch": 7.75, "learning_rate": 2.7354285714285716e-05, "loss": 1.2802, "step": 366100 }, { "epoch": 7.75, "learning_rate": 2.733387755102041e-05, "loss": 1.2765, "step": 366200 }, { "epoch": 7.75, "learning_rate": 2.7313469387755102e-05, "loss": 1.2814, "step": 366300 }, { "epoch": 7.75, "learning_rate": 2.7293061224489797e-05, "loss": 1.2769, "step": 366400 }, { "epoch": 7.76, "learning_rate": 2.7272857142857145e-05, "loss": 1.2855, "step": 366500 }, { "epoch": 7.76, "learning_rate": 2.7252448979591836e-05, "loss": 1.2811, "step": 366600 }, { "epoch": 7.76, "learning_rate": 2.723204081632653e-05, "loss": 1.2833, "step": 366700 }, { "epoch": 7.76, "learning_rate": 2.7211632653061226e-05, "loss": 1.2842, "step": 366800 }, { "epoch": 7.77, "learning_rate": 2.7191224489795918e-05, "loss": 1.2856, "step": 366900 }, { "epoch": 7.77, "learning_rate": 2.7170816326530613e-05, "loss": 1.2788, "step": 367000 }, { "epoch": 7.77, "learning_rate": 2.7150408163265308e-05, "loss": 1.276, "step": 367100 }, { "epoch": 7.77, "learning_rate": 2.713e-05, "loss": 1.2867, "step": 367200 }, { "epoch": 7.77, "learning_rate": 2.7109591836734694e-05, "loss": 1.2834, "step": 367300 }, { "epoch": 7.78, "learning_rate": 2.7089183673469386e-05, "loss": 1.2718, "step": 367400 }, { "epoch": 7.78, "learning_rate": 2.706877551020408e-05, "loss": 1.2791, "step": 367500 }, { "epoch": 7.78, "learning_rate": 2.7048367346938776e-05, "loss": 1.2779, "step": 367600 }, { "epoch": 7.78, "learning_rate": 2.7027959183673468e-05, "loss": 1.2858, "step": 367700 }, { "epoch": 7.78, "learning_rate": 2.7007551020408166e-05, "loss": 1.286, "step": 367800 }, { "epoch": 7.79, "learning_rate": 2.698714285714286e-05, "loss": 1.2821, "step": 367900 }, { "epoch": 7.79, "learning_rate": 2.6966734693877556e-05, "loss": 1.2819, "step": 368000 }, { "epoch": 7.79, "learning_rate": 2.6946326530612247e-05, "loss": 1.2838, "step": 368100 }, { "epoch": 7.79, "learning_rate": 2.6925918367346942e-05, "loss": 1.2796, "step": 368200 }, { "epoch": 7.79, "learning_rate": 2.6905510204081634e-05, "loss": 1.2813, "step": 368300 }, { "epoch": 7.8, "learning_rate": 2.688510204081633e-05, "loss": 1.2715, "step": 368400 }, { "epoch": 7.8, "learning_rate": 2.6864693877551024e-05, "loss": 1.2788, "step": 368500 }, { "epoch": 7.8, "learning_rate": 2.6844285714285715e-05, "loss": 1.2862, "step": 368600 }, { "epoch": 7.8, "learning_rate": 2.682387755102041e-05, "loss": 1.2743, "step": 368700 }, { "epoch": 7.81, "learning_rate": 2.6803469387755102e-05, "loss": 1.28, "step": 368800 }, { "epoch": 7.81, "learning_rate": 2.6783061224489797e-05, "loss": 1.2773, "step": 368900 }, { "epoch": 7.81, "learning_rate": 2.6762653061224492e-05, "loss": 1.2807, "step": 369000 }, { "epoch": 7.81, "learning_rate": 2.6742244897959184e-05, "loss": 1.2764, "step": 369100 }, { "epoch": 7.81, "learning_rate": 2.672183673469388e-05, "loss": 1.2843, "step": 369200 }, { "epoch": 7.82, "learning_rate": 2.670142857142857e-05, "loss": 1.2796, "step": 369300 }, { "epoch": 7.82, "learning_rate": 2.6681020408163265e-05, "loss": 1.2856, "step": 369400 }, { "epoch": 7.82, "learning_rate": 2.666061224489796e-05, "loss": 1.2846, "step": 369500 }, { "epoch": 7.82, "learning_rate": 2.664020408163265e-05, "loss": 1.282, "step": 369600 }, { "epoch": 7.82, "learning_rate": 2.6619795918367347e-05, "loss": 1.2746, "step": 369700 }, { "epoch": 7.83, "learning_rate": 2.6599387755102038e-05, "loss": 1.2804, "step": 369800 }, { "epoch": 7.83, "learning_rate": 2.6578979591836733e-05, "loss": 1.2804, "step": 369900 }, { "epoch": 7.83, "learning_rate": 2.6558571428571428e-05, "loss": 1.2805, "step": 370000 }, { "epoch": 7.83, "learning_rate": 2.6538163265306127e-05, "loss": 1.2806, "step": 370100 }, { "epoch": 7.83, "learning_rate": 2.651775510204082e-05, "loss": 1.2758, "step": 370200 }, { "epoch": 7.84, "learning_rate": 2.6497346938775513e-05, "loss": 1.2783, "step": 370300 }, { "epoch": 7.84, "learning_rate": 2.6476938775510208e-05, "loss": 1.2883, "step": 370400 }, { "epoch": 7.84, "learning_rate": 2.64565306122449e-05, "loss": 1.2812, "step": 370500 }, { "epoch": 7.84, "learning_rate": 2.6436122448979595e-05, "loss": 1.2784, "step": 370600 }, { "epoch": 7.85, "learning_rate": 2.641571428571429e-05, "loss": 1.278, "step": 370700 }, { "epoch": 7.85, "learning_rate": 2.639530612244898e-05, "loss": 1.2798, "step": 370800 }, { "epoch": 7.85, "learning_rate": 2.6374897959183676e-05, "loss": 1.2777, "step": 370900 }, { "epoch": 7.85, "learning_rate": 2.6354489795918368e-05, "loss": 1.2721, "step": 371000 }, { "epoch": 7.85, "learning_rate": 2.6334081632653063e-05, "loss": 1.2754, "step": 371100 }, { "epoch": 7.86, "learning_rate": 2.6313673469387758e-05, "loss": 1.2762, "step": 371200 }, { "epoch": 7.86, "learning_rate": 2.629326530612245e-05, "loss": 1.2827, "step": 371300 }, { "epoch": 7.86, "learning_rate": 2.6273061224489797e-05, "loss": 1.2791, "step": 371400 }, { "epoch": 7.86, "learning_rate": 2.6252653061224492e-05, "loss": 1.277, "step": 371500 }, { "epoch": 7.86, "learning_rate": 2.6232244897959183e-05, "loss": 1.28, "step": 371600 }, { "epoch": 7.87, "learning_rate": 2.621183673469388e-05, "loss": 1.2834, "step": 371700 }, { "epoch": 7.87, "learning_rate": 2.6191428571428573e-05, "loss": 1.2721, "step": 371800 }, { "epoch": 7.87, "learning_rate": 2.6171020408163265e-05, "loss": 1.2729, "step": 371900 }, { "epoch": 7.87, "learning_rate": 2.615061224489796e-05, "loss": 1.2694, "step": 372000 }, { "epoch": 7.88, "learning_rate": 2.6130204081632655e-05, "loss": 1.2768, "step": 372100 }, { "epoch": 7.88, "learning_rate": 2.6109795918367346e-05, "loss": 1.277, "step": 372200 }, { "epoch": 7.88, "learning_rate": 2.608938775510204e-05, "loss": 1.2818, "step": 372300 }, { "epoch": 7.88, "learning_rate": 2.6068979591836733e-05, "loss": 1.2887, "step": 372400 }, { "epoch": 7.88, "learning_rate": 2.6048571428571428e-05, "loss": 1.2802, "step": 372500 }, { "epoch": 7.89, "learning_rate": 2.6028163265306123e-05, "loss": 1.2816, "step": 372600 }, { "epoch": 7.89, "learning_rate": 2.6007755102040815e-05, "loss": 1.2844, "step": 372700 }, { "epoch": 7.89, "learning_rate": 2.5987346938775513e-05, "loss": 1.2803, "step": 372800 }, { "epoch": 7.89, "learning_rate": 2.5966938775510208e-05, "loss": 1.2753, "step": 372900 }, { "epoch": 7.89, "learning_rate": 2.5946530612244903e-05, "loss": 1.2785, "step": 373000 }, { "epoch": 7.9, "learning_rate": 2.5926122448979594e-05, "loss": 1.2811, "step": 373100 }, { "epoch": 7.9, "learning_rate": 2.590571428571429e-05, "loss": 1.2787, "step": 373200 }, { "epoch": 7.9, "learning_rate": 2.588530612244898e-05, "loss": 1.2811, "step": 373300 }, { "epoch": 7.9, "learning_rate": 2.5864897959183676e-05, "loss": 1.2761, "step": 373400 }, { "epoch": 7.9, "learning_rate": 2.584448979591837e-05, "loss": 1.2775, "step": 373500 }, { "epoch": 7.91, "learning_rate": 2.5824081632653062e-05, "loss": 1.2855, "step": 373600 }, { "epoch": 7.91, "learning_rate": 2.5803673469387757e-05, "loss": 1.2823, "step": 373700 }, { "epoch": 7.91, "learning_rate": 2.578326530612245e-05, "loss": 1.2706, "step": 373800 }, { "epoch": 7.91, "learning_rate": 2.5762857142857144e-05, "loss": 1.2717, "step": 373900 }, { "epoch": 7.92, "learning_rate": 2.574244897959184e-05, "loss": 1.2783, "step": 374000 }, { "epoch": 7.92, "learning_rate": 2.572204081632653e-05, "loss": 1.2737, "step": 374100 }, { "epoch": 7.92, "learning_rate": 2.5701632653061226e-05, "loss": 1.2832, "step": 374200 }, { "epoch": 7.92, "learning_rate": 2.5681224489795917e-05, "loss": 1.2828, "step": 374300 }, { "epoch": 7.92, "learning_rate": 2.5660816326530612e-05, "loss": 1.2812, "step": 374400 }, { "epoch": 7.93, "learning_rate": 2.5640408163265307e-05, "loss": 1.2722, "step": 374500 }, { "epoch": 7.93, "learning_rate": 2.562e-05, "loss": 1.2758, "step": 374600 }, { "epoch": 7.93, "learning_rate": 2.5599591836734694e-05, "loss": 1.2754, "step": 374700 }, { "epoch": 7.93, "learning_rate": 2.5579183673469385e-05, "loss": 1.2757, "step": 374800 }, { "epoch": 7.93, "learning_rate": 2.555877551020408e-05, "loss": 1.284, "step": 374900 }, { "epoch": 7.94, "learning_rate": 2.5538367346938775e-05, "loss": 1.2697, "step": 375000 }, { "epoch": 7.94, "learning_rate": 2.5517959183673474e-05, "loss": 1.2736, "step": 375100 }, { "epoch": 7.94, "learning_rate": 2.5497755102040814e-05, "loss": 1.2727, "step": 375200 }, { "epoch": 7.94, "learning_rate": 2.547734693877551e-05, "loss": 1.2769, "step": 375300 }, { "epoch": 7.94, "learning_rate": 2.5456938775510204e-05, "loss": 1.2802, "step": 375400 }, { "epoch": 7.95, "learning_rate": 2.5436530612244896e-05, "loss": 1.2828, "step": 375500 }, { "epoch": 7.95, "learning_rate": 2.5416122448979594e-05, "loss": 1.2714, "step": 375600 }, { "epoch": 7.95, "learning_rate": 2.539571428571429e-05, "loss": 1.2751, "step": 375700 }, { "epoch": 7.95, "learning_rate": 2.5375306122448984e-05, "loss": 1.2703, "step": 375800 }, { "epoch": 7.96, "learning_rate": 2.5354897959183676e-05, "loss": 1.2795, "step": 375900 }, { "epoch": 7.96, "learning_rate": 2.533448979591837e-05, "loss": 1.2723, "step": 376000 }, { "epoch": 7.96, "learning_rate": 2.5314285714285718e-05, "loss": 1.2766, "step": 376100 }, { "epoch": 7.96, "learning_rate": 2.5293877551020413e-05, "loss": 1.273, "step": 376200 }, { "epoch": 7.96, "learning_rate": 2.5273469387755105e-05, "loss": 1.2848, "step": 376300 }, { "epoch": 7.97, "learning_rate": 2.52530612244898e-05, "loss": 1.2706, "step": 376400 }, { "epoch": 7.97, "learning_rate": 2.523265306122449e-05, "loss": 1.2772, "step": 376500 }, { "epoch": 7.97, "learning_rate": 2.5212244897959186e-05, "loss": 1.2788, "step": 376600 }, { "epoch": 7.97, "learning_rate": 2.519183673469388e-05, "loss": 1.2772, "step": 376700 }, { "epoch": 7.97, "learning_rate": 2.5171428571428573e-05, "loss": 1.2746, "step": 376800 }, { "epoch": 7.98, "learning_rate": 2.5151020408163268e-05, "loss": 1.2796, "step": 376900 }, { "epoch": 7.98, "learning_rate": 2.513061224489796e-05, "loss": 1.2695, "step": 377000 }, { "epoch": 7.98, "learning_rate": 2.5110204081632654e-05, "loss": 1.2724, "step": 377100 }, { "epoch": 7.98, "learning_rate": 2.508979591836735e-05, "loss": 1.2707, "step": 377200 }, { "epoch": 7.99, "learning_rate": 2.506938775510204e-05, "loss": 1.2725, "step": 377300 }, { "epoch": 7.99, "learning_rate": 2.5048979591836736e-05, "loss": 1.2809, "step": 377400 }, { "epoch": 7.99, "learning_rate": 2.5028571428571428e-05, "loss": 1.2766, "step": 377500 }, { "epoch": 7.99, "learning_rate": 2.5008163265306123e-05, "loss": 1.2769, "step": 377600 }, { "epoch": 7.99, "learning_rate": 2.4987755102040818e-05, "loss": 1.2773, "step": 377700 }, { "epoch": 8.0, "learning_rate": 2.4967346938775512e-05, "loss": 1.2796, "step": 377800 }, { "epoch": 8.0, "learning_rate": 2.4946938775510207e-05, "loss": 1.2747, "step": 377900 }, { "epoch": 8.0, "learning_rate": 2.49265306122449e-05, "loss": 1.2734, "step": 378000 }, { "epoch": 8.0, "learning_rate": 2.4906122448979594e-05, "loss": 1.2695, "step": 378100 }, { "epoch": 8.0, "learning_rate": 2.4885714285714286e-05, "loss": 1.273, "step": 378200 }, { "epoch": 8.01, "learning_rate": 2.486530612244898e-05, "loss": 1.2782, "step": 378300 }, { "epoch": 8.01, "learning_rate": 2.4844897959183676e-05, "loss": 1.2723, "step": 378400 }, { "epoch": 8.01, "learning_rate": 2.4824489795918367e-05, "loss": 1.2732, "step": 378500 }, { "epoch": 8.01, "learning_rate": 2.4804081632653062e-05, "loss": 1.2817, "step": 378600 }, { "epoch": 8.01, "learning_rate": 2.4783673469387754e-05, "loss": 1.274, "step": 378700 }, { "epoch": 8.02, "learning_rate": 2.476326530612245e-05, "loss": 1.2809, "step": 378800 }, { "epoch": 8.02, "learning_rate": 2.4742857142857147e-05, "loss": 1.2733, "step": 378900 }, { "epoch": 8.02, "learning_rate": 2.472244897959184e-05, "loss": 1.2722, "step": 379000 }, { "epoch": 8.02, "learning_rate": 2.4702040816326534e-05, "loss": 1.2782, "step": 379100 }, { "epoch": 8.03, "learning_rate": 2.4681632653061225e-05, "loss": 1.2677, "step": 379200 }, { "epoch": 8.03, "learning_rate": 2.466122448979592e-05, "loss": 1.2856, "step": 379300 }, { "epoch": 8.03, "learning_rate": 2.4640816326530615e-05, "loss": 1.2705, "step": 379400 }, { "epoch": 8.03, "learning_rate": 2.4620408163265307e-05, "loss": 1.2663, "step": 379500 }, { "epoch": 8.03, "learning_rate": 2.46e-05, "loss": 1.2757, "step": 379600 }, { "epoch": 8.04, "learning_rate": 2.4579591836734693e-05, "loss": 1.2639, "step": 379700 }, { "epoch": 8.04, "learning_rate": 2.4559183673469388e-05, "loss": 1.2714, "step": 379800 }, { "epoch": 8.04, "learning_rate": 2.4538775510204083e-05, "loss": 1.2703, "step": 379900 }, { "epoch": 8.04, "learning_rate": 2.4518367346938775e-05, "loss": 1.2746, "step": 380000 }, { "epoch": 8.04, "learning_rate": 2.4497959183673473e-05, "loss": 1.2744, "step": 380100 }, { "epoch": 8.05, "learning_rate": 2.4477551020408165e-05, "loss": 1.2748, "step": 380200 }, { "epoch": 8.05, "learning_rate": 2.445714285714286e-05, "loss": 1.2734, "step": 380300 }, { "epoch": 8.05, "learning_rate": 2.443673469387755e-05, "loss": 1.2663, "step": 380400 }, { "epoch": 8.05, "learning_rate": 2.4416326530612246e-05, "loss": 1.2649, "step": 380500 }, { "epoch": 8.06, "learning_rate": 2.439591836734694e-05, "loss": 1.2684, "step": 380600 }, { "epoch": 8.06, "learning_rate": 2.4375510204081633e-05, "loss": 1.2642, "step": 380700 }, { "epoch": 8.06, "learning_rate": 2.4355102040816328e-05, "loss": 1.2774, "step": 380800 }, { "epoch": 8.06, "learning_rate": 2.433469387755102e-05, "loss": 1.2657, "step": 380900 }, { "epoch": 8.06, "learning_rate": 2.4314285714285714e-05, "loss": 1.2709, "step": 381000 }, { "epoch": 8.07, "learning_rate": 2.429387755102041e-05, "loss": 1.2718, "step": 381100 }, { "epoch": 8.07, "learning_rate": 2.4273469387755104e-05, "loss": 1.2666, "step": 381200 }, { "epoch": 8.07, "learning_rate": 2.42530612244898e-05, "loss": 1.2714, "step": 381300 }, { "epoch": 8.07, "learning_rate": 2.4232857142857143e-05, "loss": 1.2711, "step": 381400 }, { "epoch": 8.07, "learning_rate": 2.421244897959184e-05, "loss": 1.2745, "step": 381500 }, { "epoch": 8.08, "learning_rate": 2.419204081632653e-05, "loss": 1.2711, "step": 381600 }, { "epoch": 8.08, "learning_rate": 2.417163265306123e-05, "loss": 1.2681, "step": 381700 }, { "epoch": 8.08, "learning_rate": 2.415122448979592e-05, "loss": 1.2698, "step": 381800 }, { "epoch": 8.08, "learning_rate": 2.4130816326530615e-05, "loss": 1.2719, "step": 381900 }, { "epoch": 8.08, "learning_rate": 2.4110408163265306e-05, "loss": 1.2721, "step": 382000 }, { "epoch": 8.09, "learning_rate": 2.409e-05, "loss": 1.2688, "step": 382100 }, { "epoch": 8.09, "learning_rate": 2.4069591836734696e-05, "loss": 1.2674, "step": 382200 }, { "epoch": 8.09, "learning_rate": 2.4049183673469388e-05, "loss": 1.267, "step": 382300 }, { "epoch": 8.09, "learning_rate": 2.4028775510204083e-05, "loss": 1.2732, "step": 382400 }, { "epoch": 8.1, "learning_rate": 2.4008367346938775e-05, "loss": 1.2744, "step": 382500 }, { "epoch": 8.1, "learning_rate": 2.398795918367347e-05, "loss": 1.271, "step": 382600 }, { "epoch": 8.1, "learning_rate": 2.3967551020408164e-05, "loss": 1.2749, "step": 382700 }, { "epoch": 8.1, "learning_rate": 2.394714285714286e-05, "loss": 1.2704, "step": 382800 }, { "epoch": 8.1, "learning_rate": 2.3926734693877554e-05, "loss": 1.2655, "step": 382900 }, { "epoch": 8.11, "learning_rate": 2.3906326530612246e-05, "loss": 1.2684, "step": 383000 }, { "epoch": 8.11, "learning_rate": 2.388591836734694e-05, "loss": 1.2765, "step": 383100 }, { "epoch": 8.11, "learning_rate": 2.3865510204081633e-05, "loss": 1.2726, "step": 383200 }, { "epoch": 8.11, "learning_rate": 2.3845102040816328e-05, "loss": 1.2724, "step": 383300 }, { "epoch": 8.11, "learning_rate": 2.3824693877551023e-05, "loss": 1.2608, "step": 383400 }, { "epoch": 8.12, "learning_rate": 2.3804285714285714e-05, "loss": 1.2764, "step": 383500 }, { "epoch": 8.12, "learning_rate": 2.378387755102041e-05, "loss": 1.2758, "step": 383600 }, { "epoch": 8.12, "learning_rate": 2.37634693877551e-05, "loss": 1.2716, "step": 383700 }, { "epoch": 8.12, "learning_rate": 2.3743061224489796e-05, "loss": 1.2664, "step": 383800 }, { "epoch": 8.12, "learning_rate": 2.372265306122449e-05, "loss": 1.2686, "step": 383900 }, { "epoch": 8.13, "learning_rate": 2.3702244897959186e-05, "loss": 1.2711, "step": 384000 }, { "epoch": 8.13, "learning_rate": 2.368183673469388e-05, "loss": 1.2676, "step": 384100 }, { "epoch": 8.13, "learning_rate": 2.3661428571428572e-05, "loss": 1.2675, "step": 384200 }, { "epoch": 8.13, "learning_rate": 2.3641020408163267e-05, "loss": 1.2674, "step": 384300 }, { "epoch": 8.14, "learning_rate": 2.3620612244897962e-05, "loss": 1.2728, "step": 384400 }, { "epoch": 8.14, "learning_rate": 2.3600204081632654e-05, "loss": 1.2676, "step": 384500 }, { "epoch": 8.14, "learning_rate": 2.357979591836735e-05, "loss": 1.2728, "step": 384600 }, { "epoch": 8.14, "learning_rate": 2.355938775510204e-05, "loss": 1.2663, "step": 384700 }, { "epoch": 8.14, "learning_rate": 2.3538979591836735e-05, "loss": 1.2724, "step": 384800 }, { "epoch": 8.15, "learning_rate": 2.351857142857143e-05, "loss": 1.2703, "step": 384900 }, { "epoch": 8.15, "learning_rate": 2.3498163265306122e-05, "loss": 1.2651, "step": 385000 }, { "epoch": 8.15, "learning_rate": 2.347775510204082e-05, "loss": 1.2631, "step": 385100 }, { "epoch": 8.15, "learning_rate": 2.345734693877551e-05, "loss": 1.2661, "step": 385200 }, { "epoch": 8.15, "learning_rate": 2.3436938775510207e-05, "loss": 1.27, "step": 385300 }, { "epoch": 8.16, "learning_rate": 2.3416530612244898e-05, "loss": 1.2687, "step": 385400 }, { "epoch": 8.16, "learning_rate": 2.3396122448979593e-05, "loss": 1.2703, "step": 385500 }, { "epoch": 8.16, "learning_rate": 2.337591836734694e-05, "loss": 1.27, "step": 385600 }, { "epoch": 8.16, "learning_rate": 2.3355510204081636e-05, "loss": 1.2641, "step": 385700 }, { "epoch": 8.17, "learning_rate": 2.3335102040816327e-05, "loss": 1.2655, "step": 385800 }, { "epoch": 8.17, "learning_rate": 2.3314693877551022e-05, "loss": 1.2691, "step": 385900 }, { "epoch": 8.17, "learning_rate": 2.3294285714285717e-05, "loss": 1.2696, "step": 386000 }, { "epoch": 8.17, "learning_rate": 2.327387755102041e-05, "loss": 1.2634, "step": 386100 }, { "epoch": 8.17, "learning_rate": 2.3253469387755104e-05, "loss": 1.2582, "step": 386200 }, { "epoch": 8.18, "learning_rate": 2.3233061224489795e-05, "loss": 1.2701, "step": 386300 }, { "epoch": 8.18, "learning_rate": 2.3212857142857143e-05, "loss": 1.2726, "step": 386400 }, { "epoch": 8.18, "learning_rate": 2.3192448979591838e-05, "loss": 1.2661, "step": 386500 }, { "epoch": 8.18, "learning_rate": 2.3172244897959186e-05, "loss": 1.2686, "step": 386600 }, { "epoch": 8.18, "learning_rate": 2.315183673469388e-05, "loss": 1.2653, "step": 386700 }, { "epoch": 8.19, "learning_rate": 2.3131428571428572e-05, "loss": 1.2711, "step": 386800 }, { "epoch": 8.19, "learning_rate": 2.3111020408163267e-05, "loss": 1.2652, "step": 386900 }, { "epoch": 8.19, "learning_rate": 2.3090612244897962e-05, "loss": 1.2709, "step": 387000 }, { "epoch": 8.19, "learning_rate": 2.3070204081632654e-05, "loss": 1.2628, "step": 387100 }, { "epoch": 8.19, "learning_rate": 2.304979591836735e-05, "loss": 1.2654, "step": 387200 }, { "epoch": 8.2, "learning_rate": 2.302938775510204e-05, "loss": 1.2686, "step": 387300 }, { "epoch": 8.2, "learning_rate": 2.3008979591836735e-05, "loss": 1.2716, "step": 387400 }, { "epoch": 8.2, "learning_rate": 2.298857142857143e-05, "loss": 1.2732, "step": 387500 }, { "epoch": 8.2, "learning_rate": 2.2968163265306122e-05, "loss": 1.2631, "step": 387600 }, { "epoch": 8.21, "learning_rate": 2.294775510204082e-05, "loss": 1.2714, "step": 387700 }, { "epoch": 8.21, "learning_rate": 2.292734693877551e-05, "loss": 1.2724, "step": 387800 }, { "epoch": 8.21, "learning_rate": 2.2906938775510207e-05, "loss": 1.2706, "step": 387900 }, { "epoch": 8.21, "learning_rate": 2.2886530612244898e-05, "loss": 1.2693, "step": 388000 }, { "epoch": 8.21, "learning_rate": 2.2866122448979593e-05, "loss": 1.2707, "step": 388100 }, { "epoch": 8.22, "learning_rate": 2.2845714285714288e-05, "loss": 1.2726, "step": 388200 }, { "epoch": 8.22, "learning_rate": 2.282530612244898e-05, "loss": 1.2703, "step": 388300 }, { "epoch": 8.22, "learning_rate": 2.2804897959183675e-05, "loss": 1.2675, "step": 388400 }, { "epoch": 8.22, "learning_rate": 2.2784489795918366e-05, "loss": 1.2677, "step": 388500 }, { "epoch": 8.22, "learning_rate": 2.276408163265306e-05, "loss": 1.273, "step": 388600 }, { "epoch": 8.23, "learning_rate": 2.2743673469387756e-05, "loss": 1.2677, "step": 388700 }, { "epoch": 8.23, "learning_rate": 2.2723265306122448e-05, "loss": 1.2606, "step": 388800 }, { "epoch": 8.23, "learning_rate": 2.2702857142857146e-05, "loss": 1.2746, "step": 388900 }, { "epoch": 8.23, "learning_rate": 2.2682448979591838e-05, "loss": 1.2675, "step": 389000 }, { "epoch": 8.23, "learning_rate": 2.2662040816326533e-05, "loss": 1.2648, "step": 389100 }, { "epoch": 8.24, "learning_rate": 2.2641632653061228e-05, "loss": 1.2704, "step": 389200 }, { "epoch": 8.24, "learning_rate": 2.262122448979592e-05, "loss": 1.2692, "step": 389300 }, { "epoch": 8.24, "learning_rate": 2.2600816326530614e-05, "loss": 1.2691, "step": 389400 }, { "epoch": 8.24, "learning_rate": 2.2580408163265306e-05, "loss": 1.2689, "step": 389500 }, { "epoch": 8.25, "learning_rate": 2.256e-05, "loss": 1.2681, "step": 389600 }, { "epoch": 8.25, "learning_rate": 2.2539591836734696e-05, "loss": 1.261, "step": 389700 }, { "epoch": 8.25, "learning_rate": 2.2519183673469387e-05, "loss": 1.2676, "step": 389800 }, { "epoch": 8.25, "learning_rate": 2.2498775510204082e-05, "loss": 1.2589, "step": 389900 }, { "epoch": 8.25, "learning_rate": 2.2478367346938777e-05, "loss": 1.2675, "step": 390000 }, { "epoch": 8.26, "learning_rate": 2.2457959183673472e-05, "loss": 1.2669, "step": 390100 }, { "epoch": 8.26, "learning_rate": 2.2437551020408164e-05, "loss": 1.265, "step": 390200 }, { "epoch": 8.26, "learning_rate": 2.241714285714286e-05, "loss": 1.2673, "step": 390300 }, { "epoch": 8.26, "learning_rate": 2.2396734693877554e-05, "loss": 1.2676, "step": 390400 }, { "epoch": 8.26, "learning_rate": 2.2376326530612245e-05, "loss": 1.2659, "step": 390500 }, { "epoch": 8.27, "learning_rate": 2.2356122448979593e-05, "loss": 1.2664, "step": 390600 }, { "epoch": 8.27, "learning_rate": 2.2335714285714288e-05, "loss": 1.2708, "step": 390700 }, { "epoch": 8.27, "learning_rate": 2.231530612244898e-05, "loss": 1.2633, "step": 390800 }, { "epoch": 8.27, "learning_rate": 2.2294897959183675e-05, "loss": 1.2667, "step": 390900 }, { "epoch": 8.28, "learning_rate": 2.227448979591837e-05, "loss": 1.2632, "step": 391000 }, { "epoch": 8.28, "learning_rate": 2.225408163265306e-05, "loss": 1.2655, "step": 391100 }, { "epoch": 8.28, "learning_rate": 2.2233673469387756e-05, "loss": 1.2609, "step": 391200 }, { "epoch": 8.28, "learning_rate": 2.221326530612245e-05, "loss": 1.2611, "step": 391300 }, { "epoch": 8.28, "learning_rate": 2.2192857142857143e-05, "loss": 1.2707, "step": 391400 }, { "epoch": 8.29, "learning_rate": 2.2172448979591838e-05, "loss": 1.2668, "step": 391500 }, { "epoch": 8.29, "learning_rate": 2.2152040816326533e-05, "loss": 1.2655, "step": 391600 }, { "epoch": 8.29, "learning_rate": 2.2131632653061228e-05, "loss": 1.264, "step": 391700 }, { "epoch": 8.29, "learning_rate": 2.211122448979592e-05, "loss": 1.2739, "step": 391800 }, { "epoch": 8.29, "learning_rate": 2.2090816326530614e-05, "loss": 1.2578, "step": 391900 }, { "epoch": 8.3, "learning_rate": 2.207040816326531e-05, "loss": 1.2735, "step": 392000 }, { "epoch": 8.3, "learning_rate": 2.205e-05, "loss": 1.263, "step": 392100 }, { "epoch": 8.3, "learning_rate": 2.2029591836734696e-05, "loss": 1.2637, "step": 392200 }, { "epoch": 8.3, "learning_rate": 2.2009183673469387e-05, "loss": 1.2771, "step": 392300 }, { "epoch": 8.3, "learning_rate": 2.1988775510204082e-05, "loss": 1.2645, "step": 392400 }, { "epoch": 8.31, "learning_rate": 2.1968367346938777e-05, "loss": 1.2677, "step": 392500 }, { "epoch": 8.31, "learning_rate": 2.194795918367347e-05, "loss": 1.2612, "step": 392600 }, { "epoch": 8.31, "learning_rate": 2.1927551020408164e-05, "loss": 1.2616, "step": 392700 }, { "epoch": 8.31, "learning_rate": 2.190714285714286e-05, "loss": 1.2549, "step": 392800 }, { "epoch": 8.32, "learning_rate": 2.1886734693877554e-05, "loss": 1.2601, "step": 392900 }, { "epoch": 8.32, "learning_rate": 2.1866326530612245e-05, "loss": 1.2708, "step": 393000 }, { "epoch": 8.32, "learning_rate": 2.184591836734694e-05, "loss": 1.2736, "step": 393100 }, { "epoch": 8.32, "learning_rate": 2.1825510204081635e-05, "loss": 1.2712, "step": 393200 }, { "epoch": 8.32, "learning_rate": 2.1805102040816327e-05, "loss": 1.2668, "step": 393300 }, { "epoch": 8.33, "learning_rate": 2.1784693877551022e-05, "loss": 1.2512, "step": 393400 }, { "epoch": 8.33, "learning_rate": 2.1764285714285713e-05, "loss": 1.2725, "step": 393500 }, { "epoch": 8.33, "learning_rate": 2.1743877551020408e-05, "loss": 1.2645, "step": 393600 }, { "epoch": 8.33, "learning_rate": 2.1723469387755103e-05, "loss": 1.2671, "step": 393700 }, { "epoch": 8.33, "learning_rate": 2.1703061224489795e-05, "loss": 1.2671, "step": 393800 }, { "epoch": 8.34, "learning_rate": 2.1682653061224493e-05, "loss": 1.2667, "step": 393900 }, { "epoch": 8.34, "learning_rate": 2.1662244897959185e-05, "loss": 1.2628, "step": 394000 }, { "epoch": 8.34, "learning_rate": 2.1642040816326532e-05, "loss": 1.2645, "step": 394100 }, { "epoch": 8.34, "learning_rate": 2.1621632653061224e-05, "loss": 1.2584, "step": 394200 }, { "epoch": 8.34, "learning_rate": 2.160122448979592e-05, "loss": 1.2632, "step": 394300 }, { "epoch": 8.35, "learning_rate": 2.1580816326530614e-05, "loss": 1.2646, "step": 394400 }, { "epoch": 8.35, "learning_rate": 2.156040816326531e-05, "loss": 1.2643, "step": 394500 }, { "epoch": 8.35, "learning_rate": 2.154e-05, "loss": 1.2643, "step": 394600 }, { "epoch": 8.35, "learning_rate": 2.1519591836734695e-05, "loss": 1.2687, "step": 394700 }, { "epoch": 8.36, "learning_rate": 2.149918367346939e-05, "loss": 1.2673, "step": 394800 }, { "epoch": 8.36, "learning_rate": 2.1478775510204082e-05, "loss": 1.2607, "step": 394900 }, { "epoch": 8.36, "learning_rate": 2.1458367346938777e-05, "loss": 1.2668, "step": 395000 }, { "epoch": 8.36, "learning_rate": 2.143795918367347e-05, "loss": 1.2635, "step": 395100 }, { "epoch": 8.36, "learning_rate": 2.1417551020408163e-05, "loss": 1.2705, "step": 395200 }, { "epoch": 8.37, "learning_rate": 2.139714285714286e-05, "loss": 1.2689, "step": 395300 }, { "epoch": 8.37, "learning_rate": 2.137673469387755e-05, "loss": 1.2594, "step": 395400 }, { "epoch": 8.37, "learning_rate": 2.135632653061225e-05, "loss": 1.2576, "step": 395500 }, { "epoch": 8.37, "learning_rate": 2.133591836734694e-05, "loss": 1.2606, "step": 395600 }, { "epoch": 8.37, "learning_rate": 2.1315510204081635e-05, "loss": 1.2633, "step": 395700 }, { "epoch": 8.38, "learning_rate": 2.1295102040816327e-05, "loss": 1.257, "step": 395800 }, { "epoch": 8.38, "learning_rate": 2.127469387755102e-05, "loss": 1.2553, "step": 395900 }, { "epoch": 8.38, "learning_rate": 2.1254285714285716e-05, "loss": 1.2625, "step": 396000 }, { "epoch": 8.38, "learning_rate": 2.1233877551020408e-05, "loss": 1.2609, "step": 396100 }, { "epoch": 8.39, "learning_rate": 2.1213673469387756e-05, "loss": 1.2607, "step": 396200 }, { "epoch": 8.39, "learning_rate": 2.119326530612245e-05, "loss": 1.2657, "step": 396300 }, { "epoch": 8.39, "learning_rate": 2.1172857142857146e-05, "loss": 1.2626, "step": 396400 }, { "epoch": 8.39, "learning_rate": 2.1152448979591837e-05, "loss": 1.27, "step": 396500 }, { "epoch": 8.39, "learning_rate": 2.1132040816326532e-05, "loss": 1.263, "step": 396600 }, { "epoch": 8.4, "learning_rate": 2.1111632653061224e-05, "loss": 1.2595, "step": 396700 }, { "epoch": 8.4, "learning_rate": 2.109122448979592e-05, "loss": 1.2583, "step": 396800 }, { "epoch": 8.4, "learning_rate": 2.1070816326530614e-05, "loss": 1.2562, "step": 396900 }, { "epoch": 8.4, "learning_rate": 2.1050408163265305e-05, "loss": 1.2682, "step": 397000 }, { "epoch": 8.4, "learning_rate": 2.103e-05, "loss": 1.2611, "step": 397100 }, { "epoch": 8.41, "learning_rate": 2.1009591836734695e-05, "loss": 1.2685, "step": 397200 }, { "epoch": 8.41, "learning_rate": 2.098918367346939e-05, "loss": 1.2673, "step": 397300 }, { "epoch": 8.41, "learning_rate": 2.0968775510204082e-05, "loss": 1.2659, "step": 397400 }, { "epoch": 8.41, "learning_rate": 2.0948367346938777e-05, "loss": 1.2589, "step": 397500 }, { "epoch": 8.41, "learning_rate": 2.0927959183673472e-05, "loss": 1.2624, "step": 397600 }, { "epoch": 8.42, "learning_rate": 2.0907551020408163e-05, "loss": 1.2723, "step": 397700 }, { "epoch": 8.42, "learning_rate": 2.0887142857142858e-05, "loss": 1.258, "step": 397800 }, { "epoch": 8.42, "learning_rate": 2.086673469387755e-05, "loss": 1.2599, "step": 397900 }, { "epoch": 8.42, "learning_rate": 2.0846326530612245e-05, "loss": 1.2705, "step": 398000 }, { "epoch": 8.43, "learning_rate": 2.082591836734694e-05, "loss": 1.2635, "step": 398100 }, { "epoch": 8.43, "learning_rate": 2.080551020408163e-05, "loss": 1.2548, "step": 398200 }, { "epoch": 8.43, "learning_rate": 2.078510204081633e-05, "loss": 1.2636, "step": 398300 }, { "epoch": 8.43, "learning_rate": 2.076469387755102e-05, "loss": 1.2629, "step": 398400 }, { "epoch": 8.43, "learning_rate": 2.0744285714285716e-05, "loss": 1.2608, "step": 398500 }, { "epoch": 8.44, "learning_rate": 2.072387755102041e-05, "loss": 1.2602, "step": 398600 }, { "epoch": 8.44, "learning_rate": 2.0703673469387755e-05, "loss": 1.2627, "step": 398700 }, { "epoch": 8.44, "learning_rate": 2.068326530612245e-05, "loss": 1.2626, "step": 398800 }, { "epoch": 8.44, "learning_rate": 2.0662857142857145e-05, "loss": 1.2648, "step": 398900 }, { "epoch": 8.44, "learning_rate": 2.0642448979591837e-05, "loss": 1.2617, "step": 399000 }, { "epoch": 8.45, "learning_rate": 2.0622040816326532e-05, "loss": 1.2639, "step": 399100 }, { "epoch": 8.45, "learning_rate": 2.0601632653061227e-05, "loss": 1.2627, "step": 399200 }, { "epoch": 8.45, "learning_rate": 2.058122448979592e-05, "loss": 1.26, "step": 399300 }, { "epoch": 8.45, "learning_rate": 2.0560816326530613e-05, "loss": 1.262, "step": 399400 }, { "epoch": 8.46, "learning_rate": 2.0540408163265305e-05, "loss": 1.2644, "step": 399500 }, { "epoch": 8.46, "learning_rate": 2.052e-05, "loss": 1.2592, "step": 399600 }, { "epoch": 8.46, "learning_rate": 2.0499591836734695e-05, "loss": 1.2655, "step": 399700 }, { "epoch": 8.46, "learning_rate": 2.0479183673469387e-05, "loss": 1.2594, "step": 399800 }, { "epoch": 8.46, "learning_rate": 2.045877551020408e-05, "loss": 1.2693, "step": 399900 }, { "epoch": 8.47, "learning_rate": 2.0438367346938777e-05, "loss": 1.258, "step": 400000 }, { "epoch": 8.47, "learning_rate": 2.041795918367347e-05, "loss": 1.2611, "step": 400100 }, { "epoch": 8.47, "learning_rate": 2.0397551020408166e-05, "loss": 1.2568, "step": 400200 }, { "epoch": 8.47, "learning_rate": 2.0377142857142858e-05, "loss": 1.2601, "step": 400300 }, { "epoch": 8.47, "learning_rate": 2.0356734693877553e-05, "loss": 1.2633, "step": 400400 }, { "epoch": 8.48, "learning_rate": 2.0336326530612245e-05, "loss": 1.264, "step": 400500 }, { "epoch": 8.48, "learning_rate": 2.031591836734694e-05, "loss": 1.259, "step": 400600 }, { "epoch": 8.48, "learning_rate": 2.0295510204081635e-05, "loss": 1.2678, "step": 400700 }, { "epoch": 8.48, "learning_rate": 2.0275102040816326e-05, "loss": 1.2579, "step": 400800 }, { "epoch": 8.48, "learning_rate": 2.025469387755102e-05, "loss": 1.2642, "step": 400900 }, { "epoch": 8.49, "learning_rate": 2.0234285714285713e-05, "loss": 1.2625, "step": 401000 }, { "epoch": 8.49, "learning_rate": 2.021408163265306e-05, "loss": 1.2733, "step": 401100 }, { "epoch": 8.49, "learning_rate": 2.0193673469387755e-05, "loss": 1.2567, "step": 401200 }, { "epoch": 8.49, "learning_rate": 2.017326530612245e-05, "loss": 1.2723, "step": 401300 }, { "epoch": 8.5, "learning_rate": 2.0152857142857142e-05, "loss": 1.2624, "step": 401400 }, { "epoch": 8.5, "learning_rate": 2.0132448979591837e-05, "loss": 1.2674, "step": 401500 }, { "epoch": 8.5, "learning_rate": 2.0112040816326532e-05, "loss": 1.259, "step": 401600 }, { "epoch": 8.5, "learning_rate": 2.0091632653061227e-05, "loss": 1.2631, "step": 401700 }, { "epoch": 8.5, "learning_rate": 2.0071224489795922e-05, "loss": 1.2579, "step": 401800 }, { "epoch": 8.51, "learning_rate": 2.0051020408163266e-05, "loss": 1.2579, "step": 401900 }, { "epoch": 8.51, "learning_rate": 2.0030612244897957e-05, "loss": 1.268, "step": 402000 }, { "epoch": 8.51, "learning_rate": 2.0010204081632656e-05, "loss": 1.2664, "step": 402100 }, { "epoch": 8.51, "learning_rate": 1.9989795918367347e-05, "loss": 1.2609, "step": 402200 }, { "epoch": 8.51, "learning_rate": 1.9969387755102042e-05, "loss": 1.2514, "step": 402300 }, { "epoch": 8.52, "learning_rate": 1.9948979591836737e-05, "loss": 1.2603, "step": 402400 }, { "epoch": 8.52, "learning_rate": 1.992857142857143e-05, "loss": 1.2527, "step": 402500 }, { "epoch": 8.52, "learning_rate": 1.9908163265306124e-05, "loss": 1.2599, "step": 402600 }, { "epoch": 8.52, "learning_rate": 1.9887755102040816e-05, "loss": 1.2546, "step": 402700 }, { "epoch": 8.52, "learning_rate": 1.986734693877551e-05, "loss": 1.2524, "step": 402800 }, { "epoch": 8.53, "learning_rate": 1.9846938775510205e-05, "loss": 1.263, "step": 402900 }, { "epoch": 8.53, "learning_rate": 1.9826530612244897e-05, "loss": 1.2614, "step": 403000 }, { "epoch": 8.53, "learning_rate": 1.9806122448979592e-05, "loss": 1.2598, "step": 403100 }, { "epoch": 8.53, "learning_rate": 1.9785714285714287e-05, "loss": 1.2624, "step": 403200 }, { "epoch": 8.54, "learning_rate": 1.9765306122448982e-05, "loss": 1.2539, "step": 403300 }, { "epoch": 8.54, "learning_rate": 1.9744897959183677e-05, "loss": 1.2634, "step": 403400 }, { "epoch": 8.54, "learning_rate": 1.972448979591837e-05, "loss": 1.261, "step": 403500 }, { "epoch": 8.54, "learning_rate": 1.9704081632653063e-05, "loss": 1.2612, "step": 403600 }, { "epoch": 8.54, "learning_rate": 1.9683673469387755e-05, "loss": 1.2622, "step": 403700 }, { "epoch": 8.55, "learning_rate": 1.966326530612245e-05, "loss": 1.2593, "step": 403800 }, { "epoch": 8.55, "learning_rate": 1.9642857142857145e-05, "loss": 1.259, "step": 403900 }, { "epoch": 8.55, "learning_rate": 1.9622448979591837e-05, "loss": 1.2549, "step": 404000 }, { "epoch": 8.55, "learning_rate": 1.960204081632653e-05, "loss": 1.2541, "step": 404100 }, { "epoch": 8.55, "learning_rate": 1.9581632653061223e-05, "loss": 1.2608, "step": 404200 }, { "epoch": 8.56, "learning_rate": 1.9561224489795918e-05, "loss": 1.2674, "step": 404300 }, { "epoch": 8.56, "learning_rate": 1.9540816326530613e-05, "loss": 1.2603, "step": 404400 }, { "epoch": 8.56, "learning_rate": 1.9520408163265308e-05, "loss": 1.2555, "step": 404500 }, { "epoch": 8.56, "learning_rate": 1.9500000000000003e-05, "loss": 1.2582, "step": 404600 }, { "epoch": 8.57, "learning_rate": 1.9479591836734695e-05, "loss": 1.2605, "step": 404700 }, { "epoch": 8.57, "learning_rate": 1.945918367346939e-05, "loss": 1.26, "step": 404800 }, { "epoch": 8.57, "learning_rate": 1.943877551020408e-05, "loss": 1.2607, "step": 404900 }, { "epoch": 8.57, "learning_rate": 1.9418367346938776e-05, "loss": 1.256, "step": 405000 }, { "epoch": 8.57, "learning_rate": 1.939795918367347e-05, "loss": 1.2628, "step": 405100 }, { "epoch": 8.58, "learning_rate": 1.9377551020408163e-05, "loss": 1.2578, "step": 405200 }, { "epoch": 8.58, "learning_rate": 1.9357142857142858e-05, "loss": 1.2588, "step": 405300 }, { "epoch": 8.58, "learning_rate": 1.933673469387755e-05, "loss": 1.2634, "step": 405400 }, { "epoch": 8.58, "learning_rate": 1.9316326530612248e-05, "loss": 1.2607, "step": 405500 }, { "epoch": 8.58, "learning_rate": 1.929591836734694e-05, "loss": 1.255, "step": 405600 }, { "epoch": 8.59, "learning_rate": 1.9275510204081634e-05, "loss": 1.2534, "step": 405700 }, { "epoch": 8.59, "learning_rate": 1.925510204081633e-05, "loss": 1.2635, "step": 405800 }, { "epoch": 8.59, "learning_rate": 1.923469387755102e-05, "loss": 1.2602, "step": 405900 }, { "epoch": 8.59, "learning_rate": 1.9214285714285716e-05, "loss": 1.2696, "step": 406000 }, { "epoch": 8.59, "learning_rate": 1.9193877551020407e-05, "loss": 1.2571, "step": 406100 }, { "epoch": 8.6, "learning_rate": 1.9173469387755102e-05, "loss": 1.2476, "step": 406200 }, { "epoch": 8.6, "learning_rate": 1.9153061224489797e-05, "loss": 1.2572, "step": 406300 }, { "epoch": 8.6, "learning_rate": 1.913265306122449e-05, "loss": 1.2598, "step": 406400 }, { "epoch": 8.6, "learning_rate": 1.9112244897959184e-05, "loss": 1.2498, "step": 406500 }, { "epoch": 8.61, "learning_rate": 1.909183673469388e-05, "loss": 1.2602, "step": 406600 }, { "epoch": 8.61, "learning_rate": 1.9071428571428574e-05, "loss": 1.2536, "step": 406700 }, { "epoch": 8.61, "learning_rate": 1.905102040816327e-05, "loss": 1.2625, "step": 406800 }, { "epoch": 8.61, "learning_rate": 1.903061224489796e-05, "loss": 1.2586, "step": 406900 }, { "epoch": 8.61, "learning_rate": 1.9010204081632655e-05, "loss": 1.2504, "step": 407000 }, { "epoch": 8.62, "learning_rate": 1.8989795918367347e-05, "loss": 1.2601, "step": 407100 }, { "epoch": 8.62, "learning_rate": 1.8969387755102042e-05, "loss": 1.258, "step": 407200 }, { "epoch": 8.62, "learning_rate": 1.8948979591836737e-05, "loss": 1.259, "step": 407300 }, { "epoch": 8.62, "learning_rate": 1.892857142857143e-05, "loss": 1.2561, "step": 407400 }, { "epoch": 8.62, "learning_rate": 1.8908163265306123e-05, "loss": 1.2664, "step": 407500 }, { "epoch": 8.63, "learning_rate": 1.8887755102040815e-05, "loss": 1.263, "step": 407600 }, { "epoch": 8.63, "learning_rate": 1.886734693877551e-05, "loss": 1.2656, "step": 407700 }, { "epoch": 8.63, "learning_rate": 1.8846938775510205e-05, "loss": 1.2585, "step": 407800 }, { "epoch": 8.63, "learning_rate": 1.8826734693877552e-05, "loss": 1.261, "step": 407900 }, { "epoch": 8.63, "learning_rate": 1.8806326530612244e-05, "loss": 1.2562, "step": 408000 }, { "epoch": 8.64, "learning_rate": 1.878591836734694e-05, "loss": 1.2523, "step": 408100 }, { "epoch": 8.64, "learning_rate": 1.876551020408163e-05, "loss": 1.2603, "step": 408200 }, { "epoch": 8.64, "learning_rate": 1.874510204081633e-05, "loss": 1.261, "step": 408300 }, { "epoch": 8.64, "learning_rate": 1.8724693877551024e-05, "loss": 1.2556, "step": 408400 }, { "epoch": 8.65, "learning_rate": 1.8704285714285716e-05, "loss": 1.2585, "step": 408500 }, { "epoch": 8.65, "learning_rate": 1.868387755102041e-05, "loss": 1.2595, "step": 408600 }, { "epoch": 8.65, "learning_rate": 1.8663673469387755e-05, "loss": 1.2546, "step": 408700 }, { "epoch": 8.65, "learning_rate": 1.864326530612245e-05, "loss": 1.2516, "step": 408800 }, { "epoch": 8.65, "learning_rate": 1.8622857142857145e-05, "loss": 1.2588, "step": 408900 }, { "epoch": 8.66, "learning_rate": 1.860244897959184e-05, "loss": 1.2629, "step": 409000 }, { "epoch": 8.66, "learning_rate": 1.858204081632653e-05, "loss": 1.2602, "step": 409100 }, { "epoch": 8.66, "learning_rate": 1.8561632653061226e-05, "loss": 1.2556, "step": 409200 }, { "epoch": 8.66, "learning_rate": 1.8541224489795918e-05, "loss": 1.2625, "step": 409300 }, { "epoch": 8.66, "learning_rate": 1.8520816326530613e-05, "loss": 1.2564, "step": 409400 }, { "epoch": 8.67, "learning_rate": 1.8500408163265308e-05, "loss": 1.2562, "step": 409500 }, { "epoch": 8.67, "learning_rate": 1.848e-05, "loss": 1.2538, "step": 409600 }, { "epoch": 8.67, "learning_rate": 1.8459591836734694e-05, "loss": 1.2593, "step": 409700 }, { "epoch": 8.67, "learning_rate": 1.8439183673469386e-05, "loss": 1.2531, "step": 409800 }, { "epoch": 8.68, "learning_rate": 1.8418775510204084e-05, "loss": 1.2618, "step": 409900 }, { "epoch": 8.68, "learning_rate": 1.8398367346938776e-05, "loss": 1.255, "step": 410000 }, { "epoch": 8.68, "learning_rate": 1.837795918367347e-05, "loss": 1.255, "step": 410100 }, { "epoch": 8.68, "learning_rate": 1.8357551020408166e-05, "loss": 1.2565, "step": 410200 }, { "epoch": 8.68, "learning_rate": 1.8337142857142857e-05, "loss": 1.25, "step": 410300 }, { "epoch": 8.69, "learning_rate": 1.8316734693877552e-05, "loss": 1.2487, "step": 410400 }, { "epoch": 8.69, "learning_rate": 1.8296326530612247e-05, "loss": 1.2644, "step": 410500 }, { "epoch": 8.69, "learning_rate": 1.827591836734694e-05, "loss": 1.2544, "step": 410600 }, { "epoch": 8.69, "learning_rate": 1.8255510204081634e-05, "loss": 1.2551, "step": 410700 }, { "epoch": 8.69, "learning_rate": 1.8235102040816325e-05, "loss": 1.2503, "step": 410800 }, { "epoch": 8.7, "learning_rate": 1.821469387755102e-05, "loss": 1.2652, "step": 410900 }, { "epoch": 8.7, "learning_rate": 1.8194285714285715e-05, "loss": 1.2552, "step": 411000 }, { "epoch": 8.7, "learning_rate": 1.817387755102041e-05, "loss": 1.2582, "step": 411100 }, { "epoch": 8.7, "learning_rate": 1.8153469387755105e-05, "loss": 1.2576, "step": 411200 }, { "epoch": 8.7, "learning_rate": 1.8133061224489797e-05, "loss": 1.2581, "step": 411300 }, { "epoch": 8.71, "learning_rate": 1.8112653061224492e-05, "loss": 1.2441, "step": 411400 }, { "epoch": 8.71, "learning_rate": 1.8092244897959183e-05, "loss": 1.2618, "step": 411500 }, { "epoch": 8.71, "learning_rate": 1.807183673469388e-05, "loss": 1.2513, "step": 411600 }, { "epoch": 8.71, "learning_rate": 1.8051428571428573e-05, "loss": 1.2618, "step": 411700 }, { "epoch": 8.72, "learning_rate": 1.8031020408163265e-05, "loss": 1.2569, "step": 411800 }, { "epoch": 8.72, "learning_rate": 1.801061224489796e-05, "loss": 1.2483, "step": 411900 }, { "epoch": 8.72, "learning_rate": 1.799020408163265e-05, "loss": 1.2642, "step": 412000 }, { "epoch": 8.72, "learning_rate": 1.7969795918367346e-05, "loss": 1.2622, "step": 412100 }, { "epoch": 8.72, "learning_rate": 1.794938775510204e-05, "loss": 1.2524, "step": 412200 }, { "epoch": 8.73, "learning_rate": 1.7928979591836736e-05, "loss": 1.2497, "step": 412300 }, { "epoch": 8.73, "learning_rate": 1.790857142857143e-05, "loss": 1.2536, "step": 412400 }, { "epoch": 8.73, "learning_rate": 1.7888163265306123e-05, "loss": 1.2561, "step": 412500 }, { "epoch": 8.73, "learning_rate": 1.7867755102040818e-05, "loss": 1.2552, "step": 412600 }, { "epoch": 8.73, "learning_rate": 1.7847551020408165e-05, "loss": 1.2535, "step": 412700 }, { "epoch": 8.74, "learning_rate": 1.782714285714286e-05, "loss": 1.2486, "step": 412800 }, { "epoch": 8.74, "learning_rate": 1.7806734693877552e-05, "loss": 1.2536, "step": 412900 }, { "epoch": 8.74, "learning_rate": 1.7786326530612247e-05, "loss": 1.2563, "step": 413000 }, { "epoch": 8.74, "learning_rate": 1.776591836734694e-05, "loss": 1.2471, "step": 413100 }, { "epoch": 8.74, "learning_rate": 1.7745510204081634e-05, "loss": 1.2577, "step": 413200 }, { "epoch": 8.75, "learning_rate": 1.772530612244898e-05, "loss": 1.2535, "step": 413300 }, { "epoch": 8.75, "learning_rate": 1.7704897959183676e-05, "loss": 1.247, "step": 413400 }, { "epoch": 8.75, "learning_rate": 1.7684489795918368e-05, "loss": 1.2554, "step": 413500 }, { "epoch": 8.75, "learning_rate": 1.7664081632653063e-05, "loss": 1.2545, "step": 413600 }, { "epoch": 8.76, "learning_rate": 1.7643673469387754e-05, "loss": 1.26, "step": 413700 }, { "epoch": 8.76, "learning_rate": 1.762326530612245e-05, "loss": 1.2521, "step": 413800 }, { "epoch": 8.76, "learning_rate": 1.7602857142857144e-05, "loss": 1.2505, "step": 413900 }, { "epoch": 8.76, "learning_rate": 1.7582448979591836e-05, "loss": 1.2529, "step": 414000 }, { "epoch": 8.76, "learning_rate": 1.756204081632653e-05, "loss": 1.2488, "step": 414100 }, { "epoch": 8.77, "learning_rate": 1.7541632653061226e-05, "loss": 1.2591, "step": 414200 }, { "epoch": 8.77, "learning_rate": 1.752122448979592e-05, "loss": 1.2518, "step": 414300 }, { "epoch": 8.77, "learning_rate": 1.7500816326530616e-05, "loss": 1.2535, "step": 414400 }, { "epoch": 8.77, "learning_rate": 1.7480408163265307e-05, "loss": 1.2596, "step": 414500 }, { "epoch": 8.77, "learning_rate": 1.7460000000000002e-05, "loss": 1.2469, "step": 414600 }, { "epoch": 8.78, "learning_rate": 1.7439591836734694e-05, "loss": 1.257, "step": 414700 }, { "epoch": 8.78, "learning_rate": 1.741918367346939e-05, "loss": 1.2585, "step": 414800 }, { "epoch": 8.78, "learning_rate": 1.7398775510204084e-05, "loss": 1.2557, "step": 414900 }, { "epoch": 8.78, "learning_rate": 1.7378367346938775e-05, "loss": 1.2614, "step": 415000 }, { "epoch": 8.79, "learning_rate": 1.735795918367347e-05, "loss": 1.259, "step": 415100 }, { "epoch": 8.79, "learning_rate": 1.7337551020408162e-05, "loss": 1.2583, "step": 415200 }, { "epoch": 8.79, "learning_rate": 1.731734693877551e-05, "loss": 1.25, "step": 415300 }, { "epoch": 8.79, "learning_rate": 1.7296938775510204e-05, "loss": 1.2558, "step": 415400 }, { "epoch": 8.79, "learning_rate": 1.72765306122449e-05, "loss": 1.2593, "step": 415500 }, { "epoch": 8.8, "learning_rate": 1.725612244897959e-05, "loss": 1.2559, "step": 415600 }, { "epoch": 8.8, "learning_rate": 1.7235714285714286e-05, "loss": 1.2592, "step": 415700 }, { "epoch": 8.8, "learning_rate": 1.7215306122448978e-05, "loss": 1.2569, "step": 415800 }, { "epoch": 8.8, "learning_rate": 1.7194897959183676e-05, "loss": 1.2492, "step": 415900 }, { "epoch": 8.8, "learning_rate": 1.717448979591837e-05, "loss": 1.2555, "step": 416000 }, { "epoch": 8.81, "learning_rate": 1.7154081632653062e-05, "loss": 1.26, "step": 416100 }, { "epoch": 8.81, "learning_rate": 1.7133673469387757e-05, "loss": 1.2585, "step": 416200 }, { "epoch": 8.81, "learning_rate": 1.711326530612245e-05, "loss": 1.251, "step": 416300 }, { "epoch": 8.81, "learning_rate": 1.7092857142857144e-05, "loss": 1.2587, "step": 416400 }, { "epoch": 8.81, "learning_rate": 1.707244897959184e-05, "loss": 1.2496, "step": 416500 }, { "epoch": 8.82, "learning_rate": 1.705204081632653e-05, "loss": 1.2498, "step": 416600 }, { "epoch": 8.82, "learning_rate": 1.7031632653061226e-05, "loss": 1.2545, "step": 416700 }, { "epoch": 8.82, "learning_rate": 1.7011224489795917e-05, "loss": 1.2533, "step": 416800 }, { "epoch": 8.82, "learning_rate": 1.6990816326530612e-05, "loss": 1.2556, "step": 416900 }, { "epoch": 8.83, "learning_rate": 1.6970408163265307e-05, "loss": 1.2513, "step": 417000 }, { "epoch": 8.83, "learning_rate": 1.6950000000000002e-05, "loss": 1.2494, "step": 417100 }, { "epoch": 8.83, "learning_rate": 1.6929591836734697e-05, "loss": 1.257, "step": 417200 }, { "epoch": 8.83, "learning_rate": 1.690918367346939e-05, "loss": 1.2473, "step": 417300 }, { "epoch": 8.83, "learning_rate": 1.6888775510204084e-05, "loss": 1.2577, "step": 417400 }, { "epoch": 8.84, "learning_rate": 1.6868367346938775e-05, "loss": 1.251, "step": 417500 }, { "epoch": 8.84, "learning_rate": 1.684795918367347e-05, "loss": 1.2515, "step": 417600 }, { "epoch": 8.84, "learning_rate": 1.6827551020408165e-05, "loss": 1.2551, "step": 417700 }, { "epoch": 8.84, "learning_rate": 1.6807142857142857e-05, "loss": 1.2595, "step": 417800 }, { "epoch": 8.84, "learning_rate": 1.678673469387755e-05, "loss": 1.2507, "step": 417900 }, { "epoch": 8.85, "learning_rate": 1.6766326530612243e-05, "loss": 1.2466, "step": 418000 }, { "epoch": 8.85, "learning_rate": 1.6745918367346938e-05, "loss": 1.2532, "step": 418100 }, { "epoch": 8.85, "learning_rate": 1.6725714285714286e-05, "loss": 1.241, "step": 418200 }, { "epoch": 8.85, "learning_rate": 1.670530612244898e-05, "loss": 1.2578, "step": 418300 }, { "epoch": 8.86, "learning_rate": 1.6684897959183672e-05, "loss": 1.2522, "step": 418400 }, { "epoch": 8.86, "learning_rate": 1.6664489795918367e-05, "loss": 1.2526, "step": 418500 }, { "epoch": 8.86, "learning_rate": 1.6644081632653062e-05, "loss": 1.2487, "step": 418600 }, { "epoch": 8.86, "learning_rate": 1.6623673469387757e-05, "loss": 1.2462, "step": 418700 }, { "epoch": 8.86, "learning_rate": 1.6603265306122452e-05, "loss": 1.2541, "step": 418800 }, { "epoch": 8.87, "learning_rate": 1.6582857142857144e-05, "loss": 1.2481, "step": 418900 }, { "epoch": 8.87, "learning_rate": 1.656244897959184e-05, "loss": 1.2549, "step": 419000 }, { "epoch": 8.87, "learning_rate": 1.654204081632653e-05, "loss": 1.252, "step": 419100 }, { "epoch": 8.87, "learning_rate": 1.6521632653061225e-05, "loss": 1.2552, "step": 419200 }, { "epoch": 8.87, "learning_rate": 1.650122448979592e-05, "loss": 1.2527, "step": 419300 }, { "epoch": 8.88, "learning_rate": 1.6480816326530612e-05, "loss": 1.2525, "step": 419400 }, { "epoch": 8.88, "learning_rate": 1.6460408163265307e-05, "loss": 1.2543, "step": 419500 }, { "epoch": 8.88, "learning_rate": 1.644e-05, "loss": 1.2489, "step": 419600 }, { "epoch": 8.88, "learning_rate": 1.6419591836734693e-05, "loss": 1.2588, "step": 419700 }, { "epoch": 8.88, "learning_rate": 1.639918367346939e-05, "loss": 1.2491, "step": 419800 }, { "epoch": 8.89, "learning_rate": 1.6378775510204083e-05, "loss": 1.2531, "step": 419900 }, { "epoch": 8.89, "learning_rate": 1.635836734693878e-05, "loss": 1.2512, "step": 420000 }, { "epoch": 8.89, "learning_rate": 1.633795918367347e-05, "loss": 1.2617, "step": 420100 }, { "epoch": 8.89, "learning_rate": 1.6317551020408165e-05, "loss": 1.252, "step": 420200 }, { "epoch": 8.9, "learning_rate": 1.6297142857142856e-05, "loss": 1.2526, "step": 420300 }, { "epoch": 8.9, "learning_rate": 1.627673469387755e-05, "loss": 1.2501, "step": 420400 }, { "epoch": 8.9, "learning_rate": 1.6256326530612246e-05, "loss": 1.25, "step": 420500 }, { "epoch": 8.9, "learning_rate": 1.6235918367346938e-05, "loss": 1.2583, "step": 420600 }, { "epoch": 8.9, "learning_rate": 1.6215510204081633e-05, "loss": 1.2578, "step": 420700 }, { "epoch": 8.91, "learning_rate": 1.6195102040816325e-05, "loss": 1.2493, "step": 420800 }, { "epoch": 8.91, "learning_rate": 1.617469387755102e-05, "loss": 1.2547, "step": 420900 }, { "epoch": 8.91, "learning_rate": 1.6154285714285718e-05, "loss": 1.2509, "step": 421000 }, { "epoch": 8.91, "learning_rate": 1.613387755102041e-05, "loss": 1.2547, "step": 421100 }, { "epoch": 8.91, "learning_rate": 1.6113469387755104e-05, "loss": 1.2496, "step": 421200 }, { "epoch": 8.92, "learning_rate": 1.6093061224489796e-05, "loss": 1.2533, "step": 421300 }, { "epoch": 8.92, "learning_rate": 1.607265306122449e-05, "loss": 1.2485, "step": 421400 }, { "epoch": 8.92, "learning_rate": 1.6052244897959186e-05, "loss": 1.246, "step": 421500 }, { "epoch": 8.92, "learning_rate": 1.6031836734693878e-05, "loss": 1.2499, "step": 421600 }, { "epoch": 8.92, "learning_rate": 1.6011428571428573e-05, "loss": 1.2493, "step": 421700 }, { "epoch": 8.93, "learning_rate": 1.5991020408163264e-05, "loss": 1.2488, "step": 421800 }, { "epoch": 8.93, "learning_rate": 1.597061224489796e-05, "loss": 1.2498, "step": 421900 }, { "epoch": 8.93, "learning_rate": 1.5950204081632654e-05, "loss": 1.2426, "step": 422000 }, { "epoch": 8.93, "learning_rate": 1.592979591836735e-05, "loss": 1.2524, "step": 422100 }, { "epoch": 8.94, "learning_rate": 1.5909387755102044e-05, "loss": 1.2535, "step": 422200 }, { "epoch": 8.94, "learning_rate": 1.5889183673469388e-05, "loss": 1.2581, "step": 422300 }, { "epoch": 8.94, "learning_rate": 1.586877551020408e-05, "loss": 1.252, "step": 422400 }, { "epoch": 8.94, "learning_rate": 1.5848367346938775e-05, "loss": 1.2556, "step": 422500 }, { "epoch": 8.94, "learning_rate": 1.582795918367347e-05, "loss": 1.252, "step": 422600 }, { "epoch": 8.95, "learning_rate": 1.5807551020408165e-05, "loss": 1.2431, "step": 422700 }, { "epoch": 8.95, "learning_rate": 1.578714285714286e-05, "loss": 1.2529, "step": 422800 }, { "epoch": 8.95, "learning_rate": 1.576673469387755e-05, "loss": 1.2541, "step": 422900 }, { "epoch": 8.95, "learning_rate": 1.57465306122449e-05, "loss": 1.2466, "step": 423000 }, { "epoch": 8.95, "learning_rate": 1.5726122448979594e-05, "loss": 1.252, "step": 423100 }, { "epoch": 8.96, "learning_rate": 1.570571428571429e-05, "loss": 1.2509, "step": 423200 }, { "epoch": 8.96, "learning_rate": 1.568530612244898e-05, "loss": 1.2527, "step": 423300 }, { "epoch": 8.96, "learning_rate": 1.5664897959183675e-05, "loss": 1.2534, "step": 423400 }, { "epoch": 8.96, "learning_rate": 1.5644489795918367e-05, "loss": 1.2526, "step": 423500 }, { "epoch": 8.97, "learning_rate": 1.5624081632653062e-05, "loss": 1.2479, "step": 423600 }, { "epoch": 8.97, "learning_rate": 1.5603673469387757e-05, "loss": 1.2484, "step": 423700 }, { "epoch": 8.97, "learning_rate": 1.558326530612245e-05, "loss": 1.2543, "step": 423800 }, { "epoch": 8.97, "learning_rate": 1.5562857142857143e-05, "loss": 1.2522, "step": 423900 }, { "epoch": 8.97, "learning_rate": 1.5542448979591835e-05, "loss": 1.2415, "step": 424000 }, { "epoch": 8.98, "learning_rate": 1.552204081632653e-05, "loss": 1.2497, "step": 424100 }, { "epoch": 8.98, "learning_rate": 1.5501632653061225e-05, "loss": 1.2466, "step": 424200 }, { "epoch": 8.98, "learning_rate": 1.548122448979592e-05, "loss": 1.2482, "step": 424300 }, { "epoch": 8.98, "learning_rate": 1.5460816326530615e-05, "loss": 1.2536, "step": 424400 }, { "epoch": 8.98, "learning_rate": 1.5440408163265306e-05, "loss": 1.254, "step": 424500 }, { "epoch": 8.99, "learning_rate": 1.542e-05, "loss": 1.2451, "step": 424600 }, { "epoch": 8.99, "learning_rate": 1.5399591836734696e-05, "loss": 1.2566, "step": 424700 }, { "epoch": 8.99, "learning_rate": 1.5379183673469388e-05, "loss": 1.2447, "step": 424800 }, { "epoch": 8.99, "learning_rate": 1.5358775510204083e-05, "loss": 1.2508, "step": 424900 }, { "epoch": 8.99, "learning_rate": 1.5338367346938775e-05, "loss": 1.2528, "step": 425000 }, { "epoch": 9.0, "learning_rate": 1.531795918367347e-05, "loss": 1.2482, "step": 425100 }, { "epoch": 9.0, "learning_rate": 1.5297551020408164e-05, "loss": 1.2503, "step": 425200 }, { "epoch": 9.0, "learning_rate": 1.5277142857142856e-05, "loss": 1.2447, "step": 425300 }, { "epoch": 9.0, "learning_rate": 1.5256734693877553e-05, "loss": 1.2446, "step": 425400 }, { "epoch": 9.01, "learning_rate": 1.5236326530612246e-05, "loss": 1.2468, "step": 425500 }, { "epoch": 9.01, "learning_rate": 1.5215918367346941e-05, "loss": 1.2499, "step": 425600 }, { "epoch": 9.01, "learning_rate": 1.5195510204081634e-05, "loss": 1.24, "step": 425700 }, { "epoch": 9.01, "learning_rate": 1.5175102040816328e-05, "loss": 1.2441, "step": 425800 }, { "epoch": 9.01, "learning_rate": 1.515469387755102e-05, "loss": 1.2437, "step": 425900 }, { "epoch": 9.02, "learning_rate": 1.5134285714285714e-05, "loss": 1.2485, "step": 426000 }, { "epoch": 9.02, "learning_rate": 1.5113877551020409e-05, "loss": 1.2501, "step": 426100 }, { "epoch": 9.02, "learning_rate": 1.5093469387755102e-05, "loss": 1.249, "step": 426200 }, { "epoch": 9.02, "learning_rate": 1.5073061224489796e-05, "loss": 1.2437, "step": 426300 }, { "epoch": 9.02, "learning_rate": 1.5052653061224489e-05, "loss": 1.244, "step": 426400 }, { "epoch": 9.03, "learning_rate": 1.5032244897959186e-05, "loss": 1.2462, "step": 426500 }, { "epoch": 9.03, "learning_rate": 1.5011836734693879e-05, "loss": 1.2543, "step": 426600 }, { "epoch": 9.03, "learning_rate": 1.4991428571428572e-05, "loss": 1.2522, "step": 426700 }, { "epoch": 9.03, "learning_rate": 1.4971020408163267e-05, "loss": 1.2431, "step": 426800 }, { "epoch": 9.03, "learning_rate": 1.495061224489796e-05, "loss": 1.2455, "step": 426900 }, { "epoch": 9.04, "learning_rate": 1.4930408163265308e-05, "loss": 1.2441, "step": 427000 }, { "epoch": 9.04, "learning_rate": 1.4910000000000001e-05, "loss": 1.2413, "step": 427100 }, { "epoch": 9.04, "learning_rate": 1.4889591836734696e-05, "loss": 1.2441, "step": 427200 }, { "epoch": 9.04, "learning_rate": 1.486918367346939e-05, "loss": 1.2464, "step": 427300 }, { "epoch": 9.05, "learning_rate": 1.4848775510204083e-05, "loss": 1.2408, "step": 427400 }, { "epoch": 9.05, "learning_rate": 1.4828367346938776e-05, "loss": 1.2466, "step": 427500 }, { "epoch": 9.05, "learning_rate": 1.480795918367347e-05, "loss": 1.2478, "step": 427600 }, { "epoch": 9.05, "learning_rate": 1.4787551020408164e-05, "loss": 1.2459, "step": 427700 }, { "epoch": 9.05, "learning_rate": 1.4767142857142858e-05, "loss": 1.2411, "step": 427800 }, { "epoch": 9.06, "learning_rate": 1.4746734693877551e-05, "loss": 1.2473, "step": 427900 }, { "epoch": 9.06, "learning_rate": 1.4726326530612244e-05, "loss": 1.2414, "step": 428000 }, { "epoch": 9.06, "learning_rate": 1.470591836734694e-05, "loss": 1.2479, "step": 428100 }, { "epoch": 9.06, "learning_rate": 1.4685510204081634e-05, "loss": 1.2459, "step": 428200 }, { "epoch": 9.06, "learning_rate": 1.4665102040816327e-05, "loss": 1.2477, "step": 428300 }, { "epoch": 9.07, "learning_rate": 1.4644693877551022e-05, "loss": 1.2413, "step": 428400 }, { "epoch": 9.07, "learning_rate": 1.4624285714285716e-05, "loss": 1.2492, "step": 428500 }, { "epoch": 9.07, "learning_rate": 1.4603877551020409e-05, "loss": 1.2411, "step": 428600 }, { "epoch": 9.07, "learning_rate": 1.4583469387755102e-05, "loss": 1.2534, "step": 428700 }, { "epoch": 9.08, "learning_rate": 1.4563061224489795e-05, "loss": 1.2448, "step": 428800 }, { "epoch": 9.08, "learning_rate": 1.454265306122449e-05, "loss": 1.2504, "step": 428900 }, { "epoch": 9.08, "learning_rate": 1.4522244897959184e-05, "loss": 1.2511, "step": 429000 }, { "epoch": 9.08, "learning_rate": 1.4501836734693877e-05, "loss": 1.2464, "step": 429100 }, { "epoch": 9.08, "learning_rate": 1.448142857142857e-05, "loss": 1.2422, "step": 429200 }, { "epoch": 9.09, "learning_rate": 1.446122448979592e-05, "loss": 1.2493, "step": 429300 }, { "epoch": 9.09, "learning_rate": 1.4440816326530613e-05, "loss": 1.2444, "step": 429400 }, { "epoch": 9.09, "learning_rate": 1.4420408163265306e-05, "loss": 1.2438, "step": 429500 }, { "epoch": 9.09, "learning_rate": 1.44e-05, "loss": 1.244, "step": 429600 }, { "epoch": 9.09, "learning_rate": 1.4379591836734693e-05, "loss": 1.2438, "step": 429700 }, { "epoch": 9.1, "learning_rate": 1.435918367346939e-05, "loss": 1.2469, "step": 429800 }, { "epoch": 9.1, "learning_rate": 1.4338775510204083e-05, "loss": 1.2465, "step": 429900 }, { "epoch": 9.1, "learning_rate": 1.4318367346938778e-05, "loss": 1.2478, "step": 430000 }, { "epoch": 9.1, "learning_rate": 1.429795918367347e-05, "loss": 1.2451, "step": 430100 }, { "epoch": 9.1, "learning_rate": 1.4277551020408164e-05, "loss": 1.2459, "step": 430200 }, { "epoch": 9.11, "learning_rate": 1.4257142857142857e-05, "loss": 1.2414, "step": 430300 }, { "epoch": 9.11, "learning_rate": 1.423673469387755e-05, "loss": 1.2511, "step": 430400 }, { "epoch": 9.11, "learning_rate": 1.4216326530612246e-05, "loss": 1.2425, "step": 430500 }, { "epoch": 9.11, "learning_rate": 1.4195918367346939e-05, "loss": 1.2427, "step": 430600 }, { "epoch": 9.12, "learning_rate": 1.4175510204081632e-05, "loss": 1.2444, "step": 430700 }, { "epoch": 9.12, "learning_rate": 1.4155102040816325e-05, "loss": 1.2515, "step": 430800 }, { "epoch": 9.12, "learning_rate": 1.4134693877551022e-05, "loss": 1.2433, "step": 430900 }, { "epoch": 9.12, "learning_rate": 1.4114285714285715e-05, "loss": 1.2427, "step": 431000 }, { "epoch": 9.12, "learning_rate": 1.409387755102041e-05, "loss": 1.2457, "step": 431100 }, { "epoch": 9.13, "learning_rate": 1.4073469387755104e-05, "loss": 1.2503, "step": 431200 }, { "epoch": 9.13, "learning_rate": 1.4053265306122448e-05, "loss": 1.2489, "step": 431300 }, { "epoch": 9.13, "learning_rate": 1.4032857142857145e-05, "loss": 1.2492, "step": 431400 }, { "epoch": 9.13, "learning_rate": 1.4012448979591838e-05, "loss": 1.2427, "step": 431500 }, { "epoch": 9.13, "learning_rate": 1.3992040816326533e-05, "loss": 1.2501, "step": 431600 }, { "epoch": 9.14, "learning_rate": 1.3971632653061226e-05, "loss": 1.2509, "step": 431700 }, { "epoch": 9.14, "learning_rate": 1.395122448979592e-05, "loss": 1.2448, "step": 431800 }, { "epoch": 9.14, "learning_rate": 1.3930816326530613e-05, "loss": 1.2509, "step": 431900 }, { "epoch": 9.14, "learning_rate": 1.3910408163265306e-05, "loss": 1.2475, "step": 432000 }, { "epoch": 9.14, "learning_rate": 1.389e-05, "loss": 1.2423, "step": 432100 }, { "epoch": 9.15, "learning_rate": 1.3869591836734694e-05, "loss": 1.2425, "step": 432200 }, { "epoch": 9.15, "learning_rate": 1.3849183673469387e-05, "loss": 1.2463, "step": 432300 }, { "epoch": 9.15, "learning_rate": 1.382877551020408e-05, "loss": 1.2475, "step": 432400 }, { "epoch": 9.15, "learning_rate": 1.3808367346938777e-05, "loss": 1.2456, "step": 432500 }, { "epoch": 9.16, "learning_rate": 1.378795918367347e-05, "loss": 1.2465, "step": 432600 }, { "epoch": 9.16, "learning_rate": 1.3767551020408166e-05, "loss": 1.241, "step": 432700 }, { "epoch": 9.16, "learning_rate": 1.3747142857142859e-05, "loss": 1.2522, "step": 432800 }, { "epoch": 9.16, "learning_rate": 1.3726734693877552e-05, "loss": 1.2393, "step": 432900 }, { "epoch": 9.16, "learning_rate": 1.3706326530612245e-05, "loss": 1.2474, "step": 433000 }, { "epoch": 9.17, "learning_rate": 1.3685918367346939e-05, "loss": 1.2451, "step": 433100 }, { "epoch": 9.17, "learning_rate": 1.3665510204081634e-05, "loss": 1.2423, "step": 433200 }, { "epoch": 9.17, "learning_rate": 1.3645306122448981e-05, "loss": 1.2449, "step": 433300 }, { "epoch": 9.17, "learning_rate": 1.3624897959183675e-05, "loss": 1.2491, "step": 433400 }, { "epoch": 9.17, "learning_rate": 1.3604489795918368e-05, "loss": 1.2475, "step": 433500 }, { "epoch": 9.18, "learning_rate": 1.3584081632653061e-05, "loss": 1.246, "step": 433600 }, { "epoch": 9.18, "learning_rate": 1.3563673469387756e-05, "loss": 1.2427, "step": 433700 }, { "epoch": 9.18, "learning_rate": 1.354326530612245e-05, "loss": 1.2478, "step": 433800 }, { "epoch": 9.18, "learning_rate": 1.3522857142857143e-05, "loss": 1.2471, "step": 433900 }, { "epoch": 9.19, "learning_rate": 1.3502448979591836e-05, "loss": 1.2414, "step": 434000 }, { "epoch": 9.19, "learning_rate": 1.348204081632653e-05, "loss": 1.239, "step": 434100 }, { "epoch": 9.19, "learning_rate": 1.3461632653061226e-05, "loss": 1.2398, "step": 434200 }, { "epoch": 9.19, "learning_rate": 1.344122448979592e-05, "loss": 1.247, "step": 434300 }, { "epoch": 9.19, "learning_rate": 1.3420816326530614e-05, "loss": 1.2393, "step": 434400 }, { "epoch": 9.2, "learning_rate": 1.3400408163265307e-05, "loss": 1.2441, "step": 434500 }, { "epoch": 9.2, "learning_rate": 1.338e-05, "loss": 1.2365, "step": 434600 }, { "epoch": 9.2, "learning_rate": 1.3359591836734694e-05, "loss": 1.2406, "step": 434700 }, { "epoch": 9.2, "learning_rate": 1.3339183673469389e-05, "loss": 1.2336, "step": 434800 }, { "epoch": 9.2, "learning_rate": 1.3318775510204082e-05, "loss": 1.2418, "step": 434900 }, { "epoch": 9.21, "learning_rate": 1.3298367346938775e-05, "loss": 1.2416, "step": 435000 }, { "epoch": 9.21, "learning_rate": 1.3277959183673469e-05, "loss": 1.2439, "step": 435100 }, { "epoch": 9.21, "learning_rate": 1.3257551020408162e-05, "loss": 1.245, "step": 435200 }, { "epoch": 9.21, "learning_rate": 1.3237142857142859e-05, "loss": 1.2494, "step": 435300 }, { "epoch": 9.21, "learning_rate": 1.3216938775510205e-05, "loss": 1.239, "step": 435400 }, { "epoch": 9.22, "learning_rate": 1.3196530612244898e-05, "loss": 1.2371, "step": 435500 }, { "epoch": 9.22, "learning_rate": 1.3176122448979591e-05, "loss": 1.2432, "step": 435600 }, { "epoch": 9.22, "learning_rate": 1.3155714285714284e-05, "loss": 1.2382, "step": 435700 }, { "epoch": 9.22, "learning_rate": 1.3135306122448981e-05, "loss": 1.2415, "step": 435800 }, { "epoch": 9.23, "learning_rate": 1.3114897959183674e-05, "loss": 1.2455, "step": 435900 }, { "epoch": 9.23, "learning_rate": 1.309448979591837e-05, "loss": 1.2492, "step": 436000 }, { "epoch": 9.23, "learning_rate": 1.3074081632653063e-05, "loss": 1.2541, "step": 436100 }, { "epoch": 9.23, "learning_rate": 1.3053673469387756e-05, "loss": 1.2342, "step": 436200 }, { "epoch": 9.23, "learning_rate": 1.3033265306122449e-05, "loss": 1.2483, "step": 436300 }, { "epoch": 9.24, "learning_rate": 1.3012857142857144e-05, "loss": 1.2407, "step": 436400 }, { "epoch": 9.24, "learning_rate": 1.2992448979591837e-05, "loss": 1.2419, "step": 436500 }, { "epoch": 9.24, "learning_rate": 1.297204081632653e-05, "loss": 1.2389, "step": 436600 }, { "epoch": 9.24, "learning_rate": 1.2951632653061224e-05, "loss": 1.2445, "step": 436700 }, { "epoch": 9.24, "learning_rate": 1.2931224489795917e-05, "loss": 1.2438, "step": 436800 }, { "epoch": 9.25, "learning_rate": 1.2910816326530614e-05, "loss": 1.2435, "step": 436900 }, { "epoch": 9.25, "learning_rate": 1.2890408163265307e-05, "loss": 1.2434, "step": 437000 }, { "epoch": 9.25, "learning_rate": 1.2870000000000002e-05, "loss": 1.2368, "step": 437100 }, { "epoch": 9.25, "learning_rate": 1.2849591836734695e-05, "loss": 1.2445, "step": 437200 }, { "epoch": 9.26, "learning_rate": 1.2829183673469389e-05, "loss": 1.2442, "step": 437300 }, { "epoch": 9.26, "learning_rate": 1.2808775510204082e-05, "loss": 1.2458, "step": 437400 }, { "epoch": 9.26, "learning_rate": 1.278857142857143e-05, "loss": 1.247, "step": 437500 }, { "epoch": 9.26, "learning_rate": 1.2768163265306125e-05, "loss": 1.2343, "step": 437600 }, { "epoch": 9.26, "learning_rate": 1.2747755102040818e-05, "loss": 1.2412, "step": 437700 }, { "epoch": 9.27, "learning_rate": 1.2727346938775511e-05, "loss": 1.2437, "step": 437800 }, { "epoch": 9.27, "learning_rate": 1.2706938775510204e-05, "loss": 1.2473, "step": 437900 }, { "epoch": 9.27, "learning_rate": 1.2686530612244898e-05, "loss": 1.2436, "step": 438000 }, { "epoch": 9.27, "learning_rate": 1.2666122448979593e-05, "loss": 1.2375, "step": 438100 }, { "epoch": 9.27, "learning_rate": 1.2645714285714286e-05, "loss": 1.25, "step": 438200 }, { "epoch": 9.28, "learning_rate": 1.262530612244898e-05, "loss": 1.2376, "step": 438300 }, { "epoch": 9.28, "learning_rate": 1.2604897959183672e-05, "loss": 1.2371, "step": 438400 }, { "epoch": 9.28, "learning_rate": 1.2584489795918367e-05, "loss": 1.248, "step": 438500 }, { "epoch": 9.28, "learning_rate": 1.2564081632653062e-05, "loss": 1.242, "step": 438600 }, { "epoch": 9.28, "learning_rate": 1.2543673469387757e-05, "loss": 1.2369, "step": 438700 }, { "epoch": 9.29, "learning_rate": 1.252326530612245e-05, "loss": 1.2444, "step": 438800 }, { "epoch": 9.29, "learning_rate": 1.2502857142857144e-05, "loss": 1.2433, "step": 438900 }, { "epoch": 9.29, "learning_rate": 1.2482448979591837e-05, "loss": 1.2451, "step": 439000 }, { "epoch": 9.29, "learning_rate": 1.246204081632653e-05, "loss": 1.243, "step": 439100 }, { "epoch": 9.3, "learning_rate": 1.2441632653061225e-05, "loss": 1.2411, "step": 439200 }, { "epoch": 9.3, "learning_rate": 1.2421224489795919e-05, "loss": 1.236, "step": 439300 }, { "epoch": 9.3, "learning_rate": 1.2400816326530614e-05, "loss": 1.2418, "step": 439400 }, { "epoch": 9.3, "learning_rate": 1.2380408163265307e-05, "loss": 1.2419, "step": 439500 }, { "epoch": 9.3, "learning_rate": 1.2360204081632653e-05, "loss": 1.2371, "step": 439600 }, { "epoch": 9.31, "learning_rate": 1.2339795918367348e-05, "loss": 1.249, "step": 439700 }, { "epoch": 9.31, "learning_rate": 1.2319591836734694e-05, "loss": 1.25, "step": 439800 }, { "epoch": 9.31, "learning_rate": 1.2299183673469389e-05, "loss": 1.2408, "step": 439900 }, { "epoch": 9.31, "learning_rate": 1.2278775510204082e-05, "loss": 1.2405, "step": 440000 }, { "epoch": 9.31, "learning_rate": 1.2258367346938775e-05, "loss": 1.239, "step": 440100 }, { "epoch": 9.32, "learning_rate": 1.223795918367347e-05, "loss": 1.2454, "step": 440200 }, { "epoch": 9.32, "learning_rate": 1.2217551020408163e-05, "loss": 1.2353, "step": 440300 }, { "epoch": 9.32, "learning_rate": 1.2197142857142858e-05, "loss": 1.2421, "step": 440400 }, { "epoch": 9.32, "learning_rate": 1.2176734693877552e-05, "loss": 1.2458, "step": 440500 }, { "epoch": 9.32, "learning_rate": 1.2156326530612245e-05, "loss": 1.2406, "step": 440600 }, { "epoch": 9.33, "learning_rate": 1.213591836734694e-05, "loss": 1.2481, "step": 440700 }, { "epoch": 9.33, "learning_rate": 1.2115510204081633e-05, "loss": 1.2404, "step": 440800 }, { "epoch": 9.33, "learning_rate": 1.2095102040816327e-05, "loss": 1.2408, "step": 440900 }, { "epoch": 9.33, "learning_rate": 1.2074693877551022e-05, "loss": 1.2416, "step": 441000 }, { "epoch": 9.34, "learning_rate": 1.2054285714285715e-05, "loss": 1.2391, "step": 441100 }, { "epoch": 9.34, "learning_rate": 1.2033877551020408e-05, "loss": 1.2511, "step": 441200 }, { "epoch": 9.34, "learning_rate": 1.2013469387755103e-05, "loss": 1.2394, "step": 441300 }, { "epoch": 9.34, "learning_rate": 1.1993061224489796e-05, "loss": 1.2472, "step": 441400 }, { "epoch": 9.34, "learning_rate": 1.197265306122449e-05, "loss": 1.2344, "step": 441500 }, { "epoch": 9.35, "learning_rate": 1.1952244897959185e-05, "loss": 1.2471, "step": 441600 }, { "epoch": 9.35, "learning_rate": 1.1931836734693878e-05, "loss": 1.2389, "step": 441700 }, { "epoch": 9.35, "learning_rate": 1.1911428571428573e-05, "loss": 1.2424, "step": 441800 }, { "epoch": 9.35, "learning_rate": 1.1891020408163266e-05, "loss": 1.2401, "step": 441900 }, { "epoch": 9.35, "learning_rate": 1.187061224489796e-05, "loss": 1.2402, "step": 442000 }, { "epoch": 9.36, "learning_rate": 1.1850204081632654e-05, "loss": 1.239, "step": 442100 }, { "epoch": 9.36, "learning_rate": 1.1829795918367348e-05, "loss": 1.2472, "step": 442200 }, { "epoch": 9.36, "learning_rate": 1.1809387755102041e-05, "loss": 1.2464, "step": 442300 }, { "epoch": 9.36, "learning_rate": 1.1788979591836736e-05, "loss": 1.2418, "step": 442400 }, { "epoch": 9.37, "learning_rate": 1.176857142857143e-05, "loss": 1.2398, "step": 442500 }, { "epoch": 9.37, "learning_rate": 1.1748163265306122e-05, "loss": 1.2416, "step": 442600 }, { "epoch": 9.37, "learning_rate": 1.1727755102040817e-05, "loss": 1.2363, "step": 442700 }, { "epoch": 9.37, "learning_rate": 1.170734693877551e-05, "loss": 1.2457, "step": 442800 }, { "epoch": 9.37, "learning_rate": 1.1686938775510204e-05, "loss": 1.2488, "step": 442900 }, { "epoch": 9.38, "learning_rate": 1.1666530612244899e-05, "loss": 1.2418, "step": 443000 }, { "epoch": 9.38, "learning_rate": 1.1646122448979592e-05, "loss": 1.2437, "step": 443100 }, { "epoch": 9.38, "learning_rate": 1.1625714285714285e-05, "loss": 1.2398, "step": 443200 }, { "epoch": 9.38, "learning_rate": 1.160530612244898e-05, "loss": 1.2464, "step": 443300 }, { "epoch": 9.38, "learning_rate": 1.1584897959183674e-05, "loss": 1.2376, "step": 443400 }, { "epoch": 9.39, "learning_rate": 1.1564489795918369e-05, "loss": 1.247, "step": 443500 }, { "epoch": 9.39, "learning_rate": 1.1544081632653062e-05, "loss": 1.2382, "step": 443600 }, { "epoch": 9.39, "learning_rate": 1.1523673469387755e-05, "loss": 1.2397, "step": 443700 }, { "epoch": 9.39, "learning_rate": 1.150326530612245e-05, "loss": 1.2359, "step": 443800 }, { "epoch": 9.39, "learning_rate": 1.1483061224489796e-05, "loss": 1.2326, "step": 443900 }, { "epoch": 9.4, "learning_rate": 1.1462653061224491e-05, "loss": 1.2398, "step": 444000 }, { "epoch": 9.4, "learning_rate": 1.1442244897959184e-05, "loss": 1.243, "step": 444100 }, { "epoch": 9.4, "learning_rate": 1.1421836734693878e-05, "loss": 1.2349, "step": 444200 }, { "epoch": 9.4, "learning_rate": 1.1401428571428573e-05, "loss": 1.2441, "step": 444300 }, { "epoch": 9.41, "learning_rate": 1.1381020408163266e-05, "loss": 1.2426, "step": 444400 }, { "epoch": 9.41, "learning_rate": 1.136061224489796e-05, "loss": 1.2412, "step": 444500 }, { "epoch": 9.41, "learning_rate": 1.1340204081632654e-05, "loss": 1.2404, "step": 444600 }, { "epoch": 9.41, "learning_rate": 1.1319795918367347e-05, "loss": 1.2443, "step": 444700 }, { "epoch": 9.41, "learning_rate": 1.129938775510204e-05, "loss": 1.2444, "step": 444800 }, { "epoch": 9.42, "learning_rate": 1.1278979591836736e-05, "loss": 1.2364, "step": 444900 }, { "epoch": 9.42, "learning_rate": 1.1258571428571429e-05, "loss": 1.2353, "step": 445000 }, { "epoch": 9.42, "learning_rate": 1.1238163265306122e-05, "loss": 1.236, "step": 445100 }, { "epoch": 9.42, "learning_rate": 1.1217755102040817e-05, "loss": 1.2363, "step": 445200 }, { "epoch": 9.42, "learning_rate": 1.119734693877551e-05, "loss": 1.2415, "step": 445300 }, { "epoch": 9.43, "learning_rate": 1.1176938775510204e-05, "loss": 1.2381, "step": 445400 }, { "epoch": 9.43, "learning_rate": 1.1156530612244899e-05, "loss": 1.2474, "step": 445500 }, { "epoch": 9.43, "learning_rate": 1.1136122448979592e-05, "loss": 1.249, "step": 445600 }, { "epoch": 9.43, "learning_rate": 1.1115714285714287e-05, "loss": 1.2396, "step": 445700 }, { "epoch": 9.43, "learning_rate": 1.109530612244898e-05, "loss": 1.2368, "step": 445800 }, { "epoch": 9.44, "learning_rate": 1.1074897959183674e-05, "loss": 1.2354, "step": 445900 }, { "epoch": 9.44, "learning_rate": 1.1054489795918369e-05, "loss": 1.2342, "step": 446000 }, { "epoch": 9.44, "learning_rate": 1.1034081632653062e-05, "loss": 1.2404, "step": 446100 }, { "epoch": 9.44, "learning_rate": 1.1013673469387755e-05, "loss": 1.2393, "step": 446200 }, { "epoch": 9.45, "learning_rate": 1.0993469387755103e-05, "loss": 1.2364, "step": 446300 }, { "epoch": 9.45, "learning_rate": 1.0973265306122449e-05, "loss": 1.234, "step": 446400 }, { "epoch": 9.45, "learning_rate": 1.0952857142857144e-05, "loss": 1.2337, "step": 446500 }, { "epoch": 9.45, "learning_rate": 1.0932448979591837e-05, "loss": 1.2395, "step": 446600 }, { "epoch": 9.45, "learning_rate": 1.0912040816326532e-05, "loss": 1.2408, "step": 446700 }, { "epoch": 9.46, "learning_rate": 1.0891632653061225e-05, "loss": 1.2434, "step": 446800 }, { "epoch": 9.46, "learning_rate": 1.0871224489795918e-05, "loss": 1.2354, "step": 446900 }, { "epoch": 9.46, "learning_rate": 1.0850816326530613e-05, "loss": 1.2333, "step": 447000 }, { "epoch": 9.46, "learning_rate": 1.0830408163265307e-05, "loss": 1.2321, "step": 447100 }, { "epoch": 9.46, "learning_rate": 1.081e-05, "loss": 1.2326, "step": 447200 }, { "epoch": 9.47, "learning_rate": 1.0789591836734695e-05, "loss": 1.2303, "step": 447300 }, { "epoch": 9.47, "learning_rate": 1.0769183673469388e-05, "loss": 1.2372, "step": 447400 }, { "epoch": 9.47, "learning_rate": 1.0748775510204081e-05, "loss": 1.2471, "step": 447500 }, { "epoch": 9.47, "learning_rate": 1.0728367346938776e-05, "loss": 1.237, "step": 447600 }, { "epoch": 9.48, "learning_rate": 1.070795918367347e-05, "loss": 1.2402, "step": 447700 }, { "epoch": 9.48, "learning_rate": 1.0687551020408165e-05, "loss": 1.2388, "step": 447800 }, { "epoch": 9.48, "learning_rate": 1.0667142857142858e-05, "loss": 1.2264, "step": 447900 }, { "epoch": 9.48, "learning_rate": 1.0646734693877551e-05, "loss": 1.239, "step": 448000 }, { "epoch": 9.48, "learning_rate": 1.0626326530612244e-05, "loss": 1.2327, "step": 448100 }, { "epoch": 9.49, "learning_rate": 1.060591836734694e-05, "loss": 1.2407, "step": 448200 }, { "epoch": 9.49, "learning_rate": 1.0585510204081633e-05, "loss": 1.2407, "step": 448300 }, { "epoch": 9.49, "learning_rate": 1.0565102040816328e-05, "loss": 1.2353, "step": 448400 }, { "epoch": 9.49, "learning_rate": 1.0544693877551021e-05, "loss": 1.2456, "step": 448500 }, { "epoch": 9.49, "learning_rate": 1.0524285714285714e-05, "loss": 1.2371, "step": 448600 }, { "epoch": 9.5, "learning_rate": 1.050387755102041e-05, "loss": 1.2392, "step": 448700 }, { "epoch": 9.5, "learning_rate": 1.0483469387755102e-05, "loss": 1.2384, "step": 448800 }, { "epoch": 9.5, "learning_rate": 1.0463061224489797e-05, "loss": 1.2366, "step": 448900 }, { "epoch": 9.5, "learning_rate": 1.044265306122449e-05, "loss": 1.2367, "step": 449000 }, { "epoch": 9.5, "learning_rate": 1.0422244897959184e-05, "loss": 1.2388, "step": 449100 }, { "epoch": 9.51, "learning_rate": 1.0401836734693877e-05, "loss": 1.2501, "step": 449200 }, { "epoch": 9.51, "learning_rate": 1.0381428571428572e-05, "loss": 1.2356, "step": 449300 }, { "epoch": 9.51, "learning_rate": 1.0361020408163266e-05, "loss": 1.2407, "step": 449400 }, { "epoch": 9.51, "learning_rate": 1.034061224489796e-05, "loss": 1.2405, "step": 449500 }, { "epoch": 9.52, "learning_rate": 1.0320204081632654e-05, "loss": 1.2381, "step": 449600 }, { "epoch": 9.52, "learning_rate": 1.0299795918367347e-05, "loss": 1.2382, "step": 449700 }, { "epoch": 9.52, "learning_rate": 1.027938775510204e-05, "loss": 1.2388, "step": 449800 }, { "epoch": 9.52, "learning_rate": 1.0258979591836735e-05, "loss": 1.2367, "step": 449900 }, { "epoch": 9.52, "learning_rate": 1.0238571428571429e-05, "loss": 1.2336, "step": 450000 }, { "epoch": 9.53, "learning_rate": 1.0218163265306124e-05, "loss": 1.239, "step": 450100 }, { "epoch": 9.53, "learning_rate": 1.0197755102040817e-05, "loss": 1.2423, "step": 450200 }, { "epoch": 9.53, "learning_rate": 1.017734693877551e-05, "loss": 1.2413, "step": 450300 }, { "epoch": 9.53, "learning_rate": 1.0157142857142858e-05, "loss": 1.2273, "step": 450400 }, { "epoch": 9.53, "learning_rate": 1.0136734693877551e-05, "loss": 1.2404, "step": 450500 }, { "epoch": 9.54, "learning_rate": 1.0116326530612246e-05, "loss": 1.2397, "step": 450600 }, { "epoch": 9.54, "learning_rate": 1.009591836734694e-05, "loss": 1.2398, "step": 450700 }, { "epoch": 9.54, "learning_rate": 1.0075510204081632e-05, "loss": 1.237, "step": 450800 }, { "epoch": 9.54, "learning_rate": 1.0055102040816327e-05, "loss": 1.238, "step": 450900 }, { "epoch": 9.54, "learning_rate": 1.003469387755102e-05, "loss": 1.2335, "step": 451000 }, { "epoch": 9.55, "learning_rate": 1.0014285714285716e-05, "loss": 1.2384, "step": 451100 }, { "epoch": 9.55, "learning_rate": 9.993877551020409e-06, "loss": 1.2306, "step": 451200 }, { "epoch": 9.55, "learning_rate": 9.973469387755102e-06, "loss": 1.2378, "step": 451300 }, { "epoch": 9.55, "learning_rate": 9.953061224489796e-06, "loss": 1.2386, "step": 451400 }, { "epoch": 9.56, "learning_rate": 9.93265306122449e-06, "loss": 1.2336, "step": 451500 }, { "epoch": 9.56, "learning_rate": 9.912244897959184e-06, "loss": 1.2373, "step": 451600 }, { "epoch": 9.56, "learning_rate": 9.891836734693879e-06, "loss": 1.2421, "step": 451700 }, { "epoch": 9.56, "learning_rate": 9.871428571428572e-06, "loss": 1.249, "step": 451800 }, { "epoch": 9.56, "learning_rate": 9.851020408163265e-06, "loss": 1.2417, "step": 451900 }, { "epoch": 9.57, "learning_rate": 9.830612244897959e-06, "loss": 1.232, "step": 452000 }, { "epoch": 9.57, "learning_rate": 9.810204081632654e-06, "loss": 1.2359, "step": 452100 }, { "epoch": 9.57, "learning_rate": 9.789795918367347e-06, "loss": 1.2358, "step": 452200 }, { "epoch": 9.57, "learning_rate": 9.769387755102042e-06, "loss": 1.236, "step": 452300 }, { "epoch": 9.57, "learning_rate": 9.748979591836735e-06, "loss": 1.2347, "step": 452400 }, { "epoch": 9.58, "learning_rate": 9.728571428571428e-06, "loss": 1.2311, "step": 452500 }, { "epoch": 9.58, "learning_rate": 9.708163265306123e-06, "loss": 1.2371, "step": 452600 }, { "epoch": 9.58, "learning_rate": 9.687959183673471e-06, "loss": 1.2427, "step": 452700 }, { "epoch": 9.58, "learning_rate": 9.667551020408164e-06, "loss": 1.2357, "step": 452800 }, { "epoch": 9.59, "learning_rate": 9.647142857142857e-06, "loss": 1.2333, "step": 452900 }, { "epoch": 9.59, "learning_rate": 9.62673469387755e-06, "loss": 1.2403, "step": 453000 }, { "epoch": 9.59, "learning_rate": 9.606326530612246e-06, "loss": 1.2368, "step": 453100 }, { "epoch": 9.59, "learning_rate": 9.585918367346939e-06, "loss": 1.2322, "step": 453200 }, { "epoch": 9.59, "learning_rate": 9.565510204081634e-06, "loss": 1.2443, "step": 453300 }, { "epoch": 9.6, "learning_rate": 9.545102040816327e-06, "loss": 1.2406, "step": 453400 }, { "epoch": 9.6, "learning_rate": 9.52469387755102e-06, "loss": 1.2337, "step": 453500 }, { "epoch": 9.6, "learning_rate": 9.504285714285714e-06, "loss": 1.2353, "step": 453600 }, { "epoch": 9.6, "learning_rate": 9.483877551020409e-06, "loss": 1.2402, "step": 453700 }, { "epoch": 9.6, "learning_rate": 9.463469387755102e-06, "loss": 1.2325, "step": 453800 }, { "epoch": 9.61, "learning_rate": 9.443061224489797e-06, "loss": 1.2299, "step": 453900 }, { "epoch": 9.61, "learning_rate": 9.42265306122449e-06, "loss": 1.2384, "step": 454000 }, { "epoch": 9.61, "learning_rate": 9.402244897959184e-06, "loss": 1.2388, "step": 454100 }, { "epoch": 9.61, "learning_rate": 9.381836734693877e-06, "loss": 1.234, "step": 454200 }, { "epoch": 9.61, "learning_rate": 9.361428571428572e-06, "loss": 1.2338, "step": 454300 }, { "epoch": 9.62, "learning_rate": 9.341020408163267e-06, "loss": 1.2374, "step": 454400 }, { "epoch": 9.62, "learning_rate": 9.32061224489796e-06, "loss": 1.2315, "step": 454500 }, { "epoch": 9.62, "learning_rate": 9.300204081632653e-06, "loss": 1.2373, "step": 454600 }, { "epoch": 9.62, "learning_rate": 9.28e-06, "loss": 1.2415, "step": 454700 }, { "epoch": 9.63, "learning_rate": 9.259591836734694e-06, "loss": 1.2385, "step": 454800 }, { "epoch": 9.63, "learning_rate": 9.23918367346939e-06, "loss": 1.2395, "step": 454900 }, { "epoch": 9.63, "learning_rate": 9.218775510204082e-06, "loss": 1.2405, "step": 455000 }, { "epoch": 9.63, "learning_rate": 9.198367346938776e-06, "loss": 1.232, "step": 455100 }, { "epoch": 9.63, "learning_rate": 9.177959183673469e-06, "loss": 1.2298, "step": 455200 }, { "epoch": 9.64, "learning_rate": 9.157551020408164e-06, "loss": 1.231, "step": 455300 }, { "epoch": 9.64, "learning_rate": 9.137142857142857e-06, "loss": 1.2352, "step": 455400 }, { "epoch": 9.64, "learning_rate": 9.116734693877552e-06, "loss": 1.2358, "step": 455500 }, { "epoch": 9.64, "learning_rate": 9.096326530612246e-06, "loss": 1.2365, "step": 455600 }, { "epoch": 9.64, "learning_rate": 9.075918367346939e-06, "loss": 1.2348, "step": 455700 }, { "epoch": 9.65, "learning_rate": 9.055510204081632e-06, "loss": 1.2391, "step": 455800 }, { "epoch": 9.65, "learning_rate": 9.035102040816327e-06, "loss": 1.2354, "step": 455900 }, { "epoch": 9.65, "learning_rate": 9.014693877551022e-06, "loss": 1.2323, "step": 456000 }, { "epoch": 9.65, "learning_rate": 8.994285714285715e-06, "loss": 1.231, "step": 456100 }, { "epoch": 9.66, "learning_rate": 8.973877551020409e-06, "loss": 1.2351, "step": 456200 }, { "epoch": 9.66, "learning_rate": 8.953469387755102e-06, "loss": 1.2354, "step": 456300 }, { "epoch": 9.66, "learning_rate": 8.933061224489795e-06, "loss": 1.2342, "step": 456400 }, { "epoch": 9.66, "learning_rate": 8.91265306122449e-06, "loss": 1.2367, "step": 456500 }, { "epoch": 9.66, "learning_rate": 8.892244897959185e-06, "loss": 1.2297, "step": 456600 }, { "epoch": 9.67, "learning_rate": 8.871836734693878e-06, "loss": 1.2352, "step": 456700 }, { "epoch": 9.67, "learning_rate": 8.851632653061224e-06, "loss": 1.2217, "step": 456800 }, { "epoch": 9.67, "learning_rate": 8.831224489795918e-06, "loss": 1.2358, "step": 456900 }, { "epoch": 9.67, "learning_rate": 8.810816326530612e-06, "loss": 1.2382, "step": 457000 }, { "epoch": 9.67, "learning_rate": 8.790408163265307e-06, "loss": 1.234, "step": 457100 }, { "epoch": 9.68, "learning_rate": 8.77e-06, "loss": 1.2403, "step": 457200 }, { "epoch": 9.68, "learning_rate": 8.749591836734694e-06, "loss": 1.2419, "step": 457300 }, { "epoch": 9.68, "learning_rate": 8.729183673469387e-06, "loss": 1.2362, "step": 457400 }, { "epoch": 9.68, "learning_rate": 8.708775510204082e-06, "loss": 1.2348, "step": 457500 }, { "epoch": 9.68, "learning_rate": 8.688367346938776e-06, "loss": 1.2304, "step": 457600 }, { "epoch": 9.69, "learning_rate": 8.66795918367347e-06, "loss": 1.2397, "step": 457700 }, { "epoch": 9.69, "learning_rate": 8.647551020408164e-06, "loss": 1.2366, "step": 457800 }, { "epoch": 9.69, "learning_rate": 8.627142857142857e-06, "loss": 1.2333, "step": 457900 }, { "epoch": 9.69, "learning_rate": 8.60673469387755e-06, "loss": 1.2369, "step": 458000 }, { "epoch": 9.7, "learning_rate": 8.586326530612245e-06, "loss": 1.2423, "step": 458100 }, { "epoch": 9.7, "learning_rate": 8.56591836734694e-06, "loss": 1.2342, "step": 458200 }, { "epoch": 9.7, "learning_rate": 8.545510204081634e-06, "loss": 1.2385, "step": 458300 }, { "epoch": 9.7, "learning_rate": 8.525102040816327e-06, "loss": 1.2378, "step": 458400 }, { "epoch": 9.7, "learning_rate": 8.50469387755102e-06, "loss": 1.2301, "step": 458500 }, { "epoch": 9.71, "learning_rate": 8.484285714285713e-06, "loss": 1.2421, "step": 458600 }, { "epoch": 9.71, "learning_rate": 8.463877551020408e-06, "loss": 1.2405, "step": 458700 }, { "epoch": 9.71, "learning_rate": 8.443673469387756e-06, "loss": 1.2233, "step": 458800 }, { "epoch": 9.71, "learning_rate": 8.42326530612245e-06, "loss": 1.2338, "step": 458900 }, { "epoch": 9.71, "learning_rate": 8.402857142857143e-06, "loss": 1.2351, "step": 459000 }, { "epoch": 9.72, "learning_rate": 8.382448979591836e-06, "loss": 1.2339, "step": 459100 }, { "epoch": 9.72, "learning_rate": 8.36204081632653e-06, "loss": 1.2354, "step": 459200 }, { "epoch": 9.72, "learning_rate": 8.341632653061226e-06, "loss": 1.2351, "step": 459300 }, { "epoch": 9.72, "learning_rate": 8.321224489795919e-06, "loss": 1.2375, "step": 459400 }, { "epoch": 9.72, "learning_rate": 8.300816326530612e-06, "loss": 1.2342, "step": 459500 }, { "epoch": 9.73, "learning_rate": 8.280408163265306e-06, "loss": 1.2346, "step": 459600 }, { "epoch": 9.73, "learning_rate": 8.26e-06, "loss": 1.2345, "step": 459700 }, { "epoch": 9.73, "learning_rate": 8.239591836734696e-06, "loss": 1.235, "step": 459800 }, { "epoch": 9.73, "learning_rate": 8.219183673469389e-06, "loss": 1.2278, "step": 459900 }, { "epoch": 9.74, "learning_rate": 8.198775510204082e-06, "loss": 1.2402, "step": 460000 }, { "epoch": 9.74, "learning_rate": 8.178367346938775e-06, "loss": 1.236, "step": 460100 }, { "epoch": 9.74, "learning_rate": 8.157959183673469e-06, "loss": 1.2323, "step": 460200 }, { "epoch": 9.74, "learning_rate": 8.137551020408164e-06, "loss": 1.2335, "step": 460300 }, { "epoch": 9.74, "learning_rate": 8.117346938775511e-06, "loss": 1.2294, "step": 460400 }, { "epoch": 9.75, "learning_rate": 8.096938775510204e-06, "loss": 1.2386, "step": 460500 }, { "epoch": 9.75, "learning_rate": 8.076530612244898e-06, "loss": 1.2357, "step": 460600 }, { "epoch": 9.75, "learning_rate": 8.056122448979591e-06, "loss": 1.2268, "step": 460700 }, { "epoch": 9.75, "learning_rate": 8.035714285714286e-06, "loss": 1.2276, "step": 460800 }, { "epoch": 9.75, "learning_rate": 8.015306122448981e-06, "loss": 1.2283, "step": 460900 }, { "epoch": 9.76, "learning_rate": 7.994897959183674e-06, "loss": 1.2358, "step": 461000 }, { "epoch": 9.76, "learning_rate": 7.974489795918368e-06, "loss": 1.2315, "step": 461100 }, { "epoch": 9.76, "learning_rate": 7.95408163265306e-06, "loss": 1.2306, "step": 461200 }, { "epoch": 9.76, "learning_rate": 7.933673469387754e-06, "loss": 1.2313, "step": 461300 }, { "epoch": 9.77, "learning_rate": 7.913265306122449e-06, "loss": 1.2294, "step": 461400 }, { "epoch": 9.77, "learning_rate": 7.892857142857144e-06, "loss": 1.2403, "step": 461500 }, { "epoch": 9.77, "learning_rate": 7.872448979591837e-06, "loss": 1.2268, "step": 461600 }, { "epoch": 9.77, "learning_rate": 7.85204081632653e-06, "loss": 1.2377, "step": 461700 }, { "epoch": 9.77, "learning_rate": 7.831632653061224e-06, "loss": 1.2258, "step": 461800 }, { "epoch": 9.78, "learning_rate": 7.811224489795919e-06, "loss": 1.2371, "step": 461900 }, { "epoch": 9.78, "learning_rate": 7.790816326530614e-06, "loss": 1.2308, "step": 462000 }, { "epoch": 9.78, "learning_rate": 7.770408163265307e-06, "loss": 1.2333, "step": 462100 }, { "epoch": 9.78, "learning_rate": 7.75e-06, "loss": 1.2338, "step": 462200 }, { "epoch": 9.78, "learning_rate": 7.729591836734694e-06, "loss": 1.232, "step": 462300 }, { "epoch": 9.79, "learning_rate": 7.709183673469387e-06, "loss": 1.2401, "step": 462400 }, { "epoch": 9.79, "learning_rate": 7.688775510204082e-06, "loss": 1.2345, "step": 462500 }, { "epoch": 9.79, "learning_rate": 7.668367346938777e-06, "loss": 1.2323, "step": 462600 }, { "epoch": 9.79, "learning_rate": 7.64795918367347e-06, "loss": 1.2383, "step": 462700 }, { "epoch": 9.79, "learning_rate": 7.627551020408163e-06, "loss": 1.2403, "step": 462800 }, { "epoch": 9.8, "learning_rate": 7.6071428571428575e-06, "loss": 1.2365, "step": 462900 }, { "epoch": 9.8, "learning_rate": 7.586734693877551e-06, "loss": 1.2308, "step": 463000 }, { "epoch": 9.8, "learning_rate": 7.566326530612246e-06, "loss": 1.2328, "step": 463100 }, { "epoch": 9.8, "learning_rate": 7.545918367346939e-06, "loss": 1.2273, "step": 463200 }, { "epoch": 9.81, "learning_rate": 7.525510204081633e-06, "loss": 1.2312, "step": 463300 }, { "epoch": 9.81, "learning_rate": 7.5051020408163264e-06, "loss": 1.2311, "step": 463400 }, { "epoch": 9.81, "learning_rate": 7.4846938775510206e-06, "loss": 1.2328, "step": 463500 }, { "epoch": 9.81, "learning_rate": 7.4642857142857155e-06, "loss": 1.2314, "step": 463600 }, { "epoch": 9.81, "learning_rate": 7.443877551020409e-06, "loss": 1.2309, "step": 463700 }, { "epoch": 9.82, "learning_rate": 7.423469387755103e-06, "loss": 1.2321, "step": 463800 }, { "epoch": 9.82, "learning_rate": 7.403061224489796e-06, "loss": 1.2434, "step": 463900 }, { "epoch": 9.82, "learning_rate": 7.3826530612244895e-06, "loss": 1.2362, "step": 464000 }, { "epoch": 9.82, "learning_rate": 7.362244897959184e-06, "loss": 1.2339, "step": 464100 }, { "epoch": 9.82, "learning_rate": 7.341836734693879e-06, "loss": 1.2285, "step": 464200 }, { "epoch": 9.83, "learning_rate": 7.321428571428572e-06, "loss": 1.2407, "step": 464300 }, { "epoch": 9.83, "learning_rate": 7.301020408163266e-06, "loss": 1.2298, "step": 464400 }, { "epoch": 9.83, "learning_rate": 7.280816326530612e-06, "loss": 1.2265, "step": 464500 }, { "epoch": 9.83, "learning_rate": 7.260408163265306e-06, "loss": 1.2275, "step": 464600 }, { "epoch": 9.83, "learning_rate": 7.240000000000001e-06, "loss": 1.2379, "step": 464700 }, { "epoch": 9.84, "learning_rate": 7.219591836734694e-06, "loss": 1.2394, "step": 464800 }, { "epoch": 9.84, "learning_rate": 7.199183673469388e-06, "loss": 1.2284, "step": 464900 }, { "epoch": 9.84, "learning_rate": 7.178775510204082e-06, "loss": 1.234, "step": 465000 }, { "epoch": 9.84, "learning_rate": 7.158367346938776e-06, "loss": 1.2377, "step": 465100 }, { "epoch": 9.85, "learning_rate": 7.137959183673469e-06, "loss": 1.223, "step": 465200 }, { "epoch": 9.85, "learning_rate": 7.117551020408164e-06, "loss": 1.2339, "step": 465300 }, { "epoch": 9.85, "learning_rate": 7.097142857142858e-06, "loss": 1.228, "step": 465400 }, { "epoch": 9.85, "learning_rate": 7.076938775510204e-06, "loss": 1.2262, "step": 465500 }, { "epoch": 9.85, "learning_rate": 7.056530612244898e-06, "loss": 1.2399, "step": 465600 }, { "epoch": 9.86, "learning_rate": 7.0361224489795915e-06, "loss": 1.2311, "step": 465700 }, { "epoch": 9.86, "learning_rate": 7.0157142857142864e-06, "loss": 1.2337, "step": 465800 }, { "epoch": 9.86, "learning_rate": 6.9953061224489806e-06, "loss": 1.2323, "step": 465900 }, { "epoch": 9.86, "learning_rate": 6.974897959183674e-06, "loss": 1.2335, "step": 466000 }, { "epoch": 9.86, "learning_rate": 6.954489795918367e-06, "loss": 1.2329, "step": 466100 }, { "epoch": 9.87, "learning_rate": 6.934081632653061e-06, "loss": 1.2296, "step": 466200 }, { "epoch": 9.87, "learning_rate": 6.913673469387756e-06, "loss": 1.2313, "step": 466300 }, { "epoch": 9.87, "learning_rate": 6.8932653061224495e-06, "loss": 1.2323, "step": 466400 }, { "epoch": 9.87, "learning_rate": 6.872857142857144e-06, "loss": 1.2332, "step": 466500 }, { "epoch": 9.88, "learning_rate": 6.852448979591837e-06, "loss": 1.2308, "step": 466600 }, { "epoch": 9.88, "learning_rate": 6.832040816326531e-06, "loss": 1.2311, "step": 466700 }, { "epoch": 9.88, "learning_rate": 6.811632653061224e-06, "loss": 1.2272, "step": 466800 }, { "epoch": 9.88, "learning_rate": 6.791224489795919e-06, "loss": 1.2278, "step": 466900 }, { "epoch": 9.88, "learning_rate": 6.7708163265306125e-06, "loss": 1.2325, "step": 467000 }, { "epoch": 9.89, "learning_rate": 6.750408163265307e-06, "loss": 1.2324, "step": 467100 }, { "epoch": 9.89, "learning_rate": 6.73e-06, "loss": 1.2384, "step": 467200 }, { "epoch": 9.89, "learning_rate": 6.709591836734694e-06, "loss": 1.23, "step": 467300 }, { "epoch": 9.89, "learning_rate": 6.689183673469387e-06, "loss": 1.2333, "step": 467400 }, { "epoch": 9.89, "learning_rate": 6.668775510204082e-06, "loss": 1.2248, "step": 467500 }, { "epoch": 9.9, "learning_rate": 6.6483673469387764e-06, "loss": 1.2297, "step": 467600 }, { "epoch": 9.9, "learning_rate": 6.62795918367347e-06, "loss": 1.235, "step": 467700 }, { "epoch": 9.9, "learning_rate": 6.607551020408163e-06, "loss": 1.2267, "step": 467800 }, { "epoch": 9.9, "learning_rate": 6.587142857142857e-06, "loss": 1.231, "step": 467900 }, { "epoch": 9.9, "learning_rate": 6.566734693877552e-06, "loss": 1.2336, "step": 468000 }, { "epoch": 9.91, "learning_rate": 6.546326530612245e-06, "loss": 1.2284, "step": 468100 }, { "epoch": 9.91, "learning_rate": 6.5259183673469395e-06, "loss": 1.2295, "step": 468200 }, { "epoch": 9.91, "learning_rate": 6.505510204081633e-06, "loss": 1.2237, "step": 468300 }, { "epoch": 9.91, "learning_rate": 6.485102040816327e-06, "loss": 1.228, "step": 468400 }, { "epoch": 9.92, "learning_rate": 6.46469387755102e-06, "loss": 1.231, "step": 468500 }, { "epoch": 9.92, "learning_rate": 6.444285714285715e-06, "loss": 1.2329, "step": 468600 }, { "epoch": 9.92, "learning_rate": 6.423877551020408e-06, "loss": 1.229, "step": 468700 }, { "epoch": 9.92, "learning_rate": 6.4034693877551025e-06, "loss": 1.2292, "step": 468800 }, { "epoch": 9.92, "learning_rate": 6.383061224489796e-06, "loss": 1.2316, "step": 468900 }, { "epoch": 9.93, "learning_rate": 6.36265306122449e-06, "loss": 1.23, "step": 469000 }, { "epoch": 9.93, "learning_rate": 6.342244897959183e-06, "loss": 1.232, "step": 469100 }, { "epoch": 9.93, "learning_rate": 6.321836734693878e-06, "loss": 1.2288, "step": 469200 }, { "epoch": 9.93, "learning_rate": 6.301428571428572e-06, "loss": 1.234, "step": 469300 }, { "epoch": 9.93, "learning_rate": 6.281224489795918e-06, "loss": 1.2364, "step": 469400 }, { "epoch": 9.94, "learning_rate": 6.260816326530612e-06, "loss": 1.2225, "step": 469500 }, { "epoch": 9.94, "learning_rate": 6.2404081632653065e-06, "loss": 1.2374, "step": 469600 }, { "epoch": 9.94, "learning_rate": 6.22e-06, "loss": 1.2283, "step": 469700 }, { "epoch": 9.94, "learning_rate": 6.199591836734695e-06, "loss": 1.2271, "step": 469800 }, { "epoch": 9.94, "learning_rate": 6.179183673469388e-06, "loss": 1.2299, "step": 469900 }, { "epoch": 9.95, "learning_rate": 6.158775510204082e-06, "loss": 1.2249, "step": 470000 }, { "epoch": 9.95, "learning_rate": 6.138367346938776e-06, "loss": 1.2313, "step": 470100 }, { "epoch": 9.95, "learning_rate": 6.1179591836734695e-06, "loss": 1.2344, "step": 470200 }, { "epoch": 9.95, "learning_rate": 6.097551020408164e-06, "loss": 1.2297, "step": 470300 }, { "epoch": 9.96, "learning_rate": 6.077142857142858e-06, "loss": 1.2277, "step": 470400 }, { "epoch": 9.96, "learning_rate": 6.056734693877551e-06, "loss": 1.2318, "step": 470500 }, { "epoch": 9.96, "learning_rate": 6.036326530612245e-06, "loss": 1.2318, "step": 470600 }, { "epoch": 9.96, "learning_rate": 6.015918367346939e-06, "loss": 1.2237, "step": 470700 }, { "epoch": 9.96, "learning_rate": 5.9955102040816326e-06, "loss": 1.2307, "step": 470800 }, { "epoch": 9.97, "learning_rate": 5.975102040816327e-06, "loss": 1.2302, "step": 470900 }, { "epoch": 9.97, "learning_rate": 5.954693877551021e-06, "loss": 1.2313, "step": 471000 }, { "epoch": 9.97, "learning_rate": 5.934285714285714e-06, "loss": 1.2205, "step": 471100 }, { "epoch": 9.97, "learning_rate": 5.913877551020409e-06, "loss": 1.2286, "step": 471200 }, { "epoch": 9.97, "learning_rate": 5.893469387755102e-06, "loss": 1.2247, "step": 471300 }, { "epoch": 9.98, "learning_rate": 5.873061224489796e-06, "loss": 1.2393, "step": 471400 }, { "epoch": 9.98, "learning_rate": 5.852653061224491e-06, "loss": 1.2316, "step": 471500 }, { "epoch": 9.98, "learning_rate": 5.832244897959184e-06, "loss": 1.2247, "step": 471600 }, { "epoch": 9.98, "learning_rate": 5.811836734693878e-06, "loss": 1.2227, "step": 471700 }, { "epoch": 9.99, "learning_rate": 5.791428571428572e-06, "loss": 1.2309, "step": 471800 }, { "epoch": 9.99, "learning_rate": 5.771020408163265e-06, "loss": 1.2262, "step": 471900 }, { "epoch": 9.99, "learning_rate": 5.7506122448979595e-06, "loss": 1.2302, "step": 472000 }, { "epoch": 9.99, "learning_rate": 5.730204081632654e-06, "loss": 1.2347, "step": 472100 }, { "epoch": 9.99, "learning_rate": 5.709795918367347e-06, "loss": 1.2315, "step": 472200 }, { "epoch": 10.0, "learning_rate": 5.689387755102041e-06, "loss": 1.2299, "step": 472300 }, { "epoch": 10.0, "learning_rate": 5.668979591836735e-06, "loss": 1.2335, "step": 472400 }, { "epoch": 10.0, "learning_rate": 5.6485714285714285e-06, "loss": 1.2314, "step": 472500 }, { "epoch": 10.0, "learning_rate": 5.628163265306123e-06, "loss": 1.2308, "step": 472600 }, { "epoch": 10.0, "learning_rate": 5.607755102040817e-06, "loss": 1.2312, "step": 472700 }, { "epoch": 10.01, "learning_rate": 5.58734693877551e-06, "loss": 1.2307, "step": 472800 }, { "epoch": 10.01, "learning_rate": 5.566938775510205e-06, "loss": 1.2278, "step": 472900 }, { "epoch": 10.01, "learning_rate": 5.546530612244898e-06, "loss": 1.2265, "step": 473000 }, { "epoch": 10.01, "learning_rate": 5.526122448979592e-06, "loss": 1.2263, "step": 473100 }, { "epoch": 10.01, "learning_rate": 5.5057142857142865e-06, "loss": 1.2235, "step": 473200 }, { "epoch": 10.02, "learning_rate": 5.485510204081633e-06, "loss": 1.2288, "step": 473300 }, { "epoch": 10.02, "learning_rate": 5.465102040816327e-06, "loss": 1.2196, "step": 473400 }, { "epoch": 10.02, "learning_rate": 5.444693877551021e-06, "loss": 1.2312, "step": 473500 }, { "epoch": 10.02, "learning_rate": 5.424285714285715e-06, "loss": 1.2283, "step": 473600 }, { "epoch": 10.03, "learning_rate": 5.403877551020409e-06, "loss": 1.2309, "step": 473700 }, { "epoch": 10.03, "learning_rate": 5.383469387755102e-06, "loss": 1.2182, "step": 473800 }, { "epoch": 10.03, "learning_rate": 5.363061224489796e-06, "loss": 1.2239, "step": 473900 }, { "epoch": 10.03, "learning_rate": 5.34265306122449e-06, "loss": 1.2318, "step": 474000 }, { "epoch": 10.03, "learning_rate": 5.322244897959184e-06, "loss": 1.2261, "step": 474100 }, { "epoch": 10.04, "learning_rate": 5.301836734693878e-06, "loss": 1.2312, "step": 474200 }, { "epoch": 10.04, "learning_rate": 5.281428571428572e-06, "loss": 1.2369, "step": 474300 }, { "epoch": 10.04, "learning_rate": 5.261020408163265e-06, "loss": 1.2279, "step": 474400 }, { "epoch": 10.04, "learning_rate": 5.240612244897959e-06, "loss": 1.2224, "step": 474500 }, { "epoch": 10.04, "learning_rate": 5.2202040816326535e-06, "loss": 1.231, "step": 474600 }, { "epoch": 10.05, "learning_rate": 5.199795918367347e-06, "loss": 1.2239, "step": 474700 }, { "epoch": 10.05, "learning_rate": 5.179387755102041e-06, "loss": 1.2269, "step": 474800 }, { "epoch": 10.05, "learning_rate": 5.158979591836735e-06, "loss": 1.2187, "step": 474900 }, { "epoch": 10.05, "learning_rate": 5.138571428571429e-06, "loss": 1.2271, "step": 475000 }, { "epoch": 10.06, "learning_rate": 5.118163265306123e-06, "loss": 1.2337, "step": 475100 }, { "epoch": 10.06, "learning_rate": 5.0977551020408165e-06, "loss": 1.229, "step": 475200 }, { "epoch": 10.06, "learning_rate": 5.077346938775511e-06, "loss": 1.2337, "step": 475300 }, { "epoch": 10.06, "learning_rate": 5.056938775510205e-06, "loss": 1.2321, "step": 475400 }, { "epoch": 10.06, "learning_rate": 5.036530612244898e-06, "loss": 1.228, "step": 475500 }, { "epoch": 10.07, "learning_rate": 5.016122448979592e-06, "loss": 1.2311, "step": 475600 }, { "epoch": 10.07, "learning_rate": 4.995714285714286e-06, "loss": 1.2198, "step": 475700 }, { "epoch": 10.07, "learning_rate": 4.9753061224489796e-06, "loss": 1.229, "step": 475800 }, { "epoch": 10.07, "learning_rate": 4.954897959183674e-06, "loss": 1.2195, "step": 475900 }, { "epoch": 10.07, "learning_rate": 4.934489795918368e-06, "loss": 1.2255, "step": 476000 }, { "epoch": 10.08, "learning_rate": 4.914081632653061e-06, "loss": 1.2266, "step": 476100 }, { "epoch": 10.08, "learning_rate": 4.893673469387755e-06, "loss": 1.225, "step": 476200 }, { "epoch": 10.08, "learning_rate": 4.873265306122449e-06, "loss": 1.2273, "step": 476300 }, { "epoch": 10.08, "learning_rate": 4.852857142857143e-06, "loss": 1.2315, "step": 476400 }, { "epoch": 10.08, "learning_rate": 4.832448979591837e-06, "loss": 1.2277, "step": 476500 }, { "epoch": 10.09, "learning_rate": 4.812040816326531e-06, "loss": 1.2338, "step": 476600 }, { "epoch": 10.09, "learning_rate": 4.791632653061225e-06, "loss": 1.2265, "step": 476700 }, { "epoch": 10.09, "learning_rate": 4.771224489795919e-06, "loss": 1.2267, "step": 476800 }, { "epoch": 10.09, "learning_rate": 4.750816326530612e-06, "loss": 1.2231, "step": 476900 }, { "epoch": 10.1, "learning_rate": 4.7304081632653065e-06, "loss": 1.2203, "step": 477000 }, { "epoch": 10.1, "learning_rate": 4.710000000000001e-06, "loss": 1.2236, "step": 477100 }, { "epoch": 10.1, "learning_rate": 4.689591836734694e-06, "loss": 1.2229, "step": 477200 }, { "epoch": 10.1, "learning_rate": 4.669183673469388e-06, "loss": 1.2329, "step": 477300 }, { "epoch": 10.1, "learning_rate": 4.648979591836735e-06, "loss": 1.2221, "step": 477400 }, { "epoch": 10.11, "learning_rate": 4.628571428571429e-06, "loss": 1.2226, "step": 477500 }, { "epoch": 10.11, "learning_rate": 4.608163265306123e-06, "loss": 1.2282, "step": 477600 }, { "epoch": 10.11, "learning_rate": 4.587755102040816e-06, "loss": 1.2237, "step": 477700 }, { "epoch": 10.11, "learning_rate": 4.5673469387755104e-06, "loss": 1.2311, "step": 477800 }, { "epoch": 10.11, "learning_rate": 4.5469387755102046e-06, "loss": 1.2274, "step": 477900 }, { "epoch": 10.12, "learning_rate": 4.526530612244898e-06, "loss": 1.2282, "step": 478000 }, { "epoch": 10.12, "learning_rate": 4.506122448979592e-06, "loss": 1.2281, "step": 478100 }, { "epoch": 10.12, "learning_rate": 4.485714285714286e-06, "loss": 1.2275, "step": 478200 }, { "epoch": 10.12, "learning_rate": 4.465306122448979e-06, "loss": 1.22, "step": 478300 }, { "epoch": 10.12, "learning_rate": 4.4448979591836735e-06, "loss": 1.2232, "step": 478400 }, { "epoch": 10.13, "learning_rate": 4.424489795918368e-06, "loss": 1.2264, "step": 478500 }, { "epoch": 10.13, "learning_rate": 4.404081632653062e-06, "loss": 1.2218, "step": 478600 }, { "epoch": 10.13, "learning_rate": 4.383673469387755e-06, "loss": 1.2193, "step": 478700 }, { "epoch": 10.13, "learning_rate": 4.363265306122449e-06, "loss": 1.2281, "step": 478800 }, { "epoch": 10.14, "learning_rate": 4.342857142857143e-06, "loss": 1.2275, "step": 478900 }, { "epoch": 10.14, "learning_rate": 4.322448979591837e-06, "loss": 1.2196, "step": 479000 }, { "epoch": 10.14, "learning_rate": 4.302040816326531e-06, "loss": 1.2273, "step": 479100 }, { "epoch": 10.14, "learning_rate": 4.281632653061225e-06, "loss": 1.2242, "step": 479200 }, { "epoch": 10.14, "learning_rate": 4.261224489795919e-06, "loss": 1.2295, "step": 479300 }, { "epoch": 10.15, "learning_rate": 4.240816326530612e-06, "loss": 1.2229, "step": 479400 }, { "epoch": 10.15, "learning_rate": 4.22061224489796e-06, "loss": 1.2248, "step": 479500 }, { "epoch": 10.15, "learning_rate": 4.200204081632653e-06, "loss": 1.2287, "step": 479600 }, { "epoch": 10.15, "learning_rate": 4.179795918367347e-06, "loss": 1.228, "step": 479700 }, { "epoch": 10.15, "learning_rate": 4.159387755102041e-06, "loss": 1.2262, "step": 479800 }, { "epoch": 10.16, "learning_rate": 4.138979591836735e-06, "loss": 1.2226, "step": 479900 }, { "epoch": 10.16, "learning_rate": 4.118571428571429e-06, "loss": 1.2266, "step": 480000 }, { "epoch": 10.16, "learning_rate": 4.098163265306123e-06, "loss": 1.2271, "step": 480100 }, { "epoch": 10.16, "learning_rate": 4.077755102040817e-06, "loss": 1.2317, "step": 480200 }, { "epoch": 10.17, "learning_rate": 4.05734693877551e-06, "loss": 1.2212, "step": 480300 }, { "epoch": 10.17, "learning_rate": 4.036938775510204e-06, "loss": 1.2195, "step": 480400 }, { "epoch": 10.17, "learning_rate": 4.0165306122448985e-06, "loss": 1.2338, "step": 480500 }, { "epoch": 10.17, "learning_rate": 3.996122448979592e-06, "loss": 1.2159, "step": 480600 }, { "epoch": 10.17, "learning_rate": 3.975714285714286e-06, "loss": 1.2264, "step": 480700 }, { "epoch": 10.18, "learning_rate": 3.95530612244898e-06, "loss": 1.229, "step": 480800 }, { "epoch": 10.18, "learning_rate": 3.934897959183673e-06, "loss": 1.2207, "step": 480900 }, { "epoch": 10.18, "learning_rate": 3.914489795918367e-06, "loss": 1.2257, "step": 481000 }, { "epoch": 10.18, "learning_rate": 3.8940816326530615e-06, "loss": 1.2255, "step": 481100 }, { "epoch": 10.18, "learning_rate": 3.873673469387756e-06, "loss": 1.2307, "step": 481200 }, { "epoch": 10.19, "learning_rate": 3.853265306122449e-06, "loss": 1.2227, "step": 481300 }, { "epoch": 10.19, "learning_rate": 3.832857142857143e-06, "loss": 1.2256, "step": 481400 }, { "epoch": 10.19, "learning_rate": 3.812448979591837e-06, "loss": 1.2249, "step": 481500 }, { "epoch": 10.19, "learning_rate": 3.792244897959184e-06, "loss": 1.2172, "step": 481600 }, { "epoch": 10.19, "learning_rate": 3.771836734693878e-06, "loss": 1.222, "step": 481700 }, { "epoch": 10.2, "learning_rate": 3.7514285714285718e-06, "loss": 1.2157, "step": 481800 }, { "epoch": 10.2, "learning_rate": 3.7310204081632655e-06, "loss": 1.237, "step": 481900 }, { "epoch": 10.2, "learning_rate": 3.7106122448979596e-06, "loss": 1.2277, "step": 482000 }, { "epoch": 10.2, "learning_rate": 3.6902040816326533e-06, "loss": 1.2335, "step": 482100 }, { "epoch": 10.21, "learning_rate": 3.669795918367347e-06, "loss": 1.2252, "step": 482200 }, { "epoch": 10.21, "learning_rate": 3.649591836734694e-06, "loss": 1.2236, "step": 482300 }, { "epoch": 10.21, "learning_rate": 3.629183673469388e-06, "loss": 1.2247, "step": 482400 }, { "epoch": 10.21, "learning_rate": 3.608775510204082e-06, "loss": 1.2235, "step": 482500 }, { "epoch": 10.21, "learning_rate": 3.5883673469387757e-06, "loss": 1.2295, "step": 482600 }, { "epoch": 10.22, "learning_rate": 3.5679591836734694e-06, "loss": 1.2266, "step": 482700 }, { "epoch": 10.22, "learning_rate": 3.5475510204081635e-06, "loss": 1.2223, "step": 482800 }, { "epoch": 10.22, "learning_rate": 3.527142857142857e-06, "loss": 1.2246, "step": 482900 }, { "epoch": 10.22, "learning_rate": 3.506734693877551e-06, "loss": 1.2246, "step": 483000 }, { "epoch": 10.22, "learning_rate": 3.4863265306122454e-06, "loss": 1.2276, "step": 483100 }, { "epoch": 10.23, "learning_rate": 3.4659183673469387e-06, "loss": 1.2216, "step": 483200 }, { "epoch": 10.23, "learning_rate": 3.4455102040816324e-06, "loss": 1.2202, "step": 483300 }, { "epoch": 10.23, "learning_rate": 3.425102040816327e-06, "loss": 1.2219, "step": 483400 }, { "epoch": 10.23, "learning_rate": 3.4046938775510207e-06, "loss": 1.2339, "step": 483500 }, { "epoch": 10.23, "learning_rate": 3.384285714285714e-06, "loss": 1.2185, "step": 483600 }, { "epoch": 10.24, "learning_rate": 3.3638775510204085e-06, "loss": 1.2231, "step": 483700 }, { "epoch": 10.24, "learning_rate": 3.343469387755102e-06, "loss": 1.2306, "step": 483800 }, { "epoch": 10.24, "learning_rate": 3.323061224489796e-06, "loss": 1.2254, "step": 483900 }, { "epoch": 10.24, "learning_rate": 3.30265306122449e-06, "loss": 1.2276, "step": 484000 }, { "epoch": 10.25, "learning_rate": 3.2822448979591837e-06, "loss": 1.2308, "step": 484100 }, { "epoch": 10.25, "learning_rate": 3.261836734693878e-06, "loss": 1.2268, "step": 484200 }, { "epoch": 10.25, "learning_rate": 3.2414285714285716e-06, "loss": 1.2258, "step": 484300 }, { "epoch": 10.25, "learning_rate": 3.2212244897959183e-06, "loss": 1.2246, "step": 484400 }, { "epoch": 10.25, "learning_rate": 3.2008163265306124e-06, "loss": 1.2265, "step": 484500 }, { "epoch": 10.26, "learning_rate": 3.180408163265306e-06, "loss": 1.2245, "step": 484600 }, { "epoch": 10.26, "learning_rate": 3.1600000000000007e-06, "loss": 1.2169, "step": 484700 }, { "epoch": 10.26, "learning_rate": 3.139591836734694e-06, "loss": 1.2282, "step": 484800 }, { "epoch": 10.26, "learning_rate": 3.119183673469388e-06, "loss": 1.2285, "step": 484900 }, { "epoch": 10.26, "learning_rate": 3.0987755102040818e-06, "loss": 1.223, "step": 485000 }, { "epoch": 10.27, "learning_rate": 3.078367346938776e-06, "loss": 1.2171, "step": 485100 }, { "epoch": 10.27, "learning_rate": 3.0579591836734696e-06, "loss": 1.2261, "step": 485200 }, { "epoch": 10.27, "learning_rate": 3.0375510204081633e-06, "loss": 1.2176, "step": 485300 }, { "epoch": 10.27, "learning_rate": 3.0171428571428574e-06, "loss": 1.2157, "step": 485400 }, { "epoch": 10.28, "learning_rate": 2.996734693877551e-06, "loss": 1.2245, "step": 485500 }, { "epoch": 10.28, "learning_rate": 2.976326530612245e-06, "loss": 1.2222, "step": 485600 }, { "epoch": 10.28, "learning_rate": 2.955918367346939e-06, "loss": 1.2213, "step": 485700 }, { "epoch": 10.28, "learning_rate": 2.9355102040816326e-06, "loss": 1.219, "step": 485800 }, { "epoch": 10.28, "learning_rate": 2.9151020408163268e-06, "loss": 1.222, "step": 485900 }, { "epoch": 10.29, "learning_rate": 2.8946938775510205e-06, "loss": 1.2206, "step": 486000 }, { "epoch": 10.29, "learning_rate": 2.8742857142857146e-06, "loss": 1.2267, "step": 486100 }, { "epoch": 10.29, "learning_rate": 2.8538775510204083e-06, "loss": 1.2256, "step": 486200 }, { "epoch": 10.29, "learning_rate": 2.833469387755102e-06, "loss": 1.226, "step": 486300 }, { "epoch": 10.29, "learning_rate": 2.813061224489796e-06, "loss": 1.2251, "step": 486400 }, { "epoch": 10.3, "learning_rate": 2.79265306122449e-06, "loss": 1.2263, "step": 486500 }, { "epoch": 10.3, "learning_rate": 2.772244897959184e-06, "loss": 1.2272, "step": 486600 }, { "epoch": 10.3, "learning_rate": 2.7518367346938777e-06, "loss": 1.2217, "step": 486700 }, { "epoch": 10.3, "learning_rate": 2.7314285714285718e-06, "loss": 1.2232, "step": 486800 }, { "epoch": 10.3, "learning_rate": 2.7110204081632655e-06, "loss": 1.2339, "step": 486900 }, { "epoch": 10.31, "learning_rate": 2.690612244897959e-06, "loss": 1.2232, "step": 487000 }, { "epoch": 10.31, "learning_rate": 2.6702040816326533e-06, "loss": 1.2222, "step": 487100 }, { "epoch": 10.31, "learning_rate": 2.649795918367347e-06, "loss": 1.2229, "step": 487200 }, { "epoch": 10.31, "learning_rate": 2.6293877551020407e-06, "loss": 1.2266, "step": 487300 }, { "epoch": 10.32, "learning_rate": 2.608979591836735e-06, "loss": 1.2234, "step": 487400 }, { "epoch": 10.32, "learning_rate": 2.5885714285714285e-06, "loss": 1.2215, "step": 487500 }, { "epoch": 10.32, "learning_rate": 2.5681632653061227e-06, "loss": 1.2245, "step": 487600 }, { "epoch": 10.32, "learning_rate": 2.5477551020408164e-06, "loss": 1.2196, "step": 487700 }, { "epoch": 10.32, "learning_rate": 2.5273469387755105e-06, "loss": 1.2243, "step": 487800 }, { "epoch": 10.33, "learning_rate": 2.506938775510204e-06, "loss": 1.2196, "step": 487900 }, { "epoch": 10.33, "learning_rate": 2.486530612244898e-06, "loss": 1.2181, "step": 488000 }, { "epoch": 10.33, "learning_rate": 2.466122448979592e-06, "loss": 1.2224, "step": 488100 }, { "epoch": 10.33, "learning_rate": 2.4457142857142857e-06, "loss": 1.2226, "step": 488200 }, { "epoch": 10.33, "learning_rate": 2.42530612244898e-06, "loss": 1.2301, "step": 488300 }, { "epoch": 10.34, "learning_rate": 2.4048979591836735e-06, "loss": 1.2223, "step": 488400 }, { "epoch": 10.34, "learning_rate": 2.3844897959183677e-06, "loss": 1.2235, "step": 488500 }, { "epoch": 10.34, "learning_rate": 2.3642857142857144e-06, "loss": 1.2188, "step": 488600 }, { "epoch": 10.34, "learning_rate": 2.3438775510204085e-06, "loss": 1.2206, "step": 488700 }, { "epoch": 10.34, "learning_rate": 2.3234693877551022e-06, "loss": 1.2249, "step": 488800 }, { "epoch": 10.35, "learning_rate": 2.303061224489796e-06, "loss": 1.2195, "step": 488900 }, { "epoch": 10.35, "learning_rate": 2.28265306122449e-06, "loss": 1.2179, "step": 489000 }, { "epoch": 10.35, "learning_rate": 2.2622448979591838e-06, "loss": 1.2227, "step": 489100 }, { "epoch": 10.35, "learning_rate": 2.2418367346938775e-06, "loss": 1.2185, "step": 489200 }, { "epoch": 10.36, "learning_rate": 2.2214285714285716e-06, "loss": 1.2242, "step": 489300 }, { "epoch": 10.36, "learning_rate": 2.2010204081632657e-06, "loss": 1.2216, "step": 489400 }, { "epoch": 10.36, "learning_rate": 2.180612244897959e-06, "loss": 1.2285, "step": 489500 }, { "epoch": 10.36, "learning_rate": 2.160204081632653e-06, "loss": 1.2212, "step": 489600 }, { "epoch": 10.36, "learning_rate": 2.1397959183673472e-06, "loss": 1.2219, "step": 489700 }, { "epoch": 10.37, "learning_rate": 2.119387755102041e-06, "loss": 1.2212, "step": 489800 }, { "epoch": 10.37, "learning_rate": 2.0989795918367346e-06, "loss": 1.2282, "step": 489900 }, { "epoch": 10.37, "learning_rate": 2.0785714285714288e-06, "loss": 1.2213, "step": 490000 }, { "epoch": 10.37, "learning_rate": 2.0581632653061225e-06, "loss": 1.2181, "step": 490100 }, { "epoch": 10.37, "learning_rate": 2.037755102040816e-06, "loss": 1.2207, "step": 490200 }, { "epoch": 10.38, "learning_rate": 2.0173469387755103e-06, "loss": 1.2176, "step": 490300 }, { "epoch": 10.38, "learning_rate": 1.9969387755102044e-06, "loss": 1.2252, "step": 490400 }, { "epoch": 10.38, "learning_rate": 1.9765306122448977e-06, "loss": 1.2247, "step": 490500 }, { "epoch": 10.38, "learning_rate": 1.9563265306122453e-06, "loss": 1.2245, "step": 490600 }, { "epoch": 10.39, "learning_rate": 1.9359183673469385e-06, "loss": 1.2242, "step": 490700 }, { "epoch": 10.39, "learning_rate": 1.9155102040816327e-06, "loss": 1.2224, "step": 490800 }, { "epoch": 10.39, "learning_rate": 1.8951020408163268e-06, "loss": 1.2253, "step": 490900 }, { "epoch": 10.39, "learning_rate": 1.8746938775510203e-06, "loss": 1.2197, "step": 491000 }, { "epoch": 10.39, "learning_rate": 1.8542857142857144e-06, "loss": 1.2264, "step": 491100 }, { "epoch": 10.4, "learning_rate": 1.8338775510204083e-06, "loss": 1.2218, "step": 491200 }, { "epoch": 10.4, "learning_rate": 1.8134693877551022e-06, "loss": 1.2184, "step": 491300 }, { "epoch": 10.4, "learning_rate": 1.793061224489796e-06, "loss": 1.2283, "step": 491400 }, { "epoch": 10.4, "learning_rate": 1.7726530612244899e-06, "loss": 1.2203, "step": 491500 }, { "epoch": 10.4, "learning_rate": 1.7522448979591838e-06, "loss": 1.2184, "step": 491600 }, { "epoch": 10.41, "learning_rate": 1.7318367346938775e-06, "loss": 1.2223, "step": 491700 }, { "epoch": 10.41, "learning_rate": 1.7114285714285714e-06, "loss": 1.2258, "step": 491800 }, { "epoch": 10.41, "learning_rate": 1.6910204081632655e-06, "loss": 1.2317, "step": 491900 }, { "epoch": 10.41, "learning_rate": 1.6706122448979594e-06, "loss": 1.2195, "step": 492000 }, { "epoch": 10.41, "learning_rate": 1.6502040816326531e-06, "loss": 1.2249, "step": 492100 }, { "epoch": 10.42, "learning_rate": 1.629795918367347e-06, "loss": 1.2281, "step": 492200 }, { "epoch": 10.42, "learning_rate": 1.609387755102041e-06, "loss": 1.2272, "step": 492300 }, { "epoch": 10.42, "learning_rate": 1.5889795918367346e-06, "loss": 1.2195, "step": 492400 }, { "epoch": 10.42, "learning_rate": 1.5685714285714286e-06, "loss": 1.2191, "step": 492500 }, { "epoch": 10.43, "learning_rate": 1.5481632653061227e-06, "loss": 1.2325, "step": 492600 }, { "epoch": 10.43, "learning_rate": 1.5277551020408164e-06, "loss": 1.2208, "step": 492700 }, { "epoch": 10.43, "learning_rate": 1.5075510204081633e-06, "loss": 1.2266, "step": 492800 }, { "epoch": 10.43, "learning_rate": 1.4871428571428572e-06, "loss": 1.2201, "step": 492900 }, { "epoch": 10.43, "learning_rate": 1.4667346938775512e-06, "loss": 1.2213, "step": 493000 }, { "epoch": 10.44, "learning_rate": 1.446326530612245e-06, "loss": 1.2284, "step": 493100 }, { "epoch": 10.44, "learning_rate": 1.426122448979592e-06, "loss": 1.2221, "step": 493200 }, { "epoch": 10.44, "learning_rate": 1.4057142857142857e-06, "loss": 1.2205, "step": 493300 }, { "epoch": 10.44, "learning_rate": 1.3853061224489796e-06, "loss": 1.219, "step": 493400 }, { "epoch": 10.44, "learning_rate": 1.3648979591836735e-06, "loss": 1.2211, "step": 493500 }, { "epoch": 10.45, "learning_rate": 1.3444897959183675e-06, "loss": 1.2197, "step": 493600 }, { "epoch": 10.45, "learning_rate": 1.3240816326530614e-06, "loss": 1.2257, "step": 493700 }, { "epoch": 10.45, "learning_rate": 1.303673469387755e-06, "loss": 1.2174, "step": 493800 }, { "epoch": 10.45, "learning_rate": 1.283265306122449e-06, "loss": 1.218, "step": 493900 }, { "epoch": 10.46, "learning_rate": 1.262857142857143e-06, "loss": 1.2164, "step": 494000 }, { "epoch": 10.46, "learning_rate": 1.2424489795918368e-06, "loss": 1.2246, "step": 494100 }, { "epoch": 10.46, "learning_rate": 1.2220408163265307e-06, "loss": 1.2148, "step": 494200 }, { "epoch": 10.46, "learning_rate": 1.2016326530612244e-06, "loss": 1.2194, "step": 494300 }, { "epoch": 10.46, "learning_rate": 1.1812244897959185e-06, "loss": 1.2263, "step": 494400 }, { "epoch": 10.47, "learning_rate": 1.1608163265306123e-06, "loss": 1.2212, "step": 494500 }, { "epoch": 10.47, "learning_rate": 1.1404081632653062e-06, "loss": 1.2207, "step": 494600 }, { "epoch": 10.47, "learning_rate": 1.12e-06, "loss": 1.2134, "step": 494700 }, { "epoch": 10.47, "learning_rate": 1.0995918367346938e-06, "loss": 1.2241, "step": 494800 }, { "epoch": 10.47, "learning_rate": 1.079183673469388e-06, "loss": 1.2178, "step": 494900 }, { "epoch": 10.48, "learning_rate": 1.0587755102040816e-06, "loss": 1.2188, "step": 495000 }, { "epoch": 10.48, "learning_rate": 1.0383673469387755e-06, "loss": 1.2221, "step": 495100 }, { "epoch": 10.48, "learning_rate": 1.0179591836734694e-06, "loss": 1.2234, "step": 495200 }, { "epoch": 10.48, "learning_rate": 9.975510204081633e-07, "loss": 1.226, "step": 495300 }, { "epoch": 10.48, "learning_rate": 9.771428571428573e-07, "loss": 1.2343, "step": 495400 }, { "epoch": 10.49, "learning_rate": 9.56734693877551e-07, "loss": 1.2268, "step": 495500 }, { "epoch": 10.49, "learning_rate": 9.36326530612245e-07, "loss": 1.2226, "step": 495600 }, { "epoch": 10.49, "learning_rate": 9.159183673469388e-07, "loss": 1.2204, "step": 495700 }, { "epoch": 10.49, "learning_rate": 8.955102040816328e-07, "loss": 1.2191, "step": 495800 }, { "epoch": 10.5, "learning_rate": 8.751020408163266e-07, "loss": 1.2223, "step": 495900 }, { "epoch": 10.5, "learning_rate": 8.546938775510204e-07, "loss": 1.2241, "step": 496000 }, { "epoch": 10.5, "learning_rate": 8.342857142857143e-07, "loss": 1.2242, "step": 496100 }, { "epoch": 10.5, "learning_rate": 8.138775510204081e-07, "loss": 1.2202, "step": 496200 }, { "epoch": 10.5, "learning_rate": 7.934693877551022e-07, "loss": 1.2285, "step": 496300 }, { "epoch": 10.51, "learning_rate": 7.73061224489796e-07, "loss": 1.2218, "step": 496400 }, { "epoch": 10.51, "learning_rate": 7.526530612244899e-07, "loss": 1.2196, "step": 496500 }, { "epoch": 10.51, "learning_rate": 7.322448979591837e-07, "loss": 1.227, "step": 496600 }, { "epoch": 10.51, "learning_rate": 7.118367346938776e-07, "loss": 1.2142, "step": 496700 }, { "epoch": 10.51, "learning_rate": 6.914285714285714e-07, "loss": 1.225, "step": 496800 }, { "epoch": 10.52, "learning_rate": 6.710204081632653e-07, "loss": 1.2231, "step": 496900 }, { "epoch": 10.52, "learning_rate": 6.506122448979592e-07, "loss": 1.2213, "step": 497000 }, { "epoch": 10.52, "learning_rate": 6.302040816326531e-07, "loss": 1.2222, "step": 497100 }, { "epoch": 10.52, "learning_rate": 6.100000000000001e-07, "loss": 1.2253, "step": 497200 }, { "epoch": 10.52, "learning_rate": 5.895918367346939e-07, "loss": 1.2174, "step": 497300 }, { "epoch": 10.53, "learning_rate": 5.691836734693878e-07, "loss": 1.2149, "step": 497400 }, { "epoch": 10.53, "learning_rate": 5.487755102040816e-07, "loss": 1.2161, "step": 497500 }, { "epoch": 10.53, "learning_rate": 5.283673469387755e-07, "loss": 1.2249, "step": 497600 }, { "epoch": 10.53, "learning_rate": 5.079591836734694e-07, "loss": 1.2213, "step": 497700 }, { "epoch": 10.54, "learning_rate": 4.875510204081632e-07, "loss": 1.2238, "step": 497800 }, { "epoch": 10.54, "learning_rate": 4.6714285714285716e-07, "loss": 1.2297, "step": 497900 }, { "epoch": 10.54, "learning_rate": 4.469387755102041e-07, "loss": 1.2267, "step": 498000 }, { "epoch": 10.54, "learning_rate": 4.2653061224489797e-07, "loss": 1.2237, "step": 498100 }, { "epoch": 10.54, "learning_rate": 4.061224489795919e-07, "loss": 1.2292, "step": 498200 }, { "epoch": 10.55, "learning_rate": 3.8571428571428574e-07, "loss": 1.2176, "step": 498300 }, { "epoch": 10.55, "learning_rate": 3.653061224489796e-07, "loss": 1.223, "step": 498400 }, { "epoch": 10.55, "learning_rate": 3.4489795918367346e-07, "loss": 1.2245, "step": 498500 }, { "epoch": 10.55, "learning_rate": 3.244897959183674e-07, "loss": 1.2216, "step": 498600 }, { "epoch": 10.55, "learning_rate": 3.0408163265306124e-07, "loss": 1.2207, "step": 498700 }, { "epoch": 10.56, "learning_rate": 2.836734693877551e-07, "loss": 1.223, "step": 498800 }, { "epoch": 10.56, "learning_rate": 2.63265306122449e-07, "loss": 1.2292, "step": 498900 }, { "epoch": 10.56, "learning_rate": 2.4285714285714287e-07, "loss": 1.2224, "step": 499000 }, { "epoch": 10.56, "learning_rate": 2.2244897959183673e-07, "loss": 1.2235, "step": 499100 }, { "epoch": 10.57, "learning_rate": 2.0204081632653064e-07, "loss": 1.2154, "step": 499200 }, { "epoch": 10.57, "learning_rate": 1.816326530612245e-07, "loss": 1.2203, "step": 499300 }, { "epoch": 10.57, "learning_rate": 1.6122448979591836e-07, "loss": 1.2276, "step": 499400 }, { "epoch": 10.57, "learning_rate": 1.4081632653061225e-07, "loss": 1.2178, "step": 499500 }, { "epoch": 10.57, "learning_rate": 1.2040816326530614e-07, "loss": 1.2197, "step": 499600 }, { "epoch": 10.58, "learning_rate": 1.0000000000000001e-07, "loss": 1.2206, "step": 499700 }, { "epoch": 10.58, "learning_rate": 7.959183673469388e-08, "loss": 1.2273, "step": 499800 }, { "epoch": 10.58, "learning_rate": 5.9183673469387755e-08, "loss": 1.2239, "step": 499900 }, { "epoch": 10.58, "learning_rate": 3.8775510204081635e-08, "loss": 1.2166, "step": 500000 } ], "max_steps": 500000, "num_train_epochs": 11, "total_flos": 1.6845286741848883e+19, "trial_name": null, "trial_params": null }