diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,30016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.582010582010582, + "global_step": 500000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 10.2046, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000003e-06, + "loss": 9.366, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 8.9116, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 8.5519, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 8.1293, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 6e-06, + "loss": 7.6255, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 7.000000000000001e-06, + "loss": 7.1213, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 6.7092, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 6.4643, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 1e-05, + "loss": 6.311, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 1.1000000000000001e-05, + "loss": 6.2174, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 6.1559, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 6.1035, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 1.4000000000000001e-05, + "loss": 6.0585, + "step": 1400 + }, + { + "epoch": 0.03, + "learning_rate": 1.5e-05, + "loss": 6.0161, + "step": 1500 + }, + { + "epoch": 0.03, + "learning_rate": 1.6000000000000003e-05, + "loss": 5.9838, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 1.7000000000000003e-05, + "loss": 5.9601, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 1.8e-05, + "loss": 5.9298, + "step": 1800 + }, + { + "epoch": 0.04, + "learning_rate": 1.9e-05, + "loss": 5.9038, + "step": 1900 + }, + { + "epoch": 0.04, + "learning_rate": 2e-05, + "loss": 5.8812, + "step": 2000 + }, + { + "epoch": 0.04, + "learning_rate": 2.1e-05, + "loss": 5.8652, + "step": 2100 + }, + { + "epoch": 0.05, + "learning_rate": 2.2000000000000003e-05, + "loss": 5.8491, + "step": 2200 + }, + { + "epoch": 0.05, + "learning_rate": 2.3000000000000003e-05, + "loss": 5.8159, + "step": 2300 + }, + { + "epoch": 0.05, + "learning_rate": 2.4e-05, + "loss": 5.8046, + "step": 2400 + }, + { + "epoch": 0.05, + "learning_rate": 2.5e-05, + "loss": 5.7949, + "step": 2500 + }, + { + "epoch": 0.06, + "learning_rate": 2.6000000000000002e-05, + "loss": 5.7781, + "step": 2600 + }, + { + "epoch": 0.06, + "learning_rate": 2.7000000000000002e-05, + "loss": 5.7541, + "step": 2700 + }, + { + "epoch": 0.06, + "learning_rate": 2.8000000000000003e-05, + "loss": 5.7444, + "step": 2800 + }, + { + "epoch": 0.06, + "learning_rate": 2.9e-05, + "loss": 5.7199, + "step": 2900 + }, + { + "epoch": 0.06, + "learning_rate": 3e-05, + "loss": 5.7058, + "step": 3000 + }, + { + "epoch": 0.07, + "learning_rate": 3.1e-05, + "loss": 5.6956, + "step": 3100 + }, + { + "epoch": 0.07, + "learning_rate": 3.2000000000000005e-05, + "loss": 5.6666, + "step": 3200 + }, + { + "epoch": 0.07, + "learning_rate": 3.3e-05, + "loss": 5.6611, + "step": 3300 + }, + { + "epoch": 0.07, + "learning_rate": 3.4000000000000007e-05, + "loss": 5.6509, + "step": 3400 + }, + { + "epoch": 0.07, + "learning_rate": 3.5e-05, + "loss": 5.6268, + "step": 3500 + }, + { + "epoch": 0.08, + "learning_rate": 3.6e-05, + "loss": 5.6069, + "step": 3600 + }, + { + "epoch": 0.08, + "learning_rate": 3.7e-05, + "loss": 5.595, + "step": 3700 + }, + { + "epoch": 0.08, + "learning_rate": 3.8e-05, + "loss": 5.5835, + "step": 3800 + }, + { + "epoch": 0.08, + "learning_rate": 3.9000000000000006e-05, + "loss": 5.5517, + "step": 3900 + }, + { + "epoch": 0.08, + "learning_rate": 4e-05, + "loss": 5.538, + "step": 4000 + }, + { + "epoch": 0.09, + "learning_rate": 4.1e-05, + "loss": 5.508, + "step": 4100 + }, + { + "epoch": 0.09, + "learning_rate": 4.2e-05, + "loss": 5.4989, + "step": 4200 + }, + { + "epoch": 0.09, + "learning_rate": 4.3e-05, + "loss": 5.4678, + "step": 4300 + }, + { + "epoch": 0.09, + "learning_rate": 4.4000000000000006e-05, + "loss": 5.4355, + "step": 4400 + }, + { + "epoch": 0.1, + "learning_rate": 4.5e-05, + "loss": 5.3605, + "step": 4500 + }, + { + "epoch": 0.1, + "learning_rate": 4.600000000000001e-05, + "loss": 5.2785, + "step": 4600 + }, + { + "epoch": 0.1, + "learning_rate": 4.7e-05, + "loss": 5.2045, + "step": 4700 + }, + { + "epoch": 0.1, + "learning_rate": 4.8e-05, + "loss": 5.1103, + "step": 4800 + }, + { + "epoch": 0.1, + "learning_rate": 4.9e-05, + "loss": 5.0325, + "step": 4900 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 4.9641, + "step": 5000 + }, + { + "epoch": 0.11, + "learning_rate": 5.1000000000000006e-05, + "loss": 4.8674, + "step": 5100 + }, + { + "epoch": 0.11, + "learning_rate": 5.2000000000000004e-05, + "loss": 4.7503, + "step": 5200 + }, + { + "epoch": 0.11, + "learning_rate": 5.300000000000001e-05, + "loss": 4.6245, + "step": 5300 + }, + { + "epoch": 0.11, + "learning_rate": 5.4000000000000005e-05, + "loss": 4.5214, + "step": 5400 + }, + { + "epoch": 0.12, + "learning_rate": 5.500000000000001e-05, + "loss": 4.4142, + "step": 5500 + }, + { + "epoch": 0.12, + "learning_rate": 5.6000000000000006e-05, + "loss": 4.3115, + "step": 5600 + }, + { + "epoch": 0.12, + "learning_rate": 5.6999999999999996e-05, + "loss": 4.2359, + "step": 5700 + }, + { + "epoch": 0.12, + "learning_rate": 5.8e-05, + "loss": 4.1521, + "step": 5800 + }, + { + "epoch": 0.12, + "learning_rate": 5.9e-05, + "loss": 4.0841, + "step": 5900 + }, + { + "epoch": 0.13, + "learning_rate": 6e-05, + "loss": 4.0192, + "step": 6000 + }, + { + "epoch": 0.13, + "learning_rate": 6.1e-05, + "loss": 3.9532, + "step": 6100 + }, + { + "epoch": 0.13, + "learning_rate": 6.2e-05, + "loss": 3.8956, + "step": 6200 + }, + { + "epoch": 0.13, + "learning_rate": 6.3e-05, + "loss": 3.8454, + "step": 6300 + }, + { + "epoch": 0.14, + "learning_rate": 6.400000000000001e-05, + "loss": 3.7992, + "step": 6400 + }, + { + "epoch": 0.14, + "learning_rate": 6.500000000000001e-05, + "loss": 3.753, + "step": 6500 + }, + { + "epoch": 0.14, + "learning_rate": 6.6e-05, + "loss": 3.7035, + "step": 6600 + }, + { + "epoch": 0.14, + "learning_rate": 6.7e-05, + "loss": 3.6755, + "step": 6700 + }, + { + "epoch": 0.14, + "learning_rate": 6.800000000000001e-05, + "loss": 3.6067, + "step": 6800 + }, + { + "epoch": 0.15, + "learning_rate": 6.9e-05, + "loss": 3.5867, + "step": 6900 + }, + { + "epoch": 0.15, + "learning_rate": 7e-05, + "loss": 3.5409, + "step": 7000 + }, + { + "epoch": 0.15, + "learning_rate": 7.1e-05, + "loss": 3.4989, + "step": 7100 + }, + { + "epoch": 0.15, + "learning_rate": 7.2e-05, + "loss": 3.463, + "step": 7200 + }, + { + "epoch": 0.15, + "learning_rate": 7.3e-05, + "loss": 3.4175, + "step": 7300 + }, + { + "epoch": 0.16, + "learning_rate": 7.4e-05, + "loss": 3.3896, + "step": 7400 + }, + { + "epoch": 0.16, + "learning_rate": 7.500000000000001e-05, + "loss": 3.3591, + "step": 7500 + }, + { + "epoch": 0.16, + "learning_rate": 7.6e-05, + "loss": 3.3243, + "step": 7600 + }, + { + "epoch": 0.16, + "learning_rate": 7.7e-05, + "loss": 3.284, + "step": 7700 + }, + { + "epoch": 0.17, + "learning_rate": 7.800000000000001e-05, + "loss": 3.2571, + "step": 7800 + }, + { + "epoch": 0.17, + "learning_rate": 7.900000000000001e-05, + "loss": 3.2228, + "step": 7900 + }, + { + "epoch": 0.17, + "learning_rate": 8e-05, + "loss": 3.1848, + "step": 8000 + }, + { + "epoch": 0.17, + "learning_rate": 8.1e-05, + "loss": 3.1748, + "step": 8100 + }, + { + "epoch": 0.17, + "learning_rate": 8.2e-05, + "loss": 3.1406, + "step": 8200 + }, + { + "epoch": 0.18, + "learning_rate": 8.3e-05, + "loss": 3.123, + "step": 8300 + }, + { + "epoch": 0.18, + "learning_rate": 8.4e-05, + "loss": 3.1, + "step": 8400 + }, + { + "epoch": 0.18, + "learning_rate": 8.5e-05, + "loss": 3.0738, + "step": 8500 + }, + { + "epoch": 0.18, + "learning_rate": 8.6e-05, + "loss": 3.05, + "step": 8600 + }, + { + "epoch": 0.18, + "learning_rate": 8.7e-05, + "loss": 3.0246, + "step": 8700 + }, + { + "epoch": 0.19, + "learning_rate": 8.800000000000001e-05, + "loss": 2.9948, + "step": 8800 + }, + { + "epoch": 0.19, + "learning_rate": 8.900000000000001e-05, + "loss": 2.9928, + "step": 8900 + }, + { + "epoch": 0.19, + "learning_rate": 9e-05, + "loss": 2.9668, + "step": 9000 + }, + { + "epoch": 0.19, + "learning_rate": 9.1e-05, + "loss": 2.9411, + "step": 9100 + }, + { + "epoch": 0.19, + "learning_rate": 9.200000000000001e-05, + "loss": 2.9251, + "step": 9200 + }, + { + "epoch": 0.2, + "learning_rate": 9.300000000000001e-05, + "loss": 2.9019, + "step": 9300 + }, + { + "epoch": 0.2, + "learning_rate": 9.4e-05, + "loss": 2.8918, + "step": 9400 + }, + { + "epoch": 0.2, + "learning_rate": 9.5e-05, + "loss": 2.8718, + "step": 9500 + }, + { + "epoch": 0.2, + "learning_rate": 9.6e-05, + "loss": 2.8649, + "step": 9600 + }, + { + "epoch": 0.21, + "learning_rate": 9.7e-05, + "loss": 2.8558, + "step": 9700 + }, + { + "epoch": 0.21, + "learning_rate": 9.8e-05, + "loss": 2.8363, + "step": 9800 + }, + { + "epoch": 0.21, + "learning_rate": 9.900000000000001e-05, + "loss": 2.8145, + "step": 9900 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001, + "loss": 2.8047, + "step": 10000 + }, + { + "epoch": 0.21, + "learning_rate": 9.99795918367347e-05, + "loss": 2.785, + "step": 10100 + }, + { + "epoch": 0.22, + "learning_rate": 9.995918367346939e-05, + "loss": 2.7711, + "step": 10200 + }, + { + "epoch": 0.22, + "learning_rate": 9.993877551020409e-05, + "loss": 2.7626, + "step": 10300 + }, + { + "epoch": 0.22, + "learning_rate": 9.991836734693878e-05, + "loss": 2.7413, + "step": 10400 + }, + { + "epoch": 0.22, + "learning_rate": 9.989795918367347e-05, + "loss": 2.7286, + "step": 10500 + }, + { + "epoch": 0.22, + "learning_rate": 9.987755102040817e-05, + "loss": 2.7203, + "step": 10600 + }, + { + "epoch": 0.23, + "learning_rate": 9.985714285714287e-05, + "loss": 2.6978, + "step": 10700 + }, + { + "epoch": 0.23, + "learning_rate": 9.983673469387755e-05, + "loss": 2.6935, + "step": 10800 + }, + { + "epoch": 0.23, + "learning_rate": 9.981632653061225e-05, + "loss": 2.6829, + "step": 10900 + }, + { + "epoch": 0.23, + "learning_rate": 9.979591836734695e-05, + "loss": 2.6686, + "step": 11000 + }, + { + "epoch": 0.23, + "learning_rate": 9.977551020408163e-05, + "loss": 2.6699, + "step": 11100 + }, + { + "epoch": 0.24, + "learning_rate": 9.975510204081633e-05, + "loss": 2.6482, + "step": 11200 + }, + { + "epoch": 0.24, + "learning_rate": 9.973469387755102e-05, + "loss": 2.637, + "step": 11300 + }, + { + "epoch": 0.24, + "learning_rate": 9.971428571428571e-05, + "loss": 2.6153, + "step": 11400 + }, + { + "epoch": 0.24, + "learning_rate": 9.969387755102041e-05, + "loss": 2.612, + "step": 11500 + }, + { + "epoch": 0.25, + "learning_rate": 9.96734693877551e-05, + "loss": 2.6096, + "step": 11600 + }, + { + "epoch": 0.25, + "learning_rate": 9.96530612244898e-05, + "loss": 2.5922, + "step": 11700 + }, + { + "epoch": 0.25, + "learning_rate": 9.96326530612245e-05, + "loss": 2.5857, + "step": 11800 + }, + { + "epoch": 0.25, + "learning_rate": 9.961224489795918e-05, + "loss": 2.5749, + "step": 11900 + }, + { + "epoch": 0.25, + "learning_rate": 9.959183673469388e-05, + "loss": 2.5663, + "step": 12000 + }, + { + "epoch": 0.26, + "learning_rate": 9.957142857142858e-05, + "loss": 2.5588, + "step": 12100 + }, + { + "epoch": 0.26, + "learning_rate": 9.955102040816326e-05, + "loss": 2.5513, + "step": 12200 + }, + { + "epoch": 0.26, + "learning_rate": 9.953061224489797e-05, + "loss": 2.5398, + "step": 12300 + }, + { + "epoch": 0.26, + "learning_rate": 9.951020408163266e-05, + "loss": 2.5352, + "step": 12400 + }, + { + "epoch": 0.26, + "learning_rate": 9.948979591836736e-05, + "loss": 2.5278, + "step": 12500 + }, + { + "epoch": 0.27, + "learning_rate": 9.946938775510205e-05, + "loss": 2.5242, + "step": 12600 + }, + { + "epoch": 0.27, + "learning_rate": 9.944897959183674e-05, + "loss": 2.5046, + "step": 12700 + }, + { + "epoch": 0.27, + "learning_rate": 9.942857142857144e-05, + "loss": 2.4943, + "step": 12800 + }, + { + "epoch": 0.27, + "learning_rate": 9.940816326530614e-05, + "loss": 2.4989, + "step": 12900 + }, + { + "epoch": 0.28, + "learning_rate": 9.938775510204082e-05, + "loss": 2.4866, + "step": 13000 + }, + { + "epoch": 0.28, + "learning_rate": 9.936734693877552e-05, + "loss": 2.4764, + "step": 13100 + }, + { + "epoch": 0.28, + "learning_rate": 9.934693877551022e-05, + "loss": 2.4728, + "step": 13200 + }, + { + "epoch": 0.28, + "learning_rate": 9.93265306122449e-05, + "loss": 2.4532, + "step": 13300 + }, + { + "epoch": 0.28, + "learning_rate": 9.93061224489796e-05, + "loss": 2.4532, + "step": 13400 + }, + { + "epoch": 0.29, + "learning_rate": 9.92857142857143e-05, + "loss": 2.464, + "step": 13500 + }, + { + "epoch": 0.29, + "learning_rate": 9.926530612244898e-05, + "loss": 2.4405, + "step": 13600 + }, + { + "epoch": 0.29, + "learning_rate": 9.924489795918368e-05, + "loss": 2.4397, + "step": 13700 + }, + { + "epoch": 0.29, + "learning_rate": 9.922448979591838e-05, + "loss": 2.4286, + "step": 13800 + }, + { + "epoch": 0.29, + "learning_rate": 9.920408163265306e-05, + "loss": 2.4184, + "step": 13900 + }, + { + "epoch": 0.3, + "learning_rate": 9.91838775510204e-05, + "loss": 2.4207, + "step": 14000 + }, + { + "epoch": 0.3, + "learning_rate": 9.91634693877551e-05, + "loss": 2.4132, + "step": 14100 + }, + { + "epoch": 0.3, + "learning_rate": 9.91430612244898e-05, + "loss": 2.4126, + "step": 14200 + }, + { + "epoch": 0.3, + "learning_rate": 9.912265306122449e-05, + "loss": 2.4032, + "step": 14300 + }, + { + "epoch": 0.3, + "learning_rate": 9.910224489795919e-05, + "loss": 2.3902, + "step": 14400 + }, + { + "epoch": 0.31, + "learning_rate": 9.908183673469388e-05, + "loss": 2.3934, + "step": 14500 + }, + { + "epoch": 0.31, + "learning_rate": 9.906142857142857e-05, + "loss": 2.3873, + "step": 14600 + }, + { + "epoch": 0.31, + "learning_rate": 9.904102040816327e-05, + "loss": 2.3686, + "step": 14700 + }, + { + "epoch": 0.31, + "learning_rate": 9.902061224489797e-05, + "loss": 2.3741, + "step": 14800 + }, + { + "epoch": 0.32, + "learning_rate": 9.900020408163265e-05, + "loss": 2.3662, + "step": 14900 + }, + { + "epoch": 0.32, + "learning_rate": 9.897979591836735e-05, + "loss": 2.3625, + "step": 15000 + }, + { + "epoch": 0.32, + "learning_rate": 9.895938775510205e-05, + "loss": 2.3601, + "step": 15100 + }, + { + "epoch": 0.32, + "learning_rate": 9.893897959183675e-05, + "loss": 2.36, + "step": 15200 + }, + { + "epoch": 0.32, + "learning_rate": 9.891857142857144e-05, + "loss": 2.3391, + "step": 15300 + }, + { + "epoch": 0.33, + "learning_rate": 9.889816326530613e-05, + "loss": 2.3479, + "step": 15400 + }, + { + "epoch": 0.33, + "learning_rate": 9.887775510204083e-05, + "loss": 2.3399, + "step": 15500 + }, + { + "epoch": 0.33, + "learning_rate": 9.885734693877553e-05, + "loss": 2.3281, + "step": 15600 + }, + { + "epoch": 0.33, + "learning_rate": 9.883693877551021e-05, + "loss": 2.3202, + "step": 15700 + }, + { + "epoch": 0.33, + "learning_rate": 9.881653061224491e-05, + "loss": 2.3204, + "step": 15800 + }, + { + "epoch": 0.34, + "learning_rate": 9.87961224489796e-05, + "loss": 2.3178, + "step": 15900 + }, + { + "epoch": 0.34, + "learning_rate": 9.877571428571429e-05, + "loss": 2.3155, + "step": 16000 + }, + { + "epoch": 0.34, + "learning_rate": 9.875530612244899e-05, + "loss": 2.3132, + "step": 16100 + }, + { + "epoch": 0.34, + "learning_rate": 9.873489795918367e-05, + "loss": 2.3096, + "step": 16200 + }, + { + "epoch": 0.34, + "learning_rate": 9.871448979591837e-05, + "loss": 2.298, + "step": 16300 + }, + { + "epoch": 0.35, + "learning_rate": 9.869408163265307e-05, + "loss": 2.3049, + "step": 16400 + }, + { + "epoch": 0.35, + "learning_rate": 9.867367346938776e-05, + "loss": 2.2992, + "step": 16500 + }, + { + "epoch": 0.35, + "learning_rate": 9.865326530612245e-05, + "loss": 2.2926, + "step": 16600 + }, + { + "epoch": 0.35, + "learning_rate": 9.863285714285715e-05, + "loss": 2.2843, + "step": 16700 + }, + { + "epoch": 0.36, + "learning_rate": 9.861244897959184e-05, + "loss": 2.2873, + "step": 16800 + }, + { + "epoch": 0.36, + "learning_rate": 9.859204081632654e-05, + "loss": 2.2797, + "step": 16900 + }, + { + "epoch": 0.36, + "learning_rate": 9.857163265306123e-05, + "loss": 2.2736, + "step": 17000 + }, + { + "epoch": 0.36, + "learning_rate": 9.855122448979592e-05, + "loss": 2.2744, + "step": 17100 + }, + { + "epoch": 0.36, + "learning_rate": 9.853081632653062e-05, + "loss": 2.2719, + "step": 17200 + }, + { + "epoch": 0.37, + "learning_rate": 9.851040816326532e-05, + "loss": 2.2618, + "step": 17300 + }, + { + "epoch": 0.37, + "learning_rate": 9.849e-05, + "loss": 2.2613, + "step": 17400 + }, + { + "epoch": 0.37, + "learning_rate": 9.84695918367347e-05, + "loss": 2.2648, + "step": 17500 + }, + { + "epoch": 0.37, + "learning_rate": 9.844938775510204e-05, + "loss": 2.2479, + "step": 17600 + }, + { + "epoch": 0.37, + "learning_rate": 9.842897959183674e-05, + "loss": 2.2456, + "step": 17700 + }, + { + "epoch": 0.38, + "learning_rate": 9.840857142857142e-05, + "loss": 2.2376, + "step": 17800 + }, + { + "epoch": 0.38, + "learning_rate": 9.838816326530613e-05, + "loss": 2.2329, + "step": 17900 + }, + { + "epoch": 0.38, + "learning_rate": 9.836775510204082e-05, + "loss": 2.2304, + "step": 18000 + }, + { + "epoch": 0.38, + "learning_rate": 9.834734693877552e-05, + "loss": 2.2331, + "step": 18100 + }, + { + "epoch": 0.39, + "learning_rate": 9.832693877551022e-05, + "loss": 2.2272, + "step": 18200 + }, + { + "epoch": 0.39, + "learning_rate": 9.83065306122449e-05, + "loss": 2.2291, + "step": 18300 + }, + { + "epoch": 0.39, + "learning_rate": 9.82861224489796e-05, + "loss": 2.2169, + "step": 18400 + }, + { + "epoch": 0.39, + "learning_rate": 9.82657142857143e-05, + "loss": 2.2227, + "step": 18500 + }, + { + "epoch": 0.39, + "learning_rate": 9.824530612244898e-05, + "loss": 2.2115, + "step": 18600 + }, + { + "epoch": 0.4, + "learning_rate": 9.822489795918368e-05, + "loss": 2.2094, + "step": 18700 + }, + { + "epoch": 0.4, + "learning_rate": 9.820448979591838e-05, + "loss": 2.2061, + "step": 18800 + }, + { + "epoch": 0.4, + "learning_rate": 9.818408163265306e-05, + "loss": 2.1997, + "step": 18900 + }, + { + "epoch": 0.4, + "learning_rate": 9.816367346938776e-05, + "loss": 2.1995, + "step": 19000 + }, + { + "epoch": 0.4, + "learning_rate": 9.814326530612246e-05, + "loss": 2.1977, + "step": 19100 + }, + { + "epoch": 0.41, + "learning_rate": 9.812285714285715e-05, + "loss": 2.1991, + "step": 19200 + }, + { + "epoch": 0.41, + "learning_rate": 9.810244897959184e-05, + "loss": 2.1947, + "step": 19300 + }, + { + "epoch": 0.41, + "learning_rate": 9.808204081632654e-05, + "loss": 2.1963, + "step": 19400 + }, + { + "epoch": 0.41, + "learning_rate": 9.806163265306123e-05, + "loss": 2.1837, + "step": 19500 + }, + { + "epoch": 0.41, + "learning_rate": 9.804122448979593e-05, + "loss": 2.1804, + "step": 19600 + }, + { + "epoch": 0.42, + "learning_rate": 9.802081632653062e-05, + "loss": 2.1775, + "step": 19700 + }, + { + "epoch": 0.42, + "learning_rate": 9.800040816326531e-05, + "loss": 2.1752, + "step": 19800 + }, + { + "epoch": 0.42, + "learning_rate": 9.798000000000001e-05, + "loss": 2.1774, + "step": 19900 + }, + { + "epoch": 0.42, + "learning_rate": 9.795959183673469e-05, + "loss": 2.1665, + "step": 20000 + }, + { + "epoch": 0.43, + "learning_rate": 9.793918367346939e-05, + "loss": 2.1627, + "step": 20100 + }, + { + "epoch": 0.43, + "learning_rate": 9.791877551020409e-05, + "loss": 2.1643, + "step": 20200 + }, + { + "epoch": 0.43, + "learning_rate": 9.789857142857143e-05, + "loss": 2.1728, + "step": 20300 + }, + { + "epoch": 0.43, + "learning_rate": 9.787816326530613e-05, + "loss": 2.1629, + "step": 20400 + }, + { + "epoch": 0.43, + "learning_rate": 9.785775510204081e-05, + "loss": 2.1569, + "step": 20500 + }, + { + "epoch": 0.44, + "learning_rate": 9.783734693877552e-05, + "loss": 2.1567, + "step": 20600 + }, + { + "epoch": 0.44, + "learning_rate": 9.781693877551021e-05, + "loss": 2.1566, + "step": 20700 + }, + { + "epoch": 0.44, + "learning_rate": 9.779673469387755e-05, + "loss": 2.1454, + "step": 20800 + }, + { + "epoch": 0.44, + "learning_rate": 9.777632653061225e-05, + "loss": 2.157, + "step": 20900 + }, + { + "epoch": 0.44, + "learning_rate": 9.775591836734695e-05, + "loss": 2.1419, + "step": 21000 + }, + { + "epoch": 0.45, + "learning_rate": 9.773551020408163e-05, + "loss": 2.1389, + "step": 21100 + }, + { + "epoch": 0.45, + "learning_rate": 9.771510204081633e-05, + "loss": 2.1391, + "step": 21200 + }, + { + "epoch": 0.45, + "learning_rate": 9.769469387755103e-05, + "loss": 2.1343, + "step": 21300 + }, + { + "epoch": 0.45, + "learning_rate": 9.767428571428571e-05, + "loss": 2.1381, + "step": 21400 + }, + { + "epoch": 0.46, + "learning_rate": 9.765387755102041e-05, + "loss": 2.1352, + "step": 21500 + }, + { + "epoch": 0.46, + "learning_rate": 9.763346938775511e-05, + "loss": 2.1352, + "step": 21600 + }, + { + "epoch": 0.46, + "learning_rate": 9.76130612244898e-05, + "loss": 2.1262, + "step": 21700 + }, + { + "epoch": 0.46, + "learning_rate": 9.759265306122449e-05, + "loss": 2.133, + "step": 21800 + }, + { + "epoch": 0.46, + "learning_rate": 9.757224489795919e-05, + "loss": 2.1228, + "step": 21900 + }, + { + "epoch": 0.47, + "learning_rate": 9.755183673469388e-05, + "loss": 2.1245, + "step": 22000 + }, + { + "epoch": 0.47, + "learning_rate": 9.753142857142857e-05, + "loss": 2.1176, + "step": 22100 + }, + { + "epoch": 0.47, + "learning_rate": 9.751102040816327e-05, + "loss": 2.1164, + "step": 22200 + }, + { + "epoch": 0.47, + "learning_rate": 9.749061224489796e-05, + "loss": 2.1119, + "step": 22300 + }, + { + "epoch": 0.47, + "learning_rate": 9.747020408163266e-05, + "loss": 2.1172, + "step": 22400 + }, + { + "epoch": 0.48, + "learning_rate": 9.744979591836735e-05, + "loss": 2.1059, + "step": 22500 + }, + { + "epoch": 0.48, + "learning_rate": 9.742938775510204e-05, + "loss": 2.1087, + "step": 22600 + }, + { + "epoch": 0.48, + "learning_rate": 9.740897959183674e-05, + "loss": 2.1014, + "step": 22700 + }, + { + "epoch": 0.48, + "learning_rate": 9.738857142857142e-05, + "loss": 2.11, + "step": 22800 + }, + { + "epoch": 0.48, + "learning_rate": 9.736816326530612e-05, + "loss": 2.0921, + "step": 22900 + }, + { + "epoch": 0.49, + "learning_rate": 9.734775510204082e-05, + "loss": 2.0993, + "step": 23000 + }, + { + "epoch": 0.49, + "learning_rate": 9.73273469387755e-05, + "loss": 2.1137, + "step": 23100 + }, + { + "epoch": 0.49, + "learning_rate": 9.73069387755102e-05, + "loss": 2.0831, + "step": 23200 + }, + { + "epoch": 0.49, + "learning_rate": 9.72865306122449e-05, + "loss": 2.0913, + "step": 23300 + }, + { + "epoch": 0.5, + "learning_rate": 9.72661224489796e-05, + "loss": 2.0882, + "step": 23400 + }, + { + "epoch": 0.5, + "learning_rate": 9.72457142857143e-05, + "loss": 2.0866, + "step": 23500 + }, + { + "epoch": 0.5, + "learning_rate": 9.7225306122449e-05, + "loss": 2.0787, + "step": 23600 + }, + { + "epoch": 0.5, + "learning_rate": 9.720489795918368e-05, + "loss": 2.0838, + "step": 23700 + }, + { + "epoch": 0.5, + "learning_rate": 9.718448979591838e-05, + "loss": 2.0832, + "step": 23800 + }, + { + "epoch": 0.51, + "learning_rate": 9.716408163265306e-05, + "loss": 2.0752, + "step": 23900 + }, + { + "epoch": 0.51, + "learning_rate": 9.714387755102042e-05, + "loss": 2.0825, + "step": 24000 + }, + { + "epoch": 0.51, + "learning_rate": 9.71234693877551e-05, + "loss": 2.0727, + "step": 24100 + }, + { + "epoch": 0.51, + "learning_rate": 9.71030612244898e-05, + "loss": 2.0677, + "step": 24200 + }, + { + "epoch": 0.51, + "learning_rate": 9.70826530612245e-05, + "loss": 2.0616, + "step": 24300 + }, + { + "epoch": 0.52, + "learning_rate": 9.706224489795918e-05, + "loss": 2.0599, + "step": 24400 + }, + { + "epoch": 0.52, + "learning_rate": 9.704183673469388e-05, + "loss": 2.0761, + "step": 24500 + }, + { + "epoch": 0.52, + "learning_rate": 9.702142857142857e-05, + "loss": 2.0647, + "step": 24600 + }, + { + "epoch": 0.52, + "learning_rate": 9.700102040816327e-05, + "loss": 2.064, + "step": 24700 + }, + { + "epoch": 0.52, + "learning_rate": 9.698061224489796e-05, + "loss": 2.0585, + "step": 24800 + }, + { + "epoch": 0.53, + "learning_rate": 9.696020408163265e-05, + "loss": 2.058, + "step": 24900 + }, + { + "epoch": 0.53, + "learning_rate": 9.693979591836735e-05, + "loss": 2.0575, + "step": 25000 + }, + { + "epoch": 0.53, + "learning_rate": 9.691938775510205e-05, + "loss": 2.0467, + "step": 25100 + }, + { + "epoch": 0.53, + "learning_rate": 9.689897959183673e-05, + "loss": 2.0557, + "step": 25200 + }, + { + "epoch": 0.54, + "learning_rate": 9.687857142857143e-05, + "loss": 2.0391, + "step": 25300 + }, + { + "epoch": 0.54, + "learning_rate": 9.685816326530613e-05, + "loss": 2.0377, + "step": 25400 + }, + { + "epoch": 0.54, + "learning_rate": 9.683775510204081e-05, + "loss": 2.0564, + "step": 25500 + }, + { + "epoch": 0.54, + "learning_rate": 9.681734693877551e-05, + "loss": 2.0432, + "step": 25600 + }, + { + "epoch": 0.54, + "learning_rate": 9.679693877551021e-05, + "loss": 2.0504, + "step": 25700 + }, + { + "epoch": 0.55, + "learning_rate": 9.67765306122449e-05, + "loss": 2.0465, + "step": 25800 + }, + { + "epoch": 0.55, + "learning_rate": 9.675612244897959e-05, + "loss": 2.0407, + "step": 25900 + }, + { + "epoch": 0.55, + "learning_rate": 9.673571428571429e-05, + "loss": 2.0432, + "step": 26000 + }, + { + "epoch": 0.55, + "learning_rate": 9.671530612244898e-05, + "loss": 2.0386, + "step": 26100 + }, + { + "epoch": 0.55, + "learning_rate": 9.669489795918369e-05, + "loss": 2.023, + "step": 26200 + }, + { + "epoch": 0.56, + "learning_rate": 9.667469387755103e-05, + "loss": 2.0236, + "step": 26300 + }, + { + "epoch": 0.56, + "learning_rate": 9.665428571428571e-05, + "loss": 2.023, + "step": 26400 + }, + { + "epoch": 0.56, + "learning_rate": 9.663387755102041e-05, + "loss": 2.032, + "step": 26500 + }, + { + "epoch": 0.56, + "learning_rate": 9.661367346938775e-05, + "loss": 2.0356, + "step": 26600 + }, + { + "epoch": 0.57, + "learning_rate": 9.659326530612245e-05, + "loss": 2.0287, + "step": 26700 + }, + { + "epoch": 0.57, + "learning_rate": 9.657285714285715e-05, + "loss": 2.0249, + "step": 26800 + }, + { + "epoch": 0.57, + "learning_rate": 9.655244897959183e-05, + "loss": 2.0241, + "step": 26900 + }, + { + "epoch": 0.57, + "learning_rate": 9.653204081632653e-05, + "loss": 2.0162, + "step": 27000 + }, + { + "epoch": 0.57, + "learning_rate": 9.651163265306123e-05, + "loss": 2.0054, + "step": 27100 + }, + { + "epoch": 0.58, + "learning_rate": 9.649122448979593e-05, + "loss": 2.0278, + "step": 27200 + }, + { + "epoch": 0.58, + "learning_rate": 9.647081632653063e-05, + "loss": 2.0101, + "step": 27300 + }, + { + "epoch": 0.58, + "learning_rate": 9.645040816326531e-05, + "loss": 2.0098, + "step": 27400 + }, + { + "epoch": 0.58, + "learning_rate": 9.643000000000001e-05, + "loss": 2.0176, + "step": 27500 + }, + { + "epoch": 0.58, + "learning_rate": 9.640959183673471e-05, + "loss": 2.0118, + "step": 27600 + }, + { + "epoch": 0.59, + "learning_rate": 9.63891836734694e-05, + "loss": 1.999, + "step": 27700 + }, + { + "epoch": 0.59, + "learning_rate": 9.636877551020409e-05, + "loss": 2.0061, + "step": 27800 + }, + { + "epoch": 0.59, + "learning_rate": 9.634836734693879e-05, + "loss": 1.9981, + "step": 27900 + }, + { + "epoch": 0.59, + "learning_rate": 9.632795918367347e-05, + "loss": 2.0041, + "step": 28000 + }, + { + "epoch": 0.59, + "learning_rate": 9.630755102040817e-05, + "loss": 1.9992, + "step": 28100 + }, + { + "epoch": 0.6, + "learning_rate": 9.628714285714286e-05, + "loss": 2.0004, + "step": 28200 + }, + { + "epoch": 0.6, + "learning_rate": 9.626673469387756e-05, + "loss": 2.0035, + "step": 28300 + }, + { + "epoch": 0.6, + "learning_rate": 9.624632653061225e-05, + "loss": 1.99, + "step": 28400 + }, + { + "epoch": 0.6, + "learning_rate": 9.622591836734694e-05, + "loss": 2.0057, + "step": 28500 + }, + { + "epoch": 0.61, + "learning_rate": 9.620551020408164e-05, + "loss": 1.9969, + "step": 28600 + }, + { + "epoch": 0.61, + "learning_rate": 9.618510204081634e-05, + "loss": 2.0034, + "step": 28700 + }, + { + "epoch": 0.61, + "learning_rate": 9.616469387755102e-05, + "loss": 1.9936, + "step": 28800 + }, + { + "epoch": 0.61, + "learning_rate": 9.614428571428572e-05, + "loss": 1.9894, + "step": 28900 + }, + { + "epoch": 0.61, + "learning_rate": 9.612387755102042e-05, + "loss": 1.9868, + "step": 29000 + }, + { + "epoch": 0.62, + "learning_rate": 9.61034693877551e-05, + "loss": 1.986, + "step": 29100 + }, + { + "epoch": 0.62, + "learning_rate": 9.60830612244898e-05, + "loss": 1.9815, + "step": 29200 + }, + { + "epoch": 0.62, + "learning_rate": 9.60626530612245e-05, + "loss": 1.9786, + "step": 29300 + }, + { + "epoch": 0.62, + "learning_rate": 9.604224489795918e-05, + "loss": 1.9814, + "step": 29400 + }, + { + "epoch": 0.62, + "learning_rate": 9.602183673469388e-05, + "loss": 1.9706, + "step": 29500 + }, + { + "epoch": 0.63, + "learning_rate": 9.600142857142858e-05, + "loss": 1.9836, + "step": 29600 + }, + { + "epoch": 0.63, + "learning_rate": 9.598102040816327e-05, + "loss": 1.9775, + "step": 29700 + }, + { + "epoch": 0.63, + "learning_rate": 9.596061224489796e-05, + "loss": 1.9759, + "step": 29800 + }, + { + "epoch": 0.63, + "learning_rate": 9.594020408163266e-05, + "loss": 1.9742, + "step": 29900 + }, + { + "epoch": 0.63, + "learning_rate": 9.591979591836735e-05, + "loss": 1.9741, + "step": 30000 + }, + { + "epoch": 0.64, + "learning_rate": 9.589938775510205e-05, + "loss": 1.9729, + "step": 30100 + }, + { + "epoch": 0.64, + "learning_rate": 9.587897959183674e-05, + "loss": 1.9715, + "step": 30200 + }, + { + "epoch": 0.64, + "learning_rate": 9.585857142857143e-05, + "loss": 1.9599, + "step": 30300 + }, + { + "epoch": 0.64, + "learning_rate": 9.583816326530613e-05, + "loss": 1.969, + "step": 30400 + }, + { + "epoch": 0.65, + "learning_rate": 9.581775510204081e-05, + "loss": 1.9579, + "step": 30500 + }, + { + "epoch": 0.65, + "learning_rate": 9.579734693877551e-05, + "loss": 1.9747, + "step": 30600 + }, + { + "epoch": 0.65, + "learning_rate": 9.577693877551021e-05, + "loss": 1.9598, + "step": 30700 + }, + { + "epoch": 0.65, + "learning_rate": 9.57565306122449e-05, + "loss": 1.9604, + "step": 30800 + }, + { + "epoch": 0.65, + "learning_rate": 9.573612244897959e-05, + "loss": 1.9619, + "step": 30900 + }, + { + "epoch": 0.66, + "learning_rate": 9.571591836734695e-05, + "loss": 1.9631, + "step": 31000 + }, + { + "epoch": 0.66, + "learning_rate": 9.569551020408164e-05, + "loss": 1.9627, + "step": 31100 + }, + { + "epoch": 0.66, + "learning_rate": 9.567510204081633e-05, + "loss": 1.9624, + "step": 31200 + }, + { + "epoch": 0.66, + "learning_rate": 9.565469387755103e-05, + "loss": 1.957, + "step": 31300 + }, + { + "epoch": 0.66, + "learning_rate": 9.563428571428573e-05, + "loss": 1.9582, + "step": 31400 + }, + { + "epoch": 0.67, + "learning_rate": 9.561387755102041e-05, + "loss": 1.951, + "step": 31500 + }, + { + "epoch": 0.67, + "learning_rate": 9.559346938775511e-05, + "loss": 1.9566, + "step": 31600 + }, + { + "epoch": 0.67, + "learning_rate": 9.557306122448981e-05, + "loss": 1.9542, + "step": 31700 + }, + { + "epoch": 0.67, + "learning_rate": 9.555265306122449e-05, + "loss": 1.9523, + "step": 31800 + }, + { + "epoch": 0.68, + "learning_rate": 9.553224489795919e-05, + "loss": 1.95, + "step": 31900 + }, + { + "epoch": 0.68, + "learning_rate": 9.551183673469389e-05, + "loss": 1.9511, + "step": 32000 + }, + { + "epoch": 0.68, + "learning_rate": 9.549142857142857e-05, + "loss": 1.9521, + "step": 32100 + }, + { + "epoch": 0.68, + "learning_rate": 9.547102040816327e-05, + "loss": 1.9352, + "step": 32200 + }, + { + "epoch": 0.68, + "learning_rate": 9.545061224489796e-05, + "loss": 1.9526, + "step": 32300 + }, + { + "epoch": 0.69, + "learning_rate": 9.543020408163266e-05, + "loss": 1.949, + "step": 32400 + }, + { + "epoch": 0.69, + "learning_rate": 9.540979591836735e-05, + "loss": 1.9462, + "step": 32500 + }, + { + "epoch": 0.69, + "learning_rate": 9.538938775510204e-05, + "loss": 1.9307, + "step": 32600 + }, + { + "epoch": 0.69, + "learning_rate": 9.536897959183674e-05, + "loss": 1.9331, + "step": 32700 + }, + { + "epoch": 0.69, + "learning_rate": 9.534877551020409e-05, + "loss": 1.9348, + "step": 32800 + }, + { + "epoch": 0.7, + "learning_rate": 9.532836734693879e-05, + "loss": 1.9337, + "step": 32900 + }, + { + "epoch": 0.7, + "learning_rate": 9.530795918367347e-05, + "loss": 1.9364, + "step": 33000 + }, + { + "epoch": 0.7, + "learning_rate": 9.528755102040817e-05, + "loss": 1.9354, + "step": 33100 + }, + { + "epoch": 0.7, + "learning_rate": 9.526714285714287e-05, + "loss": 1.9338, + "step": 33200 + }, + { + "epoch": 0.7, + "learning_rate": 9.524673469387756e-05, + "loss": 1.9324, + "step": 33300 + }, + { + "epoch": 0.71, + "learning_rate": 9.522632653061225e-05, + "loss": 1.9245, + "step": 33400 + }, + { + "epoch": 0.71, + "learning_rate": 9.520591836734695e-05, + "loss": 1.9257, + "step": 33500 + }, + { + "epoch": 0.71, + "learning_rate": 9.518551020408164e-05, + "loss": 1.9342, + "step": 33600 + }, + { + "epoch": 0.71, + "learning_rate": 9.516510204081634e-05, + "loss": 1.9234, + "step": 33700 + }, + { + "epoch": 0.72, + "learning_rate": 9.514469387755103e-05, + "loss": 1.9315, + "step": 33800 + }, + { + "epoch": 0.72, + "learning_rate": 9.512428571428572e-05, + "loss": 1.9247, + "step": 33900 + }, + { + "epoch": 0.72, + "learning_rate": 9.510387755102042e-05, + "loss": 1.9185, + "step": 34000 + }, + { + "epoch": 0.72, + "learning_rate": 9.50834693877551e-05, + "loss": 1.9257, + "step": 34100 + }, + { + "epoch": 0.72, + "learning_rate": 9.50630612244898e-05, + "loss": 1.9159, + "step": 34200 + }, + { + "epoch": 0.73, + "learning_rate": 9.50426530612245e-05, + "loss": 1.9157, + "step": 34300 + }, + { + "epoch": 0.73, + "learning_rate": 9.502224489795918e-05, + "loss": 1.9157, + "step": 34400 + }, + { + "epoch": 0.73, + "learning_rate": 9.500183673469388e-05, + "loss": 1.9231, + "step": 34500 + }, + { + "epoch": 0.73, + "learning_rate": 9.498142857142858e-05, + "loss": 1.9168, + "step": 34600 + }, + { + "epoch": 0.73, + "learning_rate": 9.496102040816327e-05, + "loss": 1.9025, + "step": 34700 + }, + { + "epoch": 0.74, + "learning_rate": 9.494061224489796e-05, + "loss": 1.915, + "step": 34800 + }, + { + "epoch": 0.74, + "learning_rate": 9.492020408163266e-05, + "loss": 1.9145, + "step": 34900 + }, + { + "epoch": 0.74, + "learning_rate": 9.489979591836735e-05, + "loss": 1.9116, + "step": 35000 + }, + { + "epoch": 0.74, + "learning_rate": 9.487938775510205e-05, + "loss": 1.9127, + "step": 35100 + }, + { + "epoch": 0.74, + "learning_rate": 9.485897959183674e-05, + "loss": 1.9149, + "step": 35200 + }, + { + "epoch": 0.75, + "learning_rate": 9.483877551020408e-05, + "loss": 1.8986, + "step": 35300 + }, + { + "epoch": 0.75, + "learning_rate": 9.481836734693877e-05, + "loss": 1.9075, + "step": 35400 + }, + { + "epoch": 0.75, + "learning_rate": 9.479795918367348e-05, + "loss": 1.9033, + "step": 35500 + }, + { + "epoch": 0.75, + "learning_rate": 9.477755102040818e-05, + "loss": 1.907, + "step": 35600 + }, + { + "epoch": 0.76, + "learning_rate": 9.475714285714286e-05, + "loss": 1.8974, + "step": 35700 + }, + { + "epoch": 0.76, + "learning_rate": 9.473673469387756e-05, + "loss": 1.9046, + "step": 35800 + }, + { + "epoch": 0.76, + "learning_rate": 9.471632653061225e-05, + "loss": 1.902, + "step": 35900 + }, + { + "epoch": 0.76, + "learning_rate": 9.469591836734695e-05, + "loss": 1.9043, + "step": 36000 + }, + { + "epoch": 0.76, + "learning_rate": 9.467551020408164e-05, + "loss": 1.9067, + "step": 36100 + }, + { + "epoch": 0.77, + "learning_rate": 9.465510204081633e-05, + "loss": 1.8967, + "step": 36200 + }, + { + "epoch": 0.77, + "learning_rate": 9.463469387755103e-05, + "loss": 1.8988, + "step": 36300 + }, + { + "epoch": 0.77, + "learning_rate": 9.461428571428573e-05, + "loss": 1.8956, + "step": 36400 + }, + { + "epoch": 0.77, + "learning_rate": 9.459387755102041e-05, + "loss": 1.9023, + "step": 36500 + }, + { + "epoch": 0.77, + "learning_rate": 9.457346938775511e-05, + "loss": 1.8922, + "step": 36600 + }, + { + "epoch": 0.78, + "learning_rate": 9.455306122448981e-05, + "loss": 1.8957, + "step": 36700 + }, + { + "epoch": 0.78, + "learning_rate": 9.453265306122449e-05, + "loss": 1.8954, + "step": 36800 + }, + { + "epoch": 0.78, + "learning_rate": 9.451224489795919e-05, + "loss": 1.8881, + "step": 36900 + }, + { + "epoch": 0.78, + "learning_rate": 9.449183673469389e-05, + "loss": 1.8904, + "step": 37000 + }, + { + "epoch": 0.79, + "learning_rate": 9.447142857142857e-05, + "loss": 1.8925, + "step": 37100 + }, + { + "epoch": 0.79, + "learning_rate": 9.445102040816327e-05, + "loss": 1.8846, + "step": 37200 + }, + { + "epoch": 0.79, + "learning_rate": 9.443061224489797e-05, + "loss": 1.8902, + "step": 37300 + }, + { + "epoch": 0.79, + "learning_rate": 9.441020408163266e-05, + "loss": 1.891, + "step": 37400 + }, + { + "epoch": 0.79, + "learning_rate": 9.438979591836735e-05, + "loss": 1.8818, + "step": 37500 + }, + { + "epoch": 0.8, + "learning_rate": 9.436938775510205e-05, + "loss": 1.8877, + "step": 37600 + }, + { + "epoch": 0.8, + "learning_rate": 9.434897959183674e-05, + "loss": 1.8846, + "step": 37700 + }, + { + "epoch": 0.8, + "learning_rate": 9.432857142857143e-05, + "loss": 1.8811, + "step": 37800 + }, + { + "epoch": 0.8, + "learning_rate": 9.430816326530612e-05, + "loss": 1.8765, + "step": 37900 + }, + { + "epoch": 0.8, + "learning_rate": 9.428775510204082e-05, + "loss": 1.8753, + "step": 38000 + }, + { + "epoch": 0.81, + "learning_rate": 9.426734693877552e-05, + "loss": 1.8803, + "step": 38100 + }, + { + "epoch": 0.81, + "learning_rate": 9.424714285714287e-05, + "loss": 1.869, + "step": 38200 + }, + { + "epoch": 0.81, + "learning_rate": 9.422673469387756e-05, + "loss": 1.879, + "step": 38300 + }, + { + "epoch": 0.81, + "learning_rate": 9.420632653061225e-05, + "loss": 1.8771, + "step": 38400 + }, + { + "epoch": 0.81, + "learning_rate": 9.418591836734695e-05, + "loss": 1.874, + "step": 38500 + }, + { + "epoch": 0.82, + "learning_rate": 9.416551020408164e-05, + "loss": 1.8774, + "step": 38600 + }, + { + "epoch": 0.82, + "learning_rate": 9.414510204081634e-05, + "loss": 1.8701, + "step": 38700 + }, + { + "epoch": 0.82, + "learning_rate": 9.412469387755103e-05, + "loss": 1.8816, + "step": 38800 + }, + { + "epoch": 0.82, + "learning_rate": 9.410428571428572e-05, + "loss": 1.8751, + "step": 38900 + }, + { + "epoch": 0.83, + "learning_rate": 9.408387755102042e-05, + "loss": 1.8697, + "step": 39000 + }, + { + "epoch": 0.83, + "learning_rate": 9.406346938775512e-05, + "loss": 1.8759, + "step": 39100 + }, + { + "epoch": 0.83, + "learning_rate": 9.40430612244898e-05, + "loss": 1.868, + "step": 39200 + }, + { + "epoch": 0.83, + "learning_rate": 9.40226530612245e-05, + "loss": 1.877, + "step": 39300 + }, + { + "epoch": 0.83, + "learning_rate": 9.40022448979592e-05, + "loss": 1.8579, + "step": 39400 + }, + { + "epoch": 0.84, + "learning_rate": 9.398183673469388e-05, + "loss": 1.864, + "step": 39500 + }, + { + "epoch": 0.84, + "learning_rate": 9.396142857142858e-05, + "loss": 1.8668, + "step": 39600 + }, + { + "epoch": 0.84, + "learning_rate": 9.394102040816328e-05, + "loss": 1.8649, + "step": 39700 + }, + { + "epoch": 0.84, + "learning_rate": 9.392061224489796e-05, + "loss": 1.8577, + "step": 39800 + }, + { + "epoch": 0.84, + "learning_rate": 9.390020408163266e-05, + "loss": 1.8644, + "step": 39900 + }, + { + "epoch": 0.85, + "learning_rate": 9.387979591836735e-05, + "loss": 1.8623, + "step": 40000 + }, + { + "epoch": 0.85, + "learning_rate": 9.385938775510204e-05, + "loss": 1.8702, + "step": 40100 + }, + { + "epoch": 0.85, + "learning_rate": 9.383918367346939e-05, + "loss": 1.8701, + "step": 40200 + }, + { + "epoch": 0.85, + "learning_rate": 9.381877551020408e-05, + "loss": 1.8628, + "step": 40300 + }, + { + "epoch": 0.86, + "learning_rate": 9.379836734693878e-05, + "loss": 1.8651, + "step": 40400 + }, + { + "epoch": 0.86, + "learning_rate": 9.377795918367347e-05, + "loss": 1.8549, + "step": 40500 + }, + { + "epoch": 0.86, + "learning_rate": 9.375755102040817e-05, + "loss": 1.8583, + "step": 40600 + }, + { + "epoch": 0.86, + "learning_rate": 9.373714285714285e-05, + "loss": 1.8585, + "step": 40700 + }, + { + "epoch": 0.86, + "learning_rate": 9.371673469387755e-05, + "loss": 1.8534, + "step": 40800 + }, + { + "epoch": 0.87, + "learning_rate": 9.369632653061225e-05, + "loss": 1.8506, + "step": 40900 + }, + { + "epoch": 0.87, + "learning_rate": 9.367591836734695e-05, + "loss": 1.8517, + "step": 41000 + }, + { + "epoch": 0.87, + "learning_rate": 9.365551020408164e-05, + "loss": 1.8433, + "step": 41100 + }, + { + "epoch": 0.87, + "learning_rate": 9.363510204081634e-05, + "loss": 1.8522, + "step": 41200 + }, + { + "epoch": 0.87, + "learning_rate": 9.361469387755103e-05, + "loss": 1.8476, + "step": 41300 + }, + { + "epoch": 0.88, + "learning_rate": 9.359428571428573e-05, + "loss": 1.8474, + "step": 41400 + }, + { + "epoch": 0.88, + "learning_rate": 9.357387755102042e-05, + "loss": 1.8538, + "step": 41500 + }, + { + "epoch": 0.88, + "learning_rate": 9.355346938775511e-05, + "loss": 1.8529, + "step": 41600 + }, + { + "epoch": 0.88, + "learning_rate": 9.35330612244898e-05, + "loss": 1.8456, + "step": 41700 + }, + { + "epoch": 0.88, + "learning_rate": 9.351265306122449e-05, + "loss": 1.8524, + "step": 41800 + }, + { + "epoch": 0.89, + "learning_rate": 9.349224489795919e-05, + "loss": 1.8396, + "step": 41900 + }, + { + "epoch": 0.89, + "learning_rate": 9.347183673469389e-05, + "loss": 1.8365, + "step": 42000 + }, + { + "epoch": 0.89, + "learning_rate": 9.345142857142857e-05, + "loss": 1.8428, + "step": 42100 + }, + { + "epoch": 0.89, + "learning_rate": 9.343102040816327e-05, + "loss": 1.8381, + "step": 42200 + }, + { + "epoch": 0.9, + "learning_rate": 9.341061224489797e-05, + "loss": 1.8452, + "step": 42300 + }, + { + "epoch": 0.9, + "learning_rate": 9.339020408163265e-05, + "loss": 1.8318, + "step": 42400 + }, + { + "epoch": 0.9, + "learning_rate": 9.336979591836735e-05, + "loss": 1.8449, + "step": 42500 + }, + { + "epoch": 0.9, + "learning_rate": 9.334938775510205e-05, + "loss": 1.8372, + "step": 42600 + }, + { + "epoch": 0.9, + "learning_rate": 9.332897959183674e-05, + "loss": 1.8401, + "step": 42700 + }, + { + "epoch": 0.91, + "learning_rate": 9.330857142857143e-05, + "loss": 1.8383, + "step": 42800 + }, + { + "epoch": 0.91, + "learning_rate": 9.328816326530613e-05, + "loss": 1.8455, + "step": 42900 + }, + { + "epoch": 0.91, + "learning_rate": 9.326775510204082e-05, + "loss": 1.8365, + "step": 43000 + }, + { + "epoch": 0.91, + "learning_rate": 9.324734693877552e-05, + "loss": 1.8363, + "step": 43100 + }, + { + "epoch": 0.91, + "learning_rate": 9.322693877551021e-05, + "loss": 1.8314, + "step": 43200 + }, + { + "epoch": 0.92, + "learning_rate": 9.32065306122449e-05, + "loss": 1.8402, + "step": 43300 + }, + { + "epoch": 0.92, + "learning_rate": 9.31861224489796e-05, + "loss": 1.8266, + "step": 43400 + }, + { + "epoch": 0.92, + "learning_rate": 9.31657142857143e-05, + "loss": 1.8325, + "step": 43500 + }, + { + "epoch": 0.92, + "learning_rate": 9.314530612244898e-05, + "loss": 1.8309, + "step": 43600 + }, + { + "epoch": 0.92, + "learning_rate": 9.312489795918368e-05, + "loss": 1.8327, + "step": 43700 + }, + { + "epoch": 0.93, + "learning_rate": 9.310448979591836e-05, + "loss": 1.831, + "step": 43800 + }, + { + "epoch": 0.93, + "learning_rate": 9.308408163265306e-05, + "loss": 1.8328, + "step": 43900 + }, + { + "epoch": 0.93, + "learning_rate": 9.306367346938776e-05, + "loss": 1.8282, + "step": 44000 + }, + { + "epoch": 0.93, + "learning_rate": 9.304326530612245e-05, + "loss": 1.8296, + "step": 44100 + }, + { + "epoch": 0.94, + "learning_rate": 9.302285714285714e-05, + "loss": 1.8363, + "step": 44200 + }, + { + "epoch": 0.94, + "learning_rate": 9.30026530612245e-05, + "loss": 1.8332, + "step": 44300 + }, + { + "epoch": 0.94, + "learning_rate": 9.29822448979592e-05, + "loss": 1.8283, + "step": 44400 + }, + { + "epoch": 0.94, + "learning_rate": 9.296183673469388e-05, + "loss": 1.8268, + "step": 44500 + }, + { + "epoch": 0.94, + "learning_rate": 9.294142857142858e-05, + "loss": 1.8202, + "step": 44600 + }, + { + "epoch": 0.95, + "learning_rate": 9.292102040816328e-05, + "loss": 1.8201, + "step": 44700 + }, + { + "epoch": 0.95, + "learning_rate": 9.290061224489796e-05, + "loss": 1.8212, + "step": 44800 + }, + { + "epoch": 0.95, + "learning_rate": 9.28804081632653e-05, + "loss": 1.8187, + "step": 44900 + }, + { + "epoch": 0.95, + "learning_rate": 9.286e-05, + "loss": 1.8206, + "step": 45000 + }, + { + "epoch": 0.95, + "learning_rate": 9.28395918367347e-05, + "loss": 1.8195, + "step": 45100 + }, + { + "epoch": 0.96, + "learning_rate": 9.281918367346939e-05, + "loss": 1.8204, + "step": 45200 + }, + { + "epoch": 0.96, + "learning_rate": 9.279877551020408e-05, + "loss": 1.8205, + "step": 45300 + }, + { + "epoch": 0.96, + "learning_rate": 9.277836734693878e-05, + "loss": 1.8046, + "step": 45400 + }, + { + "epoch": 0.96, + "learning_rate": 9.275795918367347e-05, + "loss": 1.8107, + "step": 45500 + }, + { + "epoch": 0.97, + "learning_rate": 9.273755102040817e-05, + "loss": 1.8048, + "step": 45600 + }, + { + "epoch": 0.97, + "learning_rate": 9.271714285714286e-05, + "loss": 1.8062, + "step": 45700 + }, + { + "epoch": 0.97, + "learning_rate": 9.269673469387755e-05, + "loss": 1.8215, + "step": 45800 + }, + { + "epoch": 0.97, + "learning_rate": 9.267632653061225e-05, + "loss": 1.8174, + "step": 45900 + }, + { + "epoch": 0.97, + "learning_rate": 9.265591836734694e-05, + "loss": 1.8117, + "step": 46000 + }, + { + "epoch": 0.98, + "learning_rate": 9.263551020408163e-05, + "loss": 1.814, + "step": 46100 + }, + { + "epoch": 0.98, + "learning_rate": 9.261510204081633e-05, + "loss": 1.8195, + "step": 46200 + }, + { + "epoch": 0.98, + "learning_rate": 9.259469387755103e-05, + "loss": 1.8141, + "step": 46300 + }, + { + "epoch": 0.98, + "learning_rate": 9.257428571428571e-05, + "loss": 1.8168, + "step": 46400 + }, + { + "epoch": 0.98, + "learning_rate": 9.255387755102042e-05, + "loss": 1.8141, + "step": 46500 + }, + { + "epoch": 0.99, + "learning_rate": 9.253346938775511e-05, + "loss": 1.8012, + "step": 46600 + }, + { + "epoch": 0.99, + "learning_rate": 9.25130612244898e-05, + "loss": 1.8045, + "step": 46700 + }, + { + "epoch": 0.99, + "learning_rate": 9.24926530612245e-05, + "loss": 1.802, + "step": 46800 + }, + { + "epoch": 0.99, + "learning_rate": 9.247224489795919e-05, + "loss": 1.7992, + "step": 46900 + }, + { + "epoch": 0.99, + "learning_rate": 9.245183673469389e-05, + "loss": 1.802, + "step": 47000 + }, + { + "epoch": 1.0, + "learning_rate": 9.243142857142859e-05, + "loss": 1.8078, + "step": 47100 + }, + { + "epoch": 1.0, + "learning_rate": 9.241122448979593e-05, + "loss": 1.8033, + "step": 47200 + }, + { + "epoch": 1.0, + "learning_rate": 9.239081632653061e-05, + "loss": 1.8092, + "step": 47300 + }, + { + "epoch": 1.0, + "learning_rate": 9.237040816326531e-05, + "loss": 1.8075, + "step": 47400 + }, + { + "epoch": 1.01, + "learning_rate": 9.235000000000001e-05, + "loss": 1.8011, + "step": 47500 + }, + { + "epoch": 1.01, + "learning_rate": 9.23295918367347e-05, + "loss": 1.7966, + "step": 47600 + }, + { + "epoch": 1.01, + "learning_rate": 9.230918367346939e-05, + "loss": 1.7932, + "step": 47700 + }, + { + "epoch": 1.01, + "learning_rate": 9.228877551020409e-05, + "loss": 1.7997, + "step": 47800 + }, + { + "epoch": 1.01, + "learning_rate": 9.226836734693877e-05, + "loss": 1.7804, + "step": 47900 + }, + { + "epoch": 1.02, + "learning_rate": 9.224795918367347e-05, + "loss": 1.7995, + "step": 48000 + }, + { + "epoch": 1.02, + "learning_rate": 9.222755102040817e-05, + "loss": 1.7975, + "step": 48100 + }, + { + "epoch": 1.02, + "learning_rate": 9.220714285714286e-05, + "loss": 1.7941, + "step": 48200 + }, + { + "epoch": 1.02, + "learning_rate": 9.218673469387755e-05, + "loss": 1.7945, + "step": 48300 + }, + { + "epoch": 1.02, + "learning_rate": 9.216632653061224e-05, + "loss": 1.8069, + "step": 48400 + }, + { + "epoch": 1.03, + "learning_rate": 9.214591836734694e-05, + "loss": 1.7861, + "step": 48500 + }, + { + "epoch": 1.03, + "learning_rate": 9.212551020408164e-05, + "loss": 1.7893, + "step": 48600 + }, + { + "epoch": 1.03, + "learning_rate": 9.210510204081632e-05, + "loss": 1.7968, + "step": 48700 + }, + { + "epoch": 1.03, + "learning_rate": 9.208469387755102e-05, + "loss": 1.7837, + "step": 48800 + }, + { + "epoch": 1.03, + "learning_rate": 9.206428571428572e-05, + "loss": 1.7927, + "step": 48900 + }, + { + "epoch": 1.04, + "learning_rate": 9.20438775510204e-05, + "loss": 1.7835, + "step": 49000 + }, + { + "epoch": 1.04, + "learning_rate": 9.20234693877551e-05, + "loss": 1.786, + "step": 49100 + }, + { + "epoch": 1.04, + "learning_rate": 9.20030612244898e-05, + "loss": 1.7881, + "step": 49200 + }, + { + "epoch": 1.04, + "learning_rate": 9.198285714285715e-05, + "loss": 1.7879, + "step": 49300 + }, + { + "epoch": 1.05, + "learning_rate": 9.196244897959184e-05, + "loss": 1.7805, + "step": 49400 + }, + { + "epoch": 1.05, + "learning_rate": 9.194204081632654e-05, + "loss": 1.7913, + "step": 49500 + }, + { + "epoch": 1.05, + "learning_rate": 9.192163265306124e-05, + "loss": 1.7811, + "step": 49600 + }, + { + "epoch": 1.05, + "learning_rate": 9.190122448979592e-05, + "loss": 1.7742, + "step": 49700 + }, + { + "epoch": 1.05, + "learning_rate": 9.188081632653062e-05, + "loss": 1.7848, + "step": 49800 + }, + { + "epoch": 1.06, + "learning_rate": 9.186040816326532e-05, + "loss": 1.7778, + "step": 49900 + }, + { + "epoch": 1.06, + "learning_rate": 9.184020408163266e-05, + "loss": 1.7794, + "step": 50000 + }, + { + "epoch": 1.06, + "learning_rate": 9.181979591836734e-05, + "loss": 1.781, + "step": 50100 + }, + { + "epoch": 1.06, + "learning_rate": 9.179938775510204e-05, + "loss": 1.7792, + "step": 50200 + }, + { + "epoch": 1.06, + "learning_rate": 9.177897959183674e-05, + "loss": 1.7769, + "step": 50300 + }, + { + "epoch": 1.07, + "learning_rate": 9.175857142857144e-05, + "loss": 1.7735, + "step": 50400 + }, + { + "epoch": 1.07, + "learning_rate": 9.173816326530614e-05, + "loss": 1.7859, + "step": 50500 + }, + { + "epoch": 1.07, + "learning_rate": 9.171775510204082e-05, + "loss": 1.7767, + "step": 50600 + }, + { + "epoch": 1.07, + "learning_rate": 9.169734693877552e-05, + "loss": 1.7792, + "step": 50700 + }, + { + "epoch": 1.08, + "learning_rate": 9.167693877551022e-05, + "loss": 1.7791, + "step": 50800 + }, + { + "epoch": 1.08, + "learning_rate": 9.16565306122449e-05, + "loss": 1.7757, + "step": 50900 + }, + { + "epoch": 1.08, + "learning_rate": 9.16361224489796e-05, + "loss": 1.7723, + "step": 51000 + }, + { + "epoch": 1.08, + "learning_rate": 9.16157142857143e-05, + "loss": 1.7866, + "step": 51100 + }, + { + "epoch": 1.08, + "learning_rate": 9.159530612244898e-05, + "loss": 1.775, + "step": 51200 + }, + { + "epoch": 1.09, + "learning_rate": 9.157489795918368e-05, + "loss": 1.7649, + "step": 51300 + }, + { + "epoch": 1.09, + "learning_rate": 9.155448979591838e-05, + "loss": 1.7735, + "step": 51400 + }, + { + "epoch": 1.09, + "learning_rate": 9.153408163265307e-05, + "loss": 1.7694, + "step": 51500 + }, + { + "epoch": 1.09, + "learning_rate": 9.151367346938776e-05, + "loss": 1.7719, + "step": 51600 + }, + { + "epoch": 1.09, + "learning_rate": 9.149326530612246e-05, + "loss": 1.7653, + "step": 51700 + }, + { + "epoch": 1.1, + "learning_rate": 9.147285714285715e-05, + "loss": 1.7661, + "step": 51800 + }, + { + "epoch": 1.1, + "learning_rate": 9.145244897959185e-05, + "loss": 1.7754, + "step": 51900 + }, + { + "epoch": 1.1, + "learning_rate": 9.143204081632653e-05, + "loss": 1.7763, + "step": 52000 + }, + { + "epoch": 1.1, + "learning_rate": 9.141163265306123e-05, + "loss": 1.7632, + "step": 52100 + }, + { + "epoch": 1.1, + "learning_rate": 9.139122448979593e-05, + "loss": 1.7677, + "step": 52200 + }, + { + "epoch": 1.11, + "learning_rate": 9.137081632653061e-05, + "loss": 1.7678, + "step": 52300 + }, + { + "epoch": 1.11, + "learning_rate": 9.135040816326531e-05, + "loss": 1.7653, + "step": 52400 + }, + { + "epoch": 1.11, + "learning_rate": 9.133000000000001e-05, + "loss": 1.7606, + "step": 52500 + }, + { + "epoch": 1.11, + "learning_rate": 9.130959183673469e-05, + "loss": 1.763, + "step": 52600 + }, + { + "epoch": 1.12, + "learning_rate": 9.128918367346939e-05, + "loss": 1.7578, + "step": 52700 + }, + { + "epoch": 1.12, + "learning_rate": 9.126877551020409e-05, + "loss": 1.7688, + "step": 52800 + }, + { + "epoch": 1.12, + "learning_rate": 9.124836734693877e-05, + "loss": 1.7617, + "step": 52900 + }, + { + "epoch": 1.12, + "learning_rate": 9.122795918367347e-05, + "loss": 1.7671, + "step": 53000 + }, + { + "epoch": 1.12, + "learning_rate": 9.120755102040817e-05, + "loss": 1.7587, + "step": 53100 + }, + { + "epoch": 1.13, + "learning_rate": 9.118714285714286e-05, + "loss": 1.7603, + "step": 53200 + }, + { + "epoch": 1.13, + "learning_rate": 9.116673469387755e-05, + "loss": 1.7619, + "step": 53300 + }, + { + "epoch": 1.13, + "learning_rate": 9.114632653061225e-05, + "loss": 1.7661, + "step": 53400 + }, + { + "epoch": 1.13, + "learning_rate": 9.112591836734694e-05, + "loss": 1.7594, + "step": 53500 + }, + { + "epoch": 1.13, + "learning_rate": 9.110551020408164e-05, + "loss": 1.7596, + "step": 53600 + }, + { + "epoch": 1.14, + "learning_rate": 9.108510204081633e-05, + "loss": 1.7566, + "step": 53700 + }, + { + "epoch": 1.14, + "learning_rate": 9.106469387755102e-05, + "loss": 1.7634, + "step": 53800 + }, + { + "epoch": 1.14, + "learning_rate": 9.104428571428572e-05, + "loss": 1.7567, + "step": 53900 + }, + { + "epoch": 1.14, + "learning_rate": 9.10238775510204e-05, + "loss": 1.7582, + "step": 54000 + }, + { + "epoch": 1.14, + "learning_rate": 9.10034693877551e-05, + "loss": 1.7571, + "step": 54100 + }, + { + "epoch": 1.15, + "learning_rate": 9.09830612244898e-05, + "loss": 1.7602, + "step": 54200 + }, + { + "epoch": 1.15, + "learning_rate": 9.096265306122448e-05, + "loss": 1.7547, + "step": 54300 + }, + { + "epoch": 1.15, + "learning_rate": 9.094224489795918e-05, + "loss": 1.756, + "step": 54400 + }, + { + "epoch": 1.15, + "learning_rate": 9.092204081632654e-05, + "loss": 1.7572, + "step": 54500 + }, + { + "epoch": 1.16, + "learning_rate": 9.090163265306123e-05, + "loss": 1.7537, + "step": 54600 + }, + { + "epoch": 1.16, + "learning_rate": 9.088122448979592e-05, + "loss": 1.7488, + "step": 54700 + }, + { + "epoch": 1.16, + "learning_rate": 9.086081632653062e-05, + "loss": 1.749, + "step": 54800 + }, + { + "epoch": 1.16, + "learning_rate": 9.084040816326532e-05, + "loss": 1.7595, + "step": 54900 + }, + { + "epoch": 1.16, + "learning_rate": 9.082e-05, + "loss": 1.748, + "step": 55000 + }, + { + "epoch": 1.17, + "learning_rate": 9.07995918367347e-05, + "loss": 1.7583, + "step": 55100 + }, + { + "epoch": 1.17, + "learning_rate": 9.07791836734694e-05, + "loss": 1.7517, + "step": 55200 + }, + { + "epoch": 1.17, + "learning_rate": 9.075877551020408e-05, + "loss": 1.7597, + "step": 55300 + }, + { + "epoch": 1.17, + "learning_rate": 9.073836734693878e-05, + "loss": 1.7466, + "step": 55400 + }, + { + "epoch": 1.17, + "learning_rate": 9.071795918367348e-05, + "loss": 1.7591, + "step": 55500 + }, + { + "epoch": 1.18, + "learning_rate": 9.069755102040816e-05, + "loss": 1.7553, + "step": 55600 + }, + { + "epoch": 1.18, + "learning_rate": 9.067714285714286e-05, + "loss": 1.7522, + "step": 55700 + }, + { + "epoch": 1.18, + "learning_rate": 9.065673469387755e-05, + "loss": 1.7545, + "step": 55800 + }, + { + "epoch": 1.18, + "learning_rate": 9.063632653061225e-05, + "loss": 1.7465, + "step": 55900 + }, + { + "epoch": 1.19, + "learning_rate": 9.061591836734694e-05, + "loss": 1.7485, + "step": 56000 + }, + { + "epoch": 1.19, + "learning_rate": 9.059551020408163e-05, + "loss": 1.7433, + "step": 56100 + }, + { + "epoch": 1.19, + "learning_rate": 9.057510204081633e-05, + "loss": 1.7459, + "step": 56200 + }, + { + "epoch": 1.19, + "learning_rate": 9.055469387755103e-05, + "loss": 1.7517, + "step": 56300 + }, + { + "epoch": 1.19, + "learning_rate": 9.053428571428571e-05, + "loss": 1.7461, + "step": 56400 + }, + { + "epoch": 1.2, + "learning_rate": 9.051387755102041e-05, + "loss": 1.7453, + "step": 56500 + }, + { + "epoch": 1.2, + "learning_rate": 9.049346938775511e-05, + "loss": 1.7475, + "step": 56600 + }, + { + "epoch": 1.2, + "learning_rate": 9.047326530612246e-05, + "loss": 1.7362, + "step": 56700 + }, + { + "epoch": 1.2, + "learning_rate": 9.045285714285715e-05, + "loss": 1.7442, + "step": 56800 + }, + { + "epoch": 1.2, + "learning_rate": 9.043244897959184e-05, + "loss": 1.7404, + "step": 56900 + }, + { + "epoch": 1.21, + "learning_rate": 9.041204081632654e-05, + "loss": 1.7508, + "step": 57000 + }, + { + "epoch": 1.21, + "learning_rate": 9.039163265306123e-05, + "loss": 1.74, + "step": 57100 + }, + { + "epoch": 1.21, + "learning_rate": 9.037122448979593e-05, + "loss": 1.7411, + "step": 57200 + }, + { + "epoch": 1.21, + "learning_rate": 9.035081632653062e-05, + "loss": 1.7488, + "step": 57300 + }, + { + "epoch": 1.21, + "learning_rate": 9.033040816326531e-05, + "loss": 1.7416, + "step": 57400 + }, + { + "epoch": 1.22, + "learning_rate": 9.031000000000001e-05, + "loss": 1.7335, + "step": 57500 + }, + { + "epoch": 1.22, + "learning_rate": 9.028959183673469e-05, + "loss": 1.7294, + "step": 57600 + }, + { + "epoch": 1.22, + "learning_rate": 9.026918367346939e-05, + "loss": 1.7356, + "step": 57700 + }, + { + "epoch": 1.22, + "learning_rate": 9.024877551020409e-05, + "loss": 1.737, + "step": 57800 + }, + { + "epoch": 1.23, + "learning_rate": 9.022836734693877e-05, + "loss": 1.7334, + "step": 57900 + }, + { + "epoch": 1.23, + "learning_rate": 9.020795918367347e-05, + "loss": 1.739, + "step": 58000 + }, + { + "epoch": 1.23, + "learning_rate": 9.018755102040817e-05, + "loss": 1.7376, + "step": 58100 + }, + { + "epoch": 1.23, + "learning_rate": 9.016714285714286e-05, + "loss": 1.7282, + "step": 58200 + }, + { + "epoch": 1.23, + "learning_rate": 9.014673469387755e-05, + "loss": 1.7373, + "step": 58300 + }, + { + "epoch": 1.24, + "learning_rate": 9.012632653061225e-05, + "loss": 1.7267, + "step": 58400 + }, + { + "epoch": 1.24, + "learning_rate": 9.010591836734694e-05, + "loss": 1.7359, + "step": 58500 + }, + { + "epoch": 1.24, + "learning_rate": 9.008551020408164e-05, + "loss": 1.7355, + "step": 58600 + }, + { + "epoch": 1.24, + "learning_rate": 9.006510204081633e-05, + "loss": 1.7232, + "step": 58700 + }, + { + "epoch": 1.24, + "learning_rate": 9.004469387755102e-05, + "loss": 1.7437, + "step": 58800 + }, + { + "epoch": 1.25, + "learning_rate": 9.002428571428572e-05, + "loss": 1.7298, + "step": 58900 + }, + { + "epoch": 1.25, + "learning_rate": 9.000387755102042e-05, + "loss": 1.741, + "step": 59000 + }, + { + "epoch": 1.25, + "learning_rate": 8.998367346938777e-05, + "loss": 1.7277, + "step": 59100 + }, + { + "epoch": 1.25, + "learning_rate": 8.996326530612245e-05, + "loss": 1.7175, + "step": 59200 + }, + { + "epoch": 1.26, + "learning_rate": 8.994285714285715e-05, + "loss": 1.7265, + "step": 59300 + }, + { + "epoch": 1.26, + "learning_rate": 8.992244897959185e-05, + "loss": 1.7356, + "step": 59400 + }, + { + "epoch": 1.26, + "learning_rate": 8.990204081632654e-05, + "loss": 1.7379, + "step": 59500 + }, + { + "epoch": 1.26, + "learning_rate": 8.988163265306123e-05, + "loss": 1.7247, + "step": 59600 + }, + { + "epoch": 1.26, + "learning_rate": 8.986122448979592e-05, + "loss": 1.7357, + "step": 59700 + }, + { + "epoch": 1.27, + "learning_rate": 8.984081632653062e-05, + "loss": 1.7294, + "step": 59800 + }, + { + "epoch": 1.27, + "learning_rate": 8.982040816326532e-05, + "loss": 1.7194, + "step": 59900 + }, + { + "epoch": 1.27, + "learning_rate": 8.98e-05, + "loss": 1.732, + "step": 60000 + }, + { + "epoch": 1.27, + "learning_rate": 8.97795918367347e-05, + "loss": 1.7268, + "step": 60100 + }, + { + "epoch": 1.27, + "learning_rate": 8.97591836734694e-05, + "loss": 1.7266, + "step": 60200 + }, + { + "epoch": 1.28, + "learning_rate": 8.973877551020408e-05, + "loss": 1.7285, + "step": 60300 + }, + { + "epoch": 1.28, + "learning_rate": 8.971836734693878e-05, + "loss": 1.7191, + "step": 60400 + }, + { + "epoch": 1.28, + "learning_rate": 8.969795918367348e-05, + "loss": 1.7208, + "step": 60500 + }, + { + "epoch": 1.28, + "learning_rate": 8.967755102040816e-05, + "loss": 1.7213, + "step": 60600 + }, + { + "epoch": 1.28, + "learning_rate": 8.965714285714286e-05, + "loss": 1.7199, + "step": 60700 + }, + { + "epoch": 1.29, + "learning_rate": 8.963673469387756e-05, + "loss": 1.7276, + "step": 60800 + }, + { + "epoch": 1.29, + "learning_rate": 8.961632653061225e-05, + "loss": 1.725, + "step": 60900 + }, + { + "epoch": 1.29, + "learning_rate": 8.959591836734694e-05, + "loss": 1.7227, + "step": 61000 + }, + { + "epoch": 1.29, + "learning_rate": 8.957551020408164e-05, + "loss": 1.7272, + "step": 61100 + }, + { + "epoch": 1.3, + "learning_rate": 8.955510204081633e-05, + "loss": 1.715, + "step": 61200 + }, + { + "epoch": 1.3, + "learning_rate": 8.953489795918367e-05, + "loss": 1.7319, + "step": 61300 + }, + { + "epoch": 1.3, + "learning_rate": 8.951448979591838e-05, + "loss": 1.7271, + "step": 61400 + }, + { + "epoch": 1.3, + "learning_rate": 8.949408163265306e-05, + "loss": 1.7232, + "step": 61500 + }, + { + "epoch": 1.3, + "learning_rate": 8.947367346938776e-05, + "loss": 1.7178, + "step": 61600 + }, + { + "epoch": 1.31, + "learning_rate": 8.945326530612246e-05, + "loss": 1.7232, + "step": 61700 + }, + { + "epoch": 1.31, + "learning_rate": 8.943285714285715e-05, + "loss": 1.7197, + "step": 61800 + }, + { + "epoch": 1.31, + "learning_rate": 8.941244897959184e-05, + "loss": 1.7137, + "step": 61900 + }, + { + "epoch": 1.31, + "learning_rate": 8.939204081632654e-05, + "loss": 1.7153, + "step": 62000 + }, + { + "epoch": 1.31, + "learning_rate": 8.937183673469388e-05, + "loss": 1.716, + "step": 62100 + }, + { + "epoch": 1.32, + "learning_rate": 8.935142857142857e-05, + "loss": 1.7187, + "step": 62200 + }, + { + "epoch": 1.32, + "learning_rate": 8.933102040816327e-05, + "loss": 1.7137, + "step": 62300 + }, + { + "epoch": 1.32, + "learning_rate": 8.931061224489797e-05, + "loss": 1.7138, + "step": 62400 + }, + { + "epoch": 1.32, + "learning_rate": 8.929020408163265e-05, + "loss": 1.7162, + "step": 62500 + }, + { + "epoch": 1.32, + "learning_rate": 8.926979591836735e-05, + "loss": 1.7143, + "step": 62600 + }, + { + "epoch": 1.33, + "learning_rate": 8.924938775510205e-05, + "loss": 1.7056, + "step": 62700 + }, + { + "epoch": 1.33, + "learning_rate": 8.922897959183673e-05, + "loss": 1.7168, + "step": 62800 + }, + { + "epoch": 1.33, + "learning_rate": 8.920857142857143e-05, + "loss": 1.7115, + "step": 62900 + }, + { + "epoch": 1.33, + "learning_rate": 8.918816326530613e-05, + "loss": 1.7186, + "step": 63000 + }, + { + "epoch": 1.34, + "learning_rate": 8.916775510204081e-05, + "loss": 1.7073, + "step": 63100 + }, + { + "epoch": 1.34, + "learning_rate": 8.914734693877551e-05, + "loss": 1.706, + "step": 63200 + }, + { + "epoch": 1.34, + "learning_rate": 8.912693877551021e-05, + "loss": 1.7135, + "step": 63300 + }, + { + "epoch": 1.34, + "learning_rate": 8.91065306122449e-05, + "loss": 1.7051, + "step": 63400 + }, + { + "epoch": 1.34, + "learning_rate": 8.908612244897959e-05, + "loss": 1.702, + "step": 63500 + }, + { + "epoch": 1.35, + "learning_rate": 8.906571428571429e-05, + "loss": 1.7151, + "step": 63600 + }, + { + "epoch": 1.35, + "learning_rate": 8.904530612244898e-05, + "loss": 1.706, + "step": 63700 + }, + { + "epoch": 1.35, + "learning_rate": 8.902489795918367e-05, + "loss": 1.7042, + "step": 63800 + }, + { + "epoch": 1.35, + "learning_rate": 8.900448979591837e-05, + "loss": 1.7106, + "step": 63900 + }, + { + "epoch": 1.35, + "learning_rate": 8.898408163265306e-05, + "loss": 1.7157, + "step": 64000 + }, + { + "epoch": 1.36, + "learning_rate": 8.896367346938776e-05, + "loss": 1.707, + "step": 64100 + }, + { + "epoch": 1.36, + "learning_rate": 8.894326530612245e-05, + "loss": 1.7065, + "step": 64200 + }, + { + "epoch": 1.36, + "learning_rate": 8.892285714285715e-05, + "loss": 1.7028, + "step": 64300 + }, + { + "epoch": 1.36, + "learning_rate": 8.890244897959185e-05, + "loss": 1.7071, + "step": 64400 + }, + { + "epoch": 1.37, + "learning_rate": 8.888204081632654e-05, + "loss": 1.7122, + "step": 64500 + }, + { + "epoch": 1.37, + "learning_rate": 8.886163265306123e-05, + "loss": 1.7008, + "step": 64600 + }, + { + "epoch": 1.37, + "learning_rate": 8.884122448979593e-05, + "loss": 1.6919, + "step": 64700 + }, + { + "epoch": 1.37, + "learning_rate": 8.882081632653062e-05, + "loss": 1.6971, + "step": 64800 + }, + { + "epoch": 1.37, + "learning_rate": 8.880061224489796e-05, + "loss": 1.6916, + "step": 64900 + }, + { + "epoch": 1.38, + "learning_rate": 8.878020408163266e-05, + "loss": 1.7018, + "step": 65000 + }, + { + "epoch": 1.38, + "learning_rate": 8.875979591836735e-05, + "loss": 1.6992, + "step": 65100 + }, + { + "epoch": 1.38, + "learning_rate": 8.873938775510204e-05, + "loss": 1.7048, + "step": 65200 + }, + { + "epoch": 1.38, + "learning_rate": 8.871897959183674e-05, + "loss": 1.698, + "step": 65300 + }, + { + "epoch": 1.38, + "learning_rate": 8.869857142857144e-05, + "loss": 1.7036, + "step": 65400 + }, + { + "epoch": 1.39, + "learning_rate": 8.867816326530612e-05, + "loss": 1.7019, + "step": 65500 + }, + { + "epoch": 1.39, + "learning_rate": 8.865775510204082e-05, + "loss": 1.7049, + "step": 65600 + }, + { + "epoch": 1.39, + "learning_rate": 8.863734693877552e-05, + "loss": 1.6903, + "step": 65700 + }, + { + "epoch": 1.39, + "learning_rate": 8.86169387755102e-05, + "loss": 1.7013, + "step": 65800 + }, + { + "epoch": 1.39, + "learning_rate": 8.85965306122449e-05, + "loss": 1.6985, + "step": 65900 + }, + { + "epoch": 1.4, + "learning_rate": 8.85761224489796e-05, + "loss": 1.6899, + "step": 66000 + }, + { + "epoch": 1.4, + "learning_rate": 8.855571428571428e-05, + "loss": 1.7036, + "step": 66100 + }, + { + "epoch": 1.4, + "learning_rate": 8.853530612244898e-05, + "loss": 1.6986, + "step": 66200 + }, + { + "epoch": 1.4, + "learning_rate": 8.851489795918367e-05, + "loss": 1.6894, + "step": 66300 + }, + { + "epoch": 1.41, + "learning_rate": 8.849448979591837e-05, + "loss": 1.6953, + "step": 66400 + }, + { + "epoch": 1.41, + "learning_rate": 8.847408163265306e-05, + "loss": 1.6989, + "step": 66500 + }, + { + "epoch": 1.41, + "learning_rate": 8.845367346938775e-05, + "loss": 1.6929, + "step": 66600 + }, + { + "epoch": 1.41, + "learning_rate": 8.843326530612245e-05, + "loss": 1.6943, + "step": 66700 + }, + { + "epoch": 1.41, + "learning_rate": 8.841285714285715e-05, + "loss": 1.6957, + "step": 66800 + }, + { + "epoch": 1.42, + "learning_rate": 8.839244897959184e-05, + "loss": 1.6895, + "step": 66900 + }, + { + "epoch": 1.42, + "learning_rate": 8.837204081632654e-05, + "loss": 1.6964, + "step": 67000 + }, + { + "epoch": 1.42, + "learning_rate": 8.835163265306123e-05, + "loss": 1.6901, + "step": 67100 + }, + { + "epoch": 1.42, + "learning_rate": 8.833122448979593e-05, + "loss": 1.7021, + "step": 67200 + }, + { + "epoch": 1.42, + "learning_rate": 8.831081632653062e-05, + "loss": 1.6999, + "step": 67300 + }, + { + "epoch": 1.43, + "learning_rate": 8.829040816326531e-05, + "loss": 1.695, + "step": 67400 + }, + { + "epoch": 1.43, + "learning_rate": 8.827000000000001e-05, + "loss": 1.6981, + "step": 67500 + }, + { + "epoch": 1.43, + "learning_rate": 8.82495918367347e-05, + "loss": 1.6905, + "step": 67600 + }, + { + "epoch": 1.43, + "learning_rate": 8.822918367346939e-05, + "loss": 1.6954, + "step": 67700 + }, + { + "epoch": 1.43, + "learning_rate": 8.820877551020409e-05, + "loss": 1.6976, + "step": 67800 + }, + { + "epoch": 1.44, + "learning_rate": 8.818836734693879e-05, + "loss": 1.69, + "step": 67900 + }, + { + "epoch": 1.44, + "learning_rate": 8.816795918367347e-05, + "loss": 1.6851, + "step": 68000 + }, + { + "epoch": 1.44, + "learning_rate": 8.814755102040817e-05, + "loss": 1.6923, + "step": 68100 + }, + { + "epoch": 1.44, + "learning_rate": 8.812714285714287e-05, + "loss": 1.6876, + "step": 68200 + }, + { + "epoch": 1.45, + "learning_rate": 8.810673469387755e-05, + "loss": 1.6924, + "step": 68300 + }, + { + "epoch": 1.45, + "learning_rate": 8.808632653061225e-05, + "loss": 1.6842, + "step": 68400 + }, + { + "epoch": 1.45, + "learning_rate": 8.806591836734695e-05, + "loss": 1.6818, + "step": 68500 + }, + { + "epoch": 1.45, + "learning_rate": 8.804551020408163e-05, + "loss": 1.6891, + "step": 68600 + }, + { + "epoch": 1.45, + "learning_rate": 8.802510204081633e-05, + "loss": 1.6815, + "step": 68700 + }, + { + "epoch": 1.46, + "learning_rate": 8.800469387755103e-05, + "loss": 1.6905, + "step": 68800 + }, + { + "epoch": 1.46, + "learning_rate": 8.798428571428572e-05, + "loss": 1.6864, + "step": 68900 + }, + { + "epoch": 1.46, + "learning_rate": 8.796408163265306e-05, + "loss": 1.6832, + "step": 69000 + }, + { + "epoch": 1.46, + "learning_rate": 8.794367346938776e-05, + "loss": 1.6885, + "step": 69100 + }, + { + "epoch": 1.46, + "learning_rate": 8.792326530612245e-05, + "loss": 1.6906, + "step": 69200 + }, + { + "epoch": 1.47, + "learning_rate": 8.790285714285714e-05, + "loss": 1.6818, + "step": 69300 + }, + { + "epoch": 1.47, + "learning_rate": 8.788265306122449e-05, + "loss": 1.6789, + "step": 69400 + }, + { + "epoch": 1.47, + "learning_rate": 8.786224489795919e-05, + "loss": 1.6823, + "step": 69500 + }, + { + "epoch": 1.47, + "learning_rate": 8.784183673469389e-05, + "loss": 1.683, + "step": 69600 + }, + { + "epoch": 1.48, + "learning_rate": 8.782142857142857e-05, + "loss": 1.6849, + "step": 69700 + }, + { + "epoch": 1.48, + "learning_rate": 8.780102040816327e-05, + "loss": 1.6844, + "step": 69800 + }, + { + "epoch": 1.48, + "learning_rate": 8.778061224489796e-05, + "loss": 1.6798, + "step": 69900 + }, + { + "epoch": 1.48, + "learning_rate": 8.776040816326531e-05, + "loss": 1.6715, + "step": 70000 + }, + { + "epoch": 1.48, + "learning_rate": 8.774e-05, + "loss": 1.6838, + "step": 70100 + }, + { + "epoch": 1.49, + "learning_rate": 8.77195918367347e-05, + "loss": 1.6847, + "step": 70200 + }, + { + "epoch": 1.49, + "learning_rate": 8.76991836734694e-05, + "loss": 1.6865, + "step": 70300 + }, + { + "epoch": 1.49, + "learning_rate": 8.767877551020408e-05, + "loss": 1.6788, + "step": 70400 + }, + { + "epoch": 1.49, + "learning_rate": 8.765836734693878e-05, + "loss": 1.6836, + "step": 70500 + }, + { + "epoch": 1.49, + "learning_rate": 8.763795918367346e-05, + "loss": 1.6776, + "step": 70600 + }, + { + "epoch": 1.5, + "learning_rate": 8.761755102040817e-05, + "loss": 1.6734, + "step": 70700 + }, + { + "epoch": 1.5, + "learning_rate": 8.759714285714287e-05, + "loss": 1.6777, + "step": 70800 + }, + { + "epoch": 1.5, + "learning_rate": 8.757673469387756e-05, + "loss": 1.6768, + "step": 70900 + }, + { + "epoch": 1.5, + "learning_rate": 8.755632653061225e-05, + "loss": 1.6786, + "step": 71000 + }, + { + "epoch": 1.5, + "learning_rate": 8.753591836734695e-05, + "loss": 1.6835, + "step": 71100 + }, + { + "epoch": 1.51, + "learning_rate": 8.751551020408164e-05, + "loss": 1.6744, + "step": 71200 + }, + { + "epoch": 1.51, + "learning_rate": 8.749510204081634e-05, + "loss": 1.6714, + "step": 71300 + }, + { + "epoch": 1.51, + "learning_rate": 8.747469387755103e-05, + "loss": 1.6782, + "step": 71400 + }, + { + "epoch": 1.51, + "learning_rate": 8.745428571428572e-05, + "loss": 1.6745, + "step": 71500 + }, + { + "epoch": 1.52, + "learning_rate": 8.743387755102042e-05, + "loss": 1.6676, + "step": 71600 + }, + { + "epoch": 1.52, + "learning_rate": 8.74134693877551e-05, + "loss": 1.6725, + "step": 71700 + }, + { + "epoch": 1.52, + "learning_rate": 8.73930612244898e-05, + "loss": 1.6674, + "step": 71800 + }, + { + "epoch": 1.52, + "learning_rate": 8.73726530612245e-05, + "loss": 1.676, + "step": 71900 + }, + { + "epoch": 1.52, + "learning_rate": 8.735224489795918e-05, + "loss": 1.6717, + "step": 72000 + }, + { + "epoch": 1.53, + "learning_rate": 8.733183673469388e-05, + "loss": 1.6677, + "step": 72100 + }, + { + "epoch": 1.53, + "learning_rate": 8.731142857142858e-05, + "loss": 1.6684, + "step": 72200 + }, + { + "epoch": 1.53, + "learning_rate": 8.729102040816327e-05, + "loss": 1.6703, + "step": 72300 + }, + { + "epoch": 1.53, + "learning_rate": 8.727061224489796e-05, + "loss": 1.6717, + "step": 72400 + }, + { + "epoch": 1.53, + "learning_rate": 8.725020408163266e-05, + "loss": 1.672, + "step": 72500 + }, + { + "epoch": 1.54, + "learning_rate": 8.722979591836735e-05, + "loss": 1.6617, + "step": 72600 + }, + { + "epoch": 1.54, + "learning_rate": 8.720938775510205e-05, + "loss": 1.673, + "step": 72700 + }, + { + "epoch": 1.54, + "learning_rate": 8.718897959183674e-05, + "loss": 1.6649, + "step": 72800 + }, + { + "epoch": 1.54, + "learning_rate": 8.716857142857143e-05, + "loss": 1.6754, + "step": 72900 + }, + { + "epoch": 1.54, + "learning_rate": 8.714816326530613e-05, + "loss": 1.6693, + "step": 73000 + }, + { + "epoch": 1.55, + "learning_rate": 8.712775510204083e-05, + "loss": 1.6612, + "step": 73100 + }, + { + "epoch": 1.55, + "learning_rate": 8.710734693877551e-05, + "loss": 1.6712, + "step": 73200 + }, + { + "epoch": 1.55, + "learning_rate": 8.708693877551021e-05, + "loss": 1.6641, + "step": 73300 + }, + { + "epoch": 1.55, + "learning_rate": 8.706653061224491e-05, + "loss": 1.664, + "step": 73400 + }, + { + "epoch": 1.56, + "learning_rate": 8.704612244897959e-05, + "loss": 1.6667, + "step": 73500 + }, + { + "epoch": 1.56, + "learning_rate": 8.702571428571429e-05, + "loss": 1.6604, + "step": 73600 + }, + { + "epoch": 1.56, + "learning_rate": 8.700530612244898e-05, + "loss": 1.6719, + "step": 73700 + }, + { + "epoch": 1.56, + "learning_rate": 8.698489795918367e-05, + "loss": 1.6651, + "step": 73800 + }, + { + "epoch": 1.56, + "learning_rate": 8.696448979591837e-05, + "loss": 1.6675, + "step": 73900 + }, + { + "epoch": 1.57, + "learning_rate": 8.694408163265306e-05, + "loss": 1.6611, + "step": 74000 + }, + { + "epoch": 1.57, + "learning_rate": 8.692367346938776e-05, + "loss": 1.6659, + "step": 74100 + }, + { + "epoch": 1.57, + "learning_rate": 8.690326530612245e-05, + "loss": 1.6709, + "step": 74200 + }, + { + "epoch": 1.57, + "learning_rate": 8.688285714285714e-05, + "loss": 1.6553, + "step": 74300 + }, + { + "epoch": 1.57, + "learning_rate": 8.686244897959184e-05, + "loss": 1.6654, + "step": 74400 + }, + { + "epoch": 1.58, + "learning_rate": 8.684204081632653e-05, + "loss": 1.6549, + "step": 74500 + }, + { + "epoch": 1.58, + "learning_rate": 8.682183673469389e-05, + "loss": 1.6685, + "step": 74600 + }, + { + "epoch": 1.58, + "learning_rate": 8.680142857142857e-05, + "loss": 1.6552, + "step": 74700 + }, + { + "epoch": 1.58, + "learning_rate": 8.678102040816327e-05, + "loss": 1.6568, + "step": 74800 + }, + { + "epoch": 1.59, + "learning_rate": 8.676061224489797e-05, + "loss": 1.6621, + "step": 74900 + }, + { + "epoch": 1.59, + "learning_rate": 8.674020408163266e-05, + "loss": 1.665, + "step": 75000 + }, + { + "epoch": 1.59, + "learning_rate": 8.671979591836735e-05, + "loss": 1.6602, + "step": 75100 + }, + { + "epoch": 1.59, + "learning_rate": 8.669938775510205e-05, + "loss": 1.6588, + "step": 75200 + }, + { + "epoch": 1.59, + "learning_rate": 8.667897959183674e-05, + "loss": 1.659, + "step": 75300 + }, + { + "epoch": 1.6, + "learning_rate": 8.665857142857144e-05, + "loss": 1.6661, + "step": 75400 + }, + { + "epoch": 1.6, + "learning_rate": 8.663816326530612e-05, + "loss": 1.6632, + "step": 75500 + }, + { + "epoch": 1.6, + "learning_rate": 8.661775510204082e-05, + "loss": 1.6646, + "step": 75600 + }, + { + "epoch": 1.6, + "learning_rate": 8.659734693877552e-05, + "loss": 1.6618, + "step": 75700 + }, + { + "epoch": 1.6, + "learning_rate": 8.65769387755102e-05, + "loss": 1.6599, + "step": 75800 + }, + { + "epoch": 1.61, + "learning_rate": 8.65565306122449e-05, + "loss": 1.6611, + "step": 75900 + }, + { + "epoch": 1.61, + "learning_rate": 8.65361224489796e-05, + "loss": 1.6468, + "step": 76000 + }, + { + "epoch": 1.61, + "learning_rate": 8.651571428571428e-05, + "loss": 1.6629, + "step": 76100 + }, + { + "epoch": 1.61, + "learning_rate": 8.649530612244898e-05, + "loss": 1.6565, + "step": 76200 + }, + { + "epoch": 1.61, + "learning_rate": 8.647489795918368e-05, + "loss": 1.6618, + "step": 76300 + }, + { + "epoch": 1.62, + "learning_rate": 8.645448979591836e-05, + "loss": 1.6577, + "step": 76400 + }, + { + "epoch": 1.62, + "learning_rate": 8.643408163265306e-05, + "loss": 1.6609, + "step": 76500 + }, + { + "epoch": 1.62, + "learning_rate": 8.641367346938776e-05, + "loss": 1.656, + "step": 76600 + }, + { + "epoch": 1.62, + "learning_rate": 8.639326530612245e-05, + "loss": 1.6584, + "step": 76700 + }, + { + "epoch": 1.63, + "learning_rate": 8.637285714285714e-05, + "loss": 1.6525, + "step": 76800 + }, + { + "epoch": 1.63, + "learning_rate": 8.63526530612245e-05, + "loss": 1.6497, + "step": 76900 + }, + { + "epoch": 1.63, + "learning_rate": 8.63322448979592e-05, + "loss": 1.6528, + "step": 77000 + }, + { + "epoch": 1.63, + "learning_rate": 8.631183673469388e-05, + "loss": 1.6534, + "step": 77100 + }, + { + "epoch": 1.63, + "learning_rate": 8.629142857142858e-05, + "loss": 1.6549, + "step": 77200 + }, + { + "epoch": 1.64, + "learning_rate": 8.627102040816328e-05, + "loss": 1.6509, + "step": 77300 + }, + { + "epoch": 1.64, + "learning_rate": 8.625061224489796e-05, + "loss": 1.6506, + "step": 77400 + }, + { + "epoch": 1.64, + "learning_rate": 8.623020408163266e-05, + "loss": 1.6498, + "step": 77500 + }, + { + "epoch": 1.64, + "learning_rate": 8.620979591836735e-05, + "loss": 1.6564, + "step": 77600 + }, + { + "epoch": 1.64, + "learning_rate": 8.618938775510205e-05, + "loss": 1.6501, + "step": 77700 + }, + { + "epoch": 1.65, + "learning_rate": 8.616897959183674e-05, + "loss": 1.6487, + "step": 77800 + }, + { + "epoch": 1.65, + "learning_rate": 8.614857142857143e-05, + "loss": 1.6444, + "step": 77900 + }, + { + "epoch": 1.65, + "learning_rate": 8.612816326530613e-05, + "loss": 1.6475, + "step": 78000 + }, + { + "epoch": 1.65, + "learning_rate": 8.610775510204083e-05, + "loss": 1.6496, + "step": 78100 + }, + { + "epoch": 1.66, + "learning_rate": 8.608734693877551e-05, + "loss": 1.6542, + "step": 78200 + }, + { + "epoch": 1.66, + "learning_rate": 8.606693877551021e-05, + "loss": 1.6604, + "step": 78300 + }, + { + "epoch": 1.66, + "learning_rate": 8.60465306122449e-05, + "loss": 1.6454, + "step": 78400 + }, + { + "epoch": 1.66, + "learning_rate": 8.602612244897959e-05, + "loss": 1.6475, + "step": 78500 + }, + { + "epoch": 1.66, + "learning_rate": 8.600571428571429e-05, + "loss": 1.6516, + "step": 78600 + }, + { + "epoch": 1.67, + "learning_rate": 8.598530612244899e-05, + "loss": 1.6451, + "step": 78700 + }, + { + "epoch": 1.67, + "learning_rate": 8.596489795918367e-05, + "loss": 1.6511, + "step": 78800 + }, + { + "epoch": 1.67, + "learning_rate": 8.594448979591837e-05, + "loss": 1.6444, + "step": 78900 + }, + { + "epoch": 1.67, + "learning_rate": 8.592408163265307e-05, + "loss": 1.6485, + "step": 79000 + }, + { + "epoch": 1.67, + "learning_rate": 8.590367346938775e-05, + "loss": 1.6492, + "step": 79100 + }, + { + "epoch": 1.68, + "learning_rate": 8.588326530612245e-05, + "loss": 1.6386, + "step": 79200 + }, + { + "epoch": 1.68, + "learning_rate": 8.586285714285715e-05, + "loss": 1.6482, + "step": 79300 + }, + { + "epoch": 1.68, + "learning_rate": 8.584244897959184e-05, + "loss": 1.6493, + "step": 79400 + }, + { + "epoch": 1.68, + "learning_rate": 8.582204081632653e-05, + "loss": 1.6512, + "step": 79500 + }, + { + "epoch": 1.68, + "learning_rate": 8.580163265306122e-05, + "loss": 1.6423, + "step": 79600 + }, + { + "epoch": 1.69, + "learning_rate": 8.578122448979592e-05, + "loss": 1.637, + "step": 79700 + }, + { + "epoch": 1.69, + "learning_rate": 8.576081632653062e-05, + "loss": 1.6464, + "step": 79800 + }, + { + "epoch": 1.69, + "learning_rate": 8.57404081632653e-05, + "loss": 1.6383, + "step": 79900 + }, + { + "epoch": 1.69, + "learning_rate": 8.572e-05, + "loss": 1.6486, + "step": 80000 + }, + { + "epoch": 1.7, + "learning_rate": 8.56995918367347e-05, + "loss": 1.6384, + "step": 80100 + }, + { + "epoch": 1.7, + "learning_rate": 8.567918367346938e-05, + "loss": 1.6405, + "step": 80200 + }, + { + "epoch": 1.7, + "learning_rate": 8.565897959183674e-05, + "loss": 1.6375, + "step": 80300 + }, + { + "epoch": 1.7, + "learning_rate": 8.563857142857144e-05, + "loss": 1.6406, + "step": 80400 + }, + { + "epoch": 1.7, + "learning_rate": 8.561816326530613e-05, + "loss": 1.6478, + "step": 80500 + }, + { + "epoch": 1.71, + "learning_rate": 8.559775510204082e-05, + "loss": 1.6418, + "step": 80600 + }, + { + "epoch": 1.71, + "learning_rate": 8.557734693877552e-05, + "loss": 1.6416, + "step": 80700 + }, + { + "epoch": 1.71, + "learning_rate": 8.555693877551021e-05, + "loss": 1.6395, + "step": 80800 + }, + { + "epoch": 1.71, + "learning_rate": 8.55365306122449e-05, + "loss": 1.6435, + "step": 80900 + }, + { + "epoch": 1.71, + "learning_rate": 8.55161224489796e-05, + "loss": 1.6446, + "step": 81000 + }, + { + "epoch": 1.72, + "learning_rate": 8.54957142857143e-05, + "loss": 1.6399, + "step": 81100 + }, + { + "epoch": 1.72, + "learning_rate": 8.547530612244898e-05, + "loss": 1.6338, + "step": 81200 + }, + { + "epoch": 1.72, + "learning_rate": 8.545489795918368e-05, + "loss": 1.6412, + "step": 81300 + }, + { + "epoch": 1.72, + "learning_rate": 8.543448979591836e-05, + "loss": 1.6326, + "step": 81400 + }, + { + "epoch": 1.72, + "learning_rate": 8.541408163265306e-05, + "loss": 1.6488, + "step": 81500 + }, + { + "epoch": 1.73, + "learning_rate": 8.539367346938776e-05, + "loss": 1.6392, + "step": 81600 + }, + { + "epoch": 1.73, + "learning_rate": 8.537326530612245e-05, + "loss": 1.6355, + "step": 81700 + }, + { + "epoch": 1.73, + "learning_rate": 8.535285714285714e-05, + "loss": 1.6371, + "step": 81800 + }, + { + "epoch": 1.73, + "learning_rate": 8.533244897959184e-05, + "loss": 1.6325, + "step": 81900 + }, + { + "epoch": 1.74, + "learning_rate": 8.531204081632653e-05, + "loss": 1.634, + "step": 82000 + }, + { + "epoch": 1.74, + "learning_rate": 8.529163265306123e-05, + "loss": 1.6442, + "step": 82100 + }, + { + "epoch": 1.74, + "learning_rate": 8.527122448979592e-05, + "loss": 1.6302, + "step": 82200 + }, + { + "epoch": 1.74, + "learning_rate": 8.525081632653061e-05, + "loss": 1.6329, + "step": 82300 + }, + { + "epoch": 1.74, + "learning_rate": 8.523040816326531e-05, + "loss": 1.6356, + "step": 82400 + }, + { + "epoch": 1.75, + "learning_rate": 8.521e-05, + "loss": 1.6328, + "step": 82500 + }, + { + "epoch": 1.75, + "learning_rate": 8.518959183673469e-05, + "loss": 1.6346, + "step": 82600 + }, + { + "epoch": 1.75, + "learning_rate": 8.516918367346939e-05, + "loss": 1.6301, + "step": 82700 + }, + { + "epoch": 1.75, + "learning_rate": 8.514877551020409e-05, + "loss": 1.6325, + "step": 82800 + }, + { + "epoch": 1.75, + "learning_rate": 8.512836734693877e-05, + "loss": 1.6276, + "step": 82900 + }, + { + "epoch": 1.76, + "learning_rate": 8.510795918367347e-05, + "loss": 1.638, + "step": 83000 + }, + { + "epoch": 1.76, + "learning_rate": 8.508755102040817e-05, + "loss": 1.6347, + "step": 83100 + }, + { + "epoch": 1.76, + "learning_rate": 8.506734693877551e-05, + "loss": 1.6288, + "step": 83200 + }, + { + "epoch": 1.76, + "learning_rate": 8.504693877551021e-05, + "loss": 1.6267, + "step": 83300 + }, + { + "epoch": 1.77, + "learning_rate": 8.50265306122449e-05, + "loss": 1.6386, + "step": 83400 + }, + { + "epoch": 1.77, + "learning_rate": 8.500612244897959e-05, + "loss": 1.634, + "step": 83500 + }, + { + "epoch": 1.77, + "learning_rate": 8.498591836734695e-05, + "loss": 1.6339, + "step": 83600 + }, + { + "epoch": 1.77, + "learning_rate": 8.496551020408163e-05, + "loss": 1.6281, + "step": 83700 + }, + { + "epoch": 1.77, + "learning_rate": 8.494510204081633e-05, + "loss": 1.6272, + "step": 83800 + }, + { + "epoch": 1.78, + "learning_rate": 8.492469387755103e-05, + "loss": 1.6306, + "step": 83900 + }, + { + "epoch": 1.78, + "learning_rate": 8.490428571428571e-05, + "loss": 1.6375, + "step": 84000 + }, + { + "epoch": 1.78, + "learning_rate": 8.488387755102041e-05, + "loss": 1.6298, + "step": 84100 + }, + { + "epoch": 1.78, + "learning_rate": 8.48634693877551e-05, + "loss": 1.6336, + "step": 84200 + }, + { + "epoch": 1.78, + "learning_rate": 8.48430612244898e-05, + "loss": 1.6233, + "step": 84300 + }, + { + "epoch": 1.79, + "learning_rate": 8.482265306122449e-05, + "loss": 1.6354, + "step": 84400 + }, + { + "epoch": 1.79, + "learning_rate": 8.480224489795919e-05, + "loss": 1.6274, + "step": 84500 + }, + { + "epoch": 1.79, + "learning_rate": 8.478183673469389e-05, + "loss": 1.6248, + "step": 84600 + }, + { + "epoch": 1.79, + "learning_rate": 8.476142857142859e-05, + "loss": 1.6284, + "step": 84700 + }, + { + "epoch": 1.79, + "learning_rate": 8.474102040816327e-05, + "loss": 1.6327, + "step": 84800 + }, + { + "epoch": 1.8, + "learning_rate": 8.472061224489797e-05, + "loss": 1.6178, + "step": 84900 + }, + { + "epoch": 1.8, + "learning_rate": 8.470020408163265e-05, + "loss": 1.6245, + "step": 85000 + }, + { + "epoch": 1.8, + "learning_rate": 8.467979591836735e-05, + "loss": 1.6274, + "step": 85100 + }, + { + "epoch": 1.8, + "learning_rate": 8.465938775510205e-05, + "loss": 1.6248, + "step": 85200 + }, + { + "epoch": 1.81, + "learning_rate": 8.463897959183674e-05, + "loss": 1.6219, + "step": 85300 + }, + { + "epoch": 1.81, + "learning_rate": 8.461857142857143e-05, + "loss": 1.623, + "step": 85400 + }, + { + "epoch": 1.81, + "learning_rate": 8.459816326530613e-05, + "loss": 1.6258, + "step": 85500 + }, + { + "epoch": 1.81, + "learning_rate": 8.457775510204082e-05, + "loss": 1.6235, + "step": 85600 + }, + { + "epoch": 1.81, + "learning_rate": 8.455734693877552e-05, + "loss": 1.6183, + "step": 85700 + }, + { + "epoch": 1.82, + "learning_rate": 8.453693877551021e-05, + "loss": 1.6229, + "step": 85800 + }, + { + "epoch": 1.82, + "learning_rate": 8.45165306122449e-05, + "loss": 1.6289, + "step": 85900 + }, + { + "epoch": 1.82, + "learning_rate": 8.44961224489796e-05, + "loss": 1.6321, + "step": 86000 + }, + { + "epoch": 1.82, + "learning_rate": 8.44757142857143e-05, + "loss": 1.625, + "step": 86100 + }, + { + "epoch": 1.82, + "learning_rate": 8.445530612244898e-05, + "loss": 1.6288, + "step": 86200 + }, + { + "epoch": 1.83, + "learning_rate": 8.443489795918368e-05, + "loss": 1.6262, + "step": 86300 + }, + { + "epoch": 1.83, + "learning_rate": 8.441448979591838e-05, + "loss": 1.6222, + "step": 86400 + }, + { + "epoch": 1.83, + "learning_rate": 8.439408163265306e-05, + "loss": 1.6253, + "step": 86500 + }, + { + "epoch": 1.83, + "learning_rate": 8.437367346938776e-05, + "loss": 1.6219, + "step": 86600 + }, + { + "epoch": 1.83, + "learning_rate": 8.435326530612246e-05, + "loss": 1.6215, + "step": 86700 + }, + { + "epoch": 1.84, + "learning_rate": 8.433285714285714e-05, + "loss": 1.6131, + "step": 86800 + }, + { + "epoch": 1.84, + "learning_rate": 8.431244897959184e-05, + "loss": 1.6179, + "step": 86900 + }, + { + "epoch": 1.84, + "learning_rate": 8.429204081632654e-05, + "loss": 1.6131, + "step": 87000 + }, + { + "epoch": 1.84, + "learning_rate": 8.427163265306123e-05, + "loss": 1.6247, + "step": 87100 + }, + { + "epoch": 1.85, + "learning_rate": 8.425122448979592e-05, + "loss": 1.6259, + "step": 87200 + }, + { + "epoch": 1.85, + "learning_rate": 8.423081632653061e-05, + "loss": 1.6216, + "step": 87300 + }, + { + "epoch": 1.85, + "learning_rate": 8.421040816326531e-05, + "loss": 1.6218, + "step": 87400 + }, + { + "epoch": 1.85, + "learning_rate": 8.419e-05, + "loss": 1.61, + "step": 87500 + }, + { + "epoch": 1.85, + "learning_rate": 8.416959183673469e-05, + "loss": 1.6201, + "step": 87600 + }, + { + "epoch": 1.86, + "learning_rate": 8.414918367346939e-05, + "loss": 1.6161, + "step": 87700 + }, + { + "epoch": 1.86, + "learning_rate": 8.412877551020409e-05, + "loss": 1.617, + "step": 87800 + }, + { + "epoch": 1.86, + "learning_rate": 8.410836734693877e-05, + "loss": 1.622, + "step": 87900 + }, + { + "epoch": 1.86, + "learning_rate": 8.408795918367347e-05, + "loss": 1.6149, + "step": 88000 + }, + { + "epoch": 1.86, + "learning_rate": 8.406755102040817e-05, + "loss": 1.6155, + "step": 88100 + }, + { + "epoch": 1.87, + "learning_rate": 8.404714285714285e-05, + "loss": 1.6139, + "step": 88200 + }, + { + "epoch": 1.87, + "learning_rate": 8.402673469387755e-05, + "loss": 1.6086, + "step": 88300 + }, + { + "epoch": 1.87, + "learning_rate": 8.400632653061225e-05, + "loss": 1.6116, + "step": 88400 + }, + { + "epoch": 1.87, + "learning_rate": 8.39861224489796e-05, + "loss": 1.621, + "step": 88500 + }, + { + "epoch": 1.88, + "learning_rate": 8.396571428571429e-05, + "loss": 1.6108, + "step": 88600 + }, + { + "epoch": 1.88, + "learning_rate": 8.394530612244899e-05, + "loss": 1.6223, + "step": 88700 + }, + { + "epoch": 1.88, + "learning_rate": 8.392489795918369e-05, + "loss": 1.6185, + "step": 88800 + }, + { + "epoch": 1.88, + "learning_rate": 8.390448979591837e-05, + "loss": 1.6239, + "step": 88900 + }, + { + "epoch": 1.88, + "learning_rate": 8.388408163265307e-05, + "loss": 1.605, + "step": 89000 + }, + { + "epoch": 1.89, + "learning_rate": 8.386367346938775e-05, + "loss": 1.6105, + "step": 89100 + }, + { + "epoch": 1.89, + "learning_rate": 8.384326530612245e-05, + "loss": 1.6151, + "step": 89200 + }, + { + "epoch": 1.89, + "learning_rate": 8.382285714285715e-05, + "loss": 1.6145, + "step": 89300 + }, + { + "epoch": 1.89, + "learning_rate": 8.380244897959184e-05, + "loss": 1.623, + "step": 89400 + }, + { + "epoch": 1.89, + "learning_rate": 8.378204081632653e-05, + "loss": 1.6176, + "step": 89500 + }, + { + "epoch": 1.9, + "learning_rate": 8.376163265306123e-05, + "loss": 1.6083, + "step": 89600 + }, + { + "epoch": 1.9, + "learning_rate": 8.374122448979592e-05, + "loss": 1.6112, + "step": 89700 + }, + { + "epoch": 1.9, + "learning_rate": 8.372081632653062e-05, + "loss": 1.613, + "step": 89800 + }, + { + "epoch": 1.9, + "learning_rate": 8.370040816326531e-05, + "loss": 1.6176, + "step": 89900 + }, + { + "epoch": 1.9, + "learning_rate": 8.368e-05, + "loss": 1.6185, + "step": 90000 + }, + { + "epoch": 1.91, + "learning_rate": 8.36595918367347e-05, + "loss": 1.6205, + "step": 90100 + }, + { + "epoch": 1.91, + "learning_rate": 8.36391836734694e-05, + "loss": 1.617, + "step": 90200 + }, + { + "epoch": 1.91, + "learning_rate": 8.361877551020408e-05, + "loss": 1.6221, + "step": 90300 + }, + { + "epoch": 1.91, + "learning_rate": 8.359836734693878e-05, + "loss": 1.6177, + "step": 90400 + }, + { + "epoch": 1.92, + "learning_rate": 8.357816326530613e-05, + "loss": 1.607, + "step": 90500 + }, + { + "epoch": 1.92, + "learning_rate": 8.355775510204083e-05, + "loss": 1.6132, + "step": 90600 + }, + { + "epoch": 1.92, + "learning_rate": 8.353734693877552e-05, + "loss": 1.6007, + "step": 90700 + }, + { + "epoch": 1.92, + "learning_rate": 8.351693877551021e-05, + "loss": 1.6057, + "step": 90800 + }, + { + "epoch": 1.92, + "learning_rate": 8.34965306122449e-05, + "loss": 1.6116, + "step": 90900 + }, + { + "epoch": 1.93, + "learning_rate": 8.34761224489796e-05, + "loss": 1.6121, + "step": 91000 + }, + { + "epoch": 1.93, + "learning_rate": 8.34557142857143e-05, + "loss": 1.6077, + "step": 91100 + }, + { + "epoch": 1.93, + "learning_rate": 8.343530612244898e-05, + "loss": 1.616, + "step": 91200 + }, + { + "epoch": 1.93, + "learning_rate": 8.341489795918368e-05, + "loss": 1.6066, + "step": 91300 + }, + { + "epoch": 1.93, + "learning_rate": 8.339448979591838e-05, + "loss": 1.6093, + "step": 91400 + }, + { + "epoch": 1.94, + "learning_rate": 8.337408163265306e-05, + "loss": 1.6054, + "step": 91500 + }, + { + "epoch": 1.94, + "learning_rate": 8.335367346938776e-05, + "loss": 1.6099, + "step": 91600 + }, + { + "epoch": 1.94, + "learning_rate": 8.333326530612246e-05, + "loss": 1.6069, + "step": 91700 + }, + { + "epoch": 1.94, + "learning_rate": 8.331285714285714e-05, + "loss": 1.6152, + "step": 91800 + }, + { + "epoch": 1.94, + "learning_rate": 8.329244897959184e-05, + "loss": 1.6062, + "step": 91900 + }, + { + "epoch": 1.95, + "learning_rate": 8.327204081632654e-05, + "loss": 1.6017, + "step": 92000 + }, + { + "epoch": 1.95, + "learning_rate": 8.325163265306123e-05, + "loss": 1.6047, + "step": 92100 + }, + { + "epoch": 1.95, + "learning_rate": 8.323122448979592e-05, + "loss": 1.6063, + "step": 92200 + }, + { + "epoch": 1.95, + "learning_rate": 8.321081632653062e-05, + "loss": 1.6036, + "step": 92300 + }, + { + "epoch": 1.96, + "learning_rate": 8.31904081632653e-05, + "loss": 1.6064, + "step": 92400 + }, + { + "epoch": 1.96, + "learning_rate": 8.317e-05, + "loss": 1.6035, + "step": 92500 + }, + { + "epoch": 1.96, + "learning_rate": 8.31495918367347e-05, + "loss": 1.6031, + "step": 92600 + }, + { + "epoch": 1.96, + "learning_rate": 8.312938775510204e-05, + "loss": 1.6052, + "step": 92700 + }, + { + "epoch": 1.96, + "learning_rate": 8.310897959183674e-05, + "loss": 1.6037, + "step": 92800 + }, + { + "epoch": 1.97, + "learning_rate": 8.308857142857144e-05, + "loss": 1.6044, + "step": 92900 + }, + { + "epoch": 1.97, + "learning_rate": 8.306816326530613e-05, + "loss": 1.6047, + "step": 93000 + }, + { + "epoch": 1.97, + "learning_rate": 8.304775510204082e-05, + "loss": 1.601, + "step": 93100 + }, + { + "epoch": 1.97, + "learning_rate": 8.302734693877552e-05, + "loss": 1.5984, + "step": 93200 + }, + { + "epoch": 1.97, + "learning_rate": 8.300693877551021e-05, + "loss": 1.6022, + "step": 93300 + }, + { + "epoch": 1.98, + "learning_rate": 8.29865306122449e-05, + "loss": 1.5999, + "step": 93400 + }, + { + "epoch": 1.98, + "learning_rate": 8.29661224489796e-05, + "loss": 1.6109, + "step": 93500 + }, + { + "epoch": 1.98, + "learning_rate": 8.294571428571429e-05, + "loss": 1.604, + "step": 93600 + }, + { + "epoch": 1.98, + "learning_rate": 8.292551020408163e-05, + "loss": 1.6048, + "step": 93700 + }, + { + "epoch": 1.99, + "learning_rate": 8.290510204081633e-05, + "loss": 1.6008, + "step": 93800 + }, + { + "epoch": 1.99, + "learning_rate": 8.288469387755103e-05, + "loss": 1.6005, + "step": 93900 + }, + { + "epoch": 1.99, + "learning_rate": 8.286428571428571e-05, + "loss": 1.5987, + "step": 94000 + }, + { + "epoch": 1.99, + "learning_rate": 8.284387755102041e-05, + "loss": 1.6, + "step": 94100 + }, + { + "epoch": 1.99, + "learning_rate": 8.282346938775511e-05, + "loss": 1.602, + "step": 94200 + }, + { + "epoch": 2.0, + "learning_rate": 8.280306122448979e-05, + "loss": 1.6111, + "step": 94300 + }, + { + "epoch": 2.0, + "learning_rate": 8.278265306122449e-05, + "loss": 1.599, + "step": 94400 + }, + { + "epoch": 2.0, + "learning_rate": 8.276224489795919e-05, + "loss": 1.606, + "step": 94500 + }, + { + "epoch": 2.0, + "learning_rate": 8.274183673469387e-05, + "loss": 1.5968, + "step": 94600 + }, + { + "epoch": 2.0, + "learning_rate": 8.272142857142857e-05, + "loss": 1.5868, + "step": 94700 + }, + { + "epoch": 2.01, + "learning_rate": 8.270102040816327e-05, + "loss": 1.6044, + "step": 94800 + }, + { + "epoch": 2.01, + "learning_rate": 8.268061224489796e-05, + "loss": 1.594, + "step": 94900 + }, + { + "epoch": 2.01, + "learning_rate": 8.266020408163265e-05, + "loss": 1.5909, + "step": 95000 + }, + { + "epoch": 2.01, + "learning_rate": 8.263979591836735e-05, + "loss": 1.5955, + "step": 95100 + }, + { + "epoch": 2.01, + "learning_rate": 8.261938775510204e-05, + "loss": 1.588, + "step": 95200 + }, + { + "epoch": 2.02, + "learning_rate": 8.259897959183674e-05, + "loss": 1.5873, + "step": 95300 + }, + { + "epoch": 2.02, + "learning_rate": 8.257857142857143e-05, + "loss": 1.5995, + "step": 95400 + }, + { + "epoch": 2.02, + "learning_rate": 8.255816326530612e-05, + "loss": 1.5928, + "step": 95500 + }, + { + "epoch": 2.02, + "learning_rate": 8.253775510204083e-05, + "loss": 1.5925, + "step": 95600 + }, + { + "epoch": 2.03, + "learning_rate": 8.251734693877552e-05, + "loss": 1.5995, + "step": 95700 + }, + { + "epoch": 2.03, + "learning_rate": 8.249693877551021e-05, + "loss": 1.5906, + "step": 95800 + }, + { + "epoch": 2.03, + "learning_rate": 8.247653061224491e-05, + "loss": 1.5933, + "step": 95900 + }, + { + "epoch": 2.03, + "learning_rate": 8.24561224489796e-05, + "loss": 1.5945, + "step": 96000 + }, + { + "epoch": 2.03, + "learning_rate": 8.24357142857143e-05, + "loss": 1.5872, + "step": 96100 + }, + { + "epoch": 2.04, + "learning_rate": 8.2415306122449e-05, + "loss": 1.5889, + "step": 96200 + }, + { + "epoch": 2.04, + "learning_rate": 8.239489795918368e-05, + "loss": 1.5898, + "step": 96300 + }, + { + "epoch": 2.04, + "learning_rate": 8.237448979591838e-05, + "loss": 1.585, + "step": 96400 + }, + { + "epoch": 2.04, + "learning_rate": 8.235408163265306e-05, + "loss": 1.5857, + "step": 96500 + }, + { + "epoch": 2.04, + "learning_rate": 8.233367346938776e-05, + "loss": 1.5885, + "step": 96600 + }, + { + "epoch": 2.05, + "learning_rate": 8.231326530612246e-05, + "loss": 1.5938, + "step": 96700 + }, + { + "epoch": 2.05, + "learning_rate": 8.229285714285714e-05, + "loss": 1.5889, + "step": 96800 + }, + { + "epoch": 2.05, + "learning_rate": 8.227244897959184e-05, + "loss": 1.5863, + "step": 96900 + }, + { + "epoch": 2.05, + "learning_rate": 8.225204081632654e-05, + "loss": 1.5901, + "step": 97000 + }, + { + "epoch": 2.06, + "learning_rate": 8.223163265306122e-05, + "loss": 1.587, + "step": 97100 + }, + { + "epoch": 2.06, + "learning_rate": 8.221122448979592e-05, + "loss": 1.5944, + "step": 97200 + }, + { + "epoch": 2.06, + "learning_rate": 8.219081632653062e-05, + "loss": 1.5867, + "step": 97300 + }, + { + "epoch": 2.06, + "learning_rate": 8.21704081632653e-05, + "loss": 1.5812, + "step": 97400 + }, + { + "epoch": 2.06, + "learning_rate": 8.215e-05, + "loss": 1.5864, + "step": 97500 + }, + { + "epoch": 2.07, + "learning_rate": 8.21295918367347e-05, + "loss": 1.5928, + "step": 97600 + }, + { + "epoch": 2.07, + "learning_rate": 8.210938775510204e-05, + "loss": 1.5907, + "step": 97700 + }, + { + "epoch": 2.07, + "learning_rate": 8.208897959183673e-05, + "loss": 1.5866, + "step": 97800 + }, + { + "epoch": 2.07, + "learning_rate": 8.206857142857143e-05, + "loss": 1.6052, + "step": 97900 + }, + { + "epoch": 2.07, + "learning_rate": 8.204816326530613e-05, + "loss": 1.5815, + "step": 98000 + }, + { + "epoch": 2.08, + "learning_rate": 8.202775510204081e-05, + "loss": 1.5801, + "step": 98100 + }, + { + "epoch": 2.08, + "learning_rate": 8.200734693877551e-05, + "loss": 1.5841, + "step": 98200 + }, + { + "epoch": 2.08, + "learning_rate": 8.198693877551021e-05, + "loss": 1.5855, + "step": 98300 + }, + { + "epoch": 2.08, + "learning_rate": 8.19665306122449e-05, + "loss": 1.584, + "step": 98400 + }, + { + "epoch": 2.08, + "learning_rate": 8.19461224489796e-05, + "loss": 1.5811, + "step": 98500 + }, + { + "epoch": 2.09, + "learning_rate": 8.192571428571429e-05, + "loss": 1.5792, + "step": 98600 + }, + { + "epoch": 2.09, + "learning_rate": 8.190530612244899e-05, + "loss": 1.5816, + "step": 98700 + }, + { + "epoch": 2.09, + "learning_rate": 8.188489795918369e-05, + "loss": 1.5791, + "step": 98800 + }, + { + "epoch": 2.09, + "learning_rate": 8.186448979591837e-05, + "loss": 1.5806, + "step": 98900 + }, + { + "epoch": 2.1, + "learning_rate": 8.184408163265307e-05, + "loss": 1.5861, + "step": 99000 + }, + { + "epoch": 2.1, + "learning_rate": 8.182367346938777e-05, + "loss": 1.5813, + "step": 99100 + }, + { + "epoch": 2.1, + "learning_rate": 8.180326530612245e-05, + "loss": 1.5888, + "step": 99200 + }, + { + "epoch": 2.1, + "learning_rate": 8.178285714285715e-05, + "loss": 1.5832, + "step": 99300 + }, + { + "epoch": 2.1, + "learning_rate": 8.176244897959185e-05, + "loss": 1.5824, + "step": 99400 + }, + { + "epoch": 2.11, + "learning_rate": 8.174204081632653e-05, + "loss": 1.5756, + "step": 99500 + }, + { + "epoch": 2.11, + "learning_rate": 8.172163265306123e-05, + "loss": 1.5854, + "step": 99600 + }, + { + "epoch": 2.11, + "learning_rate": 8.170122448979593e-05, + "loss": 1.5929, + "step": 99700 + }, + { + "epoch": 2.11, + "learning_rate": 8.168081632653061e-05, + "loss": 1.574, + "step": 99800 + }, + { + "epoch": 2.11, + "learning_rate": 8.166040816326531e-05, + "loss": 1.5755, + "step": 99900 + }, + { + "epoch": 2.12, + "learning_rate": 8.164000000000001e-05, + "loss": 1.5811, + "step": 100000 + }, + { + "epoch": 2.12, + "learning_rate": 8.16195918367347e-05, + "loss": 1.5787, + "step": 100100 + }, + { + "epoch": 2.12, + "learning_rate": 8.159938775510204e-05, + "loss": 1.5844, + "step": 100200 + }, + { + "epoch": 2.12, + "learning_rate": 8.157897959183674e-05, + "loss": 1.5802, + "step": 100300 + }, + { + "epoch": 2.12, + "learning_rate": 8.155857142857143e-05, + "loss": 1.5799, + "step": 100400 + }, + { + "epoch": 2.13, + "learning_rate": 8.153816326530612e-05, + "loss": 1.5914, + "step": 100500 + }, + { + "epoch": 2.13, + "learning_rate": 8.151775510204082e-05, + "loss": 1.5856, + "step": 100600 + }, + { + "epoch": 2.13, + "learning_rate": 8.149734693877552e-05, + "loss": 1.5786, + "step": 100700 + }, + { + "epoch": 2.13, + "learning_rate": 8.14769387755102e-05, + "loss": 1.5799, + "step": 100800 + }, + { + "epoch": 2.14, + "learning_rate": 8.14565306122449e-05, + "loss": 1.5756, + "step": 100900 + }, + { + "epoch": 2.14, + "learning_rate": 8.14361224489796e-05, + "loss": 1.5801, + "step": 101000 + }, + { + "epoch": 2.14, + "learning_rate": 8.14157142857143e-05, + "loss": 1.5814, + "step": 101100 + }, + { + "epoch": 2.14, + "learning_rate": 8.1395306122449e-05, + "loss": 1.5797, + "step": 101200 + }, + { + "epoch": 2.14, + "learning_rate": 8.137489795918368e-05, + "loss": 1.58, + "step": 101300 + }, + { + "epoch": 2.15, + "learning_rate": 8.135448979591838e-05, + "loss": 1.5733, + "step": 101400 + }, + { + "epoch": 2.15, + "learning_rate": 8.133408163265307e-05, + "loss": 1.5771, + "step": 101500 + }, + { + "epoch": 2.15, + "learning_rate": 8.131367346938776e-05, + "loss": 1.5803, + "step": 101600 + }, + { + "epoch": 2.15, + "learning_rate": 8.129326530612246e-05, + "loss": 1.5738, + "step": 101700 + }, + { + "epoch": 2.15, + "learning_rate": 8.127285714285716e-05, + "loss": 1.5729, + "step": 101800 + }, + { + "epoch": 2.16, + "learning_rate": 8.125244897959184e-05, + "loss": 1.5772, + "step": 101900 + }, + { + "epoch": 2.16, + "learning_rate": 8.123204081632654e-05, + "loss": 1.5771, + "step": 102000 + }, + { + "epoch": 2.16, + "learning_rate": 8.121163265306124e-05, + "loss": 1.5776, + "step": 102100 + }, + { + "epoch": 2.16, + "learning_rate": 8.119122448979592e-05, + "loss": 1.583, + "step": 102200 + }, + { + "epoch": 2.17, + "learning_rate": 8.117081632653062e-05, + "loss": 1.5745, + "step": 102300 + }, + { + "epoch": 2.17, + "learning_rate": 8.11504081632653e-05, + "loss": 1.5749, + "step": 102400 + }, + { + "epoch": 2.17, + "learning_rate": 8.113e-05, + "loss": 1.5867, + "step": 102500 + }, + { + "epoch": 2.17, + "learning_rate": 8.11095918367347e-05, + "loss": 1.5795, + "step": 102600 + }, + { + "epoch": 2.17, + "learning_rate": 8.108938775510204e-05, + "loss": 1.5763, + "step": 102700 + }, + { + "epoch": 2.18, + "learning_rate": 8.106897959183674e-05, + "loss": 1.5683, + "step": 102800 + }, + { + "epoch": 2.18, + "learning_rate": 8.104857142857143e-05, + "loss": 1.5766, + "step": 102900 + }, + { + "epoch": 2.18, + "learning_rate": 8.102816326530612e-05, + "loss": 1.5716, + "step": 103000 + }, + { + "epoch": 2.18, + "learning_rate": 8.100775510204082e-05, + "loss": 1.581, + "step": 103100 + }, + { + "epoch": 2.18, + "learning_rate": 8.098734693877551e-05, + "loss": 1.578, + "step": 103200 + }, + { + "epoch": 2.19, + "learning_rate": 8.09669387755102e-05, + "loss": 1.5702, + "step": 103300 + }, + { + "epoch": 2.19, + "learning_rate": 8.094653061224489e-05, + "loss": 1.572, + "step": 103400 + }, + { + "epoch": 2.19, + "learning_rate": 8.092612244897959e-05, + "loss": 1.5754, + "step": 103500 + }, + { + "epoch": 2.19, + "learning_rate": 8.090571428571429e-05, + "loss": 1.5761, + "step": 103600 + }, + { + "epoch": 2.19, + "learning_rate": 8.088530612244897e-05, + "loss": 1.5736, + "step": 103700 + }, + { + "epoch": 2.2, + "learning_rate": 8.086489795918367e-05, + "loss": 1.5668, + "step": 103800 + }, + { + "epoch": 2.2, + "learning_rate": 8.084448979591838e-05, + "loss": 1.5761, + "step": 103900 + }, + { + "epoch": 2.2, + "learning_rate": 8.082408163265307e-05, + "loss": 1.5777, + "step": 104000 + }, + { + "epoch": 2.2, + "learning_rate": 8.080367346938777e-05, + "loss": 1.5655, + "step": 104100 + }, + { + "epoch": 2.21, + "learning_rate": 8.078326530612245e-05, + "loss": 1.579, + "step": 104200 + }, + { + "epoch": 2.21, + "learning_rate": 8.076285714285715e-05, + "loss": 1.5827, + "step": 104300 + }, + { + "epoch": 2.21, + "learning_rate": 8.074244897959185e-05, + "loss": 1.5745, + "step": 104400 + }, + { + "epoch": 2.21, + "learning_rate": 8.072204081632653e-05, + "loss": 1.5625, + "step": 104500 + }, + { + "epoch": 2.21, + "learning_rate": 8.070163265306123e-05, + "loss": 1.5732, + "step": 104600 + }, + { + "epoch": 2.22, + "learning_rate": 8.068122448979593e-05, + "loss": 1.5714, + "step": 104700 + }, + { + "epoch": 2.22, + "learning_rate": 8.066081632653061e-05, + "loss": 1.5755, + "step": 104800 + }, + { + "epoch": 2.22, + "learning_rate": 8.064040816326531e-05, + "loss": 1.5785, + "step": 104900 + }, + { + "epoch": 2.22, + "learning_rate": 8.062000000000001e-05, + "loss": 1.5689, + "step": 105000 + }, + { + "epoch": 2.22, + "learning_rate": 8.05995918367347e-05, + "loss": 1.563, + "step": 105100 + }, + { + "epoch": 2.23, + "learning_rate": 8.05791836734694e-05, + "loss": 1.5673, + "step": 105200 + }, + { + "epoch": 2.23, + "learning_rate": 8.055877551020409e-05, + "loss": 1.5768, + "step": 105300 + }, + { + "epoch": 2.23, + "learning_rate": 8.053836734693878e-05, + "loss": 1.57, + "step": 105400 + }, + { + "epoch": 2.23, + "learning_rate": 8.051795918367348e-05, + "loss": 1.568, + "step": 105500 + }, + { + "epoch": 2.23, + "learning_rate": 8.049755102040817e-05, + "loss": 1.567, + "step": 105600 + }, + { + "epoch": 2.24, + "learning_rate": 8.047714285714286e-05, + "loss": 1.5693, + "step": 105700 + }, + { + "epoch": 2.24, + "learning_rate": 8.045673469387756e-05, + "loss": 1.5817, + "step": 105800 + }, + { + "epoch": 2.24, + "learning_rate": 8.043632653061226e-05, + "loss": 1.5695, + "step": 105900 + }, + { + "epoch": 2.24, + "learning_rate": 8.041591836734694e-05, + "loss": 1.5707, + "step": 106000 + }, + { + "epoch": 2.25, + "learning_rate": 8.039551020408164e-05, + "loss": 1.5695, + "step": 106100 + }, + { + "epoch": 2.25, + "learning_rate": 8.037510204081634e-05, + "loss": 1.5738, + "step": 106200 + }, + { + "epoch": 2.25, + "learning_rate": 8.035469387755102e-05, + "loss": 1.5671, + "step": 106300 + }, + { + "epoch": 2.25, + "learning_rate": 8.033428571428572e-05, + "loss": 1.568, + "step": 106400 + }, + { + "epoch": 2.25, + "learning_rate": 8.03138775510204e-05, + "loss": 1.5706, + "step": 106500 + }, + { + "epoch": 2.26, + "learning_rate": 8.02934693877551e-05, + "loss": 1.5664, + "step": 106600 + }, + { + "epoch": 2.26, + "learning_rate": 8.02730612244898e-05, + "loss": 1.5629, + "step": 106700 + }, + { + "epoch": 2.26, + "learning_rate": 8.025265306122449e-05, + "loss": 1.5689, + "step": 106800 + }, + { + "epoch": 2.26, + "learning_rate": 8.023244897959184e-05, + "loss": 1.564, + "step": 106900 + }, + { + "epoch": 2.26, + "learning_rate": 8.021204081632654e-05, + "loss": 1.5708, + "step": 107000 + }, + { + "epoch": 2.27, + "learning_rate": 8.019163265306124e-05, + "loss": 1.5595, + "step": 107100 + }, + { + "epoch": 2.27, + "learning_rate": 8.017122448979592e-05, + "loss": 1.5546, + "step": 107200 + }, + { + "epoch": 2.27, + "learning_rate": 8.015081632653062e-05, + "loss": 1.5598, + "step": 107300 + }, + { + "epoch": 2.27, + "learning_rate": 8.013040816326532e-05, + "loss": 1.5639, + "step": 107400 + }, + { + "epoch": 2.28, + "learning_rate": 8.011e-05, + "loss": 1.558, + "step": 107500 + }, + { + "epoch": 2.28, + "learning_rate": 8.00895918367347e-05, + "loss": 1.5644, + "step": 107600 + }, + { + "epoch": 2.28, + "learning_rate": 8.00691836734694e-05, + "loss": 1.5677, + "step": 107700 + }, + { + "epoch": 2.28, + "learning_rate": 8.004877551020409e-05, + "loss": 1.5615, + "step": 107800 + }, + { + "epoch": 2.28, + "learning_rate": 8.002836734693878e-05, + "loss": 1.5615, + "step": 107900 + }, + { + "epoch": 2.29, + "learning_rate": 8.000795918367348e-05, + "loss": 1.5653, + "step": 108000 + }, + { + "epoch": 2.29, + "learning_rate": 7.998755102040817e-05, + "loss": 1.5624, + "step": 108100 + }, + { + "epoch": 2.29, + "learning_rate": 7.996714285714287e-05, + "loss": 1.5565, + "step": 108200 + }, + { + "epoch": 2.29, + "learning_rate": 7.994673469387755e-05, + "loss": 1.5667, + "step": 108300 + }, + { + "epoch": 2.29, + "learning_rate": 7.99265306122449e-05, + "loss": 1.5662, + "step": 108400 + }, + { + "epoch": 2.3, + "learning_rate": 7.990612244897959e-05, + "loss": 1.5653, + "step": 108500 + }, + { + "epoch": 2.3, + "learning_rate": 7.988571428571429e-05, + "loss": 1.5635, + "step": 108600 + }, + { + "epoch": 2.3, + "learning_rate": 7.986530612244899e-05, + "loss": 1.5638, + "step": 108700 + }, + { + "epoch": 2.3, + "learning_rate": 7.984489795918367e-05, + "loss": 1.5647, + "step": 108800 + }, + { + "epoch": 2.3, + "learning_rate": 7.982448979591837e-05, + "loss": 1.5653, + "step": 108900 + }, + { + "epoch": 2.31, + "learning_rate": 7.980408163265305e-05, + "loss": 1.5694, + "step": 109000 + }, + { + "epoch": 2.31, + "learning_rate": 7.978367346938775e-05, + "loss": 1.5644, + "step": 109100 + }, + { + "epoch": 2.31, + "learning_rate": 7.976326530612245e-05, + "loss": 1.5638, + "step": 109200 + }, + { + "epoch": 2.31, + "learning_rate": 7.974285714285714e-05, + "loss": 1.5624, + "step": 109300 + }, + { + "epoch": 2.32, + "learning_rate": 7.972244897959183e-05, + "loss": 1.5654, + "step": 109400 + }, + { + "epoch": 2.32, + "learning_rate": 7.970204081632655e-05, + "loss": 1.5575, + "step": 109500 + }, + { + "epoch": 2.32, + "learning_rate": 7.968163265306123e-05, + "loss": 1.5626, + "step": 109600 + }, + { + "epoch": 2.32, + "learning_rate": 7.966122448979593e-05, + "loss": 1.5577, + "step": 109700 + }, + { + "epoch": 2.32, + "learning_rate": 7.964081632653063e-05, + "loss": 1.5663, + "step": 109800 + }, + { + "epoch": 2.33, + "learning_rate": 7.962040816326531e-05, + "loss": 1.5587, + "step": 109900 + }, + { + "epoch": 2.33, + "learning_rate": 7.960000000000001e-05, + "loss": 1.5641, + "step": 110000 + }, + { + "epoch": 2.33, + "learning_rate": 7.95795918367347e-05, + "loss": 1.5583, + "step": 110100 + }, + { + "epoch": 2.33, + "learning_rate": 7.95591836734694e-05, + "loss": 1.5579, + "step": 110200 + }, + { + "epoch": 2.33, + "learning_rate": 7.953877551020409e-05, + "loss": 1.5577, + "step": 110300 + }, + { + "epoch": 2.34, + "learning_rate": 7.951836734693878e-05, + "loss": 1.5526, + "step": 110400 + }, + { + "epoch": 2.34, + "learning_rate": 7.949795918367348e-05, + "loss": 1.5622, + "step": 110500 + }, + { + "epoch": 2.34, + "learning_rate": 7.947755102040817e-05, + "loss": 1.5611, + "step": 110600 + }, + { + "epoch": 2.34, + "learning_rate": 7.945734693877551e-05, + "loss": 1.5596, + "step": 110700 + }, + { + "epoch": 2.34, + "learning_rate": 7.94369387755102e-05, + "loss": 1.5589, + "step": 110800 + }, + { + "epoch": 2.35, + "learning_rate": 7.94165306122449e-05, + "loss": 1.5554, + "step": 110900 + }, + { + "epoch": 2.35, + "learning_rate": 7.93961224489796e-05, + "loss": 1.559, + "step": 111000 + }, + { + "epoch": 2.35, + "learning_rate": 7.937571428571428e-05, + "loss": 1.5522, + "step": 111100 + }, + { + "epoch": 2.35, + "learning_rate": 7.935530612244898e-05, + "loss": 1.559, + "step": 111200 + }, + { + "epoch": 2.36, + "learning_rate": 7.933489795918368e-05, + "loss": 1.5542, + "step": 111300 + }, + { + "epoch": 2.36, + "learning_rate": 7.931448979591836e-05, + "loss": 1.5536, + "step": 111400 + }, + { + "epoch": 2.36, + "learning_rate": 7.929408163265306e-05, + "loss": 1.5498, + "step": 111500 + }, + { + "epoch": 2.36, + "learning_rate": 7.927367346938776e-05, + "loss": 1.5556, + "step": 111600 + }, + { + "epoch": 2.36, + "learning_rate": 7.925326530612244e-05, + "loss": 1.5617, + "step": 111700 + }, + { + "epoch": 2.37, + "learning_rate": 7.923285714285714e-05, + "loss": 1.5517, + "step": 111800 + }, + { + "epoch": 2.37, + "learning_rate": 7.921244897959184e-05, + "loss": 1.5526, + "step": 111900 + }, + { + "epoch": 2.37, + "learning_rate": 7.919204081632653e-05, + "loss": 1.5576, + "step": 112000 + }, + { + "epoch": 2.37, + "learning_rate": 7.917163265306122e-05, + "loss": 1.5517, + "step": 112100 + }, + { + "epoch": 2.37, + "learning_rate": 7.915122448979592e-05, + "loss": 1.55, + "step": 112200 + }, + { + "epoch": 2.38, + "learning_rate": 7.913081632653062e-05, + "loss": 1.551, + "step": 112300 + }, + { + "epoch": 2.38, + "learning_rate": 7.911040816326532e-05, + "loss": 1.5561, + "step": 112400 + }, + { + "epoch": 2.38, + "learning_rate": 7.909e-05, + "loss": 1.5479, + "step": 112500 + }, + { + "epoch": 2.38, + "learning_rate": 7.90695918367347e-05, + "loss": 1.5498, + "step": 112600 + }, + { + "epoch": 2.39, + "learning_rate": 7.90491836734694e-05, + "loss": 1.5467, + "step": 112700 + }, + { + "epoch": 2.39, + "learning_rate": 7.902877551020408e-05, + "loss": 1.5532, + "step": 112800 + }, + { + "epoch": 2.39, + "learning_rate": 7.900836734693878e-05, + "loss": 1.5613, + "step": 112900 + }, + { + "epoch": 2.39, + "learning_rate": 7.898795918367348e-05, + "loss": 1.5587, + "step": 113000 + }, + { + "epoch": 2.39, + "learning_rate": 7.896755102040817e-05, + "loss": 1.5509, + "step": 113100 + }, + { + "epoch": 2.4, + "learning_rate": 7.894714285714286e-05, + "loss": 1.551, + "step": 113200 + }, + { + "epoch": 2.4, + "learning_rate": 7.89269387755102e-05, + "loss": 1.5522, + "step": 113300 + }, + { + "epoch": 2.4, + "learning_rate": 7.89065306122449e-05, + "loss": 1.5469, + "step": 113400 + }, + { + "epoch": 2.4, + "learning_rate": 7.888612244897959e-05, + "loss": 1.552, + "step": 113500 + }, + { + "epoch": 2.4, + "learning_rate": 7.886571428571429e-05, + "loss": 1.5568, + "step": 113600 + }, + { + "epoch": 2.41, + "learning_rate": 7.884530612244899e-05, + "loss": 1.552, + "step": 113700 + }, + { + "epoch": 2.41, + "learning_rate": 7.882489795918367e-05, + "loss": 1.543, + "step": 113800 + }, + { + "epoch": 2.41, + "learning_rate": 7.880448979591837e-05, + "loss": 1.5518, + "step": 113900 + }, + { + "epoch": 2.41, + "learning_rate": 7.878408163265307e-05, + "loss": 1.5574, + "step": 114000 + }, + { + "epoch": 2.41, + "learning_rate": 7.876367346938775e-05, + "loss": 1.5493, + "step": 114100 + }, + { + "epoch": 2.42, + "learning_rate": 7.874326530612245e-05, + "loss": 1.5509, + "step": 114200 + }, + { + "epoch": 2.42, + "learning_rate": 7.872285714285715e-05, + "loss": 1.5438, + "step": 114300 + }, + { + "epoch": 2.42, + "learning_rate": 7.870244897959183e-05, + "loss": 1.5488, + "step": 114400 + }, + { + "epoch": 2.42, + "learning_rate": 7.868204081632653e-05, + "loss": 1.5539, + "step": 114500 + }, + { + "epoch": 2.43, + "learning_rate": 7.866163265306123e-05, + "loss": 1.5434, + "step": 114600 + }, + { + "epoch": 2.43, + "learning_rate": 7.864122448979591e-05, + "loss": 1.5465, + "step": 114700 + }, + { + "epoch": 2.43, + "learning_rate": 7.862081632653061e-05, + "loss": 1.5467, + "step": 114800 + }, + { + "epoch": 2.43, + "learning_rate": 7.86004081632653e-05, + "loss": 1.5436, + "step": 114900 + }, + { + "epoch": 2.43, + "learning_rate": 7.858000000000001e-05, + "loss": 1.5447, + "step": 115000 + }, + { + "epoch": 2.44, + "learning_rate": 7.855959183673471e-05, + "loss": 1.5457, + "step": 115100 + }, + { + "epoch": 2.44, + "learning_rate": 7.853918367346939e-05, + "loss": 1.547, + "step": 115200 + }, + { + "epoch": 2.44, + "learning_rate": 7.851877551020409e-05, + "loss": 1.5493, + "step": 115300 + }, + { + "epoch": 2.44, + "learning_rate": 7.849857142857143e-05, + "loss": 1.5352, + "step": 115400 + }, + { + "epoch": 2.44, + "learning_rate": 7.847816326530613e-05, + "loss": 1.5444, + "step": 115500 + }, + { + "epoch": 2.45, + "learning_rate": 7.845775510204082e-05, + "loss": 1.5563, + "step": 115600 + }, + { + "epoch": 2.45, + "learning_rate": 7.843734693877551e-05, + "loss": 1.5493, + "step": 115700 + }, + { + "epoch": 2.45, + "learning_rate": 7.841693877551021e-05, + "loss": 1.5445, + "step": 115800 + }, + { + "epoch": 2.45, + "learning_rate": 7.83965306122449e-05, + "loss": 1.548, + "step": 115900 + }, + { + "epoch": 2.46, + "learning_rate": 7.83761224489796e-05, + "loss": 1.5448, + "step": 116000 + }, + { + "epoch": 2.46, + "learning_rate": 7.83557142857143e-05, + "loss": 1.5489, + "step": 116100 + }, + { + "epoch": 2.46, + "learning_rate": 7.833530612244898e-05, + "loss": 1.5479, + "step": 116200 + }, + { + "epoch": 2.46, + "learning_rate": 7.831489795918368e-05, + "loss": 1.5432, + "step": 116300 + }, + { + "epoch": 2.46, + "learning_rate": 7.829448979591838e-05, + "loss": 1.5413, + "step": 116400 + }, + { + "epoch": 2.47, + "learning_rate": 7.827408163265306e-05, + "loss": 1.5402, + "step": 116500 + }, + { + "epoch": 2.47, + "learning_rate": 7.825367346938776e-05, + "loss": 1.5461, + "step": 116600 + }, + { + "epoch": 2.47, + "learning_rate": 7.823326530612244e-05, + "loss": 1.5456, + "step": 116700 + }, + { + "epoch": 2.47, + "learning_rate": 7.821285714285714e-05, + "loss": 1.5503, + "step": 116800 + }, + { + "epoch": 2.47, + "learning_rate": 7.819244897959184e-05, + "loss": 1.5402, + "step": 116900 + }, + { + "epoch": 2.48, + "learning_rate": 7.817204081632652e-05, + "loss": 1.5477, + "step": 117000 + }, + { + "epoch": 2.48, + "learning_rate": 7.815163265306122e-05, + "loss": 1.5453, + "step": 117100 + }, + { + "epoch": 2.48, + "learning_rate": 7.813122448979592e-05, + "loss": 1.5508, + "step": 117200 + }, + { + "epoch": 2.48, + "learning_rate": 7.81108163265306e-05, + "loss": 1.5441, + "step": 117300 + }, + { + "epoch": 2.48, + "learning_rate": 7.80904081632653e-05, + "loss": 1.5413, + "step": 117400 + }, + { + "epoch": 2.49, + "learning_rate": 7.807020408163266e-05, + "loss": 1.547, + "step": 117500 + }, + { + "epoch": 2.49, + "learning_rate": 7.804979591836736e-05, + "loss": 1.5493, + "step": 117600 + }, + { + "epoch": 2.49, + "learning_rate": 7.802938775510204e-05, + "loss": 1.5385, + "step": 117700 + }, + { + "epoch": 2.49, + "learning_rate": 7.800897959183674e-05, + "loss": 1.5419, + "step": 117800 + }, + { + "epoch": 2.5, + "learning_rate": 7.798857142857144e-05, + "loss": 1.5409, + "step": 117900 + }, + { + "epoch": 2.5, + "learning_rate": 7.796816326530612e-05, + "loss": 1.5382, + "step": 118000 + }, + { + "epoch": 2.5, + "learning_rate": 7.794775510204082e-05, + "loss": 1.5337, + "step": 118100 + }, + { + "epoch": 2.5, + "learning_rate": 7.792734693877552e-05, + "loss": 1.5345, + "step": 118200 + }, + { + "epoch": 2.5, + "learning_rate": 7.79069387755102e-05, + "loss": 1.5412, + "step": 118300 + }, + { + "epoch": 2.51, + "learning_rate": 7.78865306122449e-05, + "loss": 1.5362, + "step": 118400 + }, + { + "epoch": 2.51, + "learning_rate": 7.786612244897959e-05, + "loss": 1.5429, + "step": 118500 + }, + { + "epoch": 2.51, + "learning_rate": 7.784571428571429e-05, + "loss": 1.5394, + "step": 118600 + }, + { + "epoch": 2.51, + "learning_rate": 7.782530612244899e-05, + "loss": 1.5422, + "step": 118700 + }, + { + "epoch": 2.51, + "learning_rate": 7.780489795918367e-05, + "loss": 1.5367, + "step": 118800 + }, + { + "epoch": 2.52, + "learning_rate": 7.778448979591837e-05, + "loss": 1.54, + "step": 118900 + }, + { + "epoch": 2.52, + "learning_rate": 7.776408163265307e-05, + "loss": 1.539, + "step": 119000 + }, + { + "epoch": 2.52, + "learning_rate": 7.774367346938775e-05, + "loss": 1.5392, + "step": 119100 + }, + { + "epoch": 2.52, + "learning_rate": 7.772326530612245e-05, + "loss": 1.5401, + "step": 119200 + }, + { + "epoch": 2.52, + "learning_rate": 7.770285714285715e-05, + "loss": 1.5364, + "step": 119300 + }, + { + "epoch": 2.53, + "learning_rate": 7.768244897959183e-05, + "loss": 1.5404, + "step": 119400 + }, + { + "epoch": 2.53, + "learning_rate": 7.766204081632653e-05, + "loss": 1.5374, + "step": 119500 + }, + { + "epoch": 2.53, + "learning_rate": 7.764163265306123e-05, + "loss": 1.5386, + "step": 119600 + }, + { + "epoch": 2.53, + "learning_rate": 7.762122448979591e-05, + "loss": 1.5436, + "step": 119700 + }, + { + "epoch": 2.54, + "learning_rate": 7.760081632653061e-05, + "loss": 1.5483, + "step": 119800 + }, + { + "epoch": 2.54, + "learning_rate": 7.758040816326531e-05, + "loss": 1.5424, + "step": 119900 + }, + { + "epoch": 2.54, + "learning_rate": 7.756e-05, + "loss": 1.5456, + "step": 120000 + }, + { + "epoch": 2.54, + "learning_rate": 7.75395918367347e-05, + "loss": 1.5483, + "step": 120100 + }, + { + "epoch": 2.54, + "learning_rate": 7.751918367346939e-05, + "loss": 1.5346, + "step": 120200 + }, + { + "epoch": 2.55, + "learning_rate": 7.749877551020408e-05, + "loss": 1.5387, + "step": 120300 + }, + { + "epoch": 2.55, + "learning_rate": 7.747836734693878e-05, + "loss": 1.5327, + "step": 120400 + }, + { + "epoch": 2.55, + "learning_rate": 7.745795918367347e-05, + "loss": 1.5299, + "step": 120500 + }, + { + "epoch": 2.55, + "learning_rate": 7.743755102040817e-05, + "loss": 1.5365, + "step": 120600 + }, + { + "epoch": 2.55, + "learning_rate": 7.741714285714287e-05, + "loss": 1.5332, + "step": 120700 + }, + { + "epoch": 2.56, + "learning_rate": 7.739673469387756e-05, + "loss": 1.5309, + "step": 120800 + }, + { + "epoch": 2.56, + "learning_rate": 7.737632653061225e-05, + "loss": 1.5325, + "step": 120900 + }, + { + "epoch": 2.56, + "learning_rate": 7.735591836734695e-05, + "loss": 1.5408, + "step": 121000 + }, + { + "epoch": 2.56, + "learning_rate": 7.733551020408164e-05, + "loss": 1.5385, + "step": 121100 + }, + { + "epoch": 2.57, + "learning_rate": 7.731510204081634e-05, + "loss": 1.5373, + "step": 121200 + }, + { + "epoch": 2.57, + "learning_rate": 7.729489795918368e-05, + "loss": 1.5339, + "step": 121300 + }, + { + "epoch": 2.57, + "learning_rate": 7.727448979591837e-05, + "loss": 1.5329, + "step": 121400 + }, + { + "epoch": 2.57, + "learning_rate": 7.725408163265306e-05, + "loss": 1.5338, + "step": 121500 + }, + { + "epoch": 2.57, + "learning_rate": 7.723367346938776e-05, + "loss": 1.5435, + "step": 121600 + }, + { + "epoch": 2.58, + "learning_rate": 7.721326530612246e-05, + "loss": 1.5353, + "step": 121700 + }, + { + "epoch": 2.58, + "learning_rate": 7.719285714285714e-05, + "loss": 1.5441, + "step": 121800 + }, + { + "epoch": 2.58, + "learning_rate": 7.717244897959184e-05, + "loss": 1.5382, + "step": 121900 + }, + { + "epoch": 2.58, + "learning_rate": 7.715204081632654e-05, + "loss": 1.5281, + "step": 122000 + }, + { + "epoch": 2.58, + "learning_rate": 7.713163265306122e-05, + "loss": 1.5395, + "step": 122100 + }, + { + "epoch": 2.59, + "learning_rate": 7.711122448979592e-05, + "loss": 1.5414, + "step": 122200 + }, + { + "epoch": 2.59, + "learning_rate": 7.709081632653062e-05, + "loss": 1.5403, + "step": 122300 + }, + { + "epoch": 2.59, + "learning_rate": 7.70704081632653e-05, + "loss": 1.5377, + "step": 122400 + }, + { + "epoch": 2.59, + "learning_rate": 7.705e-05, + "loss": 1.5357, + "step": 122500 + }, + { + "epoch": 2.59, + "learning_rate": 7.702959183673469e-05, + "loss": 1.5267, + "step": 122600 + }, + { + "epoch": 2.6, + "learning_rate": 7.700918367346939e-05, + "loss": 1.5313, + "step": 122700 + }, + { + "epoch": 2.6, + "learning_rate": 7.698877551020408e-05, + "loss": 1.5313, + "step": 122800 + }, + { + "epoch": 2.6, + "learning_rate": 7.696836734693877e-05, + "loss": 1.5416, + "step": 122900 + }, + { + "epoch": 2.6, + "learning_rate": 7.694795918367347e-05, + "loss": 1.5324, + "step": 123000 + }, + { + "epoch": 2.61, + "learning_rate": 7.692755102040817e-05, + "loss": 1.5337, + "step": 123100 + }, + { + "epoch": 2.61, + "learning_rate": 7.690714285714285e-05, + "loss": 1.5251, + "step": 123200 + }, + { + "epoch": 2.61, + "learning_rate": 7.688673469387756e-05, + "loss": 1.5319, + "step": 123300 + }, + { + "epoch": 2.61, + "learning_rate": 7.686632653061225e-05, + "loss": 1.5345, + "step": 123400 + }, + { + "epoch": 2.61, + "learning_rate": 7.684591836734695e-05, + "loss": 1.5258, + "step": 123500 + }, + { + "epoch": 2.62, + "learning_rate": 7.682551020408164e-05, + "loss": 1.5312, + "step": 123600 + }, + { + "epoch": 2.62, + "learning_rate": 7.680530612244898e-05, + "loss": 1.5261, + "step": 123700 + }, + { + "epoch": 2.62, + "learning_rate": 7.678489795918368e-05, + "loss": 1.5297, + "step": 123800 + }, + { + "epoch": 2.62, + "learning_rate": 7.676469387755102e-05, + "loss": 1.5295, + "step": 123900 + }, + { + "epoch": 2.62, + "learning_rate": 7.674428571428571e-05, + "loss": 1.5279, + "step": 124000 + }, + { + "epoch": 2.63, + "learning_rate": 7.672387755102041e-05, + "loss": 1.531, + "step": 124100 + }, + { + "epoch": 2.63, + "learning_rate": 7.67034693877551e-05, + "loss": 1.5365, + "step": 124200 + }, + { + "epoch": 2.63, + "learning_rate": 7.66830612244898e-05, + "loss": 1.5293, + "step": 124300 + }, + { + "epoch": 2.63, + "learning_rate": 7.66626530612245e-05, + "loss": 1.533, + "step": 124400 + }, + { + "epoch": 2.63, + "learning_rate": 7.664224489795919e-05, + "loss": 1.5199, + "step": 124500 + }, + { + "epoch": 2.64, + "learning_rate": 7.662183673469389e-05, + "loss": 1.5206, + "step": 124600 + }, + { + "epoch": 2.64, + "learning_rate": 7.660142857142858e-05, + "loss": 1.5226, + "step": 124700 + }, + { + "epoch": 2.64, + "learning_rate": 7.658102040816327e-05, + "loss": 1.5292, + "step": 124800 + }, + { + "epoch": 2.64, + "learning_rate": 7.656061224489797e-05, + "loss": 1.5311, + "step": 124900 + }, + { + "epoch": 2.65, + "learning_rate": 7.654020408163267e-05, + "loss": 1.5315, + "step": 125000 + }, + { + "epoch": 2.65, + "learning_rate": 7.651979591836735e-05, + "loss": 1.5295, + "step": 125100 + }, + { + "epoch": 2.65, + "learning_rate": 7.649938775510205e-05, + "loss": 1.5281, + "step": 125200 + }, + { + "epoch": 2.65, + "learning_rate": 7.647897959183675e-05, + "loss": 1.5298, + "step": 125300 + }, + { + "epoch": 2.65, + "learning_rate": 7.645857142857143e-05, + "loss": 1.5273, + "step": 125400 + }, + { + "epoch": 2.66, + "learning_rate": 7.643816326530613e-05, + "loss": 1.5355, + "step": 125500 + }, + { + "epoch": 2.66, + "learning_rate": 7.641775510204083e-05, + "loss": 1.5248, + "step": 125600 + }, + { + "epoch": 2.66, + "learning_rate": 7.639734693877551e-05, + "loss": 1.5235, + "step": 125700 + }, + { + "epoch": 2.66, + "learning_rate": 7.637693877551021e-05, + "loss": 1.5206, + "step": 125800 + }, + { + "epoch": 2.66, + "learning_rate": 7.635653061224491e-05, + "loss": 1.5353, + "step": 125900 + }, + { + "epoch": 2.67, + "learning_rate": 7.63361224489796e-05, + "loss": 1.5237, + "step": 126000 + }, + { + "epoch": 2.67, + "learning_rate": 7.631571428571429e-05, + "loss": 1.5284, + "step": 126100 + }, + { + "epoch": 2.67, + "learning_rate": 7.629530612244898e-05, + "loss": 1.5201, + "step": 126200 + }, + { + "epoch": 2.67, + "learning_rate": 7.627489795918368e-05, + "loss": 1.5184, + "step": 126300 + }, + { + "epoch": 2.68, + "learning_rate": 7.625448979591837e-05, + "loss": 1.5288, + "step": 126400 + }, + { + "epoch": 2.68, + "learning_rate": 7.623408163265306e-05, + "loss": 1.5185, + "step": 126500 + }, + { + "epoch": 2.68, + "learning_rate": 7.621367346938776e-05, + "loss": 1.5197, + "step": 126600 + }, + { + "epoch": 2.68, + "learning_rate": 7.619326530612246e-05, + "loss": 1.5183, + "step": 126700 + }, + { + "epoch": 2.68, + "learning_rate": 7.617285714285714e-05, + "loss": 1.526, + "step": 126800 + }, + { + "epoch": 2.69, + "learning_rate": 7.615244897959184e-05, + "loss": 1.5263, + "step": 126900 + }, + { + "epoch": 2.69, + "learning_rate": 7.613204081632654e-05, + "loss": 1.5355, + "step": 127000 + }, + { + "epoch": 2.69, + "learning_rate": 7.611163265306122e-05, + "loss": 1.5153, + "step": 127100 + }, + { + "epoch": 2.69, + "learning_rate": 7.609122448979592e-05, + "loss": 1.5238, + "step": 127200 + }, + { + "epoch": 2.69, + "learning_rate": 7.607081632653062e-05, + "loss": 1.5249, + "step": 127300 + }, + { + "epoch": 2.7, + "learning_rate": 7.60504081632653e-05, + "loss": 1.5221, + "step": 127400 + }, + { + "epoch": 2.7, + "learning_rate": 7.603e-05, + "loss": 1.5308, + "step": 127500 + }, + { + "epoch": 2.7, + "learning_rate": 7.60095918367347e-05, + "loss": 1.5278, + "step": 127600 + }, + { + "epoch": 2.7, + "learning_rate": 7.598918367346939e-05, + "loss": 1.5243, + "step": 127700 + }, + { + "epoch": 2.7, + "learning_rate": 7.596877551020408e-05, + "loss": 1.5192, + "step": 127800 + }, + { + "epoch": 2.71, + "learning_rate": 7.594836734693878e-05, + "loss": 1.5234, + "step": 127900 + }, + { + "epoch": 2.71, + "learning_rate": 7.592795918367347e-05, + "loss": 1.5179, + "step": 128000 + }, + { + "epoch": 2.71, + "learning_rate": 7.590755102040817e-05, + "loss": 1.5199, + "step": 128100 + }, + { + "epoch": 2.71, + "learning_rate": 7.588714285714285e-05, + "loss": 1.5272, + "step": 128200 + }, + { + "epoch": 2.72, + "learning_rate": 7.586673469387755e-05, + "loss": 1.5181, + "step": 128300 + }, + { + "epoch": 2.72, + "learning_rate": 7.584632653061225e-05, + "loss": 1.5181, + "step": 128400 + }, + { + "epoch": 2.72, + "learning_rate": 7.582591836734693e-05, + "loss": 1.5264, + "step": 128500 + }, + { + "epoch": 2.72, + "learning_rate": 7.580591836734694e-05, + "loss": 1.5234, + "step": 128600 + }, + { + "epoch": 2.72, + "learning_rate": 7.578551020408163e-05, + "loss": 1.5209, + "step": 128700 + }, + { + "epoch": 2.73, + "learning_rate": 7.576510204081633e-05, + "loss": 1.5243, + "step": 128800 + }, + { + "epoch": 2.73, + "learning_rate": 7.574469387755102e-05, + "loss": 1.5177, + "step": 128900 + }, + { + "epoch": 2.73, + "learning_rate": 7.572428571428571e-05, + "loss": 1.5149, + "step": 129000 + }, + { + "epoch": 2.73, + "learning_rate": 7.57038775510204e-05, + "loss": 1.5152, + "step": 129100 + }, + { + "epoch": 2.73, + "learning_rate": 7.56834693877551e-05, + "loss": 1.5168, + "step": 129200 + }, + { + "epoch": 2.74, + "learning_rate": 7.566306122448979e-05, + "loss": 1.5249, + "step": 129300 + }, + { + "epoch": 2.74, + "learning_rate": 7.564265306122449e-05, + "loss": 1.5187, + "step": 129400 + }, + { + "epoch": 2.74, + "learning_rate": 7.562224489795919e-05, + "loss": 1.5214, + "step": 129500 + }, + { + "epoch": 2.74, + "learning_rate": 7.560183673469387e-05, + "loss": 1.5098, + "step": 129600 + }, + { + "epoch": 2.74, + "learning_rate": 7.558142857142857e-05, + "loss": 1.5203, + "step": 129700 + }, + { + "epoch": 2.75, + "learning_rate": 7.556102040816327e-05, + "loss": 1.5166, + "step": 129800 + }, + { + "epoch": 2.75, + "learning_rate": 7.554061224489797e-05, + "loss": 1.5162, + "step": 129900 + }, + { + "epoch": 2.75, + "learning_rate": 7.552020408163266e-05, + "loss": 1.5211, + "step": 130000 + }, + { + "epoch": 2.75, + "learning_rate": 7.549979591836735e-05, + "loss": 1.516, + "step": 130100 + }, + { + "epoch": 2.76, + "learning_rate": 7.547938775510205e-05, + "loss": 1.5189, + "step": 130200 + }, + { + "epoch": 2.76, + "learning_rate": 7.545897959183675e-05, + "loss": 1.5171, + "step": 130300 + }, + { + "epoch": 2.76, + "learning_rate": 7.543857142857143e-05, + "loss": 1.5168, + "step": 130400 + }, + { + "epoch": 2.76, + "learning_rate": 7.541816326530613e-05, + "loss": 1.5272, + "step": 130500 + }, + { + "epoch": 2.76, + "learning_rate": 7.539775510204083e-05, + "loss": 1.5202, + "step": 130600 + }, + { + "epoch": 2.77, + "learning_rate": 7.537734693877551e-05, + "loss": 1.5196, + "step": 130700 + }, + { + "epoch": 2.77, + "learning_rate": 7.535693877551021e-05, + "loss": 1.5122, + "step": 130800 + }, + { + "epoch": 2.77, + "learning_rate": 7.533653061224491e-05, + "loss": 1.5158, + "step": 130900 + }, + { + "epoch": 2.77, + "learning_rate": 7.53161224489796e-05, + "loss": 1.5127, + "step": 131000 + }, + { + "epoch": 2.77, + "learning_rate": 7.529571428571429e-05, + "loss": 1.5193, + "step": 131100 + }, + { + "epoch": 2.78, + "learning_rate": 7.527530612244899e-05, + "loss": 1.5264, + "step": 131200 + }, + { + "epoch": 2.78, + "learning_rate": 7.525489795918368e-05, + "loss": 1.5092, + "step": 131300 + }, + { + "epoch": 2.78, + "learning_rate": 7.523448979591837e-05, + "loss": 1.5148, + "step": 131400 + }, + { + "epoch": 2.78, + "learning_rate": 7.521408163265307e-05, + "loss": 1.5172, + "step": 131500 + }, + { + "epoch": 2.79, + "learning_rate": 7.519367346938776e-05, + "loss": 1.5146, + "step": 131600 + }, + { + "epoch": 2.79, + "learning_rate": 7.517326530612246e-05, + "loss": 1.5272, + "step": 131700 + }, + { + "epoch": 2.79, + "learning_rate": 7.515285714285714e-05, + "loss": 1.5156, + "step": 131800 + }, + { + "epoch": 2.79, + "learning_rate": 7.513244897959184e-05, + "loss": 1.5165, + "step": 131900 + }, + { + "epoch": 2.79, + "learning_rate": 7.511204081632654e-05, + "loss": 1.5151, + "step": 132000 + }, + { + "epoch": 2.8, + "learning_rate": 7.509163265306122e-05, + "loss": 1.5146, + "step": 132100 + }, + { + "epoch": 2.8, + "learning_rate": 7.507122448979592e-05, + "loss": 1.5261, + "step": 132200 + }, + { + "epoch": 2.8, + "learning_rate": 7.505081632653062e-05, + "loss": 1.5172, + "step": 132300 + }, + { + "epoch": 2.8, + "learning_rate": 7.50304081632653e-05, + "loss": 1.5118, + "step": 132400 + }, + { + "epoch": 2.8, + "learning_rate": 7.501e-05, + "loss": 1.5136, + "step": 132500 + }, + { + "epoch": 2.81, + "learning_rate": 7.49895918367347e-05, + "loss": 1.5107, + "step": 132600 + }, + { + "epoch": 2.81, + "learning_rate": 7.496918367346938e-05, + "loss": 1.5172, + "step": 132700 + }, + { + "epoch": 2.81, + "learning_rate": 7.494877551020408e-05, + "loss": 1.52, + "step": 132800 + }, + { + "epoch": 2.81, + "learning_rate": 7.492836734693878e-05, + "loss": 1.5162, + "step": 132900 + }, + { + "epoch": 2.81, + "learning_rate": 7.490795918367347e-05, + "loss": 1.5069, + "step": 133000 + }, + { + "epoch": 2.82, + "learning_rate": 7.488755102040816e-05, + "loss": 1.5107, + "step": 133100 + }, + { + "epoch": 2.82, + "learning_rate": 7.486714285714286e-05, + "loss": 1.5024, + "step": 133200 + }, + { + "epoch": 2.82, + "learning_rate": 7.484673469387755e-05, + "loss": 1.5077, + "step": 133300 + }, + { + "epoch": 2.82, + "learning_rate": 7.482632653061225e-05, + "loss": 1.5104, + "step": 133400 + }, + { + "epoch": 2.83, + "learning_rate": 7.480591836734694e-05, + "loss": 1.5059, + "step": 133500 + }, + { + "epoch": 2.83, + "learning_rate": 7.478551020408163e-05, + "loss": 1.5116, + "step": 133600 + }, + { + "epoch": 2.83, + "learning_rate": 7.476510204081633e-05, + "loss": 1.5048, + "step": 133700 + }, + { + "epoch": 2.83, + "learning_rate": 7.474469387755103e-05, + "loss": 1.51, + "step": 133800 + }, + { + "epoch": 2.83, + "learning_rate": 7.472428571428571e-05, + "loss": 1.5127, + "step": 133900 + }, + { + "epoch": 2.84, + "learning_rate": 7.470387755102041e-05, + "loss": 1.5077, + "step": 134000 + }, + { + "epoch": 2.84, + "learning_rate": 7.46834693877551e-05, + "loss": 1.51, + "step": 134100 + }, + { + "epoch": 2.84, + "learning_rate": 7.466326530612245e-05, + "loss": 1.5082, + "step": 134200 + }, + { + "epoch": 2.84, + "learning_rate": 7.464285714285715e-05, + "loss": 1.5151, + "step": 134300 + }, + { + "epoch": 2.84, + "learning_rate": 7.462244897959185e-05, + "loss": 1.5116, + "step": 134400 + }, + { + "epoch": 2.85, + "learning_rate": 7.460204081632653e-05, + "loss": 1.507, + "step": 134500 + }, + { + "epoch": 2.85, + "learning_rate": 7.458163265306123e-05, + "loss": 1.5088, + "step": 134600 + }, + { + "epoch": 2.85, + "learning_rate": 7.456122448979593e-05, + "loss": 1.5147, + "step": 134700 + }, + { + "epoch": 2.85, + "learning_rate": 7.454081632653061e-05, + "loss": 1.5039, + "step": 134800 + }, + { + "epoch": 2.86, + "learning_rate": 7.452040816326531e-05, + "loss": 1.5101, + "step": 134900 + }, + { + "epoch": 2.86, + "learning_rate": 7.450000000000001e-05, + "loss": 1.5042, + "step": 135000 + }, + { + "epoch": 2.86, + "learning_rate": 7.447959183673469e-05, + "loss": 1.5117, + "step": 135100 + }, + { + "epoch": 2.86, + "learning_rate": 7.445918367346939e-05, + "loss": 1.5149, + "step": 135200 + }, + { + "epoch": 2.86, + "learning_rate": 7.443877551020409e-05, + "loss": 1.506, + "step": 135300 + }, + { + "epoch": 2.87, + "learning_rate": 7.441836734693877e-05, + "loss": 1.5112, + "step": 135400 + }, + { + "epoch": 2.87, + "learning_rate": 7.439795918367347e-05, + "loss": 1.5203, + "step": 135500 + }, + { + "epoch": 2.87, + "learning_rate": 7.437755102040817e-05, + "loss": 1.5137, + "step": 135600 + }, + { + "epoch": 2.87, + "learning_rate": 7.435714285714286e-05, + "loss": 1.5105, + "step": 135700 + }, + { + "epoch": 2.87, + "learning_rate": 7.433673469387755e-05, + "loss": 1.5049, + "step": 135800 + }, + { + "epoch": 2.88, + "learning_rate": 7.431632653061224e-05, + "loss": 1.5072, + "step": 135900 + }, + { + "epoch": 2.88, + "learning_rate": 7.429591836734694e-05, + "loss": 1.5087, + "step": 136000 + }, + { + "epoch": 2.88, + "learning_rate": 7.427551020408164e-05, + "loss": 1.5089, + "step": 136100 + }, + { + "epoch": 2.88, + "learning_rate": 7.425530612244899e-05, + "loss": 1.506, + "step": 136200 + }, + { + "epoch": 2.88, + "learning_rate": 7.423489795918368e-05, + "loss": 1.5045, + "step": 136300 + }, + { + "epoch": 2.89, + "learning_rate": 7.421448979591837e-05, + "loss": 1.5111, + "step": 136400 + }, + { + "epoch": 2.89, + "learning_rate": 7.419408163265307e-05, + "loss": 1.509, + "step": 136500 + }, + { + "epoch": 2.89, + "learning_rate": 7.417367346938776e-05, + "loss": 1.5069, + "step": 136600 + }, + { + "epoch": 2.89, + "learning_rate": 7.415326530612246e-05, + "loss": 1.513, + "step": 136700 + }, + { + "epoch": 2.9, + "learning_rate": 7.413285714285715e-05, + "loss": 1.5051, + "step": 136800 + }, + { + "epoch": 2.9, + "learning_rate": 7.411244897959184e-05, + "loss": 1.4956, + "step": 136900 + }, + { + "epoch": 2.9, + "learning_rate": 7.409204081632654e-05, + "loss": 1.5124, + "step": 137000 + }, + { + "epoch": 2.9, + "learning_rate": 7.407163265306124e-05, + "loss": 1.5031, + "step": 137100 + }, + { + "epoch": 2.9, + "learning_rate": 7.405122448979592e-05, + "loss": 1.506, + "step": 137200 + }, + { + "epoch": 2.91, + "learning_rate": 7.403081632653062e-05, + "loss": 1.5137, + "step": 137300 + }, + { + "epoch": 2.91, + "learning_rate": 7.401040816326532e-05, + "loss": 1.5057, + "step": 137400 + }, + { + "epoch": 2.91, + "learning_rate": 7.399e-05, + "loss": 1.5096, + "step": 137500 + }, + { + "epoch": 2.91, + "learning_rate": 7.39695918367347e-05, + "loss": 1.5013, + "step": 137600 + }, + { + "epoch": 2.91, + "learning_rate": 7.394918367346938e-05, + "loss": 1.4997, + "step": 137700 + }, + { + "epoch": 2.92, + "learning_rate": 7.392877551020408e-05, + "loss": 1.5044, + "step": 137800 + }, + { + "epoch": 2.92, + "learning_rate": 7.390836734693878e-05, + "loss": 1.4998, + "step": 137900 + }, + { + "epoch": 2.92, + "learning_rate": 7.388795918367347e-05, + "loss": 1.4997, + "step": 138000 + }, + { + "epoch": 2.92, + "learning_rate": 7.386755102040816e-05, + "loss": 1.5011, + "step": 138100 + }, + { + "epoch": 2.92, + "learning_rate": 7.384714285714286e-05, + "loss": 1.5059, + "step": 138200 + }, + { + "epoch": 2.93, + "learning_rate": 7.382673469387755e-05, + "loss": 1.4967, + "step": 138300 + }, + { + "epoch": 2.93, + "learning_rate": 7.380632653061225e-05, + "loss": 1.5058, + "step": 138400 + }, + { + "epoch": 2.93, + "learning_rate": 7.378591836734694e-05, + "loss": 1.507, + "step": 138500 + }, + { + "epoch": 2.93, + "learning_rate": 7.376551020408163e-05, + "loss": 1.5055, + "step": 138600 + }, + { + "epoch": 2.94, + "learning_rate": 7.374530612244898e-05, + "loss": 1.4997, + "step": 138700 + }, + { + "epoch": 2.94, + "learning_rate": 7.372489795918368e-05, + "loss": 1.5018, + "step": 138800 + }, + { + "epoch": 2.94, + "learning_rate": 7.370448979591838e-05, + "loss": 1.5025, + "step": 138900 + }, + { + "epoch": 2.94, + "learning_rate": 7.368408163265307e-05, + "loss": 1.4973, + "step": 139000 + }, + { + "epoch": 2.94, + "learning_rate": 7.366367346938776e-05, + "loss": 1.5026, + "step": 139100 + }, + { + "epoch": 2.95, + "learning_rate": 7.364326530612246e-05, + "loss": 1.4982, + "step": 139200 + }, + { + "epoch": 2.95, + "learning_rate": 7.362285714285715e-05, + "loss": 1.5012, + "step": 139300 + }, + { + "epoch": 2.95, + "learning_rate": 7.360244897959184e-05, + "loss": 1.496, + "step": 139400 + }, + { + "epoch": 2.95, + "learning_rate": 7.358204081632653e-05, + "loss": 1.4993, + "step": 139500 + }, + { + "epoch": 2.95, + "learning_rate": 7.356163265306123e-05, + "loss": 1.5052, + "step": 139600 + }, + { + "epoch": 2.96, + "learning_rate": 7.354122448979593e-05, + "loss": 1.4959, + "step": 139700 + }, + { + "epoch": 2.96, + "learning_rate": 7.352081632653061e-05, + "loss": 1.5008, + "step": 139800 + }, + { + "epoch": 2.96, + "learning_rate": 7.350040816326531e-05, + "loss": 1.5012, + "step": 139900 + }, + { + "epoch": 2.96, + "learning_rate": 7.348000000000001e-05, + "loss": 1.4916, + "step": 140000 + }, + { + "epoch": 2.97, + "learning_rate": 7.345959183673469e-05, + "loss": 1.5001, + "step": 140100 + }, + { + "epoch": 2.97, + "learning_rate": 7.343918367346939e-05, + "loss": 1.4972, + "step": 140200 + }, + { + "epoch": 2.97, + "learning_rate": 7.341877551020409e-05, + "loss": 1.508, + "step": 140300 + }, + { + "epoch": 2.97, + "learning_rate": 7.339836734693877e-05, + "loss": 1.5028, + "step": 140400 + }, + { + "epoch": 2.97, + "learning_rate": 7.337795918367347e-05, + "loss": 1.4981, + "step": 140500 + }, + { + "epoch": 2.98, + "learning_rate": 7.335755102040817e-05, + "loss": 1.5012, + "step": 140600 + }, + { + "epoch": 2.98, + "learning_rate": 7.333714285714286e-05, + "loss": 1.5039, + "step": 140700 + }, + { + "epoch": 2.98, + "learning_rate": 7.331673469387755e-05, + "loss": 1.4998, + "step": 140800 + }, + { + "epoch": 2.98, + "learning_rate": 7.329632653061225e-05, + "loss": 1.5055, + "step": 140900 + }, + { + "epoch": 2.98, + "learning_rate": 7.327591836734694e-05, + "loss": 1.5009, + "step": 141000 + }, + { + "epoch": 2.99, + "learning_rate": 7.325551020408164e-05, + "loss": 1.4966, + "step": 141100 + }, + { + "epoch": 2.99, + "learning_rate": 7.323510204081633e-05, + "loss": 1.4976, + "step": 141200 + }, + { + "epoch": 2.99, + "learning_rate": 7.321469387755102e-05, + "loss": 1.5013, + "step": 141300 + }, + { + "epoch": 2.99, + "learning_rate": 7.319428571428572e-05, + "loss": 1.5025, + "step": 141400 + }, + { + "epoch": 2.99, + "learning_rate": 7.317387755102042e-05, + "loss": 1.4963, + "step": 141500 + }, + { + "epoch": 3.0, + "learning_rate": 7.31534693877551e-05, + "loss": 1.5062, + "step": 141600 + }, + { + "epoch": 3.0, + "learning_rate": 7.31330612244898e-05, + "loss": 1.496, + "step": 141700 + }, + { + "epoch": 3.0, + "learning_rate": 7.311265306122448e-05, + "loss": 1.5063, + "step": 141800 + }, + { + "epoch": 3.0, + "learning_rate": 7.309224489795918e-05, + "loss": 1.493, + "step": 141900 + }, + { + "epoch": 3.01, + "learning_rate": 7.307183673469388e-05, + "loss": 1.4966, + "step": 142000 + }, + { + "epoch": 3.01, + "learning_rate": 7.305142857142857e-05, + "loss": 1.4851, + "step": 142100 + }, + { + "epoch": 3.01, + "learning_rate": 7.303102040816326e-05, + "loss": 1.4939, + "step": 142200 + }, + { + "epoch": 3.01, + "learning_rate": 7.301061224489796e-05, + "loss": 1.4906, + "step": 142300 + }, + { + "epoch": 3.01, + "learning_rate": 7.299020408163265e-05, + "loss": 1.483, + "step": 142400 + }, + { + "epoch": 3.02, + "learning_rate": 7.296979591836735e-05, + "loss": 1.4873, + "step": 142500 + }, + { + "epoch": 3.02, + "learning_rate": 7.294938775510204e-05, + "loss": 1.4995, + "step": 142600 + }, + { + "epoch": 3.02, + "learning_rate": 7.29291836734694e-05, + "loss": 1.4982, + "step": 142700 + }, + { + "epoch": 3.02, + "learning_rate": 7.290877551020408e-05, + "loss": 1.4879, + "step": 142800 + }, + { + "epoch": 3.02, + "learning_rate": 7.288836734693878e-05, + "loss": 1.4877, + "step": 142900 + }, + { + "epoch": 3.03, + "learning_rate": 7.286816326530612e-05, + "loss": 1.4944, + "step": 143000 + }, + { + "epoch": 3.03, + "learning_rate": 7.284775510204082e-05, + "loss": 1.4964, + "step": 143100 + }, + { + "epoch": 3.03, + "learning_rate": 7.28273469387755e-05, + "loss": 1.4956, + "step": 143200 + }, + { + "epoch": 3.03, + "learning_rate": 7.28069387755102e-05, + "loss": 1.4902, + "step": 143300 + }, + { + "epoch": 3.03, + "learning_rate": 7.27865306122449e-05, + "loss": 1.5054, + "step": 143400 + }, + { + "epoch": 3.04, + "learning_rate": 7.276612244897959e-05, + "loss": 1.4937, + "step": 143500 + }, + { + "epoch": 3.04, + "learning_rate": 7.274571428571428e-05, + "loss": 1.4873, + "step": 143600 + }, + { + "epoch": 3.04, + "learning_rate": 7.272530612244898e-05, + "loss": 1.4904, + "step": 143700 + }, + { + "epoch": 3.04, + "learning_rate": 7.270489795918368e-05, + "loss": 1.4901, + "step": 143800 + }, + { + "epoch": 3.05, + "learning_rate": 7.268448979591838e-05, + "loss": 1.4897, + "step": 143900 + }, + { + "epoch": 3.05, + "learning_rate": 7.266408163265306e-05, + "loss": 1.4886, + "step": 144000 + }, + { + "epoch": 3.05, + "learning_rate": 7.264367346938776e-05, + "loss": 1.4874, + "step": 144100 + }, + { + "epoch": 3.05, + "learning_rate": 7.262326530612246e-05, + "loss": 1.4873, + "step": 144200 + }, + { + "epoch": 3.05, + "learning_rate": 7.260285714285715e-05, + "loss": 1.4831, + "step": 144300 + }, + { + "epoch": 3.06, + "learning_rate": 7.258244897959184e-05, + "loss": 1.4915, + "step": 144400 + }, + { + "epoch": 3.06, + "learning_rate": 7.256204081632654e-05, + "loss": 1.4896, + "step": 144500 + }, + { + "epoch": 3.06, + "learning_rate": 7.254163265306123e-05, + "loss": 1.494, + "step": 144600 + }, + { + "epoch": 3.06, + "learning_rate": 7.252122448979593e-05, + "loss": 1.4907, + "step": 144700 + }, + { + "epoch": 3.06, + "learning_rate": 7.250081632653062e-05, + "loss": 1.4886, + "step": 144800 + }, + { + "epoch": 3.07, + "learning_rate": 7.248040816326531e-05, + "loss": 1.4901, + "step": 144900 + }, + { + "epoch": 3.07, + "learning_rate": 7.246000000000001e-05, + "loss": 1.4895, + "step": 145000 + }, + { + "epoch": 3.07, + "learning_rate": 7.24395918367347e-05, + "loss": 1.4902, + "step": 145100 + }, + { + "epoch": 3.07, + "learning_rate": 7.241918367346939e-05, + "loss": 1.4891, + "step": 145200 + }, + { + "epoch": 3.08, + "learning_rate": 7.239877551020409e-05, + "loss": 1.4909, + "step": 145300 + }, + { + "epoch": 3.08, + "learning_rate": 7.237836734693877e-05, + "loss": 1.4858, + "step": 145400 + }, + { + "epoch": 3.08, + "learning_rate": 7.235795918367347e-05, + "loss": 1.4814, + "step": 145500 + }, + { + "epoch": 3.08, + "learning_rate": 7.233755102040817e-05, + "loss": 1.4896, + "step": 145600 + }, + { + "epoch": 3.08, + "learning_rate": 7.231714285714286e-05, + "loss": 1.4909, + "step": 145700 + }, + { + "epoch": 3.09, + "learning_rate": 7.229673469387755e-05, + "loss": 1.4817, + "step": 145800 + }, + { + "epoch": 3.09, + "learning_rate": 7.227632653061225e-05, + "loss": 1.4996, + "step": 145900 + }, + { + "epoch": 3.09, + "learning_rate": 7.225612244897959e-05, + "loss": 1.4846, + "step": 146000 + }, + { + "epoch": 3.09, + "learning_rate": 7.223571428571428e-05, + "loss": 1.4848, + "step": 146100 + }, + { + "epoch": 3.09, + "learning_rate": 7.221530612244898e-05, + "loss": 1.4927, + "step": 146200 + }, + { + "epoch": 3.1, + "learning_rate": 7.219489795918367e-05, + "loss": 1.4827, + "step": 146300 + }, + { + "epoch": 3.1, + "learning_rate": 7.217448979591837e-05, + "loss": 1.4828, + "step": 146400 + }, + { + "epoch": 3.1, + "learning_rate": 7.215408163265307e-05, + "loss": 1.4836, + "step": 146500 + }, + { + "epoch": 3.1, + "learning_rate": 7.213367346938777e-05, + "loss": 1.492, + "step": 146600 + }, + { + "epoch": 3.1, + "learning_rate": 7.211326530612245e-05, + "loss": 1.486, + "step": 146700 + }, + { + "epoch": 3.11, + "learning_rate": 7.209285714285715e-05, + "loss": 1.4876, + "step": 146800 + }, + { + "epoch": 3.11, + "learning_rate": 7.207244897959185e-05, + "loss": 1.4951, + "step": 146900 + }, + { + "epoch": 3.11, + "learning_rate": 7.205204081632654e-05, + "loss": 1.4902, + "step": 147000 + }, + { + "epoch": 3.11, + "learning_rate": 7.203163265306123e-05, + "loss": 1.4879, + "step": 147100 + }, + { + "epoch": 3.12, + "learning_rate": 7.201122448979592e-05, + "loss": 1.4919, + "step": 147200 + }, + { + "epoch": 3.12, + "learning_rate": 7.199081632653062e-05, + "loss": 1.4878, + "step": 147300 + }, + { + "epoch": 3.12, + "learning_rate": 7.197040816326532e-05, + "loss": 1.4864, + "step": 147400 + }, + { + "epoch": 3.12, + "learning_rate": 7.195e-05, + "loss": 1.491, + "step": 147500 + }, + { + "epoch": 3.12, + "learning_rate": 7.19295918367347e-05, + "loss": 1.487, + "step": 147600 + }, + { + "epoch": 3.13, + "learning_rate": 7.19091836734694e-05, + "loss": 1.4847, + "step": 147700 + }, + { + "epoch": 3.13, + "learning_rate": 7.188877551020408e-05, + "loss": 1.4858, + "step": 147800 + }, + { + "epoch": 3.13, + "learning_rate": 7.186836734693878e-05, + "loss": 1.4811, + "step": 147900 + }, + { + "epoch": 3.13, + "learning_rate": 7.184795918367348e-05, + "loss": 1.4852, + "step": 148000 + }, + { + "epoch": 3.13, + "learning_rate": 7.182755102040816e-05, + "loss": 1.4834, + "step": 148100 + }, + { + "epoch": 3.14, + "learning_rate": 7.180714285714286e-05, + "loss": 1.4882, + "step": 148200 + }, + { + "epoch": 3.14, + "learning_rate": 7.178673469387756e-05, + "loss": 1.4876, + "step": 148300 + }, + { + "epoch": 3.14, + "learning_rate": 7.176632653061225e-05, + "loss": 1.4891, + "step": 148400 + }, + { + "epoch": 3.14, + "learning_rate": 7.174612244897959e-05, + "loss": 1.4888, + "step": 148500 + }, + { + "epoch": 3.14, + "learning_rate": 7.172571428571428e-05, + "loss": 1.4906, + "step": 148600 + }, + { + "epoch": 3.15, + "learning_rate": 7.170530612244898e-05, + "loss": 1.4777, + "step": 148700 + }, + { + "epoch": 3.15, + "learning_rate": 7.168489795918367e-05, + "loss": 1.4833, + "step": 148800 + }, + { + "epoch": 3.15, + "learning_rate": 7.166448979591837e-05, + "loss": 1.4839, + "step": 148900 + }, + { + "epoch": 3.15, + "learning_rate": 7.164408163265306e-05, + "loss": 1.497, + "step": 149000 + }, + { + "epoch": 3.16, + "learning_rate": 7.162367346938775e-05, + "loss": 1.485, + "step": 149100 + }, + { + "epoch": 3.16, + "learning_rate": 7.160326530612246e-05, + "loss": 1.4841, + "step": 149200 + }, + { + "epoch": 3.16, + "learning_rate": 7.158285714285715e-05, + "loss": 1.4745, + "step": 149300 + }, + { + "epoch": 3.16, + "learning_rate": 7.156244897959184e-05, + "loss": 1.4864, + "step": 149400 + }, + { + "epoch": 3.16, + "learning_rate": 7.154204081632654e-05, + "loss": 1.4819, + "step": 149500 + }, + { + "epoch": 3.17, + "learning_rate": 7.152163265306123e-05, + "loss": 1.4867, + "step": 149600 + }, + { + "epoch": 3.17, + "learning_rate": 7.150122448979593e-05, + "loss": 1.4801, + "step": 149700 + }, + { + "epoch": 3.17, + "learning_rate": 7.148081632653062e-05, + "loss": 1.4701, + "step": 149800 + }, + { + "epoch": 3.17, + "learning_rate": 7.146040816326531e-05, + "loss": 1.4851, + "step": 149900 + }, + { + "epoch": 3.17, + "learning_rate": 7.144000000000001e-05, + "loss": 1.4769, + "step": 150000 + }, + { + "epoch": 3.18, + "learning_rate": 7.14195918367347e-05, + "loss": 1.4796, + "step": 150100 + }, + { + "epoch": 3.18, + "learning_rate": 7.139918367346939e-05, + "loss": 1.4736, + "step": 150200 + }, + { + "epoch": 3.18, + "learning_rate": 7.137877551020409e-05, + "loss": 1.4836, + "step": 150300 + }, + { + "epoch": 3.18, + "learning_rate": 7.135836734693879e-05, + "loss": 1.4787, + "step": 150400 + }, + { + "epoch": 3.19, + "learning_rate": 7.133795918367347e-05, + "loss": 1.4834, + "step": 150500 + }, + { + "epoch": 3.19, + "learning_rate": 7.131755102040817e-05, + "loss": 1.4826, + "step": 150600 + }, + { + "epoch": 3.19, + "learning_rate": 7.129714285714287e-05, + "loss": 1.4824, + "step": 150700 + }, + { + "epoch": 3.19, + "learning_rate": 7.127673469387755e-05, + "loss": 1.4822, + "step": 150800 + }, + { + "epoch": 3.19, + "learning_rate": 7.125632653061225e-05, + "loss": 1.4784, + "step": 150900 + }, + { + "epoch": 3.2, + "learning_rate": 7.123591836734694e-05, + "loss": 1.4739, + "step": 151000 + }, + { + "epoch": 3.2, + "learning_rate": 7.121551020408163e-05, + "loss": 1.4851, + "step": 151100 + }, + { + "epoch": 3.2, + "learning_rate": 7.119510204081633e-05, + "loss": 1.475, + "step": 151200 + }, + { + "epoch": 3.2, + "learning_rate": 7.117469387755102e-05, + "loss": 1.4827, + "step": 151300 + }, + { + "epoch": 3.2, + "learning_rate": 7.115428571428572e-05, + "loss": 1.4764, + "step": 151400 + }, + { + "epoch": 3.21, + "learning_rate": 7.113387755102041e-05, + "loss": 1.4717, + "step": 151500 + }, + { + "epoch": 3.21, + "learning_rate": 7.11134693877551e-05, + "loss": 1.4854, + "step": 151600 + }, + { + "epoch": 3.21, + "learning_rate": 7.109326530612245e-05, + "loss": 1.4696, + "step": 151700 + }, + { + "epoch": 3.21, + "learning_rate": 7.107285714285714e-05, + "loss": 1.4815, + "step": 151800 + }, + { + "epoch": 3.21, + "learning_rate": 7.105244897959184e-05, + "loss": 1.4826, + "step": 151900 + }, + { + "epoch": 3.22, + "learning_rate": 7.103204081632654e-05, + "loss": 1.4785, + "step": 152000 + }, + { + "epoch": 3.22, + "learning_rate": 7.101163265306123e-05, + "loss": 1.4766, + "step": 152100 + }, + { + "epoch": 3.22, + "learning_rate": 7.099122448979593e-05, + "loss": 1.4779, + "step": 152200 + }, + { + "epoch": 3.22, + "learning_rate": 7.097081632653062e-05, + "loss": 1.4775, + "step": 152300 + }, + { + "epoch": 3.23, + "learning_rate": 7.095040816326532e-05, + "loss": 1.4779, + "step": 152400 + }, + { + "epoch": 3.23, + "learning_rate": 7.093000000000001e-05, + "loss": 1.4756, + "step": 152500 + }, + { + "epoch": 3.23, + "learning_rate": 7.09095918367347e-05, + "loss": 1.482, + "step": 152600 + }, + { + "epoch": 3.23, + "learning_rate": 7.08891836734694e-05, + "loss": 1.4791, + "step": 152700 + }, + { + "epoch": 3.23, + "learning_rate": 7.086877551020408e-05, + "loss": 1.4696, + "step": 152800 + }, + { + "epoch": 3.24, + "learning_rate": 7.084836734693878e-05, + "loss": 1.4822, + "step": 152900 + }, + { + "epoch": 3.24, + "learning_rate": 7.082795918367348e-05, + "loss": 1.4795, + "step": 153000 + }, + { + "epoch": 3.24, + "learning_rate": 7.080755102040816e-05, + "loss": 1.4753, + "step": 153100 + }, + { + "epoch": 3.24, + "learning_rate": 7.078714285714286e-05, + "loss": 1.4844, + "step": 153200 + }, + { + "epoch": 3.24, + "learning_rate": 7.076673469387756e-05, + "loss": 1.4719, + "step": 153300 + }, + { + "epoch": 3.25, + "learning_rate": 7.074632653061224e-05, + "loss": 1.4787, + "step": 153400 + }, + { + "epoch": 3.25, + "learning_rate": 7.072591836734694e-05, + "loss": 1.4747, + "step": 153500 + }, + { + "epoch": 3.25, + "learning_rate": 7.070551020408164e-05, + "loss": 1.477, + "step": 153600 + }, + { + "epoch": 3.25, + "learning_rate": 7.068510204081633e-05, + "loss": 1.4793, + "step": 153700 + }, + { + "epoch": 3.26, + "learning_rate": 7.066469387755102e-05, + "loss": 1.4716, + "step": 153800 + }, + { + "epoch": 3.26, + "learning_rate": 7.064448979591837e-05, + "loss": 1.4776, + "step": 153900 + }, + { + "epoch": 3.26, + "learning_rate": 7.062408163265306e-05, + "loss": 1.4762, + "step": 154000 + }, + { + "epoch": 3.26, + "learning_rate": 7.060367346938775e-05, + "loss": 1.4777, + "step": 154100 + }, + { + "epoch": 3.26, + "learning_rate": 7.058326530612245e-05, + "loss": 1.4844, + "step": 154200 + }, + { + "epoch": 3.27, + "learning_rate": 7.056285714285715e-05, + "loss": 1.4796, + "step": 154300 + }, + { + "epoch": 3.27, + "learning_rate": 7.054244897959183e-05, + "loss": 1.4695, + "step": 154400 + }, + { + "epoch": 3.27, + "learning_rate": 7.052204081632653e-05, + "loss": 1.4783, + "step": 154500 + }, + { + "epoch": 3.27, + "learning_rate": 7.050163265306123e-05, + "loss": 1.4821, + "step": 154600 + }, + { + "epoch": 3.27, + "learning_rate": 7.048122448979593e-05, + "loss": 1.4741, + "step": 154700 + }, + { + "epoch": 3.28, + "learning_rate": 7.046081632653062e-05, + "loss": 1.4705, + "step": 154800 + }, + { + "epoch": 3.28, + "learning_rate": 7.044040816326531e-05, + "loss": 1.4731, + "step": 154900 + }, + { + "epoch": 3.28, + "learning_rate": 7.042000000000001e-05, + "loss": 1.4795, + "step": 155000 + }, + { + "epoch": 3.28, + "learning_rate": 7.03995918367347e-05, + "loss": 1.4709, + "step": 155100 + }, + { + "epoch": 3.28, + "learning_rate": 7.037918367346939e-05, + "loss": 1.4758, + "step": 155200 + }, + { + "epoch": 3.29, + "learning_rate": 7.035877551020409e-05, + "loss": 1.4721, + "step": 155300 + }, + { + "epoch": 3.29, + "learning_rate": 7.033836734693879e-05, + "loss": 1.4812, + "step": 155400 + }, + { + "epoch": 3.29, + "learning_rate": 7.031795918367347e-05, + "loss": 1.4775, + "step": 155500 + }, + { + "epoch": 3.29, + "learning_rate": 7.029755102040817e-05, + "loss": 1.4835, + "step": 155600 + }, + { + "epoch": 3.3, + "learning_rate": 7.027714285714287e-05, + "loss": 1.4796, + "step": 155700 + }, + { + "epoch": 3.3, + "learning_rate": 7.025673469387755e-05, + "loss": 1.4798, + "step": 155800 + }, + { + "epoch": 3.3, + "learning_rate": 7.023632653061225e-05, + "loss": 1.4718, + "step": 155900 + }, + { + "epoch": 3.3, + "learning_rate": 7.021591836734695e-05, + "loss": 1.4766, + "step": 156000 + }, + { + "epoch": 3.3, + "learning_rate": 7.019571428571429e-05, + "loss": 1.4761, + "step": 156100 + }, + { + "epoch": 3.31, + "learning_rate": 7.017530612244898e-05, + "loss": 1.4791, + "step": 156200 + }, + { + "epoch": 3.31, + "learning_rate": 7.015489795918367e-05, + "loss": 1.463, + "step": 156300 + }, + { + "epoch": 3.31, + "learning_rate": 7.013448979591837e-05, + "loss": 1.4671, + "step": 156400 + }, + { + "epoch": 3.31, + "learning_rate": 7.011408163265306e-05, + "loss": 1.4708, + "step": 156500 + }, + { + "epoch": 3.31, + "learning_rate": 7.009367346938776e-05, + "loss": 1.4685, + "step": 156600 + }, + { + "epoch": 3.32, + "learning_rate": 7.007326530612245e-05, + "loss": 1.4649, + "step": 156700 + }, + { + "epoch": 3.32, + "learning_rate": 7.005285714285714e-05, + "loss": 1.4684, + "step": 156800 + }, + { + "epoch": 3.32, + "learning_rate": 7.003244897959184e-05, + "loss": 1.4714, + "step": 156900 + }, + { + "epoch": 3.32, + "learning_rate": 7.001204081632654e-05, + "loss": 1.4737, + "step": 157000 + }, + { + "epoch": 3.32, + "learning_rate": 6.999163265306122e-05, + "loss": 1.4659, + "step": 157100 + }, + { + "epoch": 3.33, + "learning_rate": 6.997122448979592e-05, + "loss": 1.4762, + "step": 157200 + }, + { + "epoch": 3.33, + "learning_rate": 6.995081632653062e-05, + "loss": 1.4687, + "step": 157300 + }, + { + "epoch": 3.33, + "learning_rate": 6.99304081632653e-05, + "loss": 1.473, + "step": 157400 + }, + { + "epoch": 3.33, + "learning_rate": 6.991000000000001e-05, + "loss": 1.4657, + "step": 157500 + }, + { + "epoch": 3.34, + "learning_rate": 6.98895918367347e-05, + "loss": 1.4718, + "step": 157600 + }, + { + "epoch": 3.34, + "learning_rate": 6.98691836734694e-05, + "loss": 1.4785, + "step": 157700 + }, + { + "epoch": 3.34, + "learning_rate": 6.98487755102041e-05, + "loss": 1.4707, + "step": 157800 + }, + { + "epoch": 3.34, + "learning_rate": 6.982836734693878e-05, + "loss": 1.4636, + "step": 157900 + }, + { + "epoch": 3.34, + "learning_rate": 6.980795918367348e-05, + "loss": 1.4707, + "step": 158000 + }, + { + "epoch": 3.35, + "learning_rate": 6.978755102040818e-05, + "loss": 1.4648, + "step": 158100 + }, + { + "epoch": 3.35, + "learning_rate": 6.976714285714286e-05, + "loss": 1.4699, + "step": 158200 + }, + { + "epoch": 3.35, + "learning_rate": 6.974673469387756e-05, + "loss": 1.4668, + "step": 158300 + }, + { + "epoch": 3.35, + "learning_rate": 6.972632653061226e-05, + "loss": 1.4699, + "step": 158400 + }, + { + "epoch": 3.35, + "learning_rate": 6.970591836734694e-05, + "loss": 1.4691, + "step": 158500 + }, + { + "epoch": 3.36, + "learning_rate": 6.968571428571428e-05, + "loss": 1.4771, + "step": 158600 + }, + { + "epoch": 3.36, + "learning_rate": 6.966530612244898e-05, + "loss": 1.4666, + "step": 158700 + }, + { + "epoch": 3.36, + "learning_rate": 6.964489795918368e-05, + "loss": 1.4655, + "step": 158800 + }, + { + "epoch": 3.36, + "learning_rate": 6.962448979591837e-05, + "loss": 1.4689, + "step": 158900 + }, + { + "epoch": 3.37, + "learning_rate": 6.960408163265306e-05, + "loss": 1.4633, + "step": 159000 + }, + { + "epoch": 3.37, + "learning_rate": 6.958367346938776e-05, + "loss": 1.4655, + "step": 159100 + }, + { + "epoch": 3.37, + "learning_rate": 6.956326530612245e-05, + "loss": 1.4622, + "step": 159200 + }, + { + "epoch": 3.37, + "learning_rate": 6.954285714285714e-05, + "loss": 1.4644, + "step": 159300 + }, + { + "epoch": 3.37, + "learning_rate": 6.952244897959184e-05, + "loss": 1.4699, + "step": 159400 + }, + { + "epoch": 3.38, + "learning_rate": 6.950204081632653e-05, + "loss": 1.4622, + "step": 159500 + }, + { + "epoch": 3.38, + "learning_rate": 6.948163265306123e-05, + "loss": 1.4687, + "step": 159600 + }, + { + "epoch": 3.38, + "learning_rate": 6.946122448979591e-05, + "loss": 1.4754, + "step": 159700 + }, + { + "epoch": 3.38, + "learning_rate": 6.944081632653061e-05, + "loss": 1.4585, + "step": 159800 + }, + { + "epoch": 3.38, + "learning_rate": 6.942040816326531e-05, + "loss": 1.4764, + "step": 159900 + }, + { + "epoch": 3.39, + "learning_rate": 6.939999999999999e-05, + "loss": 1.477, + "step": 160000 + }, + { + "epoch": 3.39, + "learning_rate": 6.937959183673469e-05, + "loss": 1.4675, + "step": 160100 + }, + { + "epoch": 3.39, + "learning_rate": 6.935918367346939e-05, + "loss": 1.4614, + "step": 160200 + }, + { + "epoch": 3.39, + "learning_rate": 6.933877551020409e-05, + "loss": 1.4692, + "step": 160300 + }, + { + "epoch": 3.39, + "learning_rate": 6.931836734693879e-05, + "loss": 1.4738, + "step": 160400 + }, + { + "epoch": 3.4, + "learning_rate": 6.929795918367347e-05, + "loss": 1.4686, + "step": 160500 + }, + { + "epoch": 3.4, + "learning_rate": 6.927755102040817e-05, + "loss": 1.4662, + "step": 160600 + }, + { + "epoch": 3.4, + "learning_rate": 6.925714285714287e-05, + "loss": 1.4678, + "step": 160700 + }, + { + "epoch": 3.4, + "learning_rate": 6.923673469387755e-05, + "loss": 1.4637, + "step": 160800 + }, + { + "epoch": 3.41, + "learning_rate": 6.921632653061225e-05, + "loss": 1.4693, + "step": 160900 + }, + { + "epoch": 3.41, + "learning_rate": 6.919591836734695e-05, + "loss": 1.4652, + "step": 161000 + }, + { + "epoch": 3.41, + "learning_rate": 6.917551020408163e-05, + "loss": 1.4715, + "step": 161100 + }, + { + "epoch": 3.41, + "learning_rate": 6.915510204081633e-05, + "loss": 1.4652, + "step": 161200 + }, + { + "epoch": 3.41, + "learning_rate": 6.913469387755103e-05, + "loss": 1.4633, + "step": 161300 + }, + { + "epoch": 3.42, + "learning_rate": 6.911428571428572e-05, + "loss": 1.4604, + "step": 161400 + }, + { + "epoch": 3.42, + "learning_rate": 6.909387755102041e-05, + "loss": 1.4643, + "step": 161500 + }, + { + "epoch": 3.42, + "learning_rate": 6.907346938775511e-05, + "loss": 1.4675, + "step": 161600 + }, + { + "epoch": 3.42, + "learning_rate": 6.90530612244898e-05, + "loss": 1.4678, + "step": 161700 + }, + { + "epoch": 3.42, + "learning_rate": 6.90326530612245e-05, + "loss": 1.4709, + "step": 161800 + }, + { + "epoch": 3.43, + "learning_rate": 6.90122448979592e-05, + "loss": 1.4652, + "step": 161900 + }, + { + "epoch": 3.43, + "learning_rate": 6.899183673469388e-05, + "loss": 1.4665, + "step": 162000 + }, + { + "epoch": 3.43, + "learning_rate": 6.897142857142858e-05, + "loss": 1.4581, + "step": 162100 + }, + { + "epoch": 3.43, + "learning_rate": 6.895102040816328e-05, + "loss": 1.4595, + "step": 162200 + }, + { + "epoch": 3.43, + "learning_rate": 6.893061224489796e-05, + "loss": 1.4611, + "step": 162300 + }, + { + "epoch": 3.44, + "learning_rate": 6.891020408163266e-05, + "loss": 1.4668, + "step": 162400 + }, + { + "epoch": 3.44, + "learning_rate": 6.888979591836736e-05, + "loss": 1.4709, + "step": 162500 + }, + { + "epoch": 3.44, + "learning_rate": 6.886938775510204e-05, + "loss": 1.4499, + "step": 162600 + }, + { + "epoch": 3.44, + "learning_rate": 6.884918367346938e-05, + "loss": 1.4748, + "step": 162700 + }, + { + "epoch": 3.45, + "learning_rate": 6.882877551020408e-05, + "loss": 1.4707, + "step": 162800 + }, + { + "epoch": 3.45, + "learning_rate": 6.880836734693878e-05, + "loss": 1.47, + "step": 162900 + }, + { + "epoch": 3.45, + "learning_rate": 6.878795918367346e-05, + "loss": 1.4619, + "step": 163000 + }, + { + "epoch": 3.45, + "learning_rate": 6.876755102040818e-05, + "loss": 1.4681, + "step": 163100 + }, + { + "epoch": 3.45, + "learning_rate": 6.874714285714286e-05, + "loss": 1.474, + "step": 163200 + }, + { + "epoch": 3.46, + "learning_rate": 6.872673469387756e-05, + "loss": 1.4628, + "step": 163300 + }, + { + "epoch": 3.46, + "learning_rate": 6.870632653061226e-05, + "loss": 1.4585, + "step": 163400 + }, + { + "epoch": 3.46, + "learning_rate": 6.868591836734694e-05, + "loss": 1.4648, + "step": 163500 + }, + { + "epoch": 3.46, + "learning_rate": 6.866551020408164e-05, + "loss": 1.4648, + "step": 163600 + }, + { + "epoch": 3.46, + "learning_rate": 6.864530612244898e-05, + "loss": 1.4679, + "step": 163700 + }, + { + "epoch": 3.47, + "learning_rate": 6.862489795918368e-05, + "loss": 1.4626, + "step": 163800 + }, + { + "epoch": 3.47, + "learning_rate": 6.860448979591836e-05, + "loss": 1.4632, + "step": 163900 + }, + { + "epoch": 3.47, + "learning_rate": 6.858408163265306e-05, + "loss": 1.4665, + "step": 164000 + }, + { + "epoch": 3.47, + "learning_rate": 6.856367346938776e-05, + "loss": 1.4637, + "step": 164100 + }, + { + "epoch": 3.48, + "learning_rate": 6.854326530612245e-05, + "loss": 1.4647, + "step": 164200 + }, + { + "epoch": 3.48, + "learning_rate": 6.852285714285714e-05, + "loss": 1.4545, + "step": 164300 + }, + { + "epoch": 3.48, + "learning_rate": 6.850244897959184e-05, + "loss": 1.468, + "step": 164400 + }, + { + "epoch": 3.48, + "learning_rate": 6.848204081632653e-05, + "loss": 1.4599, + "step": 164500 + }, + { + "epoch": 3.48, + "learning_rate": 6.846163265306123e-05, + "loss": 1.4655, + "step": 164600 + }, + { + "epoch": 3.49, + "learning_rate": 6.844122448979592e-05, + "loss": 1.46, + "step": 164700 + }, + { + "epoch": 3.49, + "learning_rate": 6.842081632653061e-05, + "loss": 1.4621, + "step": 164800 + }, + { + "epoch": 3.49, + "learning_rate": 6.840040816326531e-05, + "loss": 1.4609, + "step": 164900 + }, + { + "epoch": 3.49, + "learning_rate": 6.838e-05, + "loss": 1.4595, + "step": 165000 + }, + { + "epoch": 3.49, + "learning_rate": 6.835959183673469e-05, + "loss": 1.4637, + "step": 165100 + }, + { + "epoch": 3.5, + "learning_rate": 6.833918367346939e-05, + "loss": 1.4643, + "step": 165200 + }, + { + "epoch": 3.5, + "learning_rate": 6.831877551020407e-05, + "loss": 1.4607, + "step": 165300 + }, + { + "epoch": 3.5, + "learning_rate": 6.829836734693877e-05, + "loss": 1.4653, + "step": 165400 + }, + { + "epoch": 3.5, + "learning_rate": 6.827795918367347e-05, + "loss": 1.4641, + "step": 165500 + }, + { + "epoch": 3.5, + "learning_rate": 6.825755102040816e-05, + "loss": 1.4606, + "step": 165600 + }, + { + "epoch": 3.51, + "learning_rate": 6.823714285714285e-05, + "loss": 1.4565, + "step": 165700 + }, + { + "epoch": 3.51, + "learning_rate": 6.821673469387757e-05, + "loss": 1.4664, + "step": 165800 + }, + { + "epoch": 3.51, + "learning_rate": 6.819632653061225e-05, + "loss": 1.4644, + "step": 165900 + }, + { + "epoch": 3.51, + "learning_rate": 6.817591836734695e-05, + "loss": 1.464, + "step": 166000 + }, + { + "epoch": 3.52, + "learning_rate": 6.815551020408165e-05, + "loss": 1.4656, + "step": 166100 + }, + { + "epoch": 3.52, + "learning_rate": 6.813510204081633e-05, + "loss": 1.4568, + "step": 166200 + }, + { + "epoch": 3.52, + "learning_rate": 6.811469387755103e-05, + "loss": 1.4682, + "step": 166300 + }, + { + "epoch": 3.52, + "learning_rate": 6.809428571428572e-05, + "loss": 1.462, + "step": 166400 + }, + { + "epoch": 3.52, + "learning_rate": 6.807387755102041e-05, + "loss": 1.466, + "step": 166500 + }, + { + "epoch": 3.53, + "learning_rate": 6.805346938775511e-05, + "loss": 1.4559, + "step": 166600 + }, + { + "epoch": 3.53, + "learning_rate": 6.80330612244898e-05, + "loss": 1.4558, + "step": 166700 + }, + { + "epoch": 3.53, + "learning_rate": 6.80126530612245e-05, + "loss": 1.4545, + "step": 166800 + }, + { + "epoch": 3.53, + "learning_rate": 6.79922448979592e-05, + "loss": 1.4587, + "step": 166900 + }, + { + "epoch": 3.53, + "learning_rate": 6.797183673469388e-05, + "loss": 1.4619, + "step": 167000 + }, + { + "epoch": 3.54, + "learning_rate": 6.795142857142858e-05, + "loss": 1.4557, + "step": 167100 + }, + { + "epoch": 3.54, + "learning_rate": 6.793102040816328e-05, + "loss": 1.4665, + "step": 167200 + }, + { + "epoch": 3.54, + "learning_rate": 6.791061224489796e-05, + "loss": 1.466, + "step": 167300 + }, + { + "epoch": 3.54, + "learning_rate": 6.789020408163266e-05, + "loss": 1.4621, + "step": 167400 + }, + { + "epoch": 3.54, + "learning_rate": 6.786979591836736e-05, + "loss": 1.4557, + "step": 167500 + }, + { + "epoch": 3.55, + "learning_rate": 6.784938775510204e-05, + "loss": 1.4589, + "step": 167600 + }, + { + "epoch": 3.55, + "learning_rate": 6.782897959183674e-05, + "loss": 1.4491, + "step": 167700 + }, + { + "epoch": 3.55, + "learning_rate": 6.780857142857144e-05, + "loss": 1.4534, + "step": 167800 + }, + { + "epoch": 3.55, + "learning_rate": 6.778836734693878e-05, + "loss": 1.4545, + "step": 167900 + }, + { + "epoch": 3.56, + "learning_rate": 6.776795918367346e-05, + "loss": 1.4624, + "step": 168000 + }, + { + "epoch": 3.56, + "learning_rate": 6.774755102040816e-05, + "loss": 1.4559, + "step": 168100 + }, + { + "epoch": 3.56, + "learning_rate": 6.772714285714286e-05, + "loss": 1.459, + "step": 168200 + }, + { + "epoch": 3.56, + "learning_rate": 6.770673469387755e-05, + "loss": 1.4538, + "step": 168300 + }, + { + "epoch": 3.56, + "learning_rate": 6.768632653061224e-05, + "loss": 1.4577, + "step": 168400 + }, + { + "epoch": 3.57, + "learning_rate": 6.766591836734694e-05, + "loss": 1.4527, + "step": 168500 + }, + { + "epoch": 3.57, + "learning_rate": 6.764551020408164e-05, + "loss": 1.4545, + "step": 168600 + }, + { + "epoch": 3.57, + "learning_rate": 6.762510204081634e-05, + "loss": 1.4587, + "step": 168700 + }, + { + "epoch": 3.57, + "learning_rate": 6.760469387755102e-05, + "loss": 1.4558, + "step": 168800 + }, + { + "epoch": 3.57, + "learning_rate": 6.758428571428572e-05, + "loss": 1.4493, + "step": 168900 + }, + { + "epoch": 3.58, + "learning_rate": 6.756408163265306e-05, + "loss": 1.4554, + "step": 169000 + }, + { + "epoch": 3.58, + "learning_rate": 6.754367346938776e-05, + "loss": 1.4521, + "step": 169100 + }, + { + "epoch": 3.58, + "learning_rate": 6.752326530612245e-05, + "loss": 1.4568, + "step": 169200 + }, + { + "epoch": 3.58, + "learning_rate": 6.750285714285714e-05, + "loss": 1.4459, + "step": 169300 + }, + { + "epoch": 3.59, + "learning_rate": 6.748244897959184e-05, + "loss": 1.4529, + "step": 169400 + }, + { + "epoch": 3.59, + "learning_rate": 6.746204081632653e-05, + "loss": 1.4647, + "step": 169500 + }, + { + "epoch": 3.59, + "learning_rate": 6.744163265306123e-05, + "loss": 1.4514, + "step": 169600 + }, + { + "epoch": 3.59, + "learning_rate": 6.742122448979592e-05, + "loss": 1.4546, + "step": 169700 + }, + { + "epoch": 3.59, + "learning_rate": 6.740081632653061e-05, + "loss": 1.4522, + "step": 169800 + }, + { + "epoch": 3.6, + "learning_rate": 6.738040816326531e-05, + "loss": 1.4545, + "step": 169900 + }, + { + "epoch": 3.6, + "learning_rate": 6.736e-05, + "loss": 1.4591, + "step": 170000 + }, + { + "epoch": 3.6, + "learning_rate": 6.733959183673469e-05, + "loss": 1.4543, + "step": 170100 + }, + { + "epoch": 3.6, + "learning_rate": 6.731918367346939e-05, + "loss": 1.451, + "step": 170200 + }, + { + "epoch": 3.6, + "learning_rate": 6.729877551020409e-05, + "loss": 1.4572, + "step": 170300 + }, + { + "epoch": 3.61, + "learning_rate": 6.727836734693877e-05, + "loss": 1.4536, + "step": 170400 + }, + { + "epoch": 3.61, + "learning_rate": 6.725795918367347e-05, + "loss": 1.4485, + "step": 170500 + }, + { + "epoch": 3.61, + "learning_rate": 6.723755102040817e-05, + "loss": 1.4536, + "step": 170600 + }, + { + "epoch": 3.61, + "learning_rate": 6.721714285714285e-05, + "loss": 1.4581, + "step": 170700 + }, + { + "epoch": 3.61, + "learning_rate": 6.719673469387755e-05, + "loss": 1.4488, + "step": 170800 + }, + { + "epoch": 3.62, + "learning_rate": 6.717632653061225e-05, + "loss": 1.4555, + "step": 170900 + }, + { + "epoch": 3.62, + "learning_rate": 6.715612244897959e-05, + "loss": 1.4499, + "step": 171000 + }, + { + "epoch": 3.62, + "learning_rate": 6.713571428571429e-05, + "loss": 1.4531, + "step": 171100 + }, + { + "epoch": 3.62, + "learning_rate": 6.711530612244899e-05, + "loss": 1.4526, + "step": 171200 + }, + { + "epoch": 3.63, + "learning_rate": 6.709489795918367e-05, + "loss": 1.4559, + "step": 171300 + }, + { + "epoch": 3.63, + "learning_rate": 6.707448979591837e-05, + "loss": 1.4425, + "step": 171400 + }, + { + "epoch": 3.63, + "learning_rate": 6.705408163265307e-05, + "loss": 1.4522, + "step": 171500 + }, + { + "epoch": 3.63, + "learning_rate": 6.703367346938775e-05, + "loss": 1.4468, + "step": 171600 + }, + { + "epoch": 3.63, + "learning_rate": 6.701326530612245e-05, + "loss": 1.4513, + "step": 171700 + }, + { + "epoch": 3.64, + "learning_rate": 6.699285714285715e-05, + "loss": 1.4513, + "step": 171800 + }, + { + "epoch": 3.64, + "learning_rate": 6.697244897959184e-05, + "loss": 1.4582, + "step": 171900 + }, + { + "epoch": 3.64, + "learning_rate": 6.695204081632653e-05, + "loss": 1.4562, + "step": 172000 + }, + { + "epoch": 3.64, + "learning_rate": 6.693163265306123e-05, + "loss": 1.4456, + "step": 172100 + }, + { + "epoch": 3.64, + "learning_rate": 6.691122448979592e-05, + "loss": 1.4475, + "step": 172200 + }, + { + "epoch": 3.65, + "learning_rate": 6.689081632653062e-05, + "loss": 1.4458, + "step": 172300 + }, + { + "epoch": 3.65, + "learning_rate": 6.687040816326531e-05, + "loss": 1.444, + "step": 172400 + }, + { + "epoch": 3.65, + "learning_rate": 6.685e-05, + "loss": 1.4507, + "step": 172500 + }, + { + "epoch": 3.65, + "learning_rate": 6.68295918367347e-05, + "loss": 1.451, + "step": 172600 + }, + { + "epoch": 3.66, + "learning_rate": 6.68091836734694e-05, + "loss": 1.4528, + "step": 172700 + }, + { + "epoch": 3.66, + "learning_rate": 6.678877551020408e-05, + "loss": 1.4532, + "step": 172800 + }, + { + "epoch": 3.66, + "learning_rate": 6.676836734693878e-05, + "loss": 1.4425, + "step": 172900 + }, + { + "epoch": 3.66, + "learning_rate": 6.674795918367346e-05, + "loss": 1.4472, + "step": 173000 + }, + { + "epoch": 3.66, + "learning_rate": 6.672755102040816e-05, + "loss": 1.4475, + "step": 173100 + }, + { + "epoch": 3.67, + "learning_rate": 6.670714285714286e-05, + "loss": 1.4531, + "step": 173200 + }, + { + "epoch": 3.67, + "learning_rate": 6.668673469387754e-05, + "loss": 1.4475, + "step": 173300 + }, + { + "epoch": 3.67, + "learning_rate": 6.666632653061224e-05, + "loss": 1.4532, + "step": 173400 + }, + { + "epoch": 3.67, + "learning_rate": 6.66461224489796e-05, + "loss": 1.461, + "step": 173500 + }, + { + "epoch": 3.67, + "learning_rate": 6.66257142857143e-05, + "loss": 1.4396, + "step": 173600 + }, + { + "epoch": 3.68, + "learning_rate": 6.660530612244898e-05, + "loss": 1.4489, + "step": 173700 + }, + { + "epoch": 3.68, + "learning_rate": 6.658489795918368e-05, + "loss": 1.4457, + "step": 173800 + }, + { + "epoch": 3.68, + "learning_rate": 6.656448979591838e-05, + "loss": 1.4496, + "step": 173900 + }, + { + "epoch": 3.68, + "learning_rate": 6.654408163265306e-05, + "loss": 1.4456, + "step": 174000 + }, + { + "epoch": 3.68, + "learning_rate": 6.652367346938776e-05, + "loss": 1.4436, + "step": 174100 + }, + { + "epoch": 3.69, + "learning_rate": 6.650326530612246e-05, + "loss": 1.4414, + "step": 174200 + }, + { + "epoch": 3.69, + "learning_rate": 6.648285714285714e-05, + "loss": 1.451, + "step": 174300 + }, + { + "epoch": 3.69, + "learning_rate": 6.646244897959184e-05, + "loss": 1.4498, + "step": 174400 + }, + { + "epoch": 3.69, + "learning_rate": 6.644204081632654e-05, + "loss": 1.4499, + "step": 174500 + }, + { + "epoch": 3.7, + "learning_rate": 6.642163265306123e-05, + "loss": 1.4449, + "step": 174600 + }, + { + "epoch": 3.7, + "learning_rate": 6.640122448979592e-05, + "loss": 1.4533, + "step": 174700 + }, + { + "epoch": 3.7, + "learning_rate": 6.638081632653061e-05, + "loss": 1.4513, + "step": 174800 + }, + { + "epoch": 3.7, + "learning_rate": 6.636040816326531e-05, + "loss": 1.4592, + "step": 174900 + }, + { + "epoch": 3.7, + "learning_rate": 6.634e-05, + "loss": 1.4425, + "step": 175000 + }, + { + "epoch": 3.71, + "learning_rate": 6.631959183673469e-05, + "loss": 1.4557, + "step": 175100 + }, + { + "epoch": 3.71, + "learning_rate": 6.629918367346939e-05, + "loss": 1.4598, + "step": 175200 + }, + { + "epoch": 3.71, + "learning_rate": 6.627877551020409e-05, + "loss": 1.4538, + "step": 175300 + }, + { + "epoch": 3.71, + "learning_rate": 6.625836734693877e-05, + "loss": 1.4389, + "step": 175400 + }, + { + "epoch": 3.71, + "learning_rate": 6.623795918367347e-05, + "loss": 1.4462, + "step": 175500 + }, + { + "epoch": 3.72, + "learning_rate": 6.621755102040817e-05, + "loss": 1.446, + "step": 175600 + }, + { + "epoch": 3.72, + "learning_rate": 6.619714285714285e-05, + "loss": 1.4461, + "step": 175700 + }, + { + "epoch": 3.72, + "learning_rate": 6.617673469387755e-05, + "loss": 1.4489, + "step": 175800 + }, + { + "epoch": 3.72, + "learning_rate": 6.615632653061225e-05, + "loss": 1.4474, + "step": 175900 + }, + { + "epoch": 3.72, + "learning_rate": 6.613591836734693e-05, + "loss": 1.444, + "step": 176000 + }, + { + "epoch": 3.73, + "learning_rate": 6.611551020408163e-05, + "loss": 1.4533, + "step": 176100 + }, + { + "epoch": 3.73, + "learning_rate": 6.609530612244899e-05, + "loss": 1.4454, + "step": 176200 + }, + { + "epoch": 3.73, + "learning_rate": 6.607489795918369e-05, + "loss": 1.4361, + "step": 176300 + }, + { + "epoch": 3.73, + "learning_rate": 6.605448979591837e-05, + "loss": 1.4436, + "step": 176400 + }, + { + "epoch": 3.74, + "learning_rate": 6.603428571428571e-05, + "loss": 1.4406, + "step": 176500 + }, + { + "epoch": 3.74, + "learning_rate": 6.601387755102041e-05, + "loss": 1.4432, + "step": 176600 + }, + { + "epoch": 3.74, + "learning_rate": 6.599346938775511e-05, + "loss": 1.4469, + "step": 176700 + }, + { + "epoch": 3.74, + "learning_rate": 6.597306122448979e-05, + "loss": 1.4446, + "step": 176800 + }, + { + "epoch": 3.74, + "learning_rate": 6.595265306122449e-05, + "loss": 1.442, + "step": 176900 + }, + { + "epoch": 3.75, + "learning_rate": 6.593224489795919e-05, + "loss": 1.4429, + "step": 177000 + }, + { + "epoch": 3.75, + "learning_rate": 6.591183673469387e-05, + "loss": 1.4458, + "step": 177100 + }, + { + "epoch": 3.75, + "learning_rate": 6.589142857142857e-05, + "loss": 1.4481, + "step": 177200 + }, + { + "epoch": 3.75, + "learning_rate": 6.587102040816326e-05, + "loss": 1.4408, + "step": 177300 + }, + { + "epoch": 3.75, + "learning_rate": 6.585061224489796e-05, + "loss": 1.4464, + "step": 177400 + }, + { + "epoch": 3.76, + "learning_rate": 6.583020408163265e-05, + "loss": 1.4403, + "step": 177500 + }, + { + "epoch": 3.76, + "learning_rate": 6.580979591836734e-05, + "loss": 1.4466, + "step": 177600 + }, + { + "epoch": 3.76, + "learning_rate": 6.578938775510204e-05, + "loss": 1.4415, + "step": 177700 + }, + { + "epoch": 3.76, + "learning_rate": 6.576897959183674e-05, + "loss": 1.4466, + "step": 177800 + }, + { + "epoch": 3.77, + "learning_rate": 6.574857142857143e-05, + "loss": 1.4457, + "step": 177900 + }, + { + "epoch": 3.77, + "learning_rate": 6.572816326530613e-05, + "loss": 1.4421, + "step": 178000 + }, + { + "epoch": 3.77, + "learning_rate": 6.570775510204083e-05, + "loss": 1.4445, + "step": 178100 + }, + { + "epoch": 3.77, + "learning_rate": 6.568734693877552e-05, + "loss": 1.4483, + "step": 178200 + }, + { + "epoch": 3.77, + "learning_rate": 6.566693877551021e-05, + "loss": 1.4496, + "step": 178300 + }, + { + "epoch": 3.78, + "learning_rate": 6.56465306122449e-05, + "loss": 1.4471, + "step": 178400 + }, + { + "epoch": 3.78, + "learning_rate": 6.56261224489796e-05, + "loss": 1.4483, + "step": 178500 + }, + { + "epoch": 3.78, + "learning_rate": 6.56057142857143e-05, + "loss": 1.4459, + "step": 178600 + }, + { + "epoch": 3.78, + "learning_rate": 6.558530612244898e-05, + "loss": 1.4456, + "step": 178700 + }, + { + "epoch": 3.78, + "learning_rate": 6.556489795918368e-05, + "loss": 1.4442, + "step": 178800 + }, + { + "epoch": 3.79, + "learning_rate": 6.554448979591838e-05, + "loss": 1.4451, + "step": 178900 + }, + { + "epoch": 3.79, + "learning_rate": 6.552408163265306e-05, + "loss": 1.4377, + "step": 179000 + }, + { + "epoch": 3.79, + "learning_rate": 6.550367346938776e-05, + "loss": 1.4498, + "step": 179100 + }, + { + "epoch": 3.79, + "learning_rate": 6.548326530612246e-05, + "loss": 1.439, + "step": 179200 + }, + { + "epoch": 3.79, + "learning_rate": 6.546285714285714e-05, + "loss": 1.4383, + "step": 179300 + }, + { + "epoch": 3.8, + "learning_rate": 6.544244897959184e-05, + "loss": 1.4344, + "step": 179400 + }, + { + "epoch": 3.8, + "learning_rate": 6.542204081632654e-05, + "loss": 1.4415, + "step": 179500 + }, + { + "epoch": 3.8, + "learning_rate": 6.540163265306122e-05, + "loss": 1.443, + "step": 179600 + }, + { + "epoch": 3.8, + "learning_rate": 6.538122448979592e-05, + "loss": 1.4432, + "step": 179700 + }, + { + "epoch": 3.81, + "learning_rate": 6.536081632653062e-05, + "loss": 1.4444, + "step": 179800 + }, + { + "epoch": 3.81, + "learning_rate": 6.53404081632653e-05, + "loss": 1.4433, + "step": 179900 + }, + { + "epoch": 3.81, + "learning_rate": 6.532e-05, + "loss": 1.4456, + "step": 180000 + }, + { + "epoch": 3.81, + "learning_rate": 6.52995918367347e-05, + "loss": 1.4504, + "step": 180100 + }, + { + "epoch": 3.81, + "learning_rate": 6.527918367346939e-05, + "loss": 1.4386, + "step": 180200 + }, + { + "epoch": 3.82, + "learning_rate": 6.525877551020409e-05, + "loss": 1.4451, + "step": 180300 + }, + { + "epoch": 3.82, + "learning_rate": 6.523836734693878e-05, + "loss": 1.4381, + "step": 180400 + }, + { + "epoch": 3.82, + "learning_rate": 6.521795918367347e-05, + "loss": 1.4386, + "step": 180500 + }, + { + "epoch": 3.82, + "learning_rate": 6.519755102040817e-05, + "loss": 1.4445, + "step": 180600 + }, + { + "epoch": 3.82, + "learning_rate": 6.517734693877552e-05, + "loss": 1.4447, + "step": 180700 + }, + { + "epoch": 3.83, + "learning_rate": 6.515693877551021e-05, + "loss": 1.4458, + "step": 180800 + }, + { + "epoch": 3.83, + "learning_rate": 6.51365306122449e-05, + "loss": 1.4426, + "step": 180900 + }, + { + "epoch": 3.83, + "learning_rate": 6.51161224489796e-05, + "loss": 1.4437, + "step": 181000 + }, + { + "epoch": 3.83, + "learning_rate": 6.509571428571429e-05, + "loss": 1.436, + "step": 181100 + }, + { + "epoch": 3.83, + "learning_rate": 6.507530612244899e-05, + "loss": 1.4431, + "step": 181200 + }, + { + "epoch": 3.84, + "learning_rate": 6.505489795918369e-05, + "loss": 1.4411, + "step": 181300 + }, + { + "epoch": 3.84, + "learning_rate": 6.503448979591837e-05, + "loss": 1.4425, + "step": 181400 + }, + { + "epoch": 3.84, + "learning_rate": 6.501408163265307e-05, + "loss": 1.442, + "step": 181500 + }, + { + "epoch": 3.84, + "learning_rate": 6.499367346938777e-05, + "loss": 1.4388, + "step": 181600 + }, + { + "epoch": 3.85, + "learning_rate": 6.497326530612245e-05, + "loss": 1.4404, + "step": 181700 + }, + { + "epoch": 3.85, + "learning_rate": 6.495285714285715e-05, + "loss": 1.4486, + "step": 181800 + }, + { + "epoch": 3.85, + "learning_rate": 6.493244897959185e-05, + "loss": 1.4376, + "step": 181900 + }, + { + "epoch": 3.85, + "learning_rate": 6.491204081632653e-05, + "loss": 1.4356, + "step": 182000 + }, + { + "epoch": 3.85, + "learning_rate": 6.489163265306123e-05, + "loss": 1.4375, + "step": 182100 + }, + { + "epoch": 3.86, + "learning_rate": 6.487122448979593e-05, + "loss": 1.4396, + "step": 182200 + }, + { + "epoch": 3.86, + "learning_rate": 6.485081632653061e-05, + "loss": 1.4366, + "step": 182300 + }, + { + "epoch": 3.86, + "learning_rate": 6.483040816326531e-05, + "loss": 1.44, + "step": 182400 + }, + { + "epoch": 3.86, + "learning_rate": 6.481e-05, + "loss": 1.4448, + "step": 182500 + }, + { + "epoch": 3.86, + "learning_rate": 6.47895918367347e-05, + "loss": 1.4372, + "step": 182600 + }, + { + "epoch": 3.87, + "learning_rate": 6.47691836734694e-05, + "loss": 1.4427, + "step": 182700 + }, + { + "epoch": 3.87, + "learning_rate": 6.474877551020408e-05, + "loss": 1.4434, + "step": 182800 + }, + { + "epoch": 3.87, + "learning_rate": 6.472836734693878e-05, + "loss": 1.4332, + "step": 182900 + }, + { + "epoch": 3.87, + "learning_rate": 6.470795918367348e-05, + "loss": 1.4402, + "step": 183000 + }, + { + "epoch": 3.88, + "learning_rate": 6.468755102040816e-05, + "loss": 1.4364, + "step": 183100 + }, + { + "epoch": 3.88, + "learning_rate": 6.466714285714286e-05, + "loss": 1.4406, + "step": 183200 + }, + { + "epoch": 3.88, + "learning_rate": 6.464673469387756e-05, + "loss": 1.4308, + "step": 183300 + }, + { + "epoch": 3.88, + "learning_rate": 6.462632653061224e-05, + "loss": 1.4375, + "step": 183400 + }, + { + "epoch": 3.88, + "learning_rate": 6.460591836734694e-05, + "loss": 1.4368, + "step": 183500 + }, + { + "epoch": 3.89, + "learning_rate": 6.458551020408164e-05, + "loss": 1.4438, + "step": 183600 + }, + { + "epoch": 3.89, + "learning_rate": 6.456510204081632e-05, + "loss": 1.4479, + "step": 183700 + }, + { + "epoch": 3.89, + "learning_rate": 6.454469387755102e-05, + "loss": 1.4405, + "step": 183800 + }, + { + "epoch": 3.89, + "learning_rate": 6.452428571428572e-05, + "loss": 1.4366, + "step": 183900 + }, + { + "epoch": 3.89, + "learning_rate": 6.45038775510204e-05, + "loss": 1.4372, + "step": 184000 + }, + { + "epoch": 3.9, + "learning_rate": 6.44834693877551e-05, + "loss": 1.4367, + "step": 184100 + }, + { + "epoch": 3.9, + "learning_rate": 6.446326530612246e-05, + "loss": 1.4404, + "step": 184200 + }, + { + "epoch": 3.9, + "learning_rate": 6.444285714285714e-05, + "loss": 1.4326, + "step": 184300 + }, + { + "epoch": 3.9, + "learning_rate": 6.442244897959184e-05, + "loss": 1.4479, + "step": 184400 + }, + { + "epoch": 3.9, + "learning_rate": 6.440204081632654e-05, + "loss": 1.4353, + "step": 184500 + }, + { + "epoch": 3.91, + "learning_rate": 6.438163265306122e-05, + "loss": 1.4333, + "step": 184600 + }, + { + "epoch": 3.91, + "learning_rate": 6.436122448979592e-05, + "loss": 1.4371, + "step": 184700 + }, + { + "epoch": 3.91, + "learning_rate": 6.434081632653062e-05, + "loss": 1.4392, + "step": 184800 + }, + { + "epoch": 3.91, + "learning_rate": 6.43204081632653e-05, + "loss": 1.4349, + "step": 184900 + }, + { + "epoch": 3.92, + "learning_rate": 6.43e-05, + "loss": 1.4387, + "step": 185000 + }, + { + "epoch": 3.92, + "learning_rate": 6.42795918367347e-05, + "loss": 1.4401, + "step": 185100 + }, + { + "epoch": 3.92, + "learning_rate": 6.425918367346939e-05, + "loss": 1.4369, + "step": 185200 + }, + { + "epoch": 3.92, + "learning_rate": 6.423877551020409e-05, + "loss": 1.4377, + "step": 185300 + }, + { + "epoch": 3.92, + "learning_rate": 6.421836734693878e-05, + "loss": 1.4337, + "step": 185400 + }, + { + "epoch": 3.93, + "learning_rate": 6.419795918367347e-05, + "loss": 1.4405, + "step": 185500 + }, + { + "epoch": 3.93, + "learning_rate": 6.417755102040817e-05, + "loss": 1.4305, + "step": 185600 + }, + { + "epoch": 3.93, + "learning_rate": 6.415714285714287e-05, + "loss": 1.4364, + "step": 185700 + }, + { + "epoch": 3.93, + "learning_rate": 6.413673469387755e-05, + "loss": 1.4351, + "step": 185800 + }, + { + "epoch": 3.93, + "learning_rate": 6.411632653061225e-05, + "loss": 1.4293, + "step": 185900 + }, + { + "epoch": 3.94, + "learning_rate": 6.409591836734695e-05, + "loss": 1.4342, + "step": 186000 + }, + { + "epoch": 3.94, + "learning_rate": 6.407551020408163e-05, + "loss": 1.4455, + "step": 186100 + }, + { + "epoch": 3.94, + "learning_rate": 6.405530612244899e-05, + "loss": 1.4377, + "step": 186200 + }, + { + "epoch": 3.94, + "learning_rate": 6.403489795918368e-05, + "loss": 1.4333, + "step": 186300 + }, + { + "epoch": 3.94, + "learning_rate": 6.401448979591837e-05, + "loss": 1.4367, + "step": 186400 + }, + { + "epoch": 3.95, + "learning_rate": 6.399408163265307e-05, + "loss": 1.434, + "step": 186500 + }, + { + "epoch": 3.95, + "learning_rate": 6.397367346938777e-05, + "loss": 1.4362, + "step": 186600 + }, + { + "epoch": 3.95, + "learning_rate": 6.395326530612245e-05, + "loss": 1.4346, + "step": 186700 + }, + { + "epoch": 3.95, + "learning_rate": 6.393285714285715e-05, + "loss": 1.4307, + "step": 186800 + }, + { + "epoch": 3.96, + "learning_rate": 6.391244897959185e-05, + "loss": 1.4335, + "step": 186900 + }, + { + "epoch": 3.96, + "learning_rate": 6.389204081632653e-05, + "loss": 1.4326, + "step": 187000 + }, + { + "epoch": 3.96, + "learning_rate": 6.387163265306123e-05, + "loss": 1.4343, + "step": 187100 + }, + { + "epoch": 3.96, + "learning_rate": 6.385122448979593e-05, + "loss": 1.4377, + "step": 187200 + }, + { + "epoch": 3.96, + "learning_rate": 6.383081632653061e-05, + "loss": 1.4354, + "step": 187300 + }, + { + "epoch": 3.97, + "learning_rate": 6.381040816326531e-05, + "loss": 1.4357, + "step": 187400 + }, + { + "epoch": 3.97, + "learning_rate": 6.379000000000001e-05, + "loss": 1.4269, + "step": 187500 + }, + { + "epoch": 3.97, + "learning_rate": 6.37695918367347e-05, + "loss": 1.4449, + "step": 187600 + }, + { + "epoch": 3.97, + "learning_rate": 6.37491836734694e-05, + "loss": 1.4258, + "step": 187700 + }, + { + "epoch": 3.97, + "learning_rate": 6.372877551020409e-05, + "loss": 1.4349, + "step": 187800 + }, + { + "epoch": 3.98, + "learning_rate": 6.370836734693878e-05, + "loss": 1.4298, + "step": 187900 + }, + { + "epoch": 3.98, + "learning_rate": 6.368795918367348e-05, + "loss": 1.4251, + "step": 188000 + }, + { + "epoch": 3.98, + "learning_rate": 6.366755102040816e-05, + "loss": 1.4376, + "step": 188100 + }, + { + "epoch": 3.98, + "learning_rate": 6.364714285714286e-05, + "loss": 1.4268, + "step": 188200 + }, + { + "epoch": 3.99, + "learning_rate": 6.362673469387756e-05, + "loss": 1.4285, + "step": 188300 + }, + { + "epoch": 3.99, + "learning_rate": 6.360632653061224e-05, + "loss": 1.4305, + "step": 188400 + }, + { + "epoch": 3.99, + "learning_rate": 6.358591836734694e-05, + "loss": 1.4288, + "step": 188500 + }, + { + "epoch": 3.99, + "learning_rate": 6.356551020408164e-05, + "loss": 1.4387, + "step": 188600 + }, + { + "epoch": 3.99, + "learning_rate": 6.354510204081632e-05, + "loss": 1.4235, + "step": 188700 + }, + { + "epoch": 4.0, + "learning_rate": 6.352469387755102e-05, + "loss": 1.4333, + "step": 188800 + }, + { + "epoch": 4.0, + "learning_rate": 6.350448979591836e-05, + "loss": 1.4287, + "step": 188900 + }, + { + "epoch": 4.0, + "learning_rate": 6.348408163265307e-05, + "loss": 1.4286, + "step": 189000 + }, + { + "epoch": 4.0, + "learning_rate": 6.346367346938776e-05, + "loss": 1.4227, + "step": 189100 + }, + { + "epoch": 4.0, + "learning_rate": 6.344326530612246e-05, + "loss": 1.4155, + "step": 189200 + }, + { + "epoch": 4.01, + "learning_rate": 6.342285714285716e-05, + "loss": 1.4221, + "step": 189300 + }, + { + "epoch": 4.01, + "learning_rate": 6.340244897959184e-05, + "loss": 1.4314, + "step": 189400 + }, + { + "epoch": 4.01, + "learning_rate": 6.338204081632654e-05, + "loss": 1.4283, + "step": 189500 + }, + { + "epoch": 4.01, + "learning_rate": 6.336163265306124e-05, + "loss": 1.4354, + "step": 189600 + }, + { + "epoch": 4.01, + "learning_rate": 6.334122448979592e-05, + "loss": 1.4281, + "step": 189700 + }, + { + "epoch": 4.02, + "learning_rate": 6.332081632653062e-05, + "loss": 1.4304, + "step": 189800 + }, + { + "epoch": 4.02, + "learning_rate": 6.33004081632653e-05, + "loss": 1.4311, + "step": 189900 + }, + { + "epoch": 4.02, + "learning_rate": 6.328e-05, + "loss": 1.4265, + "step": 190000 + }, + { + "epoch": 4.02, + "learning_rate": 6.32595918367347e-05, + "loss": 1.4252, + "step": 190100 + }, + { + "epoch": 4.03, + "learning_rate": 6.323918367346939e-05, + "loss": 1.4302, + "step": 190200 + }, + { + "epoch": 4.03, + "learning_rate": 6.321877551020409e-05, + "loss": 1.4273, + "step": 190300 + }, + { + "epoch": 4.03, + "learning_rate": 6.319836734693878e-05, + "loss": 1.431, + "step": 190400 + }, + { + "epoch": 4.03, + "learning_rate": 6.317795918367347e-05, + "loss": 1.4249, + "step": 190500 + }, + { + "epoch": 4.03, + "learning_rate": 6.315755102040817e-05, + "loss": 1.4218, + "step": 190600 + }, + { + "epoch": 4.04, + "learning_rate": 6.313714285714287e-05, + "loss": 1.423, + "step": 190700 + }, + { + "epoch": 4.04, + "learning_rate": 6.311673469387755e-05, + "loss": 1.4276, + "step": 190800 + }, + { + "epoch": 4.04, + "learning_rate": 6.309632653061225e-05, + "loss": 1.4213, + "step": 190900 + }, + { + "epoch": 4.04, + "learning_rate": 6.307591836734695e-05, + "loss": 1.4147, + "step": 191000 + }, + { + "epoch": 4.04, + "learning_rate": 6.305551020408163e-05, + "loss": 1.4253, + "step": 191100 + }, + { + "epoch": 4.05, + "learning_rate": 6.303510204081633e-05, + "loss": 1.425, + "step": 191200 + }, + { + "epoch": 4.05, + "learning_rate": 6.301469387755103e-05, + "loss": 1.4224, + "step": 191300 + }, + { + "epoch": 4.05, + "learning_rate": 6.299428571428571e-05, + "loss": 1.4329, + "step": 191400 + }, + { + "epoch": 4.05, + "learning_rate": 6.297387755102041e-05, + "loss": 1.4255, + "step": 191500 + }, + { + "epoch": 4.06, + "learning_rate": 6.295346938775511e-05, + "loss": 1.4216, + "step": 191600 + }, + { + "epoch": 4.06, + "learning_rate": 6.29330612244898e-05, + "loss": 1.4159, + "step": 191700 + }, + { + "epoch": 4.06, + "learning_rate": 6.291265306122449e-05, + "loss": 1.4251, + "step": 191800 + }, + { + "epoch": 4.06, + "learning_rate": 6.289244897959185e-05, + "loss": 1.4245, + "step": 191900 + }, + { + "epoch": 4.06, + "learning_rate": 6.287204081632653e-05, + "loss": 1.4248, + "step": 192000 + }, + { + "epoch": 4.07, + "learning_rate": 6.285163265306123e-05, + "loss": 1.4188, + "step": 192100 + }, + { + "epoch": 4.07, + "learning_rate": 6.283122448979593e-05, + "loss": 1.4197, + "step": 192200 + }, + { + "epoch": 4.07, + "learning_rate": 6.281081632653061e-05, + "loss": 1.4295, + "step": 192300 + }, + { + "epoch": 4.07, + "learning_rate": 6.279040816326531e-05, + "loss": 1.426, + "step": 192400 + }, + { + "epoch": 4.07, + "learning_rate": 6.277000000000001e-05, + "loss": 1.4273, + "step": 192500 + }, + { + "epoch": 4.08, + "learning_rate": 6.27495918367347e-05, + "loss": 1.4299, + "step": 192600 + }, + { + "epoch": 4.08, + "learning_rate": 6.27291836734694e-05, + "loss": 1.4154, + "step": 192700 + }, + { + "epoch": 4.08, + "learning_rate": 6.270877551020409e-05, + "loss": 1.4249, + "step": 192800 + }, + { + "epoch": 4.08, + "learning_rate": 6.268836734693878e-05, + "loss": 1.4183, + "step": 192900 + }, + { + "epoch": 4.08, + "learning_rate": 6.266795918367348e-05, + "loss": 1.4252, + "step": 193000 + }, + { + "epoch": 4.09, + "learning_rate": 6.264755102040817e-05, + "loss": 1.4217, + "step": 193100 + }, + { + "epoch": 4.09, + "learning_rate": 6.262734693877551e-05, + "loss": 1.4244, + "step": 193200 + }, + { + "epoch": 4.09, + "learning_rate": 6.26069387755102e-05, + "loss": 1.4244, + "step": 193300 + }, + { + "epoch": 4.09, + "learning_rate": 6.25865306122449e-05, + "loss": 1.4235, + "step": 193400 + }, + { + "epoch": 4.1, + "learning_rate": 6.25661224489796e-05, + "loss": 1.4225, + "step": 193500 + }, + { + "epoch": 4.1, + "learning_rate": 6.254571428571428e-05, + "loss": 1.4244, + "step": 193600 + }, + { + "epoch": 4.1, + "learning_rate": 6.252530612244898e-05, + "loss": 1.4259, + "step": 193700 + }, + { + "epoch": 4.1, + "learning_rate": 6.250489795918368e-05, + "loss": 1.4301, + "step": 193800 + }, + { + "epoch": 4.1, + "learning_rate": 6.248448979591836e-05, + "loss": 1.4201, + "step": 193900 + }, + { + "epoch": 4.11, + "learning_rate": 6.246408163265306e-05, + "loss": 1.4182, + "step": 194000 + }, + { + "epoch": 4.11, + "learning_rate": 6.244367346938776e-05, + "loss": 1.4292, + "step": 194100 + }, + { + "epoch": 4.11, + "learning_rate": 6.242326530612244e-05, + "loss": 1.4224, + "step": 194200 + }, + { + "epoch": 4.11, + "learning_rate": 6.240285714285714e-05, + "loss": 1.4178, + "step": 194300 + }, + { + "epoch": 4.11, + "learning_rate": 6.238244897959184e-05, + "loss": 1.4167, + "step": 194400 + }, + { + "epoch": 4.12, + "learning_rate": 6.236204081632654e-05, + "loss": 1.4292, + "step": 194500 + }, + { + "epoch": 4.12, + "learning_rate": 6.234163265306124e-05, + "loss": 1.4121, + "step": 194600 + }, + { + "epoch": 4.12, + "learning_rate": 6.232122448979592e-05, + "loss": 1.4211, + "step": 194700 + }, + { + "epoch": 4.12, + "learning_rate": 6.230081632653062e-05, + "loss": 1.4181, + "step": 194800 + }, + { + "epoch": 4.12, + "learning_rate": 6.228040816326532e-05, + "loss": 1.422, + "step": 194900 + }, + { + "epoch": 4.13, + "learning_rate": 6.226e-05, + "loss": 1.4266, + "step": 195000 + }, + { + "epoch": 4.13, + "learning_rate": 6.22395918367347e-05, + "loss": 1.4206, + "step": 195100 + }, + { + "epoch": 4.13, + "learning_rate": 6.22191836734694e-05, + "loss": 1.4197, + "step": 195200 + }, + { + "epoch": 4.13, + "learning_rate": 6.219877551020409e-05, + "loss": 1.4274, + "step": 195300 + }, + { + "epoch": 4.14, + "learning_rate": 6.217836734693878e-05, + "loss": 1.4238, + "step": 195400 + }, + { + "epoch": 4.14, + "learning_rate": 6.215795918367348e-05, + "loss": 1.4234, + "step": 195500 + }, + { + "epoch": 4.14, + "learning_rate": 6.213755102040817e-05, + "loss": 1.4275, + "step": 195600 + }, + { + "epoch": 4.14, + "learning_rate": 6.211714285714287e-05, + "loss": 1.4313, + "step": 195700 + }, + { + "epoch": 4.14, + "learning_rate": 6.209673469387755e-05, + "loss": 1.4172, + "step": 195800 + }, + { + "epoch": 4.15, + "learning_rate": 6.207632653061225e-05, + "loss": 1.4196, + "step": 195900 + }, + { + "epoch": 4.15, + "learning_rate": 6.205591836734695e-05, + "loss": 1.4224, + "step": 196000 + }, + { + "epoch": 4.15, + "learning_rate": 6.203551020408163e-05, + "loss": 1.424, + "step": 196100 + }, + { + "epoch": 4.15, + "learning_rate": 6.201510204081633e-05, + "loss": 1.4144, + "step": 196200 + }, + { + "epoch": 4.15, + "learning_rate": 6.199469387755103e-05, + "loss": 1.4276, + "step": 196300 + }, + { + "epoch": 4.16, + "learning_rate": 6.197428571428571e-05, + "loss": 1.427, + "step": 196400 + }, + { + "epoch": 4.16, + "learning_rate": 6.195387755102041e-05, + "loss": 1.4167, + "step": 196500 + }, + { + "epoch": 4.16, + "learning_rate": 6.193346938775511e-05, + "loss": 1.4221, + "step": 196600 + }, + { + "epoch": 4.16, + "learning_rate": 6.19130612244898e-05, + "loss": 1.4281, + "step": 196700 + }, + { + "epoch": 4.17, + "learning_rate": 6.189265306122449e-05, + "loss": 1.4188, + "step": 196800 + }, + { + "epoch": 4.17, + "learning_rate": 6.187224489795919e-05, + "loss": 1.4206, + "step": 196900 + }, + { + "epoch": 4.17, + "learning_rate": 6.185183673469388e-05, + "loss": 1.4125, + "step": 197000 + }, + { + "epoch": 4.17, + "learning_rate": 6.183142857142857e-05, + "loss": 1.4118, + "step": 197100 + }, + { + "epoch": 4.17, + "learning_rate": 6.181122448979592e-05, + "loss": 1.4289, + "step": 197200 + }, + { + "epoch": 4.18, + "learning_rate": 6.179081632653063e-05, + "loss": 1.4164, + "step": 197300 + }, + { + "epoch": 4.18, + "learning_rate": 6.177040816326531e-05, + "loss": 1.4257, + "step": 197400 + }, + { + "epoch": 4.18, + "learning_rate": 6.175000000000001e-05, + "loss": 1.4117, + "step": 197500 + }, + { + "epoch": 4.18, + "learning_rate": 6.17295918367347e-05, + "loss": 1.4185, + "step": 197600 + }, + { + "epoch": 4.18, + "learning_rate": 6.17091836734694e-05, + "loss": 1.4199, + "step": 197700 + }, + { + "epoch": 4.19, + "learning_rate": 6.168877551020409e-05, + "loss": 1.4126, + "step": 197800 + }, + { + "epoch": 4.19, + "learning_rate": 6.166836734693878e-05, + "loss": 1.4199, + "step": 197900 + }, + { + "epoch": 4.19, + "learning_rate": 6.164795918367347e-05, + "loss": 1.4156, + "step": 198000 + }, + { + "epoch": 4.19, + "learning_rate": 6.162755102040817e-05, + "loss": 1.4157, + "step": 198100 + }, + { + "epoch": 4.19, + "learning_rate": 6.160714285714286e-05, + "loss": 1.421, + "step": 198200 + }, + { + "epoch": 4.2, + "learning_rate": 6.158673469387756e-05, + "loss": 1.4257, + "step": 198300 + }, + { + "epoch": 4.2, + "learning_rate": 6.156632653061225e-05, + "loss": 1.4168, + "step": 198400 + }, + { + "epoch": 4.2, + "learning_rate": 6.154591836734694e-05, + "loss": 1.4145, + "step": 198500 + }, + { + "epoch": 4.2, + "learning_rate": 6.152551020408164e-05, + "loss": 1.419, + "step": 198600 + }, + { + "epoch": 4.21, + "learning_rate": 6.150510204081634e-05, + "loss": 1.4159, + "step": 198700 + }, + { + "epoch": 4.21, + "learning_rate": 6.148469387755102e-05, + "loss": 1.4189, + "step": 198800 + }, + { + "epoch": 4.21, + "learning_rate": 6.146428571428572e-05, + "loss": 1.4137, + "step": 198900 + }, + { + "epoch": 4.21, + "learning_rate": 6.144387755102042e-05, + "loss": 1.4187, + "step": 199000 + }, + { + "epoch": 4.21, + "learning_rate": 6.14234693877551e-05, + "loss": 1.4223, + "step": 199100 + }, + { + "epoch": 4.22, + "learning_rate": 6.14030612244898e-05, + "loss": 1.4292, + "step": 199200 + }, + { + "epoch": 4.22, + "learning_rate": 6.13826530612245e-05, + "loss": 1.4133, + "step": 199300 + }, + { + "epoch": 4.22, + "learning_rate": 6.136224489795918e-05, + "loss": 1.4122, + "step": 199400 + }, + { + "epoch": 4.22, + "learning_rate": 6.134183673469388e-05, + "loss": 1.4174, + "step": 199500 + }, + { + "epoch": 4.22, + "learning_rate": 6.132142857142858e-05, + "loss": 1.4114, + "step": 199600 + }, + { + "epoch": 4.23, + "learning_rate": 6.130102040816327e-05, + "loss": 1.4132, + "step": 199700 + }, + { + "epoch": 4.23, + "learning_rate": 6.128061224489796e-05, + "loss": 1.4167, + "step": 199800 + }, + { + "epoch": 4.23, + "learning_rate": 6.126020408163265e-05, + "loss": 1.4112, + "step": 199900 + }, + { + "epoch": 4.23, + "learning_rate": 6.123979591836735e-05, + "loss": 1.4204, + "step": 200000 + }, + { + "epoch": 4.23, + "learning_rate": 6.121938775510205e-05, + "loss": 1.4199, + "step": 200100 + }, + { + "epoch": 4.24, + "learning_rate": 6.119897959183673e-05, + "loss": 1.4143, + "step": 200200 + }, + { + "epoch": 4.24, + "learning_rate": 6.117857142857143e-05, + "loss": 1.4089, + "step": 200300 + }, + { + "epoch": 4.24, + "learning_rate": 6.115816326530613e-05, + "loss": 1.4135, + "step": 200400 + }, + { + "epoch": 4.24, + "learning_rate": 6.113775510204081e-05, + "loss": 1.4137, + "step": 200500 + }, + { + "epoch": 4.25, + "learning_rate": 6.111734693877551e-05, + "loss": 1.4073, + "step": 200600 + }, + { + "epoch": 4.25, + "learning_rate": 6.109693877551021e-05, + "loss": 1.4108, + "step": 200700 + }, + { + "epoch": 4.25, + "learning_rate": 6.10765306122449e-05, + "loss": 1.4244, + "step": 200800 + }, + { + "epoch": 4.25, + "learning_rate": 6.105612244897959e-05, + "loss": 1.4208, + "step": 200900 + }, + { + "epoch": 4.25, + "learning_rate": 6.103571428571429e-05, + "loss": 1.4136, + "step": 201000 + }, + { + "epoch": 4.26, + "learning_rate": 6.101530612244898e-05, + "loss": 1.4216, + "step": 201100 + }, + { + "epoch": 4.26, + "learning_rate": 6.099489795918367e-05, + "loss": 1.4109, + "step": 201200 + }, + { + "epoch": 4.26, + "learning_rate": 6.0974489795918365e-05, + "loss": 1.4162, + "step": 201300 + }, + { + "epoch": 4.26, + "learning_rate": 6.0954081632653056e-05, + "loss": 1.4099, + "step": 201400 + }, + { + "epoch": 4.26, + "learning_rate": 6.0933673469387755e-05, + "loss": 1.4188, + "step": 201500 + }, + { + "epoch": 4.27, + "learning_rate": 6.0913265306122446e-05, + "loss": 1.4116, + "step": 201600 + }, + { + "epoch": 4.27, + "learning_rate": 6.089285714285714e-05, + "loss": 1.4163, + "step": 201700 + }, + { + "epoch": 4.27, + "learning_rate": 6.087244897959184e-05, + "loss": 1.4141, + "step": 201800 + }, + { + "epoch": 4.27, + "learning_rate": 6.085204081632654e-05, + "loss": 1.4135, + "step": 201900 + }, + { + "epoch": 4.28, + "learning_rate": 6.083163265306123e-05, + "loss": 1.4166, + "step": 202000 + }, + { + "epoch": 4.28, + "learning_rate": 6.0811224489795925e-05, + "loss": 1.4144, + "step": 202100 + }, + { + "epoch": 4.28, + "learning_rate": 6.079081632653062e-05, + "loss": 1.4151, + "step": 202200 + }, + { + "epoch": 4.28, + "learning_rate": 6.0770408163265315e-05, + "loss": 1.4212, + "step": 202300 + }, + { + "epoch": 4.28, + "learning_rate": 6.0750000000000006e-05, + "loss": 1.408, + "step": 202400 + }, + { + "epoch": 4.29, + "learning_rate": 6.072979591836735e-05, + "loss": 1.4142, + "step": 202500 + }, + { + "epoch": 4.29, + "learning_rate": 6.0709387755102045e-05, + "loss": 1.4018, + "step": 202600 + }, + { + "epoch": 4.29, + "learning_rate": 6.068897959183674e-05, + "loss": 1.4106, + "step": 202700 + }, + { + "epoch": 4.29, + "learning_rate": 6.066857142857143e-05, + "loss": 1.4162, + "step": 202800 + }, + { + "epoch": 4.29, + "learning_rate": 6.064816326530613e-05, + "loss": 1.4122, + "step": 202900 + }, + { + "epoch": 4.3, + "learning_rate": 6.062775510204082e-05, + "loss": 1.4079, + "step": 203000 + }, + { + "epoch": 4.3, + "learning_rate": 6.060734693877551e-05, + "loss": 1.4124, + "step": 203100 + }, + { + "epoch": 4.3, + "learning_rate": 6.05869387755102e-05, + "loss": 1.4136, + "step": 203200 + }, + { + "epoch": 4.3, + "learning_rate": 6.05665306122449e-05, + "loss": 1.4119, + "step": 203300 + }, + { + "epoch": 4.3, + "learning_rate": 6.0546326530612254e-05, + "loss": 1.415, + "step": 203400 + }, + { + "epoch": 4.31, + "learning_rate": 6.0525918367346946e-05, + "loss": 1.4218, + "step": 203500 + }, + { + "epoch": 4.31, + "learning_rate": 6.050551020408164e-05, + "loss": 1.4143, + "step": 203600 + }, + { + "epoch": 4.31, + "learning_rate": 6.0485102040816336e-05, + "loss": 1.4201, + "step": 203700 + }, + { + "epoch": 4.31, + "learning_rate": 6.046469387755103e-05, + "loss": 1.4185, + "step": 203800 + }, + { + "epoch": 4.32, + "learning_rate": 6.044428571428572e-05, + "loss": 1.4195, + "step": 203900 + }, + { + "epoch": 4.32, + "learning_rate": 6.042387755102041e-05, + "loss": 1.4096, + "step": 204000 + }, + { + "epoch": 4.32, + "learning_rate": 6.040346938775511e-05, + "loss": 1.4074, + "step": 204100 + }, + { + "epoch": 4.32, + "learning_rate": 6.03830612244898e-05, + "loss": 1.4086, + "step": 204200 + }, + { + "epoch": 4.32, + "learning_rate": 6.036265306122449e-05, + "loss": 1.4111, + "step": 204300 + }, + { + "epoch": 4.33, + "learning_rate": 6.034224489795919e-05, + "loss": 1.405, + "step": 204400 + }, + { + "epoch": 4.33, + "learning_rate": 6.032183673469388e-05, + "loss": 1.4101, + "step": 204500 + }, + { + "epoch": 4.33, + "learning_rate": 6.0301428571428574e-05, + "loss": 1.4148, + "step": 204600 + }, + { + "epoch": 4.33, + "learning_rate": 6.028102040816327e-05, + "loss": 1.4137, + "step": 204700 + }, + { + "epoch": 4.33, + "learning_rate": 6.0260612244897964e-05, + "loss": 1.4115, + "step": 204800 + }, + { + "epoch": 4.34, + "learning_rate": 6.0240204081632655e-05, + "loss": 1.4087, + "step": 204900 + }, + { + "epoch": 4.34, + "learning_rate": 6.021979591836735e-05, + "loss": 1.4085, + "step": 205000 + }, + { + "epoch": 4.34, + "learning_rate": 6.0199387755102045e-05, + "loss": 1.4082, + "step": 205100 + }, + { + "epoch": 4.34, + "learning_rate": 6.017897959183674e-05, + "loss": 1.4124, + "step": 205200 + }, + { + "epoch": 4.34, + "learning_rate": 6.015857142857143e-05, + "loss": 1.4086, + "step": 205300 + }, + { + "epoch": 4.35, + "learning_rate": 6.013816326530613e-05, + "loss": 1.4181, + "step": 205400 + }, + { + "epoch": 4.35, + "learning_rate": 6.011775510204082e-05, + "loss": 1.4173, + "step": 205500 + }, + { + "epoch": 4.35, + "learning_rate": 6.009734693877551e-05, + "loss": 1.4258, + "step": 205600 + }, + { + "epoch": 4.35, + "learning_rate": 6.007693877551021e-05, + "loss": 1.4115, + "step": 205700 + }, + { + "epoch": 4.36, + "learning_rate": 6.00565306122449e-05, + "loss": 1.4147, + "step": 205800 + }, + { + "epoch": 4.36, + "learning_rate": 6.003612244897959e-05, + "loss": 1.4112, + "step": 205900 + }, + { + "epoch": 4.36, + "learning_rate": 6.001571428571428e-05, + "loss": 1.4077, + "step": 206000 + }, + { + "epoch": 4.36, + "learning_rate": 5.999530612244898e-05, + "loss": 1.4079, + "step": 206100 + }, + { + "epoch": 4.36, + "learning_rate": 5.997489795918367e-05, + "loss": 1.4088, + "step": 206200 + }, + { + "epoch": 4.37, + "learning_rate": 5.9954489795918364e-05, + "loss": 1.4112, + "step": 206300 + }, + { + "epoch": 4.37, + "learning_rate": 5.993408163265306e-05, + "loss": 1.4124, + "step": 206400 + }, + { + "epoch": 4.37, + "learning_rate": 5.9913673469387754e-05, + "loss": 1.4074, + "step": 206500 + }, + { + "epoch": 4.37, + "learning_rate": 5.9893265306122446e-05, + "loss": 1.413, + "step": 206600 + }, + { + "epoch": 4.37, + "learning_rate": 5.9872857142857144e-05, + "loss": 1.4145, + "step": 206700 + }, + { + "epoch": 4.38, + "learning_rate": 5.9852448979591836e-05, + "loss": 1.4045, + "step": 206800 + }, + { + "epoch": 4.38, + "learning_rate": 5.983204081632653e-05, + "loss": 1.4172, + "step": 206900 + }, + { + "epoch": 4.38, + "learning_rate": 5.9811632653061226e-05, + "loss": 1.4078, + "step": 207000 + }, + { + "epoch": 4.38, + "learning_rate": 5.979122448979592e-05, + "loss": 1.4117, + "step": 207100 + }, + { + "epoch": 4.39, + "learning_rate": 5.977081632653061e-05, + "loss": 1.4072, + "step": 207200 + }, + { + "epoch": 4.39, + "learning_rate": 5.97504081632653e-05, + "loss": 1.4051, + "step": 207300 + }, + { + "epoch": 4.39, + "learning_rate": 5.9730000000000006e-05, + "loss": 1.4093, + "step": 207400 + }, + { + "epoch": 4.39, + "learning_rate": 5.9709591836734704e-05, + "loss": 1.4099, + "step": 207500 + }, + { + "epoch": 4.39, + "learning_rate": 5.9689183673469396e-05, + "loss": 1.4056, + "step": 207600 + }, + { + "epoch": 4.4, + "learning_rate": 5.966877551020409e-05, + "loss": 1.408, + "step": 207700 + }, + { + "epoch": 4.4, + "learning_rate": 5.9648367346938786e-05, + "loss": 1.4046, + "step": 207800 + }, + { + "epoch": 4.4, + "learning_rate": 5.962795918367348e-05, + "loss": 1.4112, + "step": 207900 + }, + { + "epoch": 4.4, + "learning_rate": 5.960755102040817e-05, + "loss": 1.4077, + "step": 208000 + }, + { + "epoch": 4.4, + "learning_rate": 5.958714285714286e-05, + "loss": 1.412, + "step": 208100 + }, + { + "epoch": 4.41, + "learning_rate": 5.956673469387756e-05, + "loss": 1.408, + "step": 208200 + }, + { + "epoch": 4.41, + "learning_rate": 5.954632653061225e-05, + "loss": 1.4075, + "step": 208300 + }, + { + "epoch": 4.41, + "learning_rate": 5.952591836734694e-05, + "loss": 1.404, + "step": 208400 + }, + { + "epoch": 4.41, + "learning_rate": 5.950551020408164e-05, + "loss": 1.4147, + "step": 208500 + }, + { + "epoch": 4.41, + "learning_rate": 5.948530612244898e-05, + "loss": 1.4089, + "step": 208600 + }, + { + "epoch": 4.42, + "learning_rate": 5.946489795918367e-05, + "loss": 1.417, + "step": 208700 + }, + { + "epoch": 4.42, + "learning_rate": 5.944448979591837e-05, + "loss": 1.4023, + "step": 208800 + }, + { + "epoch": 4.42, + "learning_rate": 5.942408163265306e-05, + "loss": 1.4093, + "step": 208900 + }, + { + "epoch": 4.42, + "learning_rate": 5.9403673469387754e-05, + "loss": 1.4139, + "step": 209000 + }, + { + "epoch": 4.43, + "learning_rate": 5.9383265306122446e-05, + "loss": 1.4026, + "step": 209100 + }, + { + "epoch": 4.43, + "learning_rate": 5.9362857142857144e-05, + "loss": 1.4187, + "step": 209200 + }, + { + "epoch": 4.43, + "learning_rate": 5.9342448979591836e-05, + "loss": 1.4034, + "step": 209300 + }, + { + "epoch": 4.43, + "learning_rate": 5.932204081632653e-05, + "loss": 1.4013, + "step": 209400 + }, + { + "epoch": 4.43, + "learning_rate": 5.930183673469388e-05, + "loss": 1.3987, + "step": 209500 + }, + { + "epoch": 4.44, + "learning_rate": 5.928142857142857e-05, + "loss": 1.4042, + "step": 209600 + }, + { + "epoch": 4.44, + "learning_rate": 5.926102040816327e-05, + "loss": 1.4077, + "step": 209700 + }, + { + "epoch": 4.44, + "learning_rate": 5.924061224489796e-05, + "loss": 1.4009, + "step": 209800 + }, + { + "epoch": 4.44, + "learning_rate": 5.9220204081632655e-05, + "loss": 1.4147, + "step": 209900 + }, + { + "epoch": 4.44, + "learning_rate": 5.919979591836735e-05, + "loss": 1.4082, + "step": 210000 + }, + { + "epoch": 4.45, + "learning_rate": 5.9179387755102045e-05, + "loss": 1.4001, + "step": 210100 + }, + { + "epoch": 4.45, + "learning_rate": 5.9158979591836736e-05, + "loss": 1.4195, + "step": 210200 + }, + { + "epoch": 4.45, + "learning_rate": 5.9138571428571435e-05, + "loss": 1.4051, + "step": 210300 + }, + { + "epoch": 4.45, + "learning_rate": 5.9118163265306126e-05, + "loss": 1.409, + "step": 210400 + }, + { + "epoch": 4.46, + "learning_rate": 5.909775510204082e-05, + "loss": 1.4046, + "step": 210500 + }, + { + "epoch": 4.46, + "learning_rate": 5.9077346938775516e-05, + "loss": 1.4012, + "step": 210600 + }, + { + "epoch": 4.46, + "learning_rate": 5.905693877551021e-05, + "loss": 1.4025, + "step": 210700 + }, + { + "epoch": 4.46, + "learning_rate": 5.90365306122449e-05, + "loss": 1.408, + "step": 210800 + }, + { + "epoch": 4.46, + "learning_rate": 5.901612244897959e-05, + "loss": 1.4045, + "step": 210900 + }, + { + "epoch": 4.47, + "learning_rate": 5.899571428571429e-05, + "loss": 1.4053, + "step": 211000 + }, + { + "epoch": 4.47, + "learning_rate": 5.897530612244898e-05, + "loss": 1.4055, + "step": 211100 + }, + { + "epoch": 4.47, + "learning_rate": 5.895489795918367e-05, + "loss": 1.3999, + "step": 211200 + }, + { + "epoch": 4.47, + "learning_rate": 5.893448979591837e-05, + "loss": 1.4076, + "step": 211300 + }, + { + "epoch": 4.47, + "learning_rate": 5.891408163265306e-05, + "loss": 1.3992, + "step": 211400 + }, + { + "epoch": 4.48, + "learning_rate": 5.8893673469387754e-05, + "loss": 1.4015, + "step": 211500 + }, + { + "epoch": 4.48, + "learning_rate": 5.887326530612245e-05, + "loss": 1.404, + "step": 211600 + }, + { + "epoch": 4.48, + "learning_rate": 5.8852857142857144e-05, + "loss": 1.4057, + "step": 211700 + }, + { + "epoch": 4.48, + "learning_rate": 5.8832448979591836e-05, + "loss": 1.4067, + "step": 211800 + }, + { + "epoch": 4.48, + "learning_rate": 5.881204081632653e-05, + "loss": 1.4045, + "step": 211900 + }, + { + "epoch": 4.49, + "learning_rate": 5.8791632653061226e-05, + "loss": 1.4099, + "step": 212000 + }, + { + "epoch": 4.49, + "learning_rate": 5.877122448979592e-05, + "loss": 1.405, + "step": 212100 + }, + { + "epoch": 4.49, + "learning_rate": 5.875081632653061e-05, + "loss": 1.4077, + "step": 212200 + }, + { + "epoch": 4.49, + "learning_rate": 5.873040816326531e-05, + "loss": 1.4061, + "step": 212300 + }, + { + "epoch": 4.5, + "learning_rate": 5.871e-05, + "loss": 1.4098, + "step": 212400 + }, + { + "epoch": 4.5, + "learning_rate": 5.868959183673469e-05, + "loss": 1.4043, + "step": 212500 + }, + { + "epoch": 4.5, + "learning_rate": 5.866918367346939e-05, + "loss": 1.3997, + "step": 212600 + }, + { + "epoch": 4.5, + "learning_rate": 5.864877551020408e-05, + "loss": 1.4013, + "step": 212700 + }, + { + "epoch": 4.5, + "learning_rate": 5.862836734693877e-05, + "loss": 1.401, + "step": 212800 + }, + { + "epoch": 4.51, + "learning_rate": 5.860795918367348e-05, + "loss": 1.4035, + "step": 212900 + }, + { + "epoch": 4.51, + "learning_rate": 5.858755102040817e-05, + "loss": 1.4077, + "step": 213000 + }, + { + "epoch": 4.51, + "learning_rate": 5.856714285714287e-05, + "loss": 1.411, + "step": 213100 + }, + { + "epoch": 4.51, + "learning_rate": 5.854673469387756e-05, + "loss": 1.3997, + "step": 213200 + }, + { + "epoch": 4.51, + "learning_rate": 5.852632653061225e-05, + "loss": 1.4058, + "step": 213300 + }, + { + "epoch": 4.52, + "learning_rate": 5.850591836734695e-05, + "loss": 1.4071, + "step": 213400 + }, + { + "epoch": 4.52, + "learning_rate": 5.848551020408164e-05, + "loss": 1.4006, + "step": 213500 + }, + { + "epoch": 4.52, + "learning_rate": 5.846510204081633e-05, + "loss": 1.4025, + "step": 213600 + }, + { + "epoch": 4.52, + "learning_rate": 5.844469387755103e-05, + "loss": 1.4034, + "step": 213700 + }, + { + "epoch": 4.52, + "learning_rate": 5.842428571428572e-05, + "loss": 1.3961, + "step": 213800 + }, + { + "epoch": 4.53, + "learning_rate": 5.840408163265306e-05, + "loss": 1.3938, + "step": 213900 + }, + { + "epoch": 4.53, + "learning_rate": 5.8383673469387754e-05, + "loss": 1.404, + "step": 214000 + }, + { + "epoch": 4.53, + "learning_rate": 5.836326530612245e-05, + "loss": 1.4045, + "step": 214100 + }, + { + "epoch": 4.53, + "learning_rate": 5.8342857142857144e-05, + "loss": 1.4049, + "step": 214200 + }, + { + "epoch": 4.54, + "learning_rate": 5.8322448979591835e-05, + "loss": 1.4032, + "step": 214300 + }, + { + "epoch": 4.54, + "learning_rate": 5.8302040816326534e-05, + "loss": 1.4043, + "step": 214400 + }, + { + "epoch": 4.54, + "learning_rate": 5.8281632653061225e-05, + "loss": 1.4072, + "step": 214500 + }, + { + "epoch": 4.54, + "learning_rate": 5.826122448979592e-05, + "loss": 1.3978, + "step": 214600 + }, + { + "epoch": 4.54, + "learning_rate": 5.824081632653061e-05, + "loss": 1.4076, + "step": 214700 + }, + { + "epoch": 4.55, + "learning_rate": 5.822040816326531e-05, + "loss": 1.392, + "step": 214800 + }, + { + "epoch": 4.55, + "learning_rate": 5.82e-05, + "loss": 1.3962, + "step": 214900 + }, + { + "epoch": 4.55, + "learning_rate": 5.817959183673469e-05, + "loss": 1.3993, + "step": 215000 + }, + { + "epoch": 4.55, + "learning_rate": 5.815918367346939e-05, + "loss": 1.3982, + "step": 215100 + }, + { + "epoch": 4.55, + "learning_rate": 5.813877551020408e-05, + "loss": 1.3922, + "step": 215200 + }, + { + "epoch": 4.56, + "learning_rate": 5.811836734693877e-05, + "loss": 1.397, + "step": 215300 + }, + { + "epoch": 4.56, + "learning_rate": 5.809795918367347e-05, + "loss": 1.4086, + "step": 215400 + }, + { + "epoch": 4.56, + "learning_rate": 5.807755102040816e-05, + "loss": 1.4039, + "step": 215500 + }, + { + "epoch": 4.56, + "learning_rate": 5.805714285714285e-05, + "loss": 1.3971, + "step": 215600 + }, + { + "epoch": 4.57, + "learning_rate": 5.803673469387756e-05, + "loss": 1.3937, + "step": 215700 + }, + { + "epoch": 4.57, + "learning_rate": 5.801632653061225e-05, + "loss": 1.3922, + "step": 215800 + }, + { + "epoch": 4.57, + "learning_rate": 5.799591836734695e-05, + "loss": 1.3989, + "step": 215900 + }, + { + "epoch": 4.57, + "learning_rate": 5.797551020408164e-05, + "loss": 1.4012, + "step": 216000 + }, + { + "epoch": 4.57, + "learning_rate": 5.795510204081633e-05, + "loss": 1.4098, + "step": 216100 + }, + { + "epoch": 4.58, + "learning_rate": 5.793469387755103e-05, + "loss": 1.4027, + "step": 216200 + }, + { + "epoch": 4.58, + "learning_rate": 5.791428571428572e-05, + "loss": 1.3988, + "step": 216300 + }, + { + "epoch": 4.58, + "learning_rate": 5.789387755102041e-05, + "loss": 1.4035, + "step": 216400 + }, + { + "epoch": 4.58, + "learning_rate": 5.787346938775511e-05, + "loss": 1.3945, + "step": 216500 + }, + { + "epoch": 4.58, + "learning_rate": 5.78530612244898e-05, + "loss": 1.3958, + "step": 216600 + }, + { + "epoch": 4.59, + "learning_rate": 5.7832653061224494e-05, + "loss": 1.4012, + "step": 216700 + }, + { + "epoch": 4.59, + "learning_rate": 5.7812244897959186e-05, + "loss": 1.3956, + "step": 216800 + }, + { + "epoch": 4.59, + "learning_rate": 5.7791836734693884e-05, + "loss": 1.397, + "step": 216900 + }, + { + "epoch": 4.59, + "learning_rate": 5.7771428571428576e-05, + "loss": 1.4022, + "step": 217000 + }, + { + "epoch": 4.59, + "learning_rate": 5.775102040816327e-05, + "loss": 1.3978, + "step": 217100 + }, + { + "epoch": 4.6, + "learning_rate": 5.7730612244897966e-05, + "loss": 1.3999, + "step": 217200 + }, + { + "epoch": 4.6, + "learning_rate": 5.771020408163266e-05, + "loss": 1.4066, + "step": 217300 + }, + { + "epoch": 4.6, + "learning_rate": 5.769e-05, + "loss": 1.3896, + "step": 217400 + }, + { + "epoch": 4.6, + "learning_rate": 5.7669591836734697e-05, + "loss": 1.4034, + "step": 217500 + }, + { + "epoch": 4.61, + "learning_rate": 5.764918367346939e-05, + "loss": 1.3997, + "step": 217600 + }, + { + "epoch": 4.61, + "learning_rate": 5.762877551020408e-05, + "loss": 1.3975, + "step": 217700 + }, + { + "epoch": 4.61, + "learning_rate": 5.760836734693877e-05, + "loss": 1.3996, + "step": 217800 + }, + { + "epoch": 4.61, + "learning_rate": 5.758795918367347e-05, + "loss": 1.3945, + "step": 217900 + }, + { + "epoch": 4.61, + "learning_rate": 5.756755102040816e-05, + "loss": 1.3963, + "step": 218000 + }, + { + "epoch": 4.62, + "learning_rate": 5.754714285714285e-05, + "loss": 1.3992, + "step": 218100 + }, + { + "epoch": 4.62, + "learning_rate": 5.752673469387755e-05, + "loss": 1.4032, + "step": 218200 + }, + { + "epoch": 4.62, + "learning_rate": 5.750632653061224e-05, + "loss": 1.3951, + "step": 218300 + }, + { + "epoch": 4.62, + "learning_rate": 5.748591836734695e-05, + "loss": 1.4036, + "step": 218400 + }, + { + "epoch": 4.62, + "learning_rate": 5.746551020408164e-05, + "loss": 1.3971, + "step": 218500 + }, + { + "epoch": 4.63, + "learning_rate": 5.744510204081633e-05, + "loss": 1.4007, + "step": 218600 + }, + { + "epoch": 4.63, + "learning_rate": 5.742469387755103e-05, + "loss": 1.3972, + "step": 218700 + }, + { + "epoch": 4.63, + "learning_rate": 5.740428571428572e-05, + "loss": 1.4015, + "step": 218800 + }, + { + "epoch": 4.63, + "learning_rate": 5.738387755102041e-05, + "loss": 1.4013, + "step": 218900 + }, + { + "epoch": 4.63, + "learning_rate": 5.736346938775511e-05, + "loss": 1.3963, + "step": 219000 + }, + { + "epoch": 4.64, + "learning_rate": 5.73430612244898e-05, + "loss": 1.3968, + "step": 219100 + }, + { + "epoch": 4.64, + "learning_rate": 5.7322653061224494e-05, + "loss": 1.39, + "step": 219200 + }, + { + "epoch": 4.64, + "learning_rate": 5.730244897959184e-05, + "loss": 1.3921, + "step": 219300 + }, + { + "epoch": 4.64, + "learning_rate": 5.728204081632653e-05, + "loss": 1.3942, + "step": 219400 + }, + { + "epoch": 4.65, + "learning_rate": 5.7261632653061225e-05, + "loss": 1.3987, + "step": 219500 + }, + { + "epoch": 4.65, + "learning_rate": 5.7241224489795916e-05, + "loss": 1.3988, + "step": 219600 + }, + { + "epoch": 4.65, + "learning_rate": 5.7220816326530615e-05, + "loss": 1.3955, + "step": 219700 + }, + { + "epoch": 4.65, + "learning_rate": 5.7200408163265306e-05, + "loss": 1.3946, + "step": 219800 + }, + { + "epoch": 4.65, + "learning_rate": 5.718e-05, + "loss": 1.4045, + "step": 219900 + }, + { + "epoch": 4.66, + "learning_rate": 5.7159591836734696e-05, + "loss": 1.3956, + "step": 220000 + }, + { + "epoch": 4.66, + "learning_rate": 5.713918367346939e-05, + "loss": 1.3978, + "step": 220100 + }, + { + "epoch": 4.66, + "learning_rate": 5.711877551020408e-05, + "loss": 1.3982, + "step": 220200 + }, + { + "epoch": 4.66, + "learning_rate": 5.709836734693878e-05, + "loss": 1.3949, + "step": 220300 + }, + { + "epoch": 4.66, + "learning_rate": 5.707795918367347e-05, + "loss": 1.3947, + "step": 220400 + }, + { + "epoch": 4.67, + "learning_rate": 5.705755102040816e-05, + "loss": 1.4004, + "step": 220500 + }, + { + "epoch": 4.67, + "learning_rate": 5.703714285714285e-05, + "loss": 1.4058, + "step": 220600 + }, + { + "epoch": 4.67, + "learning_rate": 5.701673469387755e-05, + "loss": 1.4008, + "step": 220700 + }, + { + "epoch": 4.67, + "learning_rate": 5.699632653061224e-05, + "loss": 1.3931, + "step": 220800 + }, + { + "epoch": 4.68, + "learning_rate": 5.6975918367346934e-05, + "loss": 1.3989, + "step": 220900 + }, + { + "epoch": 4.68, + "learning_rate": 5.695551020408163e-05, + "loss": 1.3918, + "step": 221000 + }, + { + "epoch": 4.68, + "learning_rate": 5.6935102040816324e-05, + "loss": 1.3937, + "step": 221100 + }, + { + "epoch": 4.68, + "learning_rate": 5.691469387755103e-05, + "loss": 1.3974, + "step": 221200 + }, + { + "epoch": 4.68, + "learning_rate": 5.689428571428572e-05, + "loss": 1.3933, + "step": 221300 + }, + { + "epoch": 4.69, + "learning_rate": 5.687387755102041e-05, + "loss": 1.3934, + "step": 221400 + }, + { + "epoch": 4.69, + "learning_rate": 5.685367346938776e-05, + "loss": 1.3922, + "step": 221500 + }, + { + "epoch": 4.69, + "learning_rate": 5.683326530612245e-05, + "loss": 1.3911, + "step": 221600 + }, + { + "epoch": 4.69, + "learning_rate": 5.681285714285714e-05, + "loss": 1.3908, + "step": 221700 + }, + { + "epoch": 4.69, + "learning_rate": 5.679244897959184e-05, + "loss": 1.3965, + "step": 221800 + }, + { + "epoch": 4.7, + "learning_rate": 5.677204081632653e-05, + "loss": 1.4021, + "step": 221900 + }, + { + "epoch": 4.7, + "learning_rate": 5.6751632653061225e-05, + "loss": 1.3955, + "step": 222000 + }, + { + "epoch": 4.7, + "learning_rate": 5.673122448979592e-05, + "loss": 1.3934, + "step": 222100 + }, + { + "epoch": 4.7, + "learning_rate": 5.6710816326530615e-05, + "loss": 1.3968, + "step": 222200 + }, + { + "epoch": 4.7, + "learning_rate": 5.6690408163265306e-05, + "loss": 1.3932, + "step": 222300 + }, + { + "epoch": 4.71, + "learning_rate": 5.667e-05, + "loss": 1.3976, + "step": 222400 + }, + { + "epoch": 4.71, + "learning_rate": 5.6649591836734696e-05, + "loss": 1.3959, + "step": 222500 + }, + { + "epoch": 4.71, + "learning_rate": 5.662918367346939e-05, + "loss": 1.392, + "step": 222600 + }, + { + "epoch": 4.71, + "learning_rate": 5.660877551020408e-05, + "loss": 1.3955, + "step": 222700 + }, + { + "epoch": 4.72, + "learning_rate": 5.658836734693878e-05, + "loss": 1.3874, + "step": 222800 + }, + { + "epoch": 4.72, + "learning_rate": 5.656795918367347e-05, + "loss": 1.3968, + "step": 222900 + }, + { + "epoch": 4.72, + "learning_rate": 5.654755102040816e-05, + "loss": 1.3904, + "step": 223000 + }, + { + "epoch": 4.72, + "learning_rate": 5.652714285714286e-05, + "loss": 1.3912, + "step": 223100 + }, + { + "epoch": 4.72, + "learning_rate": 5.650673469387755e-05, + "loss": 1.3996, + "step": 223200 + }, + { + "epoch": 4.73, + "learning_rate": 5.648632653061224e-05, + "loss": 1.3897, + "step": 223300 + }, + { + "epoch": 4.73, + "learning_rate": 5.6465918367346934e-05, + "loss": 1.3922, + "step": 223400 + }, + { + "epoch": 4.73, + "learning_rate": 5.644551020408163e-05, + "loss": 1.4015, + "step": 223500 + }, + { + "epoch": 4.73, + "learning_rate": 5.6425102040816324e-05, + "loss": 1.3882, + "step": 223600 + }, + { + "epoch": 4.73, + "learning_rate": 5.6404693877551016e-05, + "loss": 1.3984, + "step": 223700 + }, + { + "epoch": 4.74, + "learning_rate": 5.6384285714285714e-05, + "loss": 1.3913, + "step": 223800 + }, + { + "epoch": 4.74, + "learning_rate": 5.6363877551020405e-05, + "loss": 1.3874, + "step": 223900 + }, + { + "epoch": 4.74, + "learning_rate": 5.634346938775511e-05, + "loss": 1.393, + "step": 224000 + }, + { + "epoch": 4.74, + "learning_rate": 5.63230612244898e-05, + "loss": 1.3916, + "step": 224100 + }, + { + "epoch": 4.74, + "learning_rate": 5.63026530612245e-05, + "loss": 1.391, + "step": 224200 + }, + { + "epoch": 4.75, + "learning_rate": 5.628224489795919e-05, + "loss": 1.3869, + "step": 224300 + }, + { + "epoch": 4.75, + "learning_rate": 5.6261836734693884e-05, + "loss": 1.3942, + "step": 224400 + }, + { + "epoch": 4.75, + "learning_rate": 5.6241428571428575e-05, + "loss": 1.3863, + "step": 224500 + }, + { + "epoch": 4.75, + "learning_rate": 5.6221020408163274e-05, + "loss": 1.3916, + "step": 224600 + }, + { + "epoch": 4.76, + "learning_rate": 5.6200612244897965e-05, + "loss": 1.3891, + "step": 224700 + }, + { + "epoch": 4.76, + "learning_rate": 5.618020408163266e-05, + "loss": 1.3901, + "step": 224800 + }, + { + "epoch": 4.76, + "learning_rate": 5.6159795918367355e-05, + "loss": 1.3934, + "step": 224900 + }, + { + "epoch": 4.76, + "learning_rate": 5.613938775510205e-05, + "loss": 1.3956, + "step": 225000 + }, + { + "epoch": 4.76, + "learning_rate": 5.611897959183674e-05, + "loss": 1.3877, + "step": 225100 + }, + { + "epoch": 4.77, + "learning_rate": 5.609857142857144e-05, + "loss": 1.3958, + "step": 225200 + }, + { + "epoch": 4.77, + "learning_rate": 5.607816326530613e-05, + "loss": 1.3907, + "step": 225300 + }, + { + "epoch": 4.77, + "learning_rate": 5.605775510204082e-05, + "loss": 1.3949, + "step": 225400 + }, + { + "epoch": 4.77, + "learning_rate": 5.603734693877551e-05, + "loss": 1.3919, + "step": 225500 + }, + { + "epoch": 4.77, + "learning_rate": 5.601693877551021e-05, + "loss": 1.3983, + "step": 225600 + }, + { + "epoch": 4.78, + "learning_rate": 5.59965306122449e-05, + "loss": 1.3869, + "step": 225700 + }, + { + "epoch": 4.78, + "learning_rate": 5.597612244897959e-05, + "loss": 1.3941, + "step": 225800 + }, + { + "epoch": 4.78, + "learning_rate": 5.595571428571429e-05, + "loss": 1.3953, + "step": 225900 + }, + { + "epoch": 4.78, + "learning_rate": 5.593551020408163e-05, + "loss": 1.3968, + "step": 226000 + }, + { + "epoch": 4.79, + "learning_rate": 5.5915102040816324e-05, + "loss": 1.3905, + "step": 226100 + }, + { + "epoch": 4.79, + "learning_rate": 5.589469387755102e-05, + "loss": 1.3922, + "step": 226200 + }, + { + "epoch": 4.79, + "learning_rate": 5.5874285714285714e-05, + "loss": 1.389, + "step": 226300 + }, + { + "epoch": 4.79, + "learning_rate": 5.5853877551020405e-05, + "loss": 1.3823, + "step": 226400 + }, + { + "epoch": 4.79, + "learning_rate": 5.58334693877551e-05, + "loss": 1.3936, + "step": 226500 + }, + { + "epoch": 4.8, + "learning_rate": 5.5813061224489795e-05, + "loss": 1.3956, + "step": 226600 + }, + { + "epoch": 4.8, + "learning_rate": 5.57926530612245e-05, + "loss": 1.3901, + "step": 226700 + }, + { + "epoch": 4.8, + "learning_rate": 5.577224489795919e-05, + "loss": 1.3857, + "step": 226800 + }, + { + "epoch": 4.8, + "learning_rate": 5.5751836734693884e-05, + "loss": 1.3895, + "step": 226900 + }, + { + "epoch": 4.8, + "learning_rate": 5.573142857142858e-05, + "loss": 1.3905, + "step": 227000 + }, + { + "epoch": 4.81, + "learning_rate": 5.5711020408163273e-05, + "loss": 1.3901, + "step": 227100 + }, + { + "epoch": 4.81, + "learning_rate": 5.5690612244897965e-05, + "loss": 1.3913, + "step": 227200 + }, + { + "epoch": 4.81, + "learning_rate": 5.567020408163266e-05, + "loss": 1.3875, + "step": 227300 + }, + { + "epoch": 4.81, + "learning_rate": 5.5649795918367355e-05, + "loss": 1.382, + "step": 227400 + }, + { + "epoch": 4.81, + "learning_rate": 5.5629387755102047e-05, + "loss": 1.3959, + "step": 227500 + }, + { + "epoch": 4.82, + "learning_rate": 5.560897959183674e-05, + "loss": 1.386, + "step": 227600 + }, + { + "epoch": 4.82, + "learning_rate": 5.5588571428571437e-05, + "loss": 1.3884, + "step": 227700 + }, + { + "epoch": 4.82, + "learning_rate": 5.556816326530613e-05, + "loss": 1.3885, + "step": 227800 + }, + { + "epoch": 4.82, + "learning_rate": 5.554775510204082e-05, + "loss": 1.3945, + "step": 227900 + }, + { + "epoch": 4.83, + "learning_rate": 5.552734693877552e-05, + "loss": 1.3819, + "step": 228000 + }, + { + "epoch": 4.83, + "learning_rate": 5.550693877551021e-05, + "loss": 1.3916, + "step": 228100 + }, + { + "epoch": 4.83, + "learning_rate": 5.54865306122449e-05, + "loss": 1.3913, + "step": 228200 + }, + { + "epoch": 4.83, + "learning_rate": 5.546612244897959e-05, + "loss": 1.3866, + "step": 228300 + }, + { + "epoch": 4.83, + "learning_rate": 5.544591836734694e-05, + "loss": 1.3878, + "step": 228400 + }, + { + "epoch": 4.84, + "learning_rate": 5.542551020408163e-05, + "loss": 1.3955, + "step": 228500 + }, + { + "epoch": 4.84, + "learning_rate": 5.5405102040816324e-05, + "loss": 1.3854, + "step": 228600 + }, + { + "epoch": 4.84, + "learning_rate": 5.538469387755102e-05, + "loss": 1.3889, + "step": 228700 + }, + { + "epoch": 4.84, + "learning_rate": 5.5364285714285713e-05, + "loss": 1.3907, + "step": 228800 + }, + { + "epoch": 4.84, + "learning_rate": 5.5343877551020405e-05, + "loss": 1.3943, + "step": 228900 + }, + { + "epoch": 4.85, + "learning_rate": 5.5323469387755103e-05, + "loss": 1.3867, + "step": 229000 + }, + { + "epoch": 4.85, + "learning_rate": 5.5303061224489795e-05, + "loss": 1.3864, + "step": 229100 + }, + { + "epoch": 4.85, + "learning_rate": 5.5282653061224487e-05, + "loss": 1.3888, + "step": 229200 + }, + { + "epoch": 4.85, + "learning_rate": 5.526224489795918e-05, + "loss": 1.3842, + "step": 229300 + }, + { + "epoch": 4.86, + "learning_rate": 5.5241836734693877e-05, + "loss": 1.3886, + "step": 229400 + }, + { + "epoch": 4.86, + "learning_rate": 5.522142857142858e-05, + "loss": 1.3955, + "step": 229500 + }, + { + "epoch": 4.86, + "learning_rate": 5.520122448979592e-05, + "loss": 1.3783, + "step": 229600 + }, + { + "epoch": 4.86, + "learning_rate": 5.5180816326530614e-05, + "loss": 1.3809, + "step": 229700 + }, + { + "epoch": 4.86, + "learning_rate": 5.516040816326531e-05, + "loss": 1.386, + "step": 229800 + }, + { + "epoch": 4.87, + "learning_rate": 5.5140000000000004e-05, + "loss": 1.3852, + "step": 229900 + }, + { + "epoch": 4.87, + "learning_rate": 5.5119591836734696e-05, + "loss": 1.3826, + "step": 230000 + }, + { + "epoch": 4.87, + "learning_rate": 5.509918367346939e-05, + "loss": 1.3838, + "step": 230100 + }, + { + "epoch": 4.87, + "learning_rate": 5.5078775510204086e-05, + "loss": 1.3899, + "step": 230200 + }, + { + "epoch": 4.87, + "learning_rate": 5.505836734693878e-05, + "loss": 1.3862, + "step": 230300 + }, + { + "epoch": 4.88, + "learning_rate": 5.503795918367347e-05, + "loss": 1.388, + "step": 230400 + }, + { + "epoch": 4.88, + "learning_rate": 5.501755102040817e-05, + "loss": 1.3864, + "step": 230500 + }, + { + "epoch": 4.88, + "learning_rate": 5.499714285714286e-05, + "loss": 1.3791, + "step": 230600 + }, + { + "epoch": 4.88, + "learning_rate": 5.497673469387755e-05, + "loss": 1.3915, + "step": 230700 + }, + { + "epoch": 4.88, + "learning_rate": 5.495632653061225e-05, + "loss": 1.3864, + "step": 230800 + }, + { + "epoch": 4.89, + "learning_rate": 5.493591836734694e-05, + "loss": 1.3748, + "step": 230900 + }, + { + "epoch": 4.89, + "learning_rate": 5.491551020408163e-05, + "loss": 1.3906, + "step": 231000 + }, + { + "epoch": 4.89, + "learning_rate": 5.489510204081632e-05, + "loss": 1.3836, + "step": 231100 + }, + { + "epoch": 4.89, + "learning_rate": 5.487469387755102e-05, + "loss": 1.3837, + "step": 231200 + }, + { + "epoch": 4.9, + "learning_rate": 5.485428571428571e-05, + "loss": 1.3869, + "step": 231300 + }, + { + "epoch": 4.9, + "learning_rate": 5.4833877551020405e-05, + "loss": 1.3804, + "step": 231400 + }, + { + "epoch": 4.9, + "learning_rate": 5.48134693877551e-05, + "loss": 1.3866, + "step": 231500 + }, + { + "epoch": 4.9, + "learning_rate": 5.4793061224489795e-05, + "loss": 1.3783, + "step": 231600 + }, + { + "epoch": 4.9, + "learning_rate": 5.4772653061224486e-05, + "loss": 1.3925, + "step": 231700 + }, + { + "epoch": 4.91, + "learning_rate": 5.4752244897959185e-05, + "loss": 1.3832, + "step": 231800 + }, + { + "epoch": 4.91, + "learning_rate": 5.4731836734693876e-05, + "loss": 1.387, + "step": 231900 + }, + { + "epoch": 4.91, + "learning_rate": 5.471163265306123e-05, + "loss": 1.3928, + "step": 232000 + }, + { + "epoch": 4.91, + "learning_rate": 5.469122448979592e-05, + "loss": 1.3731, + "step": 232100 + }, + { + "epoch": 4.91, + "learning_rate": 5.4670816326530614e-05, + "loss": 1.3833, + "step": 232200 + }, + { + "epoch": 4.92, + "learning_rate": 5.465040816326531e-05, + "loss": 1.3912, + "step": 232300 + }, + { + "epoch": 4.92, + "learning_rate": 5.4630000000000004e-05, + "loss": 1.3806, + "step": 232400 + }, + { + "epoch": 4.92, + "learning_rate": 5.4609591836734695e-05, + "loss": 1.3874, + "step": 232500 + }, + { + "epoch": 4.92, + "learning_rate": 5.4589183673469394e-05, + "loss": 1.3715, + "step": 232600 + }, + { + "epoch": 4.92, + "learning_rate": 5.4568775510204085e-05, + "loss": 1.394, + "step": 232700 + }, + { + "epoch": 4.93, + "learning_rate": 5.454836734693878e-05, + "loss": 1.3899, + "step": 232800 + }, + { + "epoch": 4.93, + "learning_rate": 5.452795918367347e-05, + "loss": 1.388, + "step": 232900 + }, + { + "epoch": 4.93, + "learning_rate": 5.450755102040817e-05, + "loss": 1.3854, + "step": 233000 + }, + { + "epoch": 4.93, + "learning_rate": 5.448714285714286e-05, + "loss": 1.3906, + "step": 233100 + }, + { + "epoch": 4.94, + "learning_rate": 5.446673469387755e-05, + "loss": 1.3852, + "step": 233200 + }, + { + "epoch": 4.94, + "learning_rate": 5.444632653061225e-05, + "loss": 1.3783, + "step": 233300 + }, + { + "epoch": 4.94, + "learning_rate": 5.442591836734694e-05, + "loss": 1.3822, + "step": 233400 + }, + { + "epoch": 4.94, + "learning_rate": 5.440551020408163e-05, + "loss": 1.3801, + "step": 233500 + }, + { + "epoch": 4.94, + "learning_rate": 5.438510204081633e-05, + "loss": 1.3828, + "step": 233600 + }, + { + "epoch": 4.95, + "learning_rate": 5.436469387755102e-05, + "loss": 1.3793, + "step": 233700 + }, + { + "epoch": 4.95, + "learning_rate": 5.434428571428571e-05, + "loss": 1.3888, + "step": 233800 + }, + { + "epoch": 4.95, + "learning_rate": 5.4323877551020405e-05, + "loss": 1.3893, + "step": 233900 + }, + { + "epoch": 4.95, + "learning_rate": 5.43034693877551e-05, + "loss": 1.3863, + "step": 234000 + }, + { + "epoch": 4.95, + "learning_rate": 5.4283061224489795e-05, + "loss": 1.3819, + "step": 234100 + }, + { + "epoch": 4.96, + "learning_rate": 5.4262653061224486e-05, + "loss": 1.3797, + "step": 234200 + }, + { + "epoch": 4.96, + "learning_rate": 5.4242244897959185e-05, + "loss": 1.3859, + "step": 234300 + }, + { + "epoch": 4.96, + "learning_rate": 5.4221836734693876e-05, + "loss": 1.3799, + "step": 234400 + }, + { + "epoch": 4.96, + "learning_rate": 5.420142857142857e-05, + "loss": 1.3792, + "step": 234500 + }, + { + "epoch": 4.97, + "learning_rate": 5.4181020408163266e-05, + "loss": 1.3811, + "step": 234600 + }, + { + "epoch": 4.97, + "learning_rate": 5.416061224489796e-05, + "loss": 1.3802, + "step": 234700 + }, + { + "epoch": 4.97, + "learning_rate": 5.414020408163265e-05, + "loss": 1.3867, + "step": 234800 + }, + { + "epoch": 4.97, + "learning_rate": 5.411979591836734e-05, + "loss": 1.3771, + "step": 234900 + }, + { + "epoch": 4.97, + "learning_rate": 5.4099387755102046e-05, + "loss": 1.3859, + "step": 235000 + }, + { + "epoch": 4.98, + "learning_rate": 5.4078979591836744e-05, + "loss": 1.3873, + "step": 235100 + }, + { + "epoch": 4.98, + "learning_rate": 5.4058571428571436e-05, + "loss": 1.3838, + "step": 235200 + }, + { + "epoch": 4.98, + "learning_rate": 5.403816326530613e-05, + "loss": 1.3731, + "step": 235300 + }, + { + "epoch": 4.98, + "learning_rate": 5.4017755102040826e-05, + "loss": 1.3794, + "step": 235400 + }, + { + "epoch": 4.98, + "learning_rate": 5.399734693877552e-05, + "loss": 1.3826, + "step": 235500 + }, + { + "epoch": 4.99, + "learning_rate": 5.397693877551021e-05, + "loss": 1.3785, + "step": 235600 + }, + { + "epoch": 4.99, + "learning_rate": 5.395653061224491e-05, + "loss": 1.3761, + "step": 235700 + }, + { + "epoch": 4.99, + "learning_rate": 5.39361224489796e-05, + "loss": 1.3798, + "step": 235800 + }, + { + "epoch": 4.99, + "learning_rate": 5.391571428571429e-05, + "loss": 1.3788, + "step": 235900 + }, + { + "epoch": 4.99, + "learning_rate": 5.389530612244898e-05, + "loss": 1.3782, + "step": 236000 + }, + { + "epoch": 5.0, + "learning_rate": 5.387489795918368e-05, + "loss": 1.3784, + "step": 236100 + }, + { + "epoch": 5.0, + "learning_rate": 5.385448979591837e-05, + "loss": 1.379, + "step": 236200 + }, + { + "epoch": 5.0, + "learning_rate": 5.3834081632653064e-05, + "loss": 1.3728, + "step": 236300 + }, + { + "epoch": 5.0, + "learning_rate": 5.381367346938776e-05, + "loss": 1.3765, + "step": 236400 + }, + { + "epoch": 5.01, + "learning_rate": 5.3793265306122454e-05, + "loss": 1.3776, + "step": 236500 + }, + { + "epoch": 5.01, + "learning_rate": 5.3773061224489794e-05, + "loss": 1.3735, + "step": 236600 + }, + { + "epoch": 5.01, + "learning_rate": 5.375265306122449e-05, + "loss": 1.3798, + "step": 236700 + }, + { + "epoch": 5.01, + "learning_rate": 5.3732244897959184e-05, + "loss": 1.3693, + "step": 236800 + }, + { + "epoch": 5.01, + "learning_rate": 5.3711836734693876e-05, + "loss": 1.3738, + "step": 236900 + }, + { + "epoch": 5.02, + "learning_rate": 5.369142857142857e-05, + "loss": 1.3739, + "step": 237000 + }, + { + "epoch": 5.02, + "learning_rate": 5.3671020408163266e-05, + "loss": 1.3664, + "step": 237100 + }, + { + "epoch": 5.02, + "learning_rate": 5.365061224489796e-05, + "loss": 1.375, + "step": 237200 + }, + { + "epoch": 5.02, + "learning_rate": 5.363020408163265e-05, + "loss": 1.3725, + "step": 237300 + }, + { + "epoch": 5.02, + "learning_rate": 5.360979591836735e-05, + "loss": 1.3726, + "step": 237400 + }, + { + "epoch": 5.03, + "learning_rate": 5.358938775510204e-05, + "loss": 1.3735, + "step": 237500 + }, + { + "epoch": 5.03, + "learning_rate": 5.356897959183673e-05, + "loss": 1.3666, + "step": 237600 + }, + { + "epoch": 5.03, + "learning_rate": 5.354857142857143e-05, + "loss": 1.3782, + "step": 237700 + }, + { + "epoch": 5.03, + "learning_rate": 5.352816326530613e-05, + "loss": 1.3752, + "step": 237800 + }, + { + "epoch": 5.03, + "learning_rate": 5.3507755102040826e-05, + "loss": 1.3752, + "step": 237900 + }, + { + "epoch": 5.04, + "learning_rate": 5.348734693877552e-05, + "loss": 1.3802, + "step": 238000 + }, + { + "epoch": 5.04, + "learning_rate": 5.346693877551021e-05, + "loss": 1.3747, + "step": 238100 + }, + { + "epoch": 5.04, + "learning_rate": 5.344653061224491e-05, + "loss": 1.3736, + "step": 238200 + }, + { + "epoch": 5.04, + "learning_rate": 5.34261224489796e-05, + "loss": 1.3809, + "step": 238300 + }, + { + "epoch": 5.05, + "learning_rate": 5.340571428571429e-05, + "loss": 1.3719, + "step": 238400 + }, + { + "epoch": 5.05, + "learning_rate": 5.338530612244899e-05, + "loss": 1.384, + "step": 238500 + }, + { + "epoch": 5.05, + "learning_rate": 5.336489795918368e-05, + "loss": 1.3688, + "step": 238600 + }, + { + "epoch": 5.05, + "learning_rate": 5.334448979591837e-05, + "loss": 1.3824, + "step": 238700 + }, + { + "epoch": 5.05, + "learning_rate": 5.3324081632653063e-05, + "loss": 1.3654, + "step": 238800 + }, + { + "epoch": 5.06, + "learning_rate": 5.330367346938776e-05, + "loss": 1.3766, + "step": 238900 + }, + { + "epoch": 5.06, + "learning_rate": 5.3283265306122453e-05, + "loss": 1.3762, + "step": 239000 + }, + { + "epoch": 5.06, + "learning_rate": 5.3263061224489794e-05, + "loss": 1.3794, + "step": 239100 + }, + { + "epoch": 5.06, + "learning_rate": 5.324265306122449e-05, + "loss": 1.3763, + "step": 239200 + }, + { + "epoch": 5.06, + "learning_rate": 5.3222244897959184e-05, + "loss": 1.3725, + "step": 239300 + }, + { + "epoch": 5.07, + "learning_rate": 5.3201836734693876e-05, + "loss": 1.3763, + "step": 239400 + }, + { + "epoch": 5.07, + "learning_rate": 5.3181428571428574e-05, + "loss": 1.3667, + "step": 239500 + }, + { + "epoch": 5.07, + "learning_rate": 5.3161020408163266e-05, + "loss": 1.3746, + "step": 239600 + }, + { + "epoch": 5.07, + "learning_rate": 5.314061224489796e-05, + "loss": 1.3727, + "step": 239700 + }, + { + "epoch": 5.08, + "learning_rate": 5.312020408163265e-05, + "loss": 1.3799, + "step": 239800 + }, + { + "epoch": 5.08, + "learning_rate": 5.309979591836735e-05, + "loss": 1.3714, + "step": 239900 + }, + { + "epoch": 5.08, + "learning_rate": 5.307938775510204e-05, + "loss": 1.3683, + "step": 240000 + }, + { + "epoch": 5.08, + "learning_rate": 5.305897959183673e-05, + "loss": 1.3774, + "step": 240100 + }, + { + "epoch": 5.08, + "learning_rate": 5.303857142857143e-05, + "loss": 1.369, + "step": 240200 + }, + { + "epoch": 5.09, + "learning_rate": 5.301816326530612e-05, + "loss": 1.3806, + "step": 240300 + }, + { + "epoch": 5.09, + "learning_rate": 5.299775510204081e-05, + "loss": 1.3725, + "step": 240400 + }, + { + "epoch": 5.09, + "learning_rate": 5.297734693877551e-05, + "loss": 1.3733, + "step": 240500 + }, + { + "epoch": 5.09, + "learning_rate": 5.295693877551021e-05, + "loss": 1.383, + "step": 240600 + }, + { + "epoch": 5.09, + "learning_rate": 5.293653061224491e-05, + "loss": 1.3655, + "step": 240700 + }, + { + "epoch": 5.1, + "learning_rate": 5.29161224489796e-05, + "loss": 1.3752, + "step": 240800 + }, + { + "epoch": 5.1, + "learning_rate": 5.289571428571429e-05, + "loss": 1.3796, + "step": 240900 + }, + { + "epoch": 5.1, + "learning_rate": 5.287530612244899e-05, + "loss": 1.3732, + "step": 241000 + }, + { + "epoch": 5.1, + "learning_rate": 5.285489795918368e-05, + "loss": 1.3668, + "step": 241100 + }, + { + "epoch": 5.1, + "learning_rate": 5.283448979591837e-05, + "loss": 1.3679, + "step": 241200 + }, + { + "epoch": 5.11, + "learning_rate": 5.281408163265307e-05, + "loss": 1.376, + "step": 241300 + }, + { + "epoch": 5.11, + "learning_rate": 5.279367346938776e-05, + "loss": 1.3758, + "step": 241400 + }, + { + "epoch": 5.11, + "learning_rate": 5.277326530612245e-05, + "loss": 1.3716, + "step": 241500 + }, + { + "epoch": 5.11, + "learning_rate": 5.275285714285715e-05, + "loss": 1.3811, + "step": 241600 + }, + { + "epoch": 5.12, + "learning_rate": 5.273244897959184e-05, + "loss": 1.3784, + "step": 241700 + }, + { + "epoch": 5.12, + "learning_rate": 5.2712040816326535e-05, + "loss": 1.3682, + "step": 241800 + }, + { + "epoch": 5.12, + "learning_rate": 5.2691632653061226e-05, + "loss": 1.3745, + "step": 241900 + }, + { + "epoch": 5.12, + "learning_rate": 5.2671224489795925e-05, + "loss": 1.37, + "step": 242000 + }, + { + "epoch": 5.12, + "learning_rate": 5.2650816326530616e-05, + "loss": 1.3733, + "step": 242100 + }, + { + "epoch": 5.13, + "learning_rate": 5.263040816326531e-05, + "loss": 1.3629, + "step": 242200 + }, + { + "epoch": 5.13, + "learning_rate": 5.2610000000000006e-05, + "loss": 1.3735, + "step": 242300 + }, + { + "epoch": 5.13, + "learning_rate": 5.258979591836735e-05, + "loss": 1.368, + "step": 242400 + }, + { + "epoch": 5.13, + "learning_rate": 5.256938775510204e-05, + "loss": 1.3758, + "step": 242500 + }, + { + "epoch": 5.13, + "learning_rate": 5.254897959183673e-05, + "loss": 1.3735, + "step": 242600 + }, + { + "epoch": 5.14, + "learning_rate": 5.252857142857143e-05, + "loss": 1.3688, + "step": 242700 + }, + { + "epoch": 5.14, + "learning_rate": 5.250816326530612e-05, + "loss": 1.3777, + "step": 242800 + }, + { + "epoch": 5.14, + "learning_rate": 5.248775510204081e-05, + "loss": 1.3649, + "step": 242900 + }, + { + "epoch": 5.14, + "learning_rate": 5.246734693877551e-05, + "loss": 1.3765, + "step": 243000 + }, + { + "epoch": 5.14, + "learning_rate": 5.24469387755102e-05, + "loss": 1.3756, + "step": 243100 + }, + { + "epoch": 5.15, + "learning_rate": 5.242653061224489e-05, + "loss": 1.3668, + "step": 243200 + }, + { + "epoch": 5.15, + "learning_rate": 5.24061224489796e-05, + "loss": 1.37, + "step": 243300 + }, + { + "epoch": 5.15, + "learning_rate": 5.23857142857143e-05, + "loss": 1.3694, + "step": 243400 + }, + { + "epoch": 5.15, + "learning_rate": 5.236530612244899e-05, + "loss": 1.3737, + "step": 243500 + }, + { + "epoch": 5.16, + "learning_rate": 5.234489795918368e-05, + "loss": 1.3688, + "step": 243600 + }, + { + "epoch": 5.16, + "learning_rate": 5.232448979591837e-05, + "loss": 1.3714, + "step": 243700 + }, + { + "epoch": 5.16, + "learning_rate": 5.230408163265307e-05, + "loss": 1.3711, + "step": 243800 + }, + { + "epoch": 5.16, + "learning_rate": 5.228367346938776e-05, + "loss": 1.3686, + "step": 243900 + }, + { + "epoch": 5.16, + "learning_rate": 5.226326530612245e-05, + "loss": 1.3589, + "step": 244000 + }, + { + "epoch": 5.17, + "learning_rate": 5.224285714285715e-05, + "loss": 1.3818, + "step": 244100 + }, + { + "epoch": 5.17, + "learning_rate": 5.222244897959184e-05, + "loss": 1.3743, + "step": 244200 + }, + { + "epoch": 5.17, + "learning_rate": 5.2202040816326535e-05, + "loss": 1.3707, + "step": 244300 + }, + { + "epoch": 5.17, + "learning_rate": 5.2181836734693875e-05, + "loss": 1.3706, + "step": 244400 + }, + { + "epoch": 5.17, + "learning_rate": 5.2161428571428574e-05, + "loss": 1.3684, + "step": 244500 + }, + { + "epoch": 5.18, + "learning_rate": 5.2141020408163265e-05, + "loss": 1.3695, + "step": 244600 + }, + { + "epoch": 5.18, + "learning_rate": 5.212061224489796e-05, + "loss": 1.3676, + "step": 244700 + }, + { + "epoch": 5.18, + "learning_rate": 5.2100204081632655e-05, + "loss": 1.3762, + "step": 244800 + }, + { + "epoch": 5.18, + "learning_rate": 5.207979591836735e-05, + "loss": 1.3768, + "step": 244900 + }, + { + "epoch": 5.19, + "learning_rate": 5.205938775510204e-05, + "loss": 1.3673, + "step": 245000 + }, + { + "epoch": 5.19, + "learning_rate": 5.203897959183674e-05, + "loss": 1.3667, + "step": 245100 + }, + { + "epoch": 5.19, + "learning_rate": 5.201857142857143e-05, + "loss": 1.3749, + "step": 245200 + }, + { + "epoch": 5.19, + "learning_rate": 5.199816326530612e-05, + "loss": 1.3715, + "step": 245300 + }, + { + "epoch": 5.19, + "learning_rate": 5.197775510204081e-05, + "loss": 1.3654, + "step": 245400 + }, + { + "epoch": 5.2, + "learning_rate": 5.195734693877551e-05, + "loss": 1.372, + "step": 245500 + }, + { + "epoch": 5.2, + "learning_rate": 5.19369387755102e-05, + "loss": 1.3669, + "step": 245600 + }, + { + "epoch": 5.2, + "learning_rate": 5.191653061224489e-05, + "loss": 1.3771, + "step": 245700 + }, + { + "epoch": 5.2, + "learning_rate": 5.189612244897959e-05, + "loss": 1.3663, + "step": 245800 + }, + { + "epoch": 5.2, + "learning_rate": 5.187571428571428e-05, + "loss": 1.371, + "step": 245900 + }, + { + "epoch": 5.21, + "learning_rate": 5.1855306122448975e-05, + "loss": 1.3738, + "step": 246000 + }, + { + "epoch": 5.21, + "learning_rate": 5.183489795918368e-05, + "loss": 1.3744, + "step": 246100 + }, + { + "epoch": 5.21, + "learning_rate": 5.181448979591838e-05, + "loss": 1.3747, + "step": 246200 + }, + { + "epoch": 5.21, + "learning_rate": 5.179428571428572e-05, + "loss": 1.3614, + "step": 246300 + }, + { + "epoch": 5.21, + "learning_rate": 5.177387755102041e-05, + "loss": 1.3677, + "step": 246400 + }, + { + "epoch": 5.22, + "learning_rate": 5.17534693877551e-05, + "loss": 1.3644, + "step": 246500 + }, + { + "epoch": 5.22, + "learning_rate": 5.17330612244898e-05, + "loss": 1.3696, + "step": 246600 + }, + { + "epoch": 5.22, + "learning_rate": 5.171265306122449e-05, + "loss": 1.3718, + "step": 246700 + }, + { + "epoch": 5.22, + "learning_rate": 5.1692244897959184e-05, + "loss": 1.3666, + "step": 246800 + }, + { + "epoch": 5.23, + "learning_rate": 5.167183673469388e-05, + "loss": 1.3634, + "step": 246900 + }, + { + "epoch": 5.23, + "learning_rate": 5.1651428571428574e-05, + "loss": 1.3669, + "step": 247000 + }, + { + "epoch": 5.23, + "learning_rate": 5.1631020408163265e-05, + "loss": 1.3611, + "step": 247100 + }, + { + "epoch": 5.23, + "learning_rate": 5.1610612244897963e-05, + "loss": 1.3688, + "step": 247200 + }, + { + "epoch": 5.23, + "learning_rate": 5.1590204081632655e-05, + "loss": 1.3583, + "step": 247300 + }, + { + "epoch": 5.24, + "learning_rate": 5.156979591836735e-05, + "loss": 1.3703, + "step": 247400 + }, + { + "epoch": 5.24, + "learning_rate": 5.154938775510204e-05, + "loss": 1.374, + "step": 247500 + }, + { + "epoch": 5.24, + "learning_rate": 5.1528979591836737e-05, + "loss": 1.3702, + "step": 247600 + }, + { + "epoch": 5.24, + "learning_rate": 5.150857142857143e-05, + "loss": 1.3673, + "step": 247700 + }, + { + "epoch": 5.24, + "learning_rate": 5.148816326530612e-05, + "loss": 1.375, + "step": 247800 + }, + { + "epoch": 5.25, + "learning_rate": 5.146775510204082e-05, + "loss": 1.3668, + "step": 247900 + }, + { + "epoch": 5.25, + "learning_rate": 5.144734693877551e-05, + "loss": 1.3645, + "step": 248000 + }, + { + "epoch": 5.25, + "learning_rate": 5.14269387755102e-05, + "loss": 1.37, + "step": 248100 + }, + { + "epoch": 5.25, + "learning_rate": 5.14065306122449e-05, + "loss": 1.3684, + "step": 248200 + }, + { + "epoch": 5.26, + "learning_rate": 5.138612244897959e-05, + "loss": 1.3699, + "step": 248300 + }, + { + "epoch": 5.26, + "learning_rate": 5.136571428571428e-05, + "loss": 1.3612, + "step": 248400 + }, + { + "epoch": 5.26, + "learning_rate": 5.1345306122448974e-05, + "loss": 1.3729, + "step": 248500 + }, + { + "epoch": 5.26, + "learning_rate": 5.132489795918367e-05, + "loss": 1.3631, + "step": 248600 + }, + { + "epoch": 5.26, + "learning_rate": 5.1304489795918364e-05, + "loss": 1.3682, + "step": 248700 + }, + { + "epoch": 5.27, + "learning_rate": 5.1284081632653056e-05, + "loss": 1.3639, + "step": 248800 + }, + { + "epoch": 5.27, + "learning_rate": 5.126367346938776e-05, + "loss": 1.3699, + "step": 248900 + }, + { + "epoch": 5.27, + "learning_rate": 5.124326530612246e-05, + "loss": 1.3675, + "step": 249000 + }, + { + "epoch": 5.27, + "learning_rate": 5.122285714285715e-05, + "loss": 1.3771, + "step": 249100 + }, + { + "epoch": 5.27, + "learning_rate": 5.120244897959184e-05, + "loss": 1.3675, + "step": 249200 + }, + { + "epoch": 5.28, + "learning_rate": 5.1182040816326534e-05, + "loss": 1.3714, + "step": 249300 + }, + { + "epoch": 5.28, + "learning_rate": 5.116163265306123e-05, + "loss": 1.3658, + "step": 249400 + }, + { + "epoch": 5.28, + "learning_rate": 5.1141224489795924e-05, + "loss": 1.3634, + "step": 249500 + }, + { + "epoch": 5.28, + "learning_rate": 5.1120816326530616e-05, + "loss": 1.3612, + "step": 249600 + }, + { + "epoch": 5.28, + "learning_rate": 5.1100408163265314e-05, + "loss": 1.3638, + "step": 249700 + }, + { + "epoch": 5.29, + "learning_rate": 5.1080000000000006e-05, + "loss": 1.369, + "step": 249800 + }, + { + "epoch": 5.29, + "learning_rate": 5.10595918367347e-05, + "loss": 1.3629, + "step": 249900 + }, + { + "epoch": 5.29, + "learning_rate": 5.1039183673469396e-05, + "loss": 1.3725, + "step": 250000 + }, + { + "epoch": 5.29, + "learning_rate": 5.101877551020409e-05, + "loss": 1.3663, + "step": 250100 + }, + { + "epoch": 5.3, + "learning_rate": 5.099836734693878e-05, + "loss": 1.3668, + "step": 250200 + }, + { + "epoch": 5.3, + "learning_rate": 5.097795918367348e-05, + "loss": 1.371, + "step": 250300 + }, + { + "epoch": 5.3, + "learning_rate": 5.095755102040817e-05, + "loss": 1.3677, + "step": 250400 + }, + { + "epoch": 5.3, + "learning_rate": 5.093714285714286e-05, + "loss": 1.3704, + "step": 250500 + }, + { + "epoch": 5.3, + "learning_rate": 5.091673469387755e-05, + "loss": 1.3606, + "step": 250600 + }, + { + "epoch": 5.31, + "learning_rate": 5.089632653061225e-05, + "loss": 1.3665, + "step": 250700 + }, + { + "epoch": 5.31, + "learning_rate": 5.087591836734694e-05, + "loss": 1.3671, + "step": 250800 + }, + { + "epoch": 5.31, + "learning_rate": 5.085551020408163e-05, + "loss": 1.3599, + "step": 250900 + }, + { + "epoch": 5.31, + "learning_rate": 5.083510204081633e-05, + "loss": 1.3704, + "step": 251000 + }, + { + "epoch": 5.31, + "learning_rate": 5.081469387755102e-05, + "loss": 1.3655, + "step": 251100 + }, + { + "epoch": 5.32, + "learning_rate": 5.0794285714285715e-05, + "loss": 1.3656, + "step": 251200 + }, + { + "epoch": 5.32, + "learning_rate": 5.077387755102041e-05, + "loss": 1.3651, + "step": 251300 + }, + { + "epoch": 5.32, + "learning_rate": 5.0753469387755105e-05, + "loss": 1.3607, + "step": 251400 + }, + { + "epoch": 5.32, + "learning_rate": 5.0733265306122446e-05, + "loss": 1.3639, + "step": 251500 + }, + { + "epoch": 5.32, + "learning_rate": 5.071285714285715e-05, + "loss": 1.3607, + "step": 251600 + }, + { + "epoch": 5.33, + "learning_rate": 5.069244897959184e-05, + "loss": 1.3622, + "step": 251700 + }, + { + "epoch": 5.33, + "learning_rate": 5.067224489795919e-05, + "loss": 1.3674, + "step": 251800 + }, + { + "epoch": 5.33, + "learning_rate": 5.065183673469388e-05, + "loss": 1.3547, + "step": 251900 + }, + { + "epoch": 5.33, + "learning_rate": 5.063142857142857e-05, + "loss": 1.3675, + "step": 252000 + }, + { + "epoch": 5.34, + "learning_rate": 5.0611020408163265e-05, + "loss": 1.3667, + "step": 252100 + }, + { + "epoch": 5.34, + "learning_rate": 5.059061224489796e-05, + "loss": 1.366, + "step": 252200 + }, + { + "epoch": 5.34, + "learning_rate": 5.0570204081632655e-05, + "loss": 1.3609, + "step": 252300 + }, + { + "epoch": 5.34, + "learning_rate": 5.0549795918367346e-05, + "loss": 1.3662, + "step": 252400 + }, + { + "epoch": 5.34, + "learning_rate": 5.0529387755102045e-05, + "loss": 1.3656, + "step": 252500 + }, + { + "epoch": 5.35, + "learning_rate": 5.0508979591836736e-05, + "loss": 1.3602, + "step": 252600 + }, + { + "epoch": 5.35, + "learning_rate": 5.048857142857143e-05, + "loss": 1.3609, + "step": 252700 + }, + { + "epoch": 5.35, + "learning_rate": 5.0468163265306126e-05, + "loss": 1.37, + "step": 252800 + }, + { + "epoch": 5.35, + "learning_rate": 5.044775510204082e-05, + "loss": 1.3692, + "step": 252900 + }, + { + "epoch": 5.35, + "learning_rate": 5.042734693877551e-05, + "loss": 1.3616, + "step": 253000 + }, + { + "epoch": 5.36, + "learning_rate": 5.04069387755102e-05, + "loss": 1.3564, + "step": 253100 + }, + { + "epoch": 5.36, + "learning_rate": 5.03865306122449e-05, + "loss": 1.3671, + "step": 253200 + }, + { + "epoch": 5.36, + "learning_rate": 5.036612244897959e-05, + "loss": 1.3626, + "step": 253300 + }, + { + "epoch": 5.36, + "learning_rate": 5.034571428571428e-05, + "loss": 1.3637, + "step": 253400 + }, + { + "epoch": 5.37, + "learning_rate": 5.032530612244898e-05, + "loss": 1.3721, + "step": 253500 + }, + { + "epoch": 5.37, + "learning_rate": 5.030489795918367e-05, + "loss": 1.3671, + "step": 253600 + }, + { + "epoch": 5.37, + "learning_rate": 5.0284489795918364e-05, + "loss": 1.3607, + "step": 253700 + }, + { + "epoch": 5.37, + "learning_rate": 5.026408163265306e-05, + "loss": 1.3707, + "step": 253800 + }, + { + "epoch": 5.37, + "learning_rate": 5.0243673469387754e-05, + "loss": 1.3642, + "step": 253900 + }, + { + "epoch": 5.38, + "learning_rate": 5.0223265306122445e-05, + "loss": 1.3632, + "step": 254000 + }, + { + "epoch": 5.38, + "learning_rate": 5.020285714285714e-05, + "loss": 1.357, + "step": 254100 + }, + { + "epoch": 5.38, + "learning_rate": 5.0182448979591835e-05, + "loss": 1.3646, + "step": 254200 + }, + { + "epoch": 5.38, + "learning_rate": 5.016204081632653e-05, + "loss": 1.3602, + "step": 254300 + }, + { + "epoch": 5.38, + "learning_rate": 5.014163265306123e-05, + "loss": 1.3586, + "step": 254400 + }, + { + "epoch": 5.39, + "learning_rate": 5.0121224489795924e-05, + "loss": 1.3614, + "step": 254500 + }, + { + "epoch": 5.39, + "learning_rate": 5.010081632653062e-05, + "loss": 1.3678, + "step": 254600 + }, + { + "epoch": 5.39, + "learning_rate": 5.0080408163265314e-05, + "loss": 1.354, + "step": 254700 + }, + { + "epoch": 5.39, + "learning_rate": 5.0060000000000005e-05, + "loss": 1.3695, + "step": 254800 + }, + { + "epoch": 5.39, + "learning_rate": 5.0039591836734704e-05, + "loss": 1.3717, + "step": 254900 + }, + { + "epoch": 5.4, + "learning_rate": 5.0019183673469395e-05, + "loss": 1.3631, + "step": 255000 + }, + { + "epoch": 5.4, + "learning_rate": 4.999877551020408e-05, + "loss": 1.354, + "step": 255100 + }, + { + "epoch": 5.4, + "learning_rate": 4.997836734693878e-05, + "loss": 1.3627, + "step": 255200 + }, + { + "epoch": 5.4, + "learning_rate": 4.995795918367347e-05, + "loss": 1.3661, + "step": 255300 + }, + { + "epoch": 5.41, + "learning_rate": 4.993755102040816e-05, + "loss": 1.3643, + "step": 255400 + }, + { + "epoch": 5.41, + "learning_rate": 4.991714285714286e-05, + "loss": 1.3659, + "step": 255500 + }, + { + "epoch": 5.41, + "learning_rate": 4.989673469387756e-05, + "loss": 1.3579, + "step": 255600 + }, + { + "epoch": 5.41, + "learning_rate": 4.987632653061225e-05, + "loss": 1.3626, + "step": 255700 + }, + { + "epoch": 5.41, + "learning_rate": 4.985591836734694e-05, + "loss": 1.3663, + "step": 255800 + }, + { + "epoch": 5.42, + "learning_rate": 4.983551020408164e-05, + "loss": 1.3669, + "step": 255900 + }, + { + "epoch": 5.42, + "learning_rate": 4.981510204081633e-05, + "loss": 1.3658, + "step": 256000 + }, + { + "epoch": 5.42, + "learning_rate": 4.979469387755102e-05, + "loss": 1.3646, + "step": 256100 + }, + { + "epoch": 5.42, + "learning_rate": 4.9774285714285714e-05, + "loss": 1.3545, + "step": 256200 + }, + { + "epoch": 5.42, + "learning_rate": 4.975387755102041e-05, + "loss": 1.3558, + "step": 256300 + }, + { + "epoch": 5.43, + "learning_rate": 4.9733469387755104e-05, + "loss": 1.3715, + "step": 256400 + }, + { + "epoch": 5.43, + "learning_rate": 4.9713061224489796e-05, + "loss": 1.3543, + "step": 256500 + }, + { + "epoch": 5.43, + "learning_rate": 4.9692653061224494e-05, + "loss": 1.3574, + "step": 256600 + }, + { + "epoch": 5.43, + "learning_rate": 4.9672244897959186e-05, + "loss": 1.366, + "step": 256700 + }, + { + "epoch": 5.43, + "learning_rate": 4.965183673469388e-05, + "loss": 1.3644, + "step": 256800 + }, + { + "epoch": 5.44, + "learning_rate": 4.9631428571428576e-05, + "loss": 1.3538, + "step": 256900 + }, + { + "epoch": 5.44, + "learning_rate": 4.961102040816327e-05, + "loss": 1.3584, + "step": 257000 + }, + { + "epoch": 5.44, + "learning_rate": 4.959061224489796e-05, + "loss": 1.364, + "step": 257100 + }, + { + "epoch": 5.44, + "learning_rate": 4.957020408163266e-05, + "loss": 1.3648, + "step": 257200 + }, + { + "epoch": 5.45, + "learning_rate": 4.954979591836735e-05, + "loss": 1.3677, + "step": 257300 + }, + { + "epoch": 5.45, + "learning_rate": 4.952938775510204e-05, + "loss": 1.3627, + "step": 257400 + }, + { + "epoch": 5.45, + "learning_rate": 4.950897959183673e-05, + "loss": 1.3604, + "step": 257500 + }, + { + "epoch": 5.45, + "learning_rate": 4.948857142857143e-05, + "loss": 1.3644, + "step": 257600 + }, + { + "epoch": 5.45, + "learning_rate": 4.946816326530612e-05, + "loss": 1.3597, + "step": 257700 + }, + { + "epoch": 5.46, + "learning_rate": 4.944775510204082e-05, + "loss": 1.3584, + "step": 257800 + }, + { + "epoch": 5.46, + "learning_rate": 4.942734693877551e-05, + "loss": 1.3579, + "step": 257900 + }, + { + "epoch": 5.46, + "learning_rate": 4.940693877551021e-05, + "loss": 1.3517, + "step": 258000 + }, + { + "epoch": 5.46, + "learning_rate": 4.93865306122449e-05, + "loss": 1.3607, + "step": 258100 + }, + { + "epoch": 5.46, + "learning_rate": 4.9366122448979594e-05, + "loss": 1.361, + "step": 258200 + }, + { + "epoch": 5.47, + "learning_rate": 4.934591836734694e-05, + "loss": 1.3581, + "step": 258300 + }, + { + "epoch": 5.47, + "learning_rate": 4.932571428571429e-05, + "loss": 1.364, + "step": 258400 + }, + { + "epoch": 5.47, + "learning_rate": 4.930530612244898e-05, + "loss": 1.3659, + "step": 258500 + }, + { + "epoch": 5.47, + "learning_rate": 4.928489795918367e-05, + "loss": 1.3594, + "step": 258600 + }, + { + "epoch": 5.48, + "learning_rate": 4.926448979591837e-05, + "loss": 1.3557, + "step": 258700 + }, + { + "epoch": 5.48, + "learning_rate": 4.924408163265307e-05, + "loss": 1.3619, + "step": 258800 + }, + { + "epoch": 5.48, + "learning_rate": 4.922367346938776e-05, + "loss": 1.3588, + "step": 258900 + }, + { + "epoch": 5.48, + "learning_rate": 4.920326530612245e-05, + "loss": 1.3564, + "step": 259000 + }, + { + "epoch": 5.48, + "learning_rate": 4.918285714285715e-05, + "loss": 1.3562, + "step": 259100 + }, + { + "epoch": 5.49, + "learning_rate": 4.916244897959184e-05, + "loss": 1.3595, + "step": 259200 + }, + { + "epoch": 5.49, + "learning_rate": 4.914204081632653e-05, + "loss": 1.3622, + "step": 259300 + }, + { + "epoch": 5.49, + "learning_rate": 4.9121632653061225e-05, + "loss": 1.3632, + "step": 259400 + }, + { + "epoch": 5.49, + "learning_rate": 4.910142857142858e-05, + "loss": 1.3518, + "step": 259500 + }, + { + "epoch": 5.49, + "learning_rate": 4.908102040816327e-05, + "loss": 1.3604, + "step": 259600 + }, + { + "epoch": 5.5, + "learning_rate": 4.906061224489796e-05, + "loss": 1.3563, + "step": 259700 + }, + { + "epoch": 5.5, + "learning_rate": 4.9040204081632654e-05, + "loss": 1.3605, + "step": 259800 + }, + { + "epoch": 5.5, + "learning_rate": 4.901979591836735e-05, + "loss": 1.3585, + "step": 259900 + }, + { + "epoch": 5.5, + "learning_rate": 4.8999387755102044e-05, + "loss": 1.3552, + "step": 260000 + }, + { + "epoch": 5.5, + "learning_rate": 4.8978979591836736e-05, + "loss": 1.3663, + "step": 260100 + }, + { + "epoch": 5.51, + "learning_rate": 4.8958571428571434e-05, + "loss": 1.3552, + "step": 260200 + }, + { + "epoch": 5.51, + "learning_rate": 4.8938163265306126e-05, + "loss": 1.3525, + "step": 260300 + }, + { + "epoch": 5.51, + "learning_rate": 4.891775510204082e-05, + "loss": 1.3558, + "step": 260400 + }, + { + "epoch": 5.51, + "learning_rate": 4.8897346938775515e-05, + "loss": 1.3521, + "step": 260500 + }, + { + "epoch": 5.52, + "learning_rate": 4.887693877551021e-05, + "loss": 1.3548, + "step": 260600 + }, + { + "epoch": 5.52, + "learning_rate": 4.88565306122449e-05, + "loss": 1.3492, + "step": 260700 + }, + { + "epoch": 5.52, + "learning_rate": 4.883612244897959e-05, + "loss": 1.3522, + "step": 260800 + }, + { + "epoch": 5.52, + "learning_rate": 4.881571428571429e-05, + "loss": 1.3587, + "step": 260900 + }, + { + "epoch": 5.52, + "learning_rate": 4.879530612244898e-05, + "loss": 1.356, + "step": 261000 + }, + { + "epoch": 5.53, + "learning_rate": 4.877489795918367e-05, + "loss": 1.3501, + "step": 261100 + }, + { + "epoch": 5.53, + "learning_rate": 4.875448979591837e-05, + "loss": 1.361, + "step": 261200 + }, + { + "epoch": 5.53, + "learning_rate": 4.873408163265306e-05, + "loss": 1.3567, + "step": 261300 + }, + { + "epoch": 5.53, + "learning_rate": 4.871367346938775e-05, + "loss": 1.3509, + "step": 261400 + }, + { + "epoch": 5.53, + "learning_rate": 4.869326530612245e-05, + "loss": 1.356, + "step": 261500 + }, + { + "epoch": 5.54, + "learning_rate": 4.867285714285715e-05, + "loss": 1.3551, + "step": 261600 + }, + { + "epoch": 5.54, + "learning_rate": 4.865244897959184e-05, + "loss": 1.3602, + "step": 261700 + }, + { + "epoch": 5.54, + "learning_rate": 4.863204081632653e-05, + "loss": 1.3582, + "step": 261800 + }, + { + "epoch": 5.54, + "learning_rate": 4.861163265306123e-05, + "loss": 1.3511, + "step": 261900 + }, + { + "epoch": 5.54, + "learning_rate": 4.859122448979592e-05, + "loss": 1.3636, + "step": 262000 + }, + { + "epoch": 5.55, + "learning_rate": 4.857102040816327e-05, + "loss": 1.3549, + "step": 262100 + }, + { + "epoch": 5.55, + "learning_rate": 4.855061224489796e-05, + "loss": 1.3544, + "step": 262200 + }, + { + "epoch": 5.55, + "learning_rate": 4.853020408163266e-05, + "loss": 1.3574, + "step": 262300 + }, + { + "epoch": 5.55, + "learning_rate": 4.850979591836735e-05, + "loss": 1.3552, + "step": 262400 + }, + { + "epoch": 5.56, + "learning_rate": 4.8489387755102044e-05, + "loss": 1.358, + "step": 262500 + }, + { + "epoch": 5.56, + "learning_rate": 4.8468979591836735e-05, + "loss": 1.3516, + "step": 262600 + }, + { + "epoch": 5.56, + "learning_rate": 4.8448571428571434e-05, + "loss": 1.3533, + "step": 262700 + }, + { + "epoch": 5.56, + "learning_rate": 4.8428163265306125e-05, + "loss": 1.3445, + "step": 262800 + }, + { + "epoch": 5.56, + "learning_rate": 4.840775510204082e-05, + "loss": 1.357, + "step": 262900 + }, + { + "epoch": 5.57, + "learning_rate": 4.8387346938775515e-05, + "loss": 1.3589, + "step": 263000 + }, + { + "epoch": 5.57, + "learning_rate": 4.836693877551021e-05, + "loss": 1.3528, + "step": 263100 + }, + { + "epoch": 5.57, + "learning_rate": 4.83465306122449e-05, + "loss": 1.3559, + "step": 263200 + }, + { + "epoch": 5.57, + "learning_rate": 4.83261224489796e-05, + "loss": 1.3541, + "step": 263300 + }, + { + "epoch": 5.57, + "learning_rate": 4.830571428571429e-05, + "loss": 1.3598, + "step": 263400 + }, + { + "epoch": 5.58, + "learning_rate": 4.828530612244898e-05, + "loss": 1.3551, + "step": 263500 + }, + { + "epoch": 5.58, + "learning_rate": 4.826489795918367e-05, + "loss": 1.3555, + "step": 263600 + }, + { + "epoch": 5.58, + "learning_rate": 4.824448979591837e-05, + "loss": 1.356, + "step": 263700 + }, + { + "epoch": 5.58, + "learning_rate": 4.822408163265306e-05, + "loss": 1.3579, + "step": 263800 + }, + { + "epoch": 5.59, + "learning_rate": 4.820367346938775e-05, + "loss": 1.349, + "step": 263900 + }, + { + "epoch": 5.59, + "learning_rate": 4.818326530612245e-05, + "loss": 1.3577, + "step": 264000 + }, + { + "epoch": 5.59, + "learning_rate": 4.816285714285714e-05, + "loss": 1.3547, + "step": 264100 + }, + { + "epoch": 5.59, + "learning_rate": 4.8142448979591835e-05, + "loss": 1.3527, + "step": 264200 + }, + { + "epoch": 5.59, + "learning_rate": 4.812204081632653e-05, + "loss": 1.3547, + "step": 264300 + }, + { + "epoch": 5.6, + "learning_rate": 4.810163265306123e-05, + "loss": 1.355, + "step": 264400 + }, + { + "epoch": 5.6, + "learning_rate": 4.808122448979592e-05, + "loss": 1.3486, + "step": 264500 + }, + { + "epoch": 5.6, + "learning_rate": 4.8060816326530614e-05, + "loss": 1.3456, + "step": 264600 + }, + { + "epoch": 5.6, + "learning_rate": 4.804040816326531e-05, + "loss": 1.3508, + "step": 264700 + }, + { + "epoch": 5.6, + "learning_rate": 4.8020000000000004e-05, + "loss": 1.357, + "step": 264800 + }, + { + "epoch": 5.61, + "learning_rate": 4.7999591836734696e-05, + "loss": 1.3491, + "step": 264900 + }, + { + "epoch": 5.61, + "learning_rate": 4.7979183673469394e-05, + "loss": 1.3544, + "step": 265000 + }, + { + "epoch": 5.61, + "learning_rate": 4.7958775510204086e-05, + "loss": 1.3528, + "step": 265100 + }, + { + "epoch": 5.61, + "learning_rate": 4.793836734693878e-05, + "loss": 1.3547, + "step": 265200 + }, + { + "epoch": 5.61, + "learning_rate": 4.791795918367347e-05, + "loss": 1.3523, + "step": 265300 + }, + { + "epoch": 5.62, + "learning_rate": 4.789755102040817e-05, + "loss": 1.3545, + "step": 265400 + }, + { + "epoch": 5.62, + "learning_rate": 4.787714285714286e-05, + "loss": 1.3518, + "step": 265500 + }, + { + "epoch": 5.62, + "learning_rate": 4.785673469387755e-05, + "loss": 1.3518, + "step": 265600 + }, + { + "epoch": 5.62, + "learning_rate": 4.783632653061225e-05, + "loss": 1.3509, + "step": 265700 + }, + { + "epoch": 5.63, + "learning_rate": 4.781591836734694e-05, + "loss": 1.3615, + "step": 265800 + }, + { + "epoch": 5.63, + "learning_rate": 4.779551020408163e-05, + "loss": 1.3546, + "step": 265900 + }, + { + "epoch": 5.63, + "learning_rate": 4.777510204081633e-05, + "loss": 1.3509, + "step": 266000 + }, + { + "epoch": 5.63, + "learning_rate": 4.775469387755102e-05, + "loss": 1.3513, + "step": 266100 + }, + { + "epoch": 5.63, + "learning_rate": 4.7734285714285714e-05, + "loss": 1.3494, + "step": 266200 + }, + { + "epoch": 5.64, + "learning_rate": 4.7713877551020405e-05, + "loss": 1.349, + "step": 266300 + }, + { + "epoch": 5.64, + "learning_rate": 4.769367346938776e-05, + "loss": 1.3497, + "step": 266400 + }, + { + "epoch": 5.64, + "learning_rate": 4.767326530612245e-05, + "loss": 1.3567, + "step": 266500 + }, + { + "epoch": 5.64, + "learning_rate": 4.765285714285714e-05, + "loss": 1.3459, + "step": 266600 + }, + { + "epoch": 5.64, + "learning_rate": 4.7632448979591834e-05, + "loss": 1.3518, + "step": 266700 + }, + { + "epoch": 5.65, + "learning_rate": 4.761204081632653e-05, + "loss": 1.3449, + "step": 266800 + }, + { + "epoch": 5.65, + "learning_rate": 4.7591632653061224e-05, + "loss": 1.3547, + "step": 266900 + }, + { + "epoch": 5.65, + "learning_rate": 4.7571224489795916e-05, + "loss": 1.3518, + "step": 267000 + }, + { + "epoch": 5.65, + "learning_rate": 4.7550816326530614e-05, + "loss": 1.355, + "step": 267100 + }, + { + "epoch": 5.66, + "learning_rate": 4.753040816326531e-05, + "loss": 1.3594, + "step": 267200 + }, + { + "epoch": 5.66, + "learning_rate": 4.7510000000000004e-05, + "loss": 1.3578, + "step": 267300 + }, + { + "epoch": 5.66, + "learning_rate": 4.7489795918367345e-05, + "loss": 1.3415, + "step": 267400 + }, + { + "epoch": 5.66, + "learning_rate": 4.746938775510204e-05, + "loss": 1.3566, + "step": 267500 + }, + { + "epoch": 5.66, + "learning_rate": 4.744897959183674e-05, + "loss": 1.3475, + "step": 267600 + }, + { + "epoch": 5.67, + "learning_rate": 4.742857142857143e-05, + "loss": 1.3459, + "step": 267700 + }, + { + "epoch": 5.67, + "learning_rate": 4.7408163265306125e-05, + "loss": 1.3535, + "step": 267800 + }, + { + "epoch": 5.67, + "learning_rate": 4.738775510204082e-05, + "loss": 1.3524, + "step": 267900 + }, + { + "epoch": 5.67, + "learning_rate": 4.7367346938775515e-05, + "loss": 1.3577, + "step": 268000 + }, + { + "epoch": 5.67, + "learning_rate": 4.7346938775510206e-05, + "loss": 1.3492, + "step": 268100 + }, + { + "epoch": 5.68, + "learning_rate": 4.7326530612244905e-05, + "loss": 1.3512, + "step": 268200 + }, + { + "epoch": 5.68, + "learning_rate": 4.7306122448979596e-05, + "loss": 1.3496, + "step": 268300 + }, + { + "epoch": 5.68, + "learning_rate": 4.728571428571429e-05, + "loss": 1.3584, + "step": 268400 + }, + { + "epoch": 5.68, + "learning_rate": 4.726530612244898e-05, + "loss": 1.3619, + "step": 268500 + }, + { + "epoch": 5.68, + "learning_rate": 4.724489795918368e-05, + "loss": 1.3485, + "step": 268600 + }, + { + "epoch": 5.69, + "learning_rate": 4.722448979591837e-05, + "loss": 1.3514, + "step": 268700 + }, + { + "epoch": 5.69, + "learning_rate": 4.720408163265306e-05, + "loss": 1.3496, + "step": 268800 + }, + { + "epoch": 5.69, + "learning_rate": 4.718367346938776e-05, + "loss": 1.3482, + "step": 268900 + }, + { + "epoch": 5.69, + "learning_rate": 4.716326530612245e-05, + "loss": 1.3519, + "step": 269000 + }, + { + "epoch": 5.7, + "learning_rate": 4.714285714285714e-05, + "loss": 1.3434, + "step": 269100 + }, + { + "epoch": 5.7, + "learning_rate": 4.712244897959184e-05, + "loss": 1.3528, + "step": 269200 + }, + { + "epoch": 5.7, + "learning_rate": 4.710204081632653e-05, + "loss": 1.3501, + "step": 269300 + }, + { + "epoch": 5.7, + "learning_rate": 4.7081632653061224e-05, + "loss": 1.3531, + "step": 269400 + }, + { + "epoch": 5.7, + "learning_rate": 4.7061224489795916e-05, + "loss": 1.3506, + "step": 269500 + }, + { + "epoch": 5.71, + "learning_rate": 4.7040816326530614e-05, + "loss": 1.3509, + "step": 269600 + }, + { + "epoch": 5.71, + "learning_rate": 4.7020408163265306e-05, + "loss": 1.3494, + "step": 269700 + }, + { + "epoch": 5.71, + "learning_rate": 4.7e-05, + "loss": 1.3502, + "step": 269800 + }, + { + "epoch": 5.71, + "learning_rate": 4.6979591836734696e-05, + "loss": 1.3478, + "step": 269900 + }, + { + "epoch": 5.71, + "learning_rate": 4.6959183673469394e-05, + "loss": 1.3474, + "step": 270000 + }, + { + "epoch": 5.72, + "learning_rate": 4.6938775510204086e-05, + "loss": 1.3509, + "step": 270100 + }, + { + "epoch": 5.72, + "learning_rate": 4.691836734693878e-05, + "loss": 1.3512, + "step": 270200 + }, + { + "epoch": 5.72, + "learning_rate": 4.6897959183673475e-05, + "loss": 1.3464, + "step": 270300 + }, + { + "epoch": 5.72, + "learning_rate": 4.687755102040817e-05, + "loss": 1.3499, + "step": 270400 + }, + { + "epoch": 5.72, + "learning_rate": 4.6857346938775515e-05, + "loss": 1.3504, + "step": 270500 + }, + { + "epoch": 5.73, + "learning_rate": 4.6836938775510206e-05, + "loss": 1.3491, + "step": 270600 + }, + { + "epoch": 5.73, + "learning_rate": 4.6816530612244905e-05, + "loss": 1.3474, + "step": 270700 + }, + { + "epoch": 5.73, + "learning_rate": 4.6796122448979596e-05, + "loss": 1.3493, + "step": 270800 + }, + { + "epoch": 5.73, + "learning_rate": 4.677571428571429e-05, + "loss": 1.3453, + "step": 270900 + }, + { + "epoch": 5.74, + "learning_rate": 4.6755306122448986e-05, + "loss": 1.3475, + "step": 271000 + }, + { + "epoch": 5.74, + "learning_rate": 4.673489795918368e-05, + "loss": 1.3389, + "step": 271100 + }, + { + "epoch": 5.74, + "learning_rate": 4.671448979591837e-05, + "loss": 1.3562, + "step": 271200 + }, + { + "epoch": 5.74, + "learning_rate": 4.669408163265306e-05, + "loss": 1.347, + "step": 271300 + }, + { + "epoch": 5.74, + "learning_rate": 4.667367346938776e-05, + "loss": 1.3447, + "step": 271400 + }, + { + "epoch": 5.75, + "learning_rate": 4.665326530612245e-05, + "loss": 1.3423, + "step": 271500 + }, + { + "epoch": 5.75, + "learning_rate": 4.663285714285714e-05, + "loss": 1.3465, + "step": 271600 + }, + { + "epoch": 5.75, + "learning_rate": 4.661244897959184e-05, + "loss": 1.3532, + "step": 271700 + }, + { + "epoch": 5.75, + "learning_rate": 4.659204081632653e-05, + "loss": 1.3523, + "step": 271800 + }, + { + "epoch": 5.75, + "learning_rate": 4.6571632653061224e-05, + "loss": 1.3499, + "step": 271900 + }, + { + "epoch": 5.76, + "learning_rate": 4.655122448979592e-05, + "loss": 1.3379, + "step": 272000 + }, + { + "epoch": 5.76, + "learning_rate": 4.6530816326530614e-05, + "loss": 1.3507, + "step": 272100 + }, + { + "epoch": 5.76, + "learning_rate": 4.6510408163265305e-05, + "loss": 1.3504, + "step": 272200 + }, + { + "epoch": 5.76, + "learning_rate": 4.649e-05, + "loss": 1.3476, + "step": 272300 + }, + { + "epoch": 5.77, + "learning_rate": 4.6469591836734695e-05, + "loss": 1.3441, + "step": 272400 + }, + { + "epoch": 5.77, + "learning_rate": 4.644918367346939e-05, + "loss": 1.348, + "step": 272500 + }, + { + "epoch": 5.77, + "learning_rate": 4.642877551020408e-05, + "loss": 1.3441, + "step": 272600 + }, + { + "epoch": 5.77, + "learning_rate": 4.640836734693878e-05, + "loss": 1.3603, + "step": 272700 + }, + { + "epoch": 5.77, + "learning_rate": 4.6387959183673475e-05, + "loss": 1.3454, + "step": 272800 + }, + { + "epoch": 5.78, + "learning_rate": 4.636755102040817e-05, + "loss": 1.3501, + "step": 272900 + }, + { + "epoch": 5.78, + "learning_rate": 4.634714285714286e-05, + "loss": 1.3571, + "step": 273000 + }, + { + "epoch": 5.78, + "learning_rate": 4.632673469387756e-05, + "loss": 1.3456, + "step": 273100 + }, + { + "epoch": 5.78, + "learning_rate": 4.630632653061225e-05, + "loss": 1.3444, + "step": 273200 + }, + { + "epoch": 5.78, + "learning_rate": 4.628591836734694e-05, + "loss": 1.3509, + "step": 273300 + }, + { + "epoch": 5.79, + "learning_rate": 4.626551020408164e-05, + "loss": 1.353, + "step": 273400 + }, + { + "epoch": 5.79, + "learning_rate": 4.624510204081633e-05, + "loss": 1.3496, + "step": 273500 + }, + { + "epoch": 5.79, + "learning_rate": 4.622469387755102e-05, + "loss": 1.3433, + "step": 273600 + }, + { + "epoch": 5.79, + "learning_rate": 4.620428571428572e-05, + "loss": 1.3516, + "step": 273700 + }, + { + "epoch": 5.79, + "learning_rate": 4.618387755102041e-05, + "loss": 1.3459, + "step": 273800 + }, + { + "epoch": 5.8, + "learning_rate": 4.61634693877551e-05, + "loss": 1.3496, + "step": 273900 + }, + { + "epoch": 5.8, + "learning_rate": 4.6143061224489795e-05, + "loss": 1.3548, + "step": 274000 + }, + { + "epoch": 5.8, + "learning_rate": 4.612265306122449e-05, + "loss": 1.3448, + "step": 274100 + }, + { + "epoch": 5.8, + "learning_rate": 4.6102244897959185e-05, + "loss": 1.3405, + "step": 274200 + }, + { + "epoch": 5.81, + "learning_rate": 4.6081836734693876e-05, + "loss": 1.3427, + "step": 274300 + }, + { + "epoch": 5.81, + "learning_rate": 4.6061428571428575e-05, + "loss": 1.3477, + "step": 274400 + }, + { + "epoch": 5.81, + "learning_rate": 4.6041020408163266e-05, + "loss": 1.3471, + "step": 274500 + }, + { + "epoch": 5.81, + "learning_rate": 4.6020816326530614e-05, + "loss": 1.3434, + "step": 274600 + }, + { + "epoch": 5.81, + "learning_rate": 4.6000408163265305e-05, + "loss": 1.3432, + "step": 274700 + }, + { + "epoch": 5.82, + "learning_rate": 4.5980000000000004e-05, + "loss": 1.3433, + "step": 274800 + }, + { + "epoch": 5.82, + "learning_rate": 4.5959591836734695e-05, + "loss": 1.347, + "step": 274900 + }, + { + "epoch": 5.82, + "learning_rate": 4.593918367346939e-05, + "loss": 1.3484, + "step": 275000 + }, + { + "epoch": 5.82, + "learning_rate": 4.591877551020408e-05, + "loss": 1.3401, + "step": 275100 + }, + { + "epoch": 5.82, + "learning_rate": 4.589836734693878e-05, + "loss": 1.3389, + "step": 275200 + }, + { + "epoch": 5.83, + "learning_rate": 4.587795918367347e-05, + "loss": 1.3432, + "step": 275300 + }, + { + "epoch": 5.83, + "learning_rate": 4.585755102040817e-05, + "loss": 1.3445, + "step": 275400 + }, + { + "epoch": 5.83, + "learning_rate": 4.5837142857142865e-05, + "loss": 1.353, + "step": 275500 + }, + { + "epoch": 5.83, + "learning_rate": 4.581673469387756e-05, + "loss": 1.3429, + "step": 275600 + }, + { + "epoch": 5.83, + "learning_rate": 4.579632653061225e-05, + "loss": 1.3466, + "step": 275700 + }, + { + "epoch": 5.84, + "learning_rate": 4.577591836734694e-05, + "loss": 1.3392, + "step": 275800 + }, + { + "epoch": 5.84, + "learning_rate": 4.575551020408164e-05, + "loss": 1.3471, + "step": 275900 + }, + { + "epoch": 5.84, + "learning_rate": 4.573510204081633e-05, + "loss": 1.3481, + "step": 276000 + }, + { + "epoch": 5.84, + "learning_rate": 4.571469387755102e-05, + "loss": 1.3497, + "step": 276100 + }, + { + "epoch": 5.85, + "learning_rate": 4.569428571428572e-05, + "loss": 1.3433, + "step": 276200 + }, + { + "epoch": 5.85, + "learning_rate": 4.567387755102041e-05, + "loss": 1.3518, + "step": 276300 + }, + { + "epoch": 5.85, + "learning_rate": 4.56534693877551e-05, + "loss": 1.3487, + "step": 276400 + }, + { + "epoch": 5.85, + "learning_rate": 4.56330612244898e-05, + "loss": 1.3424, + "step": 276500 + }, + { + "epoch": 5.85, + "learning_rate": 4.561265306122449e-05, + "loss": 1.348, + "step": 276600 + }, + { + "epoch": 5.86, + "learning_rate": 4.5592244897959184e-05, + "loss": 1.3455, + "step": 276700 + }, + { + "epoch": 5.86, + "learning_rate": 4.5571836734693876e-05, + "loss": 1.35, + "step": 276800 + }, + { + "epoch": 5.86, + "learning_rate": 4.5551428571428574e-05, + "loss": 1.344, + "step": 276900 + }, + { + "epoch": 5.86, + "learning_rate": 4.5531020408163266e-05, + "loss": 1.3352, + "step": 277000 + }, + { + "epoch": 5.86, + "learning_rate": 4.5510816326530613e-05, + "loss": 1.3472, + "step": 277100 + }, + { + "epoch": 5.87, + "learning_rate": 4.5490408163265305e-05, + "loss": 1.3417, + "step": 277200 + }, + { + "epoch": 5.87, + "learning_rate": 4.5470000000000003e-05, + "loss": 1.343, + "step": 277300 + }, + { + "epoch": 5.87, + "learning_rate": 4.5449591836734695e-05, + "loss": 1.3394, + "step": 277400 + }, + { + "epoch": 5.87, + "learning_rate": 4.5429183673469387e-05, + "loss": 1.3352, + "step": 277500 + }, + { + "epoch": 5.88, + "learning_rate": 4.5408775510204085e-05, + "loss": 1.3409, + "step": 277600 + }, + { + "epoch": 5.88, + "learning_rate": 4.5388367346938777e-05, + "loss": 1.342, + "step": 277700 + }, + { + "epoch": 5.88, + "learning_rate": 4.536795918367347e-05, + "loss": 1.3487, + "step": 277800 + }, + { + "epoch": 5.88, + "learning_rate": 4.5347551020408166e-05, + "loss": 1.3347, + "step": 277900 + }, + { + "epoch": 5.88, + "learning_rate": 4.532714285714286e-05, + "loss": 1.3452, + "step": 278000 + }, + { + "epoch": 5.89, + "learning_rate": 4.530673469387755e-05, + "loss": 1.3467, + "step": 278100 + }, + { + "epoch": 5.89, + "learning_rate": 4.528632653061225e-05, + "loss": 1.3372, + "step": 278200 + }, + { + "epoch": 5.89, + "learning_rate": 4.5265918367346946e-05, + "loss": 1.345, + "step": 278300 + }, + { + "epoch": 5.89, + "learning_rate": 4.524551020408164e-05, + "loss": 1.3479, + "step": 278400 + }, + { + "epoch": 5.89, + "learning_rate": 4.522510204081633e-05, + "loss": 1.3459, + "step": 278500 + }, + { + "epoch": 5.9, + "learning_rate": 4.520469387755102e-05, + "loss": 1.3469, + "step": 278600 + }, + { + "epoch": 5.9, + "learning_rate": 4.518428571428572e-05, + "loss": 1.3439, + "step": 278700 + }, + { + "epoch": 5.9, + "learning_rate": 4.516387755102041e-05, + "loss": 1.3485, + "step": 278800 + }, + { + "epoch": 5.9, + "learning_rate": 4.51434693877551e-05, + "loss": 1.3465, + "step": 278900 + }, + { + "epoch": 5.9, + "learning_rate": 4.51230612244898e-05, + "loss": 1.3453, + "step": 279000 + }, + { + "epoch": 5.91, + "learning_rate": 4.510265306122449e-05, + "loss": 1.3403, + "step": 279100 + }, + { + "epoch": 5.91, + "learning_rate": 4.5082244897959184e-05, + "loss": 1.3401, + "step": 279200 + }, + { + "epoch": 5.91, + "learning_rate": 4.506183673469388e-05, + "loss": 1.342, + "step": 279300 + }, + { + "epoch": 5.91, + "learning_rate": 4.5041428571428574e-05, + "loss": 1.35, + "step": 279400 + }, + { + "epoch": 5.92, + "learning_rate": 4.5021020408163266e-05, + "loss": 1.3474, + "step": 279500 + }, + { + "epoch": 5.92, + "learning_rate": 4.500061224489796e-05, + "loss": 1.3487, + "step": 279600 + }, + { + "epoch": 5.92, + "learning_rate": 4.4980204081632656e-05, + "loss": 1.3438, + "step": 279700 + }, + { + "epoch": 5.92, + "learning_rate": 4.495979591836735e-05, + "loss": 1.3435, + "step": 279800 + }, + { + "epoch": 5.92, + "learning_rate": 4.493938775510204e-05, + "loss": 1.3462, + "step": 279900 + }, + { + "epoch": 5.93, + "learning_rate": 4.491897959183674e-05, + "loss": 1.3392, + "step": 280000 + }, + { + "epoch": 5.93, + "learning_rate": 4.489857142857143e-05, + "loss": 1.3455, + "step": 280100 + }, + { + "epoch": 5.93, + "learning_rate": 4.487816326530612e-05, + "loss": 1.3355, + "step": 280200 + }, + { + "epoch": 5.93, + "learning_rate": 4.485775510204082e-05, + "loss": 1.3454, + "step": 280300 + }, + { + "epoch": 5.93, + "learning_rate": 4.483734693877551e-05, + "loss": 1.3421, + "step": 280400 + }, + { + "epoch": 5.94, + "learning_rate": 4.481693877551021e-05, + "loss": 1.3418, + "step": 280500 + }, + { + "epoch": 5.94, + "learning_rate": 4.47965306122449e-05, + "loss": 1.3387, + "step": 280600 + }, + { + "epoch": 5.94, + "learning_rate": 4.477632653061225e-05, + "loss": 1.3361, + "step": 280700 + }, + { + "epoch": 5.94, + "learning_rate": 4.475591836734694e-05, + "loss": 1.3422, + "step": 280800 + }, + { + "epoch": 5.94, + "learning_rate": 4.473551020408163e-05, + "loss": 1.337, + "step": 280900 + }, + { + "epoch": 5.95, + "learning_rate": 4.471510204081633e-05, + "loss": 1.3355, + "step": 281000 + }, + { + "epoch": 5.95, + "learning_rate": 4.469469387755103e-05, + "loss": 1.3407, + "step": 281100 + }, + { + "epoch": 5.95, + "learning_rate": 4.467428571428572e-05, + "loss": 1.3399, + "step": 281200 + }, + { + "epoch": 5.95, + "learning_rate": 4.465387755102041e-05, + "loss": 1.3435, + "step": 281300 + }, + { + "epoch": 5.96, + "learning_rate": 4.46334693877551e-05, + "loss": 1.3507, + "step": 281400 + }, + { + "epoch": 5.96, + "learning_rate": 4.46130612244898e-05, + "loss": 1.344, + "step": 281500 + }, + { + "epoch": 5.96, + "learning_rate": 4.459265306122449e-05, + "loss": 1.3464, + "step": 281600 + }, + { + "epoch": 5.96, + "learning_rate": 4.4572244897959184e-05, + "loss": 1.345, + "step": 281700 + }, + { + "epoch": 5.96, + "learning_rate": 4.455183673469388e-05, + "loss": 1.3423, + "step": 281800 + }, + { + "epoch": 5.97, + "learning_rate": 4.4531428571428574e-05, + "loss": 1.3356, + "step": 281900 + }, + { + "epoch": 5.97, + "learning_rate": 4.4511020408163265e-05, + "loss": 1.3349, + "step": 282000 + }, + { + "epoch": 5.97, + "learning_rate": 4.4490612244897964e-05, + "loss": 1.3366, + "step": 282100 + }, + { + "epoch": 5.97, + "learning_rate": 4.4470204081632655e-05, + "loss": 1.3421, + "step": 282200 + }, + { + "epoch": 5.97, + "learning_rate": 4.444979591836735e-05, + "loss": 1.332, + "step": 282300 + }, + { + "epoch": 5.98, + "learning_rate": 4.4429387755102045e-05, + "loss": 1.3393, + "step": 282400 + }, + { + "epoch": 5.98, + "learning_rate": 4.440897959183674e-05, + "loss": 1.3377, + "step": 282500 + }, + { + "epoch": 5.98, + "learning_rate": 4.438857142857143e-05, + "loss": 1.3301, + "step": 282600 + }, + { + "epoch": 5.98, + "learning_rate": 4.436816326530612e-05, + "loss": 1.342, + "step": 282700 + }, + { + "epoch": 5.99, + "learning_rate": 4.434775510204082e-05, + "loss": 1.3413, + "step": 282800 + }, + { + "epoch": 5.99, + "learning_rate": 4.432734693877551e-05, + "loss": 1.3391, + "step": 282900 + }, + { + "epoch": 5.99, + "learning_rate": 4.43069387755102e-05, + "loss": 1.3439, + "step": 283000 + }, + { + "epoch": 5.99, + "learning_rate": 4.42865306122449e-05, + "loss": 1.3376, + "step": 283100 + }, + { + "epoch": 5.99, + "learning_rate": 4.42661224489796e-05, + "loss": 1.3525, + "step": 283200 + }, + { + "epoch": 6.0, + "learning_rate": 4.424571428571429e-05, + "loss": 1.3381, + "step": 283300 + }, + { + "epoch": 6.0, + "learning_rate": 4.422530612244898e-05, + "loss": 1.34, + "step": 283400 + }, + { + "epoch": 6.0, + "learning_rate": 4.420489795918368e-05, + "loss": 1.3415, + "step": 283500 + }, + { + "epoch": 6.0, + "learning_rate": 4.418448979591837e-05, + "loss": 1.3354, + "step": 283600 + }, + { + "epoch": 6.0, + "learning_rate": 4.416408163265306e-05, + "loss": 1.3298, + "step": 283700 + }, + { + "epoch": 6.01, + "learning_rate": 4.414367346938776e-05, + "loss": 1.339, + "step": 283800 + }, + { + "epoch": 6.01, + "learning_rate": 4.412326530612245e-05, + "loss": 1.3375, + "step": 283900 + }, + { + "epoch": 6.01, + "learning_rate": 4.4102857142857145e-05, + "loss": 1.3392, + "step": 284000 + }, + { + "epoch": 6.01, + "learning_rate": 4.4082448979591836e-05, + "loss": 1.3396, + "step": 284100 + }, + { + "epoch": 6.01, + "learning_rate": 4.4062040816326535e-05, + "loss": 1.3316, + "step": 284200 + }, + { + "epoch": 6.02, + "learning_rate": 4.404183673469388e-05, + "loss": 1.3385, + "step": 284300 + }, + { + "epoch": 6.02, + "learning_rate": 4.4021428571428574e-05, + "loss": 1.343, + "step": 284400 + }, + { + "epoch": 6.02, + "learning_rate": 4.4001020408163265e-05, + "loss": 1.3366, + "step": 284500 + }, + { + "epoch": 6.02, + "learning_rate": 4.3980612244897964e-05, + "loss": 1.3309, + "step": 284600 + }, + { + "epoch": 6.03, + "learning_rate": 4.3960204081632655e-05, + "loss": 1.3334, + "step": 284700 + }, + { + "epoch": 6.03, + "learning_rate": 4.393979591836735e-05, + "loss": 1.3319, + "step": 284800 + }, + { + "epoch": 6.03, + "learning_rate": 4.3919387755102045e-05, + "loss": 1.3326, + "step": 284900 + }, + { + "epoch": 6.03, + "learning_rate": 4.389897959183674e-05, + "loss": 1.3369, + "step": 285000 + }, + { + "epoch": 6.03, + "learning_rate": 4.387857142857143e-05, + "loss": 1.3434, + "step": 285100 + }, + { + "epoch": 6.04, + "learning_rate": 4.385816326530613e-05, + "loss": 1.3365, + "step": 285200 + }, + { + "epoch": 6.04, + "learning_rate": 4.383775510204082e-05, + "loss": 1.3283, + "step": 285300 + }, + { + "epoch": 6.04, + "learning_rate": 4.381734693877551e-05, + "loss": 1.3421, + "step": 285400 + }, + { + "epoch": 6.04, + "learning_rate": 4.37969387755102e-05, + "loss": 1.3341, + "step": 285500 + }, + { + "epoch": 6.04, + "learning_rate": 4.37765306122449e-05, + "loss": 1.3389, + "step": 285600 + }, + { + "epoch": 6.05, + "learning_rate": 4.375612244897959e-05, + "loss": 1.3407, + "step": 285700 + }, + { + "epoch": 6.05, + "learning_rate": 4.373571428571428e-05, + "loss": 1.3303, + "step": 285800 + }, + { + "epoch": 6.05, + "learning_rate": 4.371530612244898e-05, + "loss": 1.3381, + "step": 285900 + }, + { + "epoch": 6.05, + "learning_rate": 4.369489795918368e-05, + "loss": 1.3354, + "step": 286000 + }, + { + "epoch": 6.06, + "learning_rate": 4.367448979591837e-05, + "loss": 1.3276, + "step": 286100 + }, + { + "epoch": 6.06, + "learning_rate": 4.365408163265306e-05, + "loss": 1.3286, + "step": 286200 + }, + { + "epoch": 6.06, + "learning_rate": 4.363367346938776e-05, + "loss": 1.3329, + "step": 286300 + }, + { + "epoch": 6.06, + "learning_rate": 4.361326530612245e-05, + "loss": 1.3352, + "step": 286400 + }, + { + "epoch": 6.06, + "learning_rate": 4.3592857142857144e-05, + "loss": 1.342, + "step": 286500 + }, + { + "epoch": 6.07, + "learning_rate": 4.357244897959184e-05, + "loss": 1.3327, + "step": 286600 + }, + { + "epoch": 6.07, + "learning_rate": 4.355224489795919e-05, + "loss": 1.3269, + "step": 286700 + }, + { + "epoch": 6.07, + "learning_rate": 4.353183673469388e-05, + "loss": 1.333, + "step": 286800 + }, + { + "epoch": 6.07, + "learning_rate": 4.3511428571428574e-05, + "loss": 1.3328, + "step": 286900 + }, + { + "epoch": 6.07, + "learning_rate": 4.349102040816327e-05, + "loss": 1.3344, + "step": 287000 + }, + { + "epoch": 6.08, + "learning_rate": 4.3470612244897963e-05, + "loss": 1.3306, + "step": 287100 + }, + { + "epoch": 6.08, + "learning_rate": 4.3450204081632655e-05, + "loss": 1.3337, + "step": 287200 + }, + { + "epoch": 6.08, + "learning_rate": 4.342979591836735e-05, + "loss": 1.3351, + "step": 287300 + }, + { + "epoch": 6.08, + "learning_rate": 4.3409387755102045e-05, + "loss": 1.3411, + "step": 287400 + }, + { + "epoch": 6.08, + "learning_rate": 4.3388979591836737e-05, + "loss": 1.3384, + "step": 287500 + }, + { + "epoch": 6.09, + "learning_rate": 4.336857142857143e-05, + "loss": 1.342, + "step": 287600 + }, + { + "epoch": 6.09, + "learning_rate": 4.3348163265306127e-05, + "loss": 1.3367, + "step": 287700 + }, + { + "epoch": 6.09, + "learning_rate": 4.332775510204082e-05, + "loss": 1.3284, + "step": 287800 + }, + { + "epoch": 6.09, + "learning_rate": 4.330734693877551e-05, + "loss": 1.341, + "step": 287900 + }, + { + "epoch": 6.1, + "learning_rate": 4.328693877551021e-05, + "loss": 1.3353, + "step": 288000 + }, + { + "epoch": 6.1, + "learning_rate": 4.32665306122449e-05, + "loss": 1.3286, + "step": 288100 + }, + { + "epoch": 6.1, + "learning_rate": 4.324612244897959e-05, + "loss": 1.3329, + "step": 288200 + }, + { + "epoch": 6.1, + "learning_rate": 4.322571428571428e-05, + "loss": 1.3288, + "step": 288300 + }, + { + "epoch": 6.1, + "learning_rate": 4.320530612244898e-05, + "loss": 1.3316, + "step": 288400 + }, + { + "epoch": 6.11, + "learning_rate": 4.318489795918367e-05, + "loss": 1.3349, + "step": 288500 + }, + { + "epoch": 6.11, + "learning_rate": 4.3164489795918364e-05, + "loss": 1.3362, + "step": 288600 + }, + { + "epoch": 6.11, + "learning_rate": 4.314408163265306e-05, + "loss": 1.3388, + "step": 288700 + }, + { + "epoch": 6.11, + "learning_rate": 4.312367346938776e-05, + "loss": 1.3298, + "step": 288800 + }, + { + "epoch": 6.11, + "learning_rate": 4.310326530612245e-05, + "loss": 1.3323, + "step": 288900 + }, + { + "epoch": 6.12, + "learning_rate": 4.3082857142857144e-05, + "loss": 1.3256, + "step": 289000 + }, + { + "epoch": 6.12, + "learning_rate": 4.306244897959184e-05, + "loss": 1.3281, + "step": 289100 + }, + { + "epoch": 6.12, + "learning_rate": 4.3042040816326534e-05, + "loss": 1.3363, + "step": 289200 + }, + { + "epoch": 6.12, + "learning_rate": 4.3021632653061226e-05, + "loss": 1.3302, + "step": 289300 + }, + { + "epoch": 6.12, + "learning_rate": 4.3001224489795924e-05, + "loss": 1.3341, + "step": 289400 + }, + { + "epoch": 6.13, + "learning_rate": 4.2980816326530616e-05, + "loss": 1.329, + "step": 289500 + }, + { + "epoch": 6.13, + "learning_rate": 4.296040816326531e-05, + "loss": 1.3314, + "step": 289600 + }, + { + "epoch": 6.13, + "learning_rate": 4.2940000000000006e-05, + "loss": 1.3385, + "step": 289700 + }, + { + "epoch": 6.13, + "learning_rate": 4.29195918367347e-05, + "loss": 1.3387, + "step": 289800 + }, + { + "epoch": 6.14, + "learning_rate": 4.289918367346939e-05, + "loss": 1.3306, + "step": 289900 + }, + { + "epoch": 6.14, + "learning_rate": 4.287877551020408e-05, + "loss": 1.3336, + "step": 290000 + }, + { + "epoch": 6.14, + "learning_rate": 4.285836734693878e-05, + "loss": 1.336, + "step": 290100 + }, + { + "epoch": 6.14, + "learning_rate": 4.283795918367347e-05, + "loss": 1.3374, + "step": 290200 + }, + { + "epoch": 6.14, + "learning_rate": 4.281755102040816e-05, + "loss": 1.3331, + "step": 290300 + }, + { + "epoch": 6.15, + "learning_rate": 4.279714285714286e-05, + "loss": 1.3254, + "step": 290400 + }, + { + "epoch": 6.15, + "learning_rate": 4.277673469387755e-05, + "loss": 1.3313, + "step": 290500 + }, + { + "epoch": 6.15, + "learning_rate": 4.2756326530612243e-05, + "loss": 1.3255, + "step": 290600 + }, + { + "epoch": 6.15, + "learning_rate": 4.273591836734694e-05, + "loss": 1.3337, + "step": 290700 + }, + { + "epoch": 6.15, + "learning_rate": 4.271571428571429e-05, + "loss": 1.3215, + "step": 290800 + }, + { + "epoch": 6.16, + "learning_rate": 4.269530612244898e-05, + "loss": 1.3266, + "step": 290900 + }, + { + "epoch": 6.16, + "learning_rate": 4.267489795918367e-05, + "loss": 1.3328, + "step": 291000 + }, + { + "epoch": 6.16, + "learning_rate": 4.265448979591837e-05, + "loss": 1.3328, + "step": 291100 + }, + { + "epoch": 6.16, + "learning_rate": 4.263408163265306e-05, + "loss": 1.3267, + "step": 291200 + }, + { + "epoch": 6.17, + "learning_rate": 4.2613673469387754e-05, + "loss": 1.3329, + "step": 291300 + }, + { + "epoch": 6.17, + "learning_rate": 4.2593265306122446e-05, + "loss": 1.3281, + "step": 291400 + }, + { + "epoch": 6.17, + "learning_rate": 4.257285714285715e-05, + "loss": 1.3345, + "step": 291500 + }, + { + "epoch": 6.17, + "learning_rate": 4.255244897959184e-05, + "loss": 1.3294, + "step": 291600 + }, + { + "epoch": 6.17, + "learning_rate": 4.253224489795918e-05, + "loss": 1.3314, + "step": 291700 + }, + { + "epoch": 6.18, + "learning_rate": 4.2511836734693875e-05, + "loss": 1.3254, + "step": 291800 + }, + { + "epoch": 6.18, + "learning_rate": 4.249142857142857e-05, + "loss": 1.3333, + "step": 291900 + }, + { + "epoch": 6.18, + "learning_rate": 4.247102040816327e-05, + "loss": 1.3368, + "step": 292000 + }, + { + "epoch": 6.18, + "learning_rate": 4.245061224489796e-05, + "loss": 1.3384, + "step": 292100 + }, + { + "epoch": 6.18, + "learning_rate": 4.2430204081632655e-05, + "loss": 1.3312, + "step": 292200 + }, + { + "epoch": 6.19, + "learning_rate": 4.240979591836735e-05, + "loss": 1.336, + "step": 292300 + }, + { + "epoch": 6.19, + "learning_rate": 4.2389387755102045e-05, + "loss": 1.3343, + "step": 292400 + }, + { + "epoch": 6.19, + "learning_rate": 4.2368979591836736e-05, + "loss": 1.3281, + "step": 292500 + }, + { + "epoch": 6.19, + "learning_rate": 4.2348571428571435e-05, + "loss": 1.3312, + "step": 292600 + }, + { + "epoch": 6.19, + "learning_rate": 4.2328163265306126e-05, + "loss": 1.3395, + "step": 292700 + }, + { + "epoch": 6.2, + "learning_rate": 4.230775510204082e-05, + "loss": 1.3291, + "step": 292800 + }, + { + "epoch": 6.2, + "learning_rate": 4.2287346938775516e-05, + "loss": 1.3359, + "step": 292900 + }, + { + "epoch": 6.2, + "learning_rate": 4.226693877551021e-05, + "loss": 1.3228, + "step": 293000 + }, + { + "epoch": 6.2, + "learning_rate": 4.22465306122449e-05, + "loss": 1.3324, + "step": 293100 + }, + { + "epoch": 6.21, + "learning_rate": 4.222612244897959e-05, + "loss": 1.3299, + "step": 293200 + }, + { + "epoch": 6.21, + "learning_rate": 4.220571428571429e-05, + "loss": 1.3326, + "step": 293300 + }, + { + "epoch": 6.21, + "learning_rate": 4.218530612244898e-05, + "loss": 1.3344, + "step": 293400 + }, + { + "epoch": 6.21, + "learning_rate": 4.216489795918367e-05, + "loss": 1.324, + "step": 293500 + }, + { + "epoch": 6.21, + "learning_rate": 4.214448979591837e-05, + "loss": 1.324, + "step": 293600 + }, + { + "epoch": 6.22, + "learning_rate": 4.212408163265306e-05, + "loss": 1.3296, + "step": 293700 + }, + { + "epoch": 6.22, + "learning_rate": 4.2103673469387754e-05, + "loss": 1.3252, + "step": 293800 + }, + { + "epoch": 6.22, + "learning_rate": 4.208326530612245e-05, + "loss": 1.3327, + "step": 293900 + }, + { + "epoch": 6.22, + "learning_rate": 4.2062857142857144e-05, + "loss": 1.3246, + "step": 294000 + }, + { + "epoch": 6.22, + "learning_rate": 4.204265306122449e-05, + "loss": 1.3382, + "step": 294100 + }, + { + "epoch": 6.23, + "learning_rate": 4.202224489795918e-05, + "loss": 1.3306, + "step": 294200 + }, + { + "epoch": 6.23, + "learning_rate": 4.2001836734693875e-05, + "loss": 1.3293, + "step": 294300 + }, + { + "epoch": 6.23, + "learning_rate": 4.198163265306123e-05, + "loss": 1.3291, + "step": 294400 + }, + { + "epoch": 6.23, + "learning_rate": 4.196122448979592e-05, + "loss": 1.3355, + "step": 294500 + }, + { + "epoch": 6.23, + "learning_rate": 4.194081632653061e-05, + "loss": 1.3385, + "step": 294600 + }, + { + "epoch": 6.24, + "learning_rate": 4.1920408163265304e-05, + "loss": 1.3289, + "step": 294700 + }, + { + "epoch": 6.24, + "learning_rate": 4.19e-05, + "loss": 1.3301, + "step": 294800 + }, + { + "epoch": 6.24, + "learning_rate": 4.1879591836734694e-05, + "loss": 1.3195, + "step": 294900 + }, + { + "epoch": 6.24, + "learning_rate": 4.1859183673469385e-05, + "loss": 1.3366, + "step": 295000 + }, + { + "epoch": 6.25, + "learning_rate": 4.1838775510204084e-05, + "loss": 1.3306, + "step": 295100 + }, + { + "epoch": 6.25, + "learning_rate": 4.1818367346938775e-05, + "loss": 1.3331, + "step": 295200 + }, + { + "epoch": 6.25, + "learning_rate": 4.1797959183673473e-05, + "loss": 1.3288, + "step": 295300 + }, + { + "epoch": 6.25, + "learning_rate": 4.1777551020408165e-05, + "loss": 1.3346, + "step": 295400 + }, + { + "epoch": 6.25, + "learning_rate": 4.1757142857142863e-05, + "loss": 1.3198, + "step": 295500 + }, + { + "epoch": 6.26, + "learning_rate": 4.1736734693877555e-05, + "loss": 1.324, + "step": 295600 + }, + { + "epoch": 6.26, + "learning_rate": 4.1716326530612247e-05, + "loss": 1.328, + "step": 295700 + }, + { + "epoch": 6.26, + "learning_rate": 4.1695918367346945e-05, + "loss": 1.3288, + "step": 295800 + }, + { + "epoch": 6.26, + "learning_rate": 4.1675510204081637e-05, + "loss": 1.3293, + "step": 295900 + }, + { + "epoch": 6.26, + "learning_rate": 4.165510204081633e-05, + "loss": 1.3365, + "step": 296000 + }, + { + "epoch": 6.27, + "learning_rate": 4.1634693877551026e-05, + "loss": 1.3325, + "step": 296100 + }, + { + "epoch": 6.27, + "learning_rate": 4.161428571428572e-05, + "loss": 1.3273, + "step": 296200 + }, + { + "epoch": 6.27, + "learning_rate": 4.159387755102041e-05, + "loss": 1.3268, + "step": 296300 + }, + { + "epoch": 6.27, + "learning_rate": 4.15734693877551e-05, + "loss": 1.3318, + "step": 296400 + }, + { + "epoch": 6.28, + "learning_rate": 4.15530612244898e-05, + "loss": 1.3242, + "step": 296500 + }, + { + "epoch": 6.28, + "learning_rate": 4.153265306122449e-05, + "loss": 1.3341, + "step": 296600 + }, + { + "epoch": 6.28, + "learning_rate": 4.151224489795918e-05, + "loss": 1.3291, + "step": 296700 + }, + { + "epoch": 6.28, + "learning_rate": 4.149204081632653e-05, + "loss": 1.3285, + "step": 296800 + }, + { + "epoch": 6.28, + "learning_rate": 4.147163265306123e-05, + "loss": 1.3295, + "step": 296900 + }, + { + "epoch": 6.29, + "learning_rate": 4.145122448979592e-05, + "loss": 1.3319, + "step": 297000 + }, + { + "epoch": 6.29, + "learning_rate": 4.143081632653061e-05, + "loss": 1.332, + "step": 297100 + }, + { + "epoch": 6.29, + "learning_rate": 4.141040816326531e-05, + "loss": 1.3232, + "step": 297200 + }, + { + "epoch": 6.29, + "learning_rate": 4.139e-05, + "loss": 1.326, + "step": 297300 + }, + { + "epoch": 6.29, + "learning_rate": 4.1369591836734693e-05, + "loss": 1.3215, + "step": 297400 + }, + { + "epoch": 6.3, + "learning_rate": 4.1349183673469385e-05, + "loss": 1.3292, + "step": 297500 + }, + { + "epoch": 6.3, + "learning_rate": 4.132877551020408e-05, + "loss": 1.3246, + "step": 297600 + }, + { + "epoch": 6.3, + "learning_rate": 4.1308367346938775e-05, + "loss": 1.3297, + "step": 297700 + }, + { + "epoch": 6.3, + "learning_rate": 4.1287959183673467e-05, + "loss": 1.3299, + "step": 297800 + }, + { + "epoch": 6.3, + "learning_rate": 4.1267551020408165e-05, + "loss": 1.3294, + "step": 297900 + }, + { + "epoch": 6.31, + "learning_rate": 4.1247142857142856e-05, + "loss": 1.324, + "step": 298000 + }, + { + "epoch": 6.31, + "learning_rate": 4.1226734693877555e-05, + "loss": 1.3301, + "step": 298100 + }, + { + "epoch": 6.31, + "learning_rate": 4.1206326530612246e-05, + "loss": 1.335, + "step": 298200 + }, + { + "epoch": 6.31, + "learning_rate": 4.1185918367346945e-05, + "loss": 1.3306, + "step": 298300 + }, + { + "epoch": 6.32, + "learning_rate": 4.1165510204081636e-05, + "loss": 1.3263, + "step": 298400 + }, + { + "epoch": 6.32, + "learning_rate": 4.114510204081633e-05, + "loss": 1.3282, + "step": 298500 + }, + { + "epoch": 6.32, + "learning_rate": 4.1124693877551026e-05, + "loss": 1.3322, + "step": 298600 + }, + { + "epoch": 6.32, + "learning_rate": 4.110428571428572e-05, + "loss": 1.3278, + "step": 298700 + }, + { + "epoch": 6.32, + "learning_rate": 4.108387755102041e-05, + "loss": 1.3255, + "step": 298800 + }, + { + "epoch": 6.33, + "learning_rate": 4.106346938775511e-05, + "loss": 1.3229, + "step": 298900 + }, + { + "epoch": 6.33, + "learning_rate": 4.10430612244898e-05, + "loss": 1.3337, + "step": 299000 + }, + { + "epoch": 6.33, + "learning_rate": 4.102265306122449e-05, + "loss": 1.3301, + "step": 299100 + }, + { + "epoch": 6.33, + "learning_rate": 4.100224489795918e-05, + "loss": 1.3248, + "step": 299200 + }, + { + "epoch": 6.33, + "learning_rate": 4.098183673469388e-05, + "loss": 1.3266, + "step": 299300 + }, + { + "epoch": 6.34, + "learning_rate": 4.096142857142857e-05, + "loss": 1.3258, + "step": 299400 + }, + { + "epoch": 6.34, + "learning_rate": 4.0941020408163264e-05, + "loss": 1.3233, + "step": 299500 + }, + { + "epoch": 6.34, + "learning_rate": 4.092061224489796e-05, + "loss": 1.3349, + "step": 299600 + }, + { + "epoch": 6.34, + "learning_rate": 4.0900204081632654e-05, + "loss": 1.3269, + "step": 299700 + }, + { + "epoch": 6.34, + "learning_rate": 4.0879795918367346e-05, + "loss": 1.3183, + "step": 299800 + }, + { + "epoch": 6.35, + "learning_rate": 4.0859387755102044e-05, + "loss": 1.3233, + "step": 299900 + }, + { + "epoch": 6.35, + "learning_rate": 4.0838979591836736e-05, + "loss": 1.3278, + "step": 300000 + }, + { + "epoch": 6.35, + "learning_rate": 4.081857142857143e-05, + "loss": 1.3298, + "step": 300100 + }, + { + "epoch": 6.35, + "learning_rate": 4.079816326530612e-05, + "loss": 1.3276, + "step": 300200 + }, + { + "epoch": 6.36, + "learning_rate": 4.0777755102040824e-05, + "loss": 1.3239, + "step": 300300 + }, + { + "epoch": 6.36, + "learning_rate": 4.0757346938775515e-05, + "loss": 1.3323, + "step": 300400 + }, + { + "epoch": 6.36, + "learning_rate": 4.073693877551021e-05, + "loss": 1.329, + "step": 300500 + }, + { + "epoch": 6.36, + "learning_rate": 4.07165306122449e-05, + "loss": 1.3304, + "step": 300600 + }, + { + "epoch": 6.36, + "learning_rate": 4.06961224489796e-05, + "loss": 1.3177, + "step": 300700 + }, + { + "epoch": 6.37, + "learning_rate": 4.067571428571429e-05, + "loss": 1.3218, + "step": 300800 + }, + { + "epoch": 6.37, + "learning_rate": 4.065530612244898e-05, + "loss": 1.3271, + "step": 300900 + }, + { + "epoch": 6.37, + "learning_rate": 4.063489795918368e-05, + "loss": 1.3224, + "step": 301000 + }, + { + "epoch": 6.37, + "learning_rate": 4.0614693877551026e-05, + "loss": 1.3159, + "step": 301100 + }, + { + "epoch": 6.37, + "learning_rate": 4.059428571428572e-05, + "loss": 1.3284, + "step": 301200 + }, + { + "epoch": 6.38, + "learning_rate": 4.057387755102041e-05, + "loss": 1.3276, + "step": 301300 + }, + { + "epoch": 6.38, + "learning_rate": 4.055346938775511e-05, + "loss": 1.3288, + "step": 301400 + }, + { + "epoch": 6.38, + "learning_rate": 4.05330612244898e-05, + "loss": 1.3234, + "step": 301500 + }, + { + "epoch": 6.38, + "learning_rate": 4.051265306122449e-05, + "loss": 1.3255, + "step": 301600 + }, + { + "epoch": 6.39, + "learning_rate": 4.049224489795919e-05, + "loss": 1.3282, + "step": 301700 + }, + { + "epoch": 6.39, + "learning_rate": 4.047183673469388e-05, + "loss": 1.3234, + "step": 301800 + }, + { + "epoch": 6.39, + "learning_rate": 4.045142857142857e-05, + "loss": 1.3254, + "step": 301900 + }, + { + "epoch": 6.39, + "learning_rate": 4.0431020408163264e-05, + "loss": 1.3226, + "step": 302000 + }, + { + "epoch": 6.39, + "learning_rate": 4.041061224489796e-05, + "loss": 1.3284, + "step": 302100 + }, + { + "epoch": 6.4, + "learning_rate": 4.0390204081632654e-05, + "loss": 1.3249, + "step": 302200 + }, + { + "epoch": 6.4, + "learning_rate": 4.0369795918367345e-05, + "loss": 1.3257, + "step": 302300 + }, + { + "epoch": 6.4, + "learning_rate": 4.0349387755102044e-05, + "loss": 1.3319, + "step": 302400 + }, + { + "epoch": 6.4, + "learning_rate": 4.0328979591836735e-05, + "loss": 1.3203, + "step": 302500 + }, + { + "epoch": 6.4, + "learning_rate": 4.030857142857143e-05, + "loss": 1.3167, + "step": 302600 + }, + { + "epoch": 6.41, + "learning_rate": 4.0288163265306125e-05, + "loss": 1.3221, + "step": 302700 + }, + { + "epoch": 6.41, + "learning_rate": 4.026775510204082e-05, + "loss": 1.3205, + "step": 302800 + }, + { + "epoch": 6.41, + "learning_rate": 4.024734693877551e-05, + "loss": 1.328, + "step": 302900 + }, + { + "epoch": 6.41, + "learning_rate": 4.02269387755102e-05, + "loss": 1.3292, + "step": 303000 + }, + { + "epoch": 6.41, + "learning_rate": 4.0206530612244905e-05, + "loss": 1.3229, + "step": 303100 + }, + { + "epoch": 6.42, + "learning_rate": 4.0186326530612246e-05, + "loss": 1.3306, + "step": 303200 + }, + { + "epoch": 6.42, + "learning_rate": 4.016591836734694e-05, + "loss": 1.326, + "step": 303300 + }, + { + "epoch": 6.42, + "learning_rate": 4.014551020408163e-05, + "loss": 1.3293, + "step": 303400 + }, + { + "epoch": 6.42, + "learning_rate": 4.012510204081633e-05, + "loss": 1.3209, + "step": 303500 + }, + { + "epoch": 6.43, + "learning_rate": 4.0104693877551026e-05, + "loss": 1.3244, + "step": 303600 + }, + { + "epoch": 6.43, + "learning_rate": 4.008428571428572e-05, + "loss": 1.3213, + "step": 303700 + }, + { + "epoch": 6.43, + "learning_rate": 4.006387755102041e-05, + "loss": 1.3211, + "step": 303800 + }, + { + "epoch": 6.43, + "learning_rate": 4.004346938775511e-05, + "loss": 1.3206, + "step": 303900 + }, + { + "epoch": 6.43, + "learning_rate": 4.00230612244898e-05, + "loss": 1.3316, + "step": 304000 + }, + { + "epoch": 6.44, + "learning_rate": 4.000265306122449e-05, + "loss": 1.3205, + "step": 304100 + }, + { + "epoch": 6.44, + "learning_rate": 3.998224489795919e-05, + "loss": 1.3256, + "step": 304200 + }, + { + "epoch": 6.44, + "learning_rate": 3.996183673469388e-05, + "loss": 1.3185, + "step": 304300 + }, + { + "epoch": 6.44, + "learning_rate": 3.994142857142857e-05, + "loss": 1.3267, + "step": 304400 + }, + { + "epoch": 6.44, + "learning_rate": 3.992102040816327e-05, + "loss": 1.3304, + "step": 304500 + }, + { + "epoch": 6.45, + "learning_rate": 3.990061224489796e-05, + "loss": 1.3165, + "step": 304600 + }, + { + "epoch": 6.45, + "learning_rate": 3.9880204081632654e-05, + "loss": 1.326, + "step": 304700 + }, + { + "epoch": 6.45, + "learning_rate": 3.9859795918367345e-05, + "loss": 1.3202, + "step": 304800 + }, + { + "epoch": 6.45, + "learning_rate": 3.9839387755102044e-05, + "loss": 1.3171, + "step": 304900 + }, + { + "epoch": 6.46, + "learning_rate": 3.9818979591836735e-05, + "loss": 1.323, + "step": 305000 + }, + { + "epoch": 6.46, + "learning_rate": 3.979857142857143e-05, + "loss": 1.321, + "step": 305100 + }, + { + "epoch": 6.46, + "learning_rate": 3.9778163265306125e-05, + "loss": 1.3249, + "step": 305200 + }, + { + "epoch": 6.46, + "learning_rate": 3.975775510204082e-05, + "loss": 1.3184, + "step": 305300 + }, + { + "epoch": 6.46, + "learning_rate": 3.973734693877551e-05, + "loss": 1.3143, + "step": 305400 + }, + { + "epoch": 6.47, + "learning_rate": 3.971693877551021e-05, + "loss": 1.3207, + "step": 305500 + }, + { + "epoch": 6.47, + "learning_rate": 3.96965306122449e-05, + "loss": 1.3271, + "step": 305600 + }, + { + "epoch": 6.47, + "learning_rate": 3.967612244897959e-05, + "loss": 1.3215, + "step": 305700 + }, + { + "epoch": 6.47, + "learning_rate": 3.965571428571429e-05, + "loss": 1.3137, + "step": 305800 + }, + { + "epoch": 6.47, + "learning_rate": 3.9635306122448987e-05, + "loss": 1.3218, + "step": 305900 + }, + { + "epoch": 6.48, + "learning_rate": 3.961489795918368e-05, + "loss": 1.3197, + "step": 306000 + }, + { + "epoch": 6.48, + "learning_rate": 3.959448979591837e-05, + "loss": 1.3151, + "step": 306100 + }, + { + "epoch": 6.48, + "learning_rate": 3.957428571428571e-05, + "loss": 1.3235, + "step": 306200 + }, + { + "epoch": 6.48, + "learning_rate": 3.955387755102041e-05, + "loss": 1.3356, + "step": 306300 + }, + { + "epoch": 6.48, + "learning_rate": 3.953346938775511e-05, + "loss": 1.3146, + "step": 306400 + }, + { + "epoch": 6.49, + "learning_rate": 3.95130612244898e-05, + "loss": 1.3164, + "step": 306500 + }, + { + "epoch": 6.49, + "learning_rate": 3.94926530612245e-05, + "loss": 1.3195, + "step": 306600 + }, + { + "epoch": 6.49, + "learning_rate": 3.947224489795919e-05, + "loss": 1.3221, + "step": 306700 + }, + { + "epoch": 6.49, + "learning_rate": 3.945183673469388e-05, + "loss": 1.3194, + "step": 306800 + }, + { + "epoch": 6.5, + "learning_rate": 3.943142857142857e-05, + "loss": 1.3209, + "step": 306900 + }, + { + "epoch": 6.5, + "learning_rate": 3.941102040816327e-05, + "loss": 1.3189, + "step": 307000 + }, + { + "epoch": 6.5, + "learning_rate": 3.939061224489796e-05, + "loss": 1.3166, + "step": 307100 + }, + { + "epoch": 6.5, + "learning_rate": 3.9370204081632653e-05, + "loss": 1.3185, + "step": 307200 + }, + { + "epoch": 6.5, + "learning_rate": 3.934979591836735e-05, + "loss": 1.3212, + "step": 307300 + }, + { + "epoch": 6.51, + "learning_rate": 3.932938775510204e-05, + "loss": 1.3183, + "step": 307400 + }, + { + "epoch": 6.51, + "learning_rate": 3.9308979591836735e-05, + "loss": 1.3144, + "step": 307500 + }, + { + "epoch": 6.51, + "learning_rate": 3.928857142857143e-05, + "loss": 1.3148, + "step": 307600 + }, + { + "epoch": 6.51, + "learning_rate": 3.9268163265306125e-05, + "loss": 1.3184, + "step": 307700 + }, + { + "epoch": 6.51, + "learning_rate": 3.9247755102040816e-05, + "loss": 1.3162, + "step": 307800 + }, + { + "epoch": 6.52, + "learning_rate": 3.922734693877551e-05, + "loss": 1.3134, + "step": 307900 + }, + { + "epoch": 6.52, + "learning_rate": 3.9206938775510206e-05, + "loss": 1.3265, + "step": 308000 + }, + { + "epoch": 6.52, + "learning_rate": 3.91865306122449e-05, + "loss": 1.3164, + "step": 308100 + }, + { + "epoch": 6.52, + "learning_rate": 3.916612244897959e-05, + "loss": 1.3278, + "step": 308200 + }, + { + "epoch": 6.52, + "learning_rate": 3.914571428571429e-05, + "loss": 1.3117, + "step": 308300 + }, + { + "epoch": 6.53, + "learning_rate": 3.912530612244898e-05, + "loss": 1.3206, + "step": 308400 + }, + { + "epoch": 6.53, + "learning_rate": 3.910489795918367e-05, + "loss": 1.312, + "step": 308500 + }, + { + "epoch": 6.53, + "learning_rate": 3.908448979591837e-05, + "loss": 1.3182, + "step": 308600 + }, + { + "epoch": 6.53, + "learning_rate": 3.906408163265307e-05, + "loss": 1.3242, + "step": 308700 + }, + { + "epoch": 6.54, + "learning_rate": 3.904367346938776e-05, + "loss": 1.3133, + "step": 308800 + }, + { + "epoch": 6.54, + "learning_rate": 3.902326530612245e-05, + "loss": 1.3246, + "step": 308900 + }, + { + "epoch": 6.54, + "learning_rate": 3.900285714285715e-05, + "loss": 1.319, + "step": 309000 + }, + { + "epoch": 6.54, + "learning_rate": 3.898244897959184e-05, + "loss": 1.3172, + "step": 309100 + }, + { + "epoch": 6.54, + "learning_rate": 3.896204081632653e-05, + "loss": 1.3197, + "step": 309200 + }, + { + "epoch": 6.55, + "learning_rate": 3.8941632653061224e-05, + "loss": 1.3124, + "step": 309300 + }, + { + "epoch": 6.55, + "learning_rate": 3.892122448979592e-05, + "loss": 1.3159, + "step": 309400 + }, + { + "epoch": 6.55, + "learning_rate": 3.8900816326530614e-05, + "loss": 1.3187, + "step": 309500 + }, + { + "epoch": 6.55, + "learning_rate": 3.8880408163265306e-05, + "loss": 1.3232, + "step": 309600 + }, + { + "epoch": 6.55, + "learning_rate": 3.8860000000000004e-05, + "loss": 1.3144, + "step": 309700 + }, + { + "epoch": 6.56, + "learning_rate": 3.8839591836734696e-05, + "loss": 1.3214, + "step": 309800 + }, + { + "epoch": 6.56, + "learning_rate": 3.881918367346939e-05, + "loss": 1.3186, + "step": 309900 + }, + { + "epoch": 6.56, + "learning_rate": 3.8798775510204086e-05, + "loss": 1.3225, + "step": 310000 + }, + { + "epoch": 6.56, + "learning_rate": 3.877836734693878e-05, + "loss": 1.3161, + "step": 310100 + }, + { + "epoch": 6.57, + "learning_rate": 3.875795918367347e-05, + "loss": 1.3167, + "step": 310200 + }, + { + "epoch": 6.57, + "learning_rate": 3.873755102040817e-05, + "loss": 1.3172, + "step": 310300 + }, + { + "epoch": 6.57, + "learning_rate": 3.871714285714286e-05, + "loss": 1.3186, + "step": 310400 + }, + { + "epoch": 6.57, + "learning_rate": 3.8696938775510206e-05, + "loss": 1.3218, + "step": 310500 + }, + { + "epoch": 6.57, + "learning_rate": 3.86765306122449e-05, + "loss": 1.3192, + "step": 310600 + }, + { + "epoch": 6.58, + "learning_rate": 3.865612244897959e-05, + "loss": 1.31, + "step": 310700 + }, + { + "epoch": 6.58, + "learning_rate": 3.863571428571429e-05, + "loss": 1.3186, + "step": 310800 + }, + { + "epoch": 6.58, + "learning_rate": 3.861530612244898e-05, + "loss": 1.3096, + "step": 310900 + }, + { + "epoch": 6.58, + "learning_rate": 3.859489795918367e-05, + "loss": 1.3186, + "step": 311000 + }, + { + "epoch": 6.58, + "learning_rate": 3.857448979591837e-05, + "loss": 1.3208, + "step": 311100 + }, + { + "epoch": 6.59, + "learning_rate": 3.855408163265306e-05, + "loss": 1.3205, + "step": 311200 + }, + { + "epoch": 6.59, + "learning_rate": 3.853367346938775e-05, + "loss": 1.3187, + "step": 311300 + }, + { + "epoch": 6.59, + "learning_rate": 3.85134693877551e-05, + "loss": 1.3121, + "step": 311400 + }, + { + "epoch": 6.59, + "learning_rate": 3.84930612244898e-05, + "loss": 1.3198, + "step": 311500 + }, + { + "epoch": 6.59, + "learning_rate": 3.847265306122449e-05, + "loss": 1.319, + "step": 311600 + }, + { + "epoch": 6.6, + "learning_rate": 3.845224489795918e-05, + "loss": 1.3173, + "step": 311700 + }, + { + "epoch": 6.6, + "learning_rate": 3.843183673469388e-05, + "loss": 1.3244, + "step": 311800 + }, + { + "epoch": 6.6, + "learning_rate": 3.841142857142858e-05, + "loss": 1.315, + "step": 311900 + }, + { + "epoch": 6.6, + "learning_rate": 3.839102040816327e-05, + "loss": 1.3205, + "step": 312000 + }, + { + "epoch": 6.61, + "learning_rate": 3.837061224489796e-05, + "loss": 1.3151, + "step": 312100 + }, + { + "epoch": 6.61, + "learning_rate": 3.835020408163266e-05, + "loss": 1.3139, + "step": 312200 + }, + { + "epoch": 6.61, + "learning_rate": 3.832979591836735e-05, + "loss": 1.3142, + "step": 312300 + }, + { + "epoch": 6.61, + "learning_rate": 3.830938775510204e-05, + "loss": 1.3106, + "step": 312400 + }, + { + "epoch": 6.61, + "learning_rate": 3.8288979591836735e-05, + "loss": 1.3155, + "step": 312500 + }, + { + "epoch": 6.62, + "learning_rate": 3.826857142857143e-05, + "loss": 1.3141, + "step": 312600 + }, + { + "epoch": 6.62, + "learning_rate": 3.8248163265306125e-05, + "loss": 1.3135, + "step": 312700 + }, + { + "epoch": 6.62, + "learning_rate": 3.8227755102040816e-05, + "loss": 1.3199, + "step": 312800 + }, + { + "epoch": 6.62, + "learning_rate": 3.8207346938775514e-05, + "loss": 1.3079, + "step": 312900 + }, + { + "epoch": 6.62, + "learning_rate": 3.8186938775510206e-05, + "loss": 1.3156, + "step": 313000 + }, + { + "epoch": 6.63, + "learning_rate": 3.81665306122449e-05, + "loss": 1.3202, + "step": 313100 + }, + { + "epoch": 6.63, + "learning_rate": 3.8146122448979596e-05, + "loss": 1.3177, + "step": 313200 + }, + { + "epoch": 6.63, + "learning_rate": 3.812571428571429e-05, + "loss": 1.3097, + "step": 313300 + }, + { + "epoch": 6.63, + "learning_rate": 3.810530612244898e-05, + "loss": 1.3173, + "step": 313400 + }, + { + "epoch": 6.63, + "learning_rate": 3.808489795918367e-05, + "loss": 1.3215, + "step": 313500 + }, + { + "epoch": 6.64, + "learning_rate": 3.806448979591837e-05, + "loss": 1.3134, + "step": 313600 + }, + { + "epoch": 6.64, + "learning_rate": 3.804408163265306e-05, + "loss": 1.3152, + "step": 313700 + }, + { + "epoch": 6.64, + "learning_rate": 3.802367346938775e-05, + "loss": 1.3109, + "step": 313800 + }, + { + "epoch": 6.64, + "learning_rate": 3.800326530612245e-05, + "loss": 1.3119, + "step": 313900 + }, + { + "epoch": 6.65, + "learning_rate": 3.798285714285714e-05, + "loss": 1.3127, + "step": 314000 + }, + { + "epoch": 6.65, + "learning_rate": 3.7962448979591834e-05, + "loss": 1.3182, + "step": 314100 + }, + { + "epoch": 6.65, + "learning_rate": 3.794204081632653e-05, + "loss": 1.3185, + "step": 314200 + }, + { + "epoch": 6.65, + "learning_rate": 3.792163265306123e-05, + "loss": 1.3087, + "step": 314300 + }, + { + "epoch": 6.65, + "learning_rate": 3.790122448979592e-05, + "loss": 1.3229, + "step": 314400 + }, + { + "epoch": 6.66, + "learning_rate": 3.7880816326530614e-05, + "loss": 1.3167, + "step": 314500 + }, + { + "epoch": 6.66, + "learning_rate": 3.786040816326531e-05, + "loss": 1.3142, + "step": 314600 + }, + { + "epoch": 6.66, + "learning_rate": 3.7840000000000004e-05, + "loss": 1.3124, + "step": 314700 + }, + { + "epoch": 6.66, + "learning_rate": 3.781979591836735e-05, + "loss": 1.3226, + "step": 314800 + }, + { + "epoch": 6.66, + "learning_rate": 3.779938775510204e-05, + "loss": 1.3105, + "step": 314900 + }, + { + "epoch": 6.67, + "learning_rate": 3.777897959183674e-05, + "loss": 1.3145, + "step": 315000 + }, + { + "epoch": 6.67, + "learning_rate": 3.775857142857143e-05, + "loss": 1.3144, + "step": 315100 + }, + { + "epoch": 6.67, + "learning_rate": 3.773836734693878e-05, + "loss": 1.3147, + "step": 315200 + }, + { + "epoch": 6.67, + "learning_rate": 3.771795918367347e-05, + "loss": 1.3084, + "step": 315300 + }, + { + "epoch": 6.68, + "learning_rate": 3.769755102040817e-05, + "loss": 1.3124, + "step": 315400 + }, + { + "epoch": 6.68, + "learning_rate": 3.767714285714286e-05, + "loss": 1.3171, + "step": 315500 + }, + { + "epoch": 6.68, + "learning_rate": 3.7656734693877553e-05, + "loss": 1.3134, + "step": 315600 + }, + { + "epoch": 6.68, + "learning_rate": 3.7636326530612245e-05, + "loss": 1.3104, + "step": 315700 + }, + { + "epoch": 6.68, + "learning_rate": 3.761591836734694e-05, + "loss": 1.3171, + "step": 315800 + }, + { + "epoch": 6.69, + "learning_rate": 3.7595510204081635e-05, + "loss": 1.3114, + "step": 315900 + }, + { + "epoch": 6.69, + "learning_rate": 3.7575102040816327e-05, + "loss": 1.3132, + "step": 316000 + }, + { + "epoch": 6.69, + "learning_rate": 3.7554693877551025e-05, + "loss": 1.3143, + "step": 316100 + }, + { + "epoch": 6.69, + "learning_rate": 3.7534285714285716e-05, + "loss": 1.3085, + "step": 316200 + }, + { + "epoch": 6.69, + "learning_rate": 3.751387755102041e-05, + "loss": 1.3163, + "step": 316300 + }, + { + "epoch": 6.7, + "learning_rate": 3.7493469387755106e-05, + "loss": 1.3171, + "step": 316400 + }, + { + "epoch": 6.7, + "learning_rate": 3.74730612244898e-05, + "loss": 1.3019, + "step": 316500 + }, + { + "epoch": 6.7, + "learning_rate": 3.745265306122449e-05, + "loss": 1.3107, + "step": 316600 + }, + { + "epoch": 6.7, + "learning_rate": 3.743224489795918e-05, + "loss": 1.314, + "step": 316700 + }, + { + "epoch": 6.7, + "learning_rate": 3.741183673469388e-05, + "loss": 1.3068, + "step": 316800 + }, + { + "epoch": 6.71, + "learning_rate": 3.739142857142857e-05, + "loss": 1.3174, + "step": 316900 + }, + { + "epoch": 6.71, + "learning_rate": 3.737102040816326e-05, + "loss": 1.3094, + "step": 317000 + }, + { + "epoch": 6.71, + "learning_rate": 3.735061224489796e-05, + "loss": 1.3182, + "step": 317100 + }, + { + "epoch": 6.71, + "learning_rate": 3.733020408163265e-05, + "loss": 1.3152, + "step": 317200 + }, + { + "epoch": 6.72, + "learning_rate": 3.7309795918367344e-05, + "loss": 1.3123, + "step": 317300 + }, + { + "epoch": 6.72, + "learning_rate": 3.728938775510204e-05, + "loss": 1.3154, + "step": 317400 + }, + { + "epoch": 6.72, + "learning_rate": 3.726897959183674e-05, + "loss": 1.3166, + "step": 317500 + }, + { + "epoch": 6.72, + "learning_rate": 3.724857142857143e-05, + "loss": 1.3154, + "step": 317600 + }, + { + "epoch": 6.72, + "learning_rate": 3.7228163265306124e-05, + "loss": 1.3135, + "step": 317700 + }, + { + "epoch": 6.73, + "learning_rate": 3.720775510204082e-05, + "loss": 1.3085, + "step": 317800 + }, + { + "epoch": 6.73, + "learning_rate": 3.7187346938775514e-05, + "loss": 1.3059, + "step": 317900 + }, + { + "epoch": 6.73, + "learning_rate": 3.7166938775510206e-05, + "loss": 1.311, + "step": 318000 + }, + { + "epoch": 6.73, + "learning_rate": 3.7146530612244904e-05, + "loss": 1.3134, + "step": 318100 + }, + { + "epoch": 6.73, + "learning_rate": 3.7126122448979596e-05, + "loss": 1.311, + "step": 318200 + }, + { + "epoch": 6.74, + "learning_rate": 3.710571428571429e-05, + "loss": 1.3127, + "step": 318300 + }, + { + "epoch": 6.74, + "learning_rate": 3.708530612244898e-05, + "loss": 1.3133, + "step": 318400 + }, + { + "epoch": 6.74, + "learning_rate": 3.706489795918368e-05, + "loss": 1.3104, + "step": 318500 + }, + { + "epoch": 6.74, + "learning_rate": 3.704448979591837e-05, + "loss": 1.3058, + "step": 318600 + }, + { + "epoch": 6.74, + "learning_rate": 3.702408163265306e-05, + "loss": 1.3101, + "step": 318700 + }, + { + "epoch": 6.75, + "learning_rate": 3.700367346938776e-05, + "loss": 1.3102, + "step": 318800 + }, + { + "epoch": 6.75, + "learning_rate": 3.698326530612245e-05, + "loss": 1.3169, + "step": 318900 + }, + { + "epoch": 6.75, + "learning_rate": 3.696285714285714e-05, + "loss": 1.3097, + "step": 319000 + }, + { + "epoch": 6.75, + "learning_rate": 3.694244897959184e-05, + "loss": 1.3084, + "step": 319100 + }, + { + "epoch": 6.76, + "learning_rate": 3.692204081632653e-05, + "loss": 1.3071, + "step": 319200 + }, + { + "epoch": 6.76, + "learning_rate": 3.690163265306122e-05, + "loss": 1.3156, + "step": 319300 + }, + { + "epoch": 6.76, + "learning_rate": 3.6881224489795915e-05, + "loss": 1.3128, + "step": 319400 + }, + { + "epoch": 6.76, + "learning_rate": 3.686081632653061e-05, + "loss": 1.3094, + "step": 319500 + }, + { + "epoch": 6.76, + "learning_rate": 3.684061224489796e-05, + "loss": 1.3172, + "step": 319600 + }, + { + "epoch": 6.77, + "learning_rate": 3.682020408163265e-05, + "loss": 1.3105, + "step": 319700 + }, + { + "epoch": 6.77, + "learning_rate": 3.6799795918367344e-05, + "loss": 1.3099, + "step": 319800 + }, + { + "epoch": 6.77, + "learning_rate": 3.677938775510204e-05, + "loss": 1.3064, + "step": 319900 + }, + { + "epoch": 6.77, + "learning_rate": 3.6758979591836734e-05, + "loss": 1.3207, + "step": 320000 + }, + { + "epoch": 6.77, + "learning_rate": 3.6738571428571426e-05, + "loss": 1.3059, + "step": 320100 + }, + { + "epoch": 6.78, + "learning_rate": 3.6718163265306124e-05, + "loss": 1.3141, + "step": 320200 + }, + { + "epoch": 6.78, + "learning_rate": 3.669775510204082e-05, + "loss": 1.3138, + "step": 320300 + }, + { + "epoch": 6.78, + "learning_rate": 3.6677346938775514e-05, + "loss": 1.3121, + "step": 320400 + }, + { + "epoch": 6.78, + "learning_rate": 3.6656938775510205e-05, + "loss": 1.3124, + "step": 320500 + }, + { + "epoch": 6.79, + "learning_rate": 3.6636530612244904e-05, + "loss": 1.3116, + "step": 320600 + }, + { + "epoch": 6.79, + "learning_rate": 3.6616122448979595e-05, + "loss": 1.3113, + "step": 320700 + }, + { + "epoch": 6.79, + "learning_rate": 3.659571428571429e-05, + "loss": 1.3055, + "step": 320800 + }, + { + "epoch": 6.79, + "learning_rate": 3.6575306122448985e-05, + "loss": 1.3192, + "step": 320900 + }, + { + "epoch": 6.79, + "learning_rate": 3.655489795918368e-05, + "loss": 1.3088, + "step": 321000 + }, + { + "epoch": 6.8, + "learning_rate": 3.653448979591837e-05, + "loss": 1.3121, + "step": 321100 + }, + { + "epoch": 6.8, + "learning_rate": 3.651408163265306e-05, + "loss": 1.3061, + "step": 321200 + }, + { + "epoch": 6.8, + "learning_rate": 3.649367346938776e-05, + "loss": 1.3089, + "step": 321300 + }, + { + "epoch": 6.8, + "learning_rate": 3.647326530612245e-05, + "loss": 1.3049, + "step": 321400 + }, + { + "epoch": 6.8, + "learning_rate": 3.645285714285714e-05, + "loss": 1.3, + "step": 321500 + }, + { + "epoch": 6.81, + "learning_rate": 3.643244897959184e-05, + "loss": 1.311, + "step": 321600 + }, + { + "epoch": 6.81, + "learning_rate": 3.641204081632653e-05, + "loss": 1.3216, + "step": 321700 + }, + { + "epoch": 6.81, + "learning_rate": 3.639163265306122e-05, + "loss": 1.3031, + "step": 321800 + }, + { + "epoch": 6.81, + "learning_rate": 3.637142857142857e-05, + "loss": 1.3118, + "step": 321900 + }, + { + "epoch": 6.81, + "learning_rate": 3.635102040816327e-05, + "loss": 1.3147, + "step": 322000 + }, + { + "epoch": 6.82, + "learning_rate": 3.633061224489796e-05, + "loss": 1.3097, + "step": 322100 + }, + { + "epoch": 6.82, + "learning_rate": 3.631020408163265e-05, + "loss": 1.3123, + "step": 322200 + }, + { + "epoch": 6.82, + "learning_rate": 3.628979591836735e-05, + "loss": 1.3118, + "step": 322300 + }, + { + "epoch": 6.82, + "learning_rate": 3.626938775510204e-05, + "loss": 1.3127, + "step": 322400 + }, + { + "epoch": 6.83, + "learning_rate": 3.6248979591836734e-05, + "loss": 1.3042, + "step": 322500 + }, + { + "epoch": 6.83, + "learning_rate": 3.6228571428571425e-05, + "loss": 1.311, + "step": 322600 + }, + { + "epoch": 6.83, + "learning_rate": 3.6208163265306124e-05, + "loss": 1.3126, + "step": 322700 + }, + { + "epoch": 6.83, + "learning_rate": 3.6187755102040815e-05, + "loss": 1.3107, + "step": 322800 + }, + { + "epoch": 6.83, + "learning_rate": 3.616734693877551e-05, + "loss": 1.3204, + "step": 322900 + }, + { + "epoch": 6.84, + "learning_rate": 3.6146938775510205e-05, + "loss": 1.3122, + "step": 323000 + }, + { + "epoch": 6.84, + "learning_rate": 3.6126530612244904e-05, + "loss": 1.3043, + "step": 323100 + }, + { + "epoch": 6.84, + "learning_rate": 3.6106122448979595e-05, + "loss": 1.3094, + "step": 323200 + }, + { + "epoch": 6.84, + "learning_rate": 3.608571428571429e-05, + "loss": 1.3133, + "step": 323300 + }, + { + "epoch": 6.84, + "learning_rate": 3.6065306122448985e-05, + "loss": 1.3099, + "step": 323400 + }, + { + "epoch": 6.85, + "learning_rate": 3.604489795918368e-05, + "loss": 1.3089, + "step": 323500 + }, + { + "epoch": 6.85, + "learning_rate": 3.602448979591837e-05, + "loss": 1.3064, + "step": 323600 + }, + { + "epoch": 6.85, + "learning_rate": 3.600408163265307e-05, + "loss": 1.301, + "step": 323700 + }, + { + "epoch": 6.85, + "learning_rate": 3.598367346938776e-05, + "loss": 1.3134, + "step": 323800 + }, + { + "epoch": 6.86, + "learning_rate": 3.596326530612245e-05, + "loss": 1.3116, + "step": 323900 + }, + { + "epoch": 6.86, + "learning_rate": 3.594285714285714e-05, + "loss": 1.3117, + "step": 324000 + }, + { + "epoch": 6.86, + "learning_rate": 3.592244897959184e-05, + "loss": 1.3162, + "step": 324100 + }, + { + "epoch": 6.86, + "learning_rate": 3.590204081632653e-05, + "loss": 1.3099, + "step": 324200 + }, + { + "epoch": 6.86, + "learning_rate": 3.588163265306122e-05, + "loss": 1.315, + "step": 324300 + }, + { + "epoch": 6.87, + "learning_rate": 3.586122448979592e-05, + "loss": 1.3109, + "step": 324400 + }, + { + "epoch": 6.87, + "learning_rate": 3.584081632653061e-05, + "loss": 1.3116, + "step": 324500 + }, + { + "epoch": 6.87, + "learning_rate": 3.5820408163265304e-05, + "loss": 1.3056, + "step": 324600 + }, + { + "epoch": 6.87, + "learning_rate": 3.58e-05, + "loss": 1.3085, + "step": 324700 + }, + { + "epoch": 6.87, + "learning_rate": 3.577979591836735e-05, + "loss": 1.3095, + "step": 324800 + }, + { + "epoch": 6.88, + "learning_rate": 3.575938775510204e-05, + "loss": 1.3091, + "step": 324900 + }, + { + "epoch": 6.88, + "learning_rate": 3.5738979591836734e-05, + "loss": 1.3172, + "step": 325000 + }, + { + "epoch": 6.88, + "learning_rate": 3.571857142857143e-05, + "loss": 1.3093, + "step": 325100 + }, + { + "epoch": 6.88, + "learning_rate": 3.5698163265306124e-05, + "loss": 1.3083, + "step": 325200 + }, + { + "epoch": 6.88, + "learning_rate": 3.5677755102040815e-05, + "loss": 1.3097, + "step": 325300 + }, + { + "epoch": 6.89, + "learning_rate": 3.565734693877551e-05, + "loss": 1.3098, + "step": 325400 + }, + { + "epoch": 6.89, + "learning_rate": 3.5636938775510205e-05, + "loss": 1.2987, + "step": 325500 + }, + { + "epoch": 6.89, + "learning_rate": 3.56165306122449e-05, + "loss": 1.3139, + "step": 325600 + }, + { + "epoch": 6.89, + "learning_rate": 3.559612244897959e-05, + "loss": 1.3119, + "step": 325700 + }, + { + "epoch": 6.9, + "learning_rate": 3.557571428571429e-05, + "loss": 1.3087, + "step": 325800 + }, + { + "epoch": 6.9, + "learning_rate": 3.5555306122448985e-05, + "loss": 1.3107, + "step": 325900 + }, + { + "epoch": 6.9, + "learning_rate": 3.5534897959183677e-05, + "loss": 1.3146, + "step": 326000 + }, + { + "epoch": 6.9, + "learning_rate": 3.551448979591837e-05, + "loss": 1.3141, + "step": 326100 + }, + { + "epoch": 6.9, + "learning_rate": 3.5494081632653066e-05, + "loss": 1.3091, + "step": 326200 + }, + { + "epoch": 6.91, + "learning_rate": 3.547367346938776e-05, + "loss": 1.3104, + "step": 326300 + }, + { + "epoch": 6.91, + "learning_rate": 3.545326530612245e-05, + "loss": 1.3057, + "step": 326400 + }, + { + "epoch": 6.91, + "learning_rate": 3.543285714285715e-05, + "loss": 1.3078, + "step": 326500 + }, + { + "epoch": 6.91, + "learning_rate": 3.541244897959184e-05, + "loss": 1.3033, + "step": 326600 + }, + { + "epoch": 6.91, + "learning_rate": 3.539204081632653e-05, + "loss": 1.3005, + "step": 326700 + }, + { + "epoch": 6.92, + "learning_rate": 3.537163265306123e-05, + "loss": 1.3044, + "step": 326800 + }, + { + "epoch": 6.92, + "learning_rate": 3.535122448979592e-05, + "loss": 1.3139, + "step": 326900 + }, + { + "epoch": 6.92, + "learning_rate": 3.533081632653061e-05, + "loss": 1.3003, + "step": 327000 + }, + { + "epoch": 6.92, + "learning_rate": 3.5310408163265304e-05, + "loss": 1.2999, + "step": 327100 + }, + { + "epoch": 6.92, + "learning_rate": 3.529e-05, + "loss": 1.3091, + "step": 327200 + }, + { + "epoch": 6.93, + "learning_rate": 3.5269591836734694e-05, + "loss": 1.306, + "step": 327300 + }, + { + "epoch": 6.93, + "learning_rate": 3.5249183673469386e-05, + "loss": 1.3001, + "step": 327400 + }, + { + "epoch": 6.93, + "learning_rate": 3.5228775510204084e-05, + "loss": 1.3039, + "step": 327500 + }, + { + "epoch": 6.93, + "learning_rate": 3.5208367346938776e-05, + "loss": 1.3114, + "step": 327600 + }, + { + "epoch": 6.94, + "learning_rate": 3.518795918367347e-05, + "loss": 1.3011, + "step": 327700 + }, + { + "epoch": 6.94, + "learning_rate": 3.5167755102040815e-05, + "loss": 1.31, + "step": 327800 + }, + { + "epoch": 6.94, + "learning_rate": 3.514734693877551e-05, + "loss": 1.3121, + "step": 327900 + }, + { + "epoch": 6.94, + "learning_rate": 3.5126938775510205e-05, + "loss": 1.3061, + "step": 328000 + }, + { + "epoch": 6.94, + "learning_rate": 3.5106530612244896e-05, + "loss": 1.303, + "step": 328100 + }, + { + "epoch": 6.95, + "learning_rate": 3.5086122448979595e-05, + "loss": 1.309, + "step": 328200 + }, + { + "epoch": 6.95, + "learning_rate": 3.5065714285714286e-05, + "loss": 1.3098, + "step": 328300 + }, + { + "epoch": 6.95, + "learning_rate": 3.504530612244898e-05, + "loss": 1.3101, + "step": 328400 + }, + { + "epoch": 6.95, + "learning_rate": 3.5024897959183676e-05, + "loss": 1.3033, + "step": 328500 + }, + { + "epoch": 6.95, + "learning_rate": 3.5004489795918375e-05, + "loss": 1.308, + "step": 328600 + }, + { + "epoch": 6.96, + "learning_rate": 3.4984081632653066e-05, + "loss": 1.3056, + "step": 328700 + }, + { + "epoch": 6.96, + "learning_rate": 3.496367346938776e-05, + "loss": 1.3109, + "step": 328800 + }, + { + "epoch": 6.96, + "learning_rate": 3.494326530612245e-05, + "loss": 1.3041, + "step": 328900 + }, + { + "epoch": 6.96, + "learning_rate": 3.492285714285715e-05, + "loss": 1.3021, + "step": 329000 + }, + { + "epoch": 6.97, + "learning_rate": 3.490244897959184e-05, + "loss": 1.306, + "step": 329100 + }, + { + "epoch": 6.97, + "learning_rate": 3.488204081632653e-05, + "loss": 1.3007, + "step": 329200 + }, + { + "epoch": 6.97, + "learning_rate": 3.486163265306123e-05, + "loss": 1.3063, + "step": 329300 + }, + { + "epoch": 6.97, + "learning_rate": 3.484122448979592e-05, + "loss": 1.3068, + "step": 329400 + }, + { + "epoch": 6.97, + "learning_rate": 3.482081632653061e-05, + "loss": 1.3096, + "step": 329500 + }, + { + "epoch": 6.98, + "learning_rate": 3.480040816326531e-05, + "loss": 1.3119, + "step": 329600 + }, + { + "epoch": 6.98, + "learning_rate": 3.478e-05, + "loss": 1.3046, + "step": 329700 + }, + { + "epoch": 6.98, + "learning_rate": 3.4759591836734694e-05, + "loss": 1.3103, + "step": 329800 + }, + { + "epoch": 6.98, + "learning_rate": 3.4739183673469386e-05, + "loss": 1.3052, + "step": 329900 + }, + { + "epoch": 6.98, + "learning_rate": 3.4718775510204084e-05, + "loss": 1.3097, + "step": 330000 + }, + { + "epoch": 6.99, + "learning_rate": 3.4698367346938776e-05, + "loss": 1.3139, + "step": 330100 + }, + { + "epoch": 6.99, + "learning_rate": 3.467795918367347e-05, + "loss": 1.3062, + "step": 330200 + }, + { + "epoch": 6.99, + "learning_rate": 3.4657551020408165e-05, + "loss": 1.3089, + "step": 330300 + }, + { + "epoch": 6.99, + "learning_rate": 3.463714285714286e-05, + "loss": 1.3098, + "step": 330400 + }, + { + "epoch": 6.99, + "learning_rate": 3.4616938775510205e-05, + "loss": 1.3033, + "step": 330500 + }, + { + "epoch": 7.0, + "learning_rate": 3.4596530612244896e-05, + "loss": 1.307, + "step": 330600 + }, + { + "epoch": 7.0, + "learning_rate": 3.4576122448979595e-05, + "loss": 1.3047, + "step": 330700 + }, + { + "epoch": 7.0, + "learning_rate": 3.4555714285714286e-05, + "loss": 1.3163, + "step": 330800 + }, + { + "epoch": 7.0, + "learning_rate": 3.453530612244898e-05, + "loss": 1.3105, + "step": 330900 + }, + { + "epoch": 7.01, + "learning_rate": 3.4514897959183676e-05, + "loss": 1.3024, + "step": 331000 + }, + { + "epoch": 7.01, + "learning_rate": 3.449448979591837e-05, + "loss": 1.2983, + "step": 331100 + }, + { + "epoch": 7.01, + "learning_rate": 3.447408163265306e-05, + "loss": 1.3086, + "step": 331200 + }, + { + "epoch": 7.01, + "learning_rate": 3.445367346938776e-05, + "loss": 1.301, + "step": 331300 + }, + { + "epoch": 7.01, + "learning_rate": 3.4433265306122456e-05, + "loss": 1.3044, + "step": 331400 + }, + { + "epoch": 7.02, + "learning_rate": 3.441285714285715e-05, + "loss": 1.3011, + "step": 331500 + }, + { + "epoch": 7.02, + "learning_rate": 3.439244897959184e-05, + "loss": 1.3021, + "step": 331600 + }, + { + "epoch": 7.02, + "learning_rate": 3.437204081632653e-05, + "loss": 1.3072, + "step": 331700 + }, + { + "epoch": 7.02, + "learning_rate": 3.435163265306123e-05, + "loss": 1.2992, + "step": 331800 + }, + { + "epoch": 7.02, + "learning_rate": 3.433122448979592e-05, + "loss": 1.2948, + "step": 331900 + }, + { + "epoch": 7.03, + "learning_rate": 3.431081632653061e-05, + "loss": 1.3023, + "step": 332000 + }, + { + "epoch": 7.03, + "learning_rate": 3.429040816326531e-05, + "loss": 1.2973, + "step": 332100 + }, + { + "epoch": 7.03, + "learning_rate": 3.427e-05, + "loss": 1.308, + "step": 332200 + }, + { + "epoch": 7.03, + "learning_rate": 3.4249591836734694e-05, + "loss": 1.2983, + "step": 332300 + }, + { + "epoch": 7.03, + "learning_rate": 3.422918367346939e-05, + "loss": 1.3004, + "step": 332400 + }, + { + "epoch": 7.04, + "learning_rate": 3.4208775510204084e-05, + "loss": 1.2989, + "step": 332500 + }, + { + "epoch": 7.04, + "learning_rate": 3.4188367346938775e-05, + "loss": 1.298, + "step": 332600 + }, + { + "epoch": 7.04, + "learning_rate": 3.416795918367347e-05, + "loss": 1.2955, + "step": 332700 + }, + { + "epoch": 7.04, + "learning_rate": 3.4147551020408165e-05, + "loss": 1.3023, + "step": 332800 + }, + { + "epoch": 7.05, + "learning_rate": 3.412714285714286e-05, + "loss": 1.3, + "step": 332900 + }, + { + "epoch": 7.05, + "learning_rate": 3.410673469387755e-05, + "loss": 1.2944, + "step": 333000 + }, + { + "epoch": 7.05, + "learning_rate": 3.408632653061225e-05, + "loss": 1.3052, + "step": 333100 + }, + { + "epoch": 7.05, + "learning_rate": 3.406591836734694e-05, + "loss": 1.3006, + "step": 333200 + }, + { + "epoch": 7.05, + "learning_rate": 3.4045714285714286e-05, + "loss": 1.2956, + "step": 333300 + }, + { + "epoch": 7.06, + "learning_rate": 3.402530612244898e-05, + "loss": 1.3024, + "step": 333400 + }, + { + "epoch": 7.06, + "learning_rate": 3.4004897959183676e-05, + "loss": 1.3043, + "step": 333500 + }, + { + "epoch": 7.06, + "learning_rate": 3.398448979591837e-05, + "loss": 1.2999, + "step": 333600 + }, + { + "epoch": 7.06, + "learning_rate": 3.396408163265306e-05, + "loss": 1.3052, + "step": 333700 + }, + { + "epoch": 7.06, + "learning_rate": 3.394367346938776e-05, + "loss": 1.2954, + "step": 333800 + }, + { + "epoch": 7.07, + "learning_rate": 3.392326530612245e-05, + "loss": 1.3117, + "step": 333900 + }, + { + "epoch": 7.07, + "learning_rate": 3.390285714285714e-05, + "loss": 1.3017, + "step": 334000 + }, + { + "epoch": 7.07, + "learning_rate": 3.388244897959184e-05, + "loss": 1.3048, + "step": 334100 + }, + { + "epoch": 7.07, + "learning_rate": 3.386204081632654e-05, + "loss": 1.299, + "step": 334200 + }, + { + "epoch": 7.08, + "learning_rate": 3.384163265306123e-05, + "loss": 1.3038, + "step": 334300 + }, + { + "epoch": 7.08, + "learning_rate": 3.382122448979592e-05, + "loss": 1.3089, + "step": 334400 + }, + { + "epoch": 7.08, + "learning_rate": 3.380081632653061e-05, + "loss": 1.2962, + "step": 334500 + }, + { + "epoch": 7.08, + "learning_rate": 3.378040816326531e-05, + "loss": 1.2973, + "step": 334600 + }, + { + "epoch": 7.08, + "learning_rate": 3.376e-05, + "loss": 1.3028, + "step": 334700 + }, + { + "epoch": 7.09, + "learning_rate": 3.3739591836734694e-05, + "loss": 1.2994, + "step": 334800 + }, + { + "epoch": 7.09, + "learning_rate": 3.371918367346939e-05, + "loss": 1.2988, + "step": 334900 + }, + { + "epoch": 7.09, + "learning_rate": 3.3698775510204084e-05, + "loss": 1.301, + "step": 335000 + }, + { + "epoch": 7.09, + "learning_rate": 3.3678367346938775e-05, + "loss": 1.3051, + "step": 335100 + }, + { + "epoch": 7.09, + "learning_rate": 3.3657959183673474e-05, + "loss": 1.3061, + "step": 335200 + }, + { + "epoch": 7.1, + "learning_rate": 3.3637551020408165e-05, + "loss": 1.3036, + "step": 335300 + }, + { + "epoch": 7.1, + "learning_rate": 3.361714285714286e-05, + "loss": 1.3026, + "step": 335400 + }, + { + "epoch": 7.1, + "learning_rate": 3.3596734693877555e-05, + "loss": 1.3025, + "step": 335500 + }, + { + "epoch": 7.1, + "learning_rate": 3.357632653061225e-05, + "loss": 1.2965, + "step": 335600 + }, + { + "epoch": 7.1, + "learning_rate": 3.355591836734694e-05, + "loss": 1.2959, + "step": 335700 + }, + { + "epoch": 7.11, + "learning_rate": 3.353551020408163e-05, + "loss": 1.2977, + "step": 335800 + }, + { + "epoch": 7.11, + "learning_rate": 3.351510204081633e-05, + "loss": 1.305, + "step": 335900 + }, + { + "epoch": 7.11, + "learning_rate": 3.349469387755102e-05, + "loss": 1.3095, + "step": 336000 + }, + { + "epoch": 7.11, + "learning_rate": 3.347428571428571e-05, + "loss": 1.2972, + "step": 336100 + }, + { + "epoch": 7.12, + "learning_rate": 3.345387755102041e-05, + "loss": 1.293, + "step": 336200 + }, + { + "epoch": 7.12, + "learning_rate": 3.343346938775511e-05, + "loss": 1.3013, + "step": 336300 + }, + { + "epoch": 7.12, + "learning_rate": 3.34130612244898e-05, + "loss": 1.2942, + "step": 336400 + }, + { + "epoch": 7.12, + "learning_rate": 3.339265306122449e-05, + "loss": 1.3028, + "step": 336500 + }, + { + "epoch": 7.12, + "learning_rate": 3.337224489795919e-05, + "loss": 1.2994, + "step": 336600 + }, + { + "epoch": 7.13, + "learning_rate": 3.335183673469388e-05, + "loss": 1.2935, + "step": 336700 + }, + { + "epoch": 7.13, + "learning_rate": 3.333142857142857e-05, + "loss": 1.2984, + "step": 336800 + }, + { + "epoch": 7.13, + "learning_rate": 3.331102040816327e-05, + "loss": 1.293, + "step": 336900 + }, + { + "epoch": 7.13, + "learning_rate": 3.329061224489796e-05, + "loss": 1.2963, + "step": 337000 + }, + { + "epoch": 7.13, + "learning_rate": 3.3270204081632654e-05, + "loss": 1.2949, + "step": 337100 + }, + { + "epoch": 7.14, + "learning_rate": 3.3249795918367346e-05, + "loss": 1.3018, + "step": 337200 + }, + { + "epoch": 7.14, + "learning_rate": 3.3229387755102044e-05, + "loss": 1.301, + "step": 337300 + }, + { + "epoch": 7.14, + "learning_rate": 3.320918367346939e-05, + "loss": 1.2994, + "step": 337400 + }, + { + "epoch": 7.14, + "learning_rate": 3.318877551020408e-05, + "loss": 1.2977, + "step": 337500 + }, + { + "epoch": 7.14, + "learning_rate": 3.3168367346938775e-05, + "loss": 1.2964, + "step": 337600 + }, + { + "epoch": 7.15, + "learning_rate": 3.314795918367347e-05, + "loss": 1.3011, + "step": 337700 + }, + { + "epoch": 7.15, + "learning_rate": 3.3127551020408165e-05, + "loss": 1.2955, + "step": 337800 + }, + { + "epoch": 7.15, + "learning_rate": 3.3107142857142856e-05, + "loss": 1.3005, + "step": 337900 + }, + { + "epoch": 7.15, + "learning_rate": 3.3086734693877555e-05, + "loss": 1.2973, + "step": 338000 + }, + { + "epoch": 7.16, + "learning_rate": 3.3066326530612246e-05, + "loss": 1.2966, + "step": 338100 + }, + { + "epoch": 7.16, + "learning_rate": 3.304591836734694e-05, + "loss": 1.309, + "step": 338200 + }, + { + "epoch": 7.16, + "learning_rate": 3.3025510204081636e-05, + "loss": 1.301, + "step": 338300 + }, + { + "epoch": 7.16, + "learning_rate": 3.300510204081633e-05, + "loss": 1.2924, + "step": 338400 + }, + { + "epoch": 7.16, + "learning_rate": 3.298469387755102e-05, + "loss": 1.2964, + "step": 338500 + }, + { + "epoch": 7.17, + "learning_rate": 3.296428571428571e-05, + "loss": 1.3052, + "step": 338600 + }, + { + "epoch": 7.17, + "learning_rate": 3.294387755102041e-05, + "loss": 1.2924, + "step": 338700 + }, + { + "epoch": 7.17, + "learning_rate": 3.29234693877551e-05, + "loss": 1.3044, + "step": 338800 + }, + { + "epoch": 7.17, + "learning_rate": 3.290306122448979e-05, + "loss": 1.2986, + "step": 338900 + }, + { + "epoch": 7.17, + "learning_rate": 3.288265306122449e-05, + "loss": 1.2973, + "step": 339000 + }, + { + "epoch": 7.18, + "learning_rate": 3.286224489795919e-05, + "loss": 1.2934, + "step": 339100 + }, + { + "epoch": 7.18, + "learning_rate": 3.284183673469388e-05, + "loss": 1.2973, + "step": 339200 + }, + { + "epoch": 7.18, + "learning_rate": 3.282142857142857e-05, + "loss": 1.3004, + "step": 339300 + }, + { + "epoch": 7.18, + "learning_rate": 3.280102040816327e-05, + "loss": 1.3021, + "step": 339400 + }, + { + "epoch": 7.19, + "learning_rate": 3.278061224489796e-05, + "loss": 1.2899, + "step": 339500 + }, + { + "epoch": 7.19, + "learning_rate": 3.276040816326531e-05, + "loss": 1.2916, + "step": 339600 + }, + { + "epoch": 7.19, + "learning_rate": 3.274e-05, + "loss": 1.2915, + "step": 339700 + }, + { + "epoch": 7.19, + "learning_rate": 3.27195918367347e-05, + "loss": 1.2867, + "step": 339800 + }, + { + "epoch": 7.19, + "learning_rate": 3.269918367346939e-05, + "loss": 1.2958, + "step": 339900 + }, + { + "epoch": 7.2, + "learning_rate": 3.267877551020408e-05, + "loss": 1.2957, + "step": 340000 + }, + { + "epoch": 7.2, + "learning_rate": 3.265836734693878e-05, + "loss": 1.2919, + "step": 340100 + }, + { + "epoch": 7.2, + "learning_rate": 3.263795918367347e-05, + "loss": 1.2972, + "step": 340200 + }, + { + "epoch": 7.2, + "learning_rate": 3.2617551020408165e-05, + "loss": 1.2976, + "step": 340300 + }, + { + "epoch": 7.2, + "learning_rate": 3.2597142857142856e-05, + "loss": 1.2939, + "step": 340400 + }, + { + "epoch": 7.21, + "learning_rate": 3.2576734693877555e-05, + "loss": 1.2917, + "step": 340500 + }, + { + "epoch": 7.21, + "learning_rate": 3.2556326530612246e-05, + "loss": 1.2998, + "step": 340600 + }, + { + "epoch": 7.21, + "learning_rate": 3.253591836734694e-05, + "loss": 1.3076, + "step": 340700 + }, + { + "epoch": 7.21, + "learning_rate": 3.2515510204081636e-05, + "loss": 1.3019, + "step": 340800 + }, + { + "epoch": 7.21, + "learning_rate": 3.2495306122448984e-05, + "loss": 1.2993, + "step": 340900 + }, + { + "epoch": 7.22, + "learning_rate": 3.2474897959183675e-05, + "loss": 1.3, + "step": 341000 + }, + { + "epoch": 7.22, + "learning_rate": 3.245448979591837e-05, + "loss": 1.3019, + "step": 341100 + }, + { + "epoch": 7.22, + "learning_rate": 3.2434081632653065e-05, + "loss": 1.297, + "step": 341200 + }, + { + "epoch": 7.22, + "learning_rate": 3.241367346938776e-05, + "loss": 1.2942, + "step": 341300 + }, + { + "epoch": 7.23, + "learning_rate": 3.239326530612245e-05, + "loss": 1.3009, + "step": 341400 + }, + { + "epoch": 7.23, + "learning_rate": 3.237285714285715e-05, + "loss": 1.2914, + "step": 341500 + }, + { + "epoch": 7.23, + "learning_rate": 3.235244897959184e-05, + "loss": 1.3011, + "step": 341600 + }, + { + "epoch": 7.23, + "learning_rate": 3.233204081632653e-05, + "loss": 1.2988, + "step": 341700 + }, + { + "epoch": 7.23, + "learning_rate": 3.231163265306122e-05, + "loss": 1.2929, + "step": 341800 + }, + { + "epoch": 7.24, + "learning_rate": 3.229122448979592e-05, + "loss": 1.2994, + "step": 341900 + }, + { + "epoch": 7.24, + "learning_rate": 3.227081632653061e-05, + "loss": 1.2908, + "step": 342000 + }, + { + "epoch": 7.24, + "learning_rate": 3.22504081632653e-05, + "loss": 1.296, + "step": 342100 + }, + { + "epoch": 7.24, + "learning_rate": 3.223e-05, + "loss": 1.3053, + "step": 342200 + }, + { + "epoch": 7.24, + "learning_rate": 3.220959183673469e-05, + "loss": 1.2978, + "step": 342300 + }, + { + "epoch": 7.25, + "learning_rate": 3.218918367346939e-05, + "loss": 1.291, + "step": 342400 + }, + { + "epoch": 7.25, + "learning_rate": 3.216877551020408e-05, + "loss": 1.2957, + "step": 342500 + }, + { + "epoch": 7.25, + "learning_rate": 3.214836734693878e-05, + "loss": 1.2954, + "step": 342600 + }, + { + "epoch": 7.25, + "learning_rate": 3.212816326530612e-05, + "loss": 1.2906, + "step": 342700 + }, + { + "epoch": 7.26, + "learning_rate": 3.2107755102040814e-05, + "loss": 1.303, + "step": 342800 + }, + { + "epoch": 7.26, + "learning_rate": 3.208734693877551e-05, + "loss": 1.294, + "step": 342900 + }, + { + "epoch": 7.26, + "learning_rate": 3.206693877551021e-05, + "loss": 1.2978, + "step": 343000 + }, + { + "epoch": 7.26, + "learning_rate": 3.20465306122449e-05, + "loss": 1.3041, + "step": 343100 + }, + { + "epoch": 7.26, + "learning_rate": 3.2026122448979594e-05, + "loss": 1.2926, + "step": 343200 + }, + { + "epoch": 7.27, + "learning_rate": 3.200571428571429e-05, + "loss": 1.2953, + "step": 343300 + }, + { + "epoch": 7.27, + "learning_rate": 3.1985306122448984e-05, + "loss": 1.2961, + "step": 343400 + }, + { + "epoch": 7.27, + "learning_rate": 3.1964897959183675e-05, + "loss": 1.2883, + "step": 343500 + }, + { + "epoch": 7.27, + "learning_rate": 3.194448979591837e-05, + "loss": 1.2871, + "step": 343600 + }, + { + "epoch": 7.27, + "learning_rate": 3.1924081632653065e-05, + "loss": 1.2943, + "step": 343700 + }, + { + "epoch": 7.28, + "learning_rate": 3.190367346938776e-05, + "loss": 1.2927, + "step": 343800 + }, + { + "epoch": 7.28, + "learning_rate": 3.188326530612245e-05, + "loss": 1.296, + "step": 343900 + }, + { + "epoch": 7.28, + "learning_rate": 3.1862857142857147e-05, + "loss": 1.3018, + "step": 344000 + }, + { + "epoch": 7.28, + "learning_rate": 3.184244897959184e-05, + "loss": 1.2908, + "step": 344100 + }, + { + "epoch": 7.28, + "learning_rate": 3.182204081632653e-05, + "loss": 1.2863, + "step": 344200 + }, + { + "epoch": 7.29, + "learning_rate": 3.180163265306123e-05, + "loss": 1.2922, + "step": 344300 + }, + { + "epoch": 7.29, + "learning_rate": 3.178122448979592e-05, + "loss": 1.2941, + "step": 344400 + }, + { + "epoch": 7.29, + "learning_rate": 3.176081632653061e-05, + "loss": 1.2915, + "step": 344500 + }, + { + "epoch": 7.29, + "learning_rate": 3.17404081632653e-05, + "loss": 1.2908, + "step": 344600 + }, + { + "epoch": 7.3, + "learning_rate": 3.172e-05, + "loss": 1.2966, + "step": 344700 + }, + { + "epoch": 7.3, + "learning_rate": 3.169959183673469e-05, + "loss": 1.2973, + "step": 344800 + }, + { + "epoch": 7.3, + "learning_rate": 3.1679183673469384e-05, + "loss": 1.2879, + "step": 344900 + }, + { + "epoch": 7.3, + "learning_rate": 3.165877551020408e-05, + "loss": 1.297, + "step": 345000 + }, + { + "epoch": 7.3, + "learning_rate": 3.163836734693878e-05, + "loss": 1.2983, + "step": 345100 + }, + { + "epoch": 7.31, + "learning_rate": 3.161795918367347e-05, + "loss": 1.295, + "step": 345200 + }, + { + "epoch": 7.31, + "learning_rate": 3.1597551020408164e-05, + "loss": 1.291, + "step": 345300 + }, + { + "epoch": 7.31, + "learning_rate": 3.157714285714286e-05, + "loss": 1.2889, + "step": 345400 + }, + { + "epoch": 7.31, + "learning_rate": 3.1556734693877554e-05, + "loss": 1.2996, + "step": 345500 + }, + { + "epoch": 7.31, + "learning_rate": 3.1536326530612246e-05, + "loss": 1.2922, + "step": 345600 + }, + { + "epoch": 7.32, + "learning_rate": 3.1515918367346944e-05, + "loss": 1.2941, + "step": 345700 + }, + { + "epoch": 7.32, + "learning_rate": 3.1495510204081636e-05, + "loss": 1.2859, + "step": 345800 + }, + { + "epoch": 7.32, + "learning_rate": 3.147510204081633e-05, + "loss": 1.2844, + "step": 345900 + }, + { + "epoch": 7.32, + "learning_rate": 3.1454693877551026e-05, + "loss": 1.2953, + "step": 346000 + }, + { + "epoch": 7.32, + "learning_rate": 3.143428571428572e-05, + "loss": 1.2956, + "step": 346100 + }, + { + "epoch": 7.33, + "learning_rate": 3.141387755102041e-05, + "loss": 1.29, + "step": 346200 + }, + { + "epoch": 7.33, + "learning_rate": 3.13934693877551e-05, + "loss": 1.2898, + "step": 346300 + }, + { + "epoch": 7.33, + "learning_rate": 3.13730612244898e-05, + "loss": 1.2926, + "step": 346400 + }, + { + "epoch": 7.33, + "learning_rate": 3.135265306122449e-05, + "loss": 1.2953, + "step": 346500 + }, + { + "epoch": 7.34, + "learning_rate": 3.133224489795918e-05, + "loss": 1.2867, + "step": 346600 + }, + { + "epoch": 7.34, + "learning_rate": 3.131183673469388e-05, + "loss": 1.288, + "step": 346700 + }, + { + "epoch": 7.34, + "learning_rate": 3.129142857142857e-05, + "loss": 1.2917, + "step": 346800 + }, + { + "epoch": 7.34, + "learning_rate": 3.1271020408163264e-05, + "loss": 1.2945, + "step": 346900 + }, + { + "epoch": 7.34, + "learning_rate": 3.125061224489796e-05, + "loss": 1.2923, + "step": 347000 + }, + { + "epoch": 7.35, + "learning_rate": 3.1230204081632653e-05, + "loss": 1.2895, + "step": 347100 + }, + { + "epoch": 7.35, + "learning_rate": 3.1209795918367345e-05, + "loss": 1.2969, + "step": 347200 + }, + { + "epoch": 7.35, + "learning_rate": 3.118938775510204e-05, + "loss": 1.3009, + "step": 347300 + }, + { + "epoch": 7.35, + "learning_rate": 3.116897959183674e-05, + "loss": 1.2961, + "step": 347400 + }, + { + "epoch": 7.35, + "learning_rate": 3.114857142857143e-05, + "loss": 1.2925, + "step": 347500 + }, + { + "epoch": 7.36, + "learning_rate": 3.1128163265306125e-05, + "loss": 1.294, + "step": 347600 + }, + { + "epoch": 7.36, + "learning_rate": 3.1107755102040817e-05, + "loss": 1.2896, + "step": 347700 + }, + { + "epoch": 7.36, + "learning_rate": 3.1087346938775515e-05, + "loss": 1.2941, + "step": 347800 + }, + { + "epoch": 7.36, + "learning_rate": 3.106714285714286e-05, + "loss": 1.2905, + "step": 347900 + }, + { + "epoch": 7.37, + "learning_rate": 3.1046734693877554e-05, + "loss": 1.2898, + "step": 348000 + }, + { + "epoch": 7.37, + "learning_rate": 3.1026326530612246e-05, + "loss": 1.2947, + "step": 348100 + }, + { + "epoch": 7.37, + "learning_rate": 3.1005918367346944e-05, + "loss": 1.2925, + "step": 348200 + }, + { + "epoch": 7.37, + "learning_rate": 3.0985510204081636e-05, + "loss": 1.2983, + "step": 348300 + }, + { + "epoch": 7.37, + "learning_rate": 3.096510204081633e-05, + "loss": 1.292, + "step": 348400 + }, + { + "epoch": 7.38, + "learning_rate": 3.0944693877551026e-05, + "loss": 1.2885, + "step": 348500 + }, + { + "epoch": 7.38, + "learning_rate": 3.092428571428572e-05, + "loss": 1.3001, + "step": 348600 + }, + { + "epoch": 7.38, + "learning_rate": 3.090387755102041e-05, + "loss": 1.2919, + "step": 348700 + }, + { + "epoch": 7.38, + "learning_rate": 3.088346938775511e-05, + "loss": 1.292, + "step": 348800 + }, + { + "epoch": 7.38, + "learning_rate": 3.08630612244898e-05, + "loss": 1.2971, + "step": 348900 + }, + { + "epoch": 7.39, + "learning_rate": 3.084265306122449e-05, + "loss": 1.2983, + "step": 349000 + }, + { + "epoch": 7.39, + "learning_rate": 3.082224489795918e-05, + "loss": 1.2907, + "step": 349100 + }, + { + "epoch": 7.39, + "learning_rate": 3.080183673469388e-05, + "loss": 1.2945, + "step": 349200 + }, + { + "epoch": 7.39, + "learning_rate": 3.078142857142857e-05, + "loss": 1.2935, + "step": 349300 + }, + { + "epoch": 7.39, + "learning_rate": 3.076102040816326e-05, + "loss": 1.2901, + "step": 349400 + }, + { + "epoch": 7.4, + "learning_rate": 3.074061224489796e-05, + "loss": 1.2951, + "step": 349500 + }, + { + "epoch": 7.4, + "learning_rate": 3.072020408163265e-05, + "loss": 1.2873, + "step": 349600 + }, + { + "epoch": 7.4, + "learning_rate": 3.0699795918367345e-05, + "loss": 1.2834, + "step": 349700 + }, + { + "epoch": 7.4, + "learning_rate": 3.067938775510204e-05, + "loss": 1.293, + "step": 349800 + }, + { + "epoch": 7.41, + "learning_rate": 3.0658979591836735e-05, + "loss": 1.2874, + "step": 349900 + }, + { + "epoch": 7.41, + "learning_rate": 3.063877551020408e-05, + "loss": 1.2897, + "step": 350000 + }, + { + "epoch": 7.41, + "learning_rate": 3.0618367346938774e-05, + "loss": 1.2959, + "step": 350100 + }, + { + "epoch": 7.41, + "learning_rate": 3.059795918367347e-05, + "loss": 1.293, + "step": 350200 + }, + { + "epoch": 7.41, + "learning_rate": 3.0577551020408164e-05, + "loss": 1.2925, + "step": 350300 + }, + { + "epoch": 7.42, + "learning_rate": 3.0557142857142855e-05, + "loss": 1.285, + "step": 350400 + }, + { + "epoch": 7.42, + "learning_rate": 3.053673469387755e-05, + "loss": 1.2925, + "step": 350500 + }, + { + "epoch": 7.42, + "learning_rate": 3.0516326530612242e-05, + "loss": 1.2962, + "step": 350600 + }, + { + "epoch": 7.42, + "learning_rate": 3.0495918367346944e-05, + "loss": 1.2846, + "step": 350700 + }, + { + "epoch": 7.42, + "learning_rate": 3.0475510204081635e-05, + "loss": 1.2923, + "step": 350800 + }, + { + "epoch": 7.43, + "learning_rate": 3.045510204081633e-05, + "loss": 1.2885, + "step": 350900 + }, + { + "epoch": 7.43, + "learning_rate": 3.0434693877551025e-05, + "loss": 1.285, + "step": 351000 + }, + { + "epoch": 7.43, + "learning_rate": 3.0414285714285717e-05, + "loss": 1.2884, + "step": 351100 + }, + { + "epoch": 7.43, + "learning_rate": 3.0393877551020412e-05, + "loss": 1.2922, + "step": 351200 + }, + { + "epoch": 7.43, + "learning_rate": 3.0373469387755103e-05, + "loss": 1.2928, + "step": 351300 + }, + { + "epoch": 7.44, + "learning_rate": 3.03530612244898e-05, + "loss": 1.2952, + "step": 351400 + }, + { + "epoch": 7.44, + "learning_rate": 3.0332653061224493e-05, + "loss": 1.2903, + "step": 351500 + }, + { + "epoch": 7.44, + "learning_rate": 3.0312244897959185e-05, + "loss": 1.292, + "step": 351600 + }, + { + "epoch": 7.44, + "learning_rate": 3.029183673469388e-05, + "loss": 1.2913, + "step": 351700 + }, + { + "epoch": 7.45, + "learning_rate": 3.027142857142857e-05, + "loss": 1.2856, + "step": 351800 + }, + { + "epoch": 7.45, + "learning_rate": 3.0251020408163266e-05, + "loss": 1.2961, + "step": 351900 + }, + { + "epoch": 7.45, + "learning_rate": 3.023061224489796e-05, + "loss": 1.2861, + "step": 352000 + }, + { + "epoch": 7.45, + "learning_rate": 3.0210204081632653e-05, + "loss": 1.2824, + "step": 352100 + }, + { + "epoch": 7.45, + "learning_rate": 3.0189795918367348e-05, + "loss": 1.2957, + "step": 352200 + }, + { + "epoch": 7.46, + "learning_rate": 3.016938775510204e-05, + "loss": 1.29, + "step": 352300 + }, + { + "epoch": 7.46, + "learning_rate": 3.0148979591836735e-05, + "loss": 1.2917, + "step": 352400 + }, + { + "epoch": 7.46, + "learning_rate": 3.012857142857143e-05, + "loss": 1.295, + "step": 352500 + }, + { + "epoch": 7.46, + "learning_rate": 3.010816326530612e-05, + "loss": 1.2948, + "step": 352600 + }, + { + "epoch": 7.46, + "learning_rate": 3.0087755102040816e-05, + "loss": 1.2868, + "step": 352700 + }, + { + "epoch": 7.47, + "learning_rate": 3.0067346938775508e-05, + "loss": 1.2895, + "step": 352800 + }, + { + "epoch": 7.47, + "learning_rate": 3.0046938775510203e-05, + "loss": 1.2917, + "step": 352900 + }, + { + "epoch": 7.47, + "learning_rate": 3.00265306122449e-05, + "loss": 1.2942, + "step": 353000 + }, + { + "epoch": 7.47, + "learning_rate": 3.0006122448979596e-05, + "loss": 1.2845, + "step": 353100 + }, + { + "epoch": 7.48, + "learning_rate": 2.9985714285714288e-05, + "loss": 1.2915, + "step": 353200 + }, + { + "epoch": 7.48, + "learning_rate": 2.9965306122448983e-05, + "loss": 1.2916, + "step": 353300 + }, + { + "epoch": 7.48, + "learning_rate": 2.9944897959183678e-05, + "loss": 1.2915, + "step": 353400 + }, + { + "epoch": 7.48, + "learning_rate": 2.992448979591837e-05, + "loss": 1.2914, + "step": 353500 + }, + { + "epoch": 7.48, + "learning_rate": 2.9904081632653064e-05, + "loss": 1.2877, + "step": 353600 + }, + { + "epoch": 7.49, + "learning_rate": 2.988367346938776e-05, + "loss": 1.2855, + "step": 353700 + }, + { + "epoch": 7.49, + "learning_rate": 2.986326530612245e-05, + "loss": 1.2848, + "step": 353800 + }, + { + "epoch": 7.49, + "learning_rate": 2.9842857142857146e-05, + "loss": 1.2856, + "step": 353900 + }, + { + "epoch": 7.49, + "learning_rate": 2.9822448979591837e-05, + "loss": 1.286, + "step": 354000 + }, + { + "epoch": 7.49, + "learning_rate": 2.9802040816326532e-05, + "loss": 1.2871, + "step": 354100 + }, + { + "epoch": 7.5, + "learning_rate": 2.9781632653061227e-05, + "loss": 1.2923, + "step": 354200 + }, + { + "epoch": 7.5, + "learning_rate": 2.9761428571428575e-05, + "loss": 1.2878, + "step": 354300 + }, + { + "epoch": 7.5, + "learning_rate": 2.9741020408163266e-05, + "loss": 1.2877, + "step": 354400 + }, + { + "epoch": 7.5, + "learning_rate": 2.972061224489796e-05, + "loss": 1.2887, + "step": 354500 + }, + { + "epoch": 7.5, + "learning_rate": 2.9700204081632653e-05, + "loss": 1.2844, + "step": 354600 + }, + { + "epoch": 7.51, + "learning_rate": 2.9679795918367348e-05, + "loss": 1.2941, + "step": 354700 + }, + { + "epoch": 7.51, + "learning_rate": 2.9659387755102043e-05, + "loss": 1.2875, + "step": 354800 + }, + { + "epoch": 7.51, + "learning_rate": 2.9638979591836734e-05, + "loss": 1.2861, + "step": 354900 + }, + { + "epoch": 7.51, + "learning_rate": 2.961857142857143e-05, + "loss": 1.2888, + "step": 355000 + }, + { + "epoch": 7.52, + "learning_rate": 2.959816326530612e-05, + "loss": 1.2912, + "step": 355100 + }, + { + "epoch": 7.52, + "learning_rate": 2.9577755102040816e-05, + "loss": 1.2893, + "step": 355200 + }, + { + "epoch": 7.52, + "learning_rate": 2.9557551020408163e-05, + "loss": 1.2963, + "step": 355300 + }, + { + "epoch": 7.52, + "learning_rate": 2.953714285714286e-05, + "loss": 1.2893, + "step": 355400 + }, + { + "epoch": 7.52, + "learning_rate": 2.951673469387755e-05, + "loss": 1.2925, + "step": 355500 + }, + { + "epoch": 7.53, + "learning_rate": 2.9496326530612245e-05, + "loss": 1.2886, + "step": 355600 + }, + { + "epoch": 7.53, + "learning_rate": 2.947591836734694e-05, + "loss": 1.2839, + "step": 355700 + }, + { + "epoch": 7.53, + "learning_rate": 2.945551020408163e-05, + "loss": 1.2912, + "step": 355800 + }, + { + "epoch": 7.53, + "learning_rate": 2.9435102040816327e-05, + "loss": 1.2853, + "step": 355900 + }, + { + "epoch": 7.53, + "learning_rate": 2.9414693877551018e-05, + "loss": 1.2883, + "step": 356000 + }, + { + "epoch": 7.54, + "learning_rate": 2.9394285714285713e-05, + "loss": 1.2924, + "step": 356100 + }, + { + "epoch": 7.54, + "learning_rate": 2.937387755102041e-05, + "loss": 1.2887, + "step": 356200 + }, + { + "epoch": 7.54, + "learning_rate": 2.9353469387755106e-05, + "loss": 1.2871, + "step": 356300 + }, + { + "epoch": 7.54, + "learning_rate": 2.9333061224489798e-05, + "loss": 1.2898, + "step": 356400 + }, + { + "epoch": 7.54, + "learning_rate": 2.9312653061224493e-05, + "loss": 1.2974, + "step": 356500 + }, + { + "epoch": 7.55, + "learning_rate": 2.9292244897959188e-05, + "loss": 1.2853, + "step": 356600 + }, + { + "epoch": 7.55, + "learning_rate": 2.927183673469388e-05, + "loss": 1.2917, + "step": 356700 + }, + { + "epoch": 7.55, + "learning_rate": 2.9251428571428575e-05, + "loss": 1.2844, + "step": 356800 + }, + { + "epoch": 7.55, + "learning_rate": 2.9231020408163266e-05, + "loss": 1.2825, + "step": 356900 + }, + { + "epoch": 7.56, + "learning_rate": 2.921061224489796e-05, + "loss": 1.2787, + "step": 357000 + }, + { + "epoch": 7.56, + "learning_rate": 2.9190204081632656e-05, + "loss": 1.2816, + "step": 357100 + }, + { + "epoch": 7.56, + "learning_rate": 2.9169795918367348e-05, + "loss": 1.2835, + "step": 357200 + }, + { + "epoch": 7.56, + "learning_rate": 2.9149387755102043e-05, + "loss": 1.288, + "step": 357300 + }, + { + "epoch": 7.56, + "learning_rate": 2.9128979591836734e-05, + "loss": 1.2838, + "step": 357400 + }, + { + "epoch": 7.57, + "learning_rate": 2.910857142857143e-05, + "loss": 1.2791, + "step": 357500 + }, + { + "epoch": 7.57, + "learning_rate": 2.9088163265306124e-05, + "loss": 1.2919, + "step": 357600 + }, + { + "epoch": 7.57, + "learning_rate": 2.9067755102040816e-05, + "loss": 1.2882, + "step": 357700 + }, + { + "epoch": 7.57, + "learning_rate": 2.904734693877551e-05, + "loss": 1.2906, + "step": 357800 + }, + { + "epoch": 7.57, + "learning_rate": 2.9026938775510206e-05, + "loss": 1.2883, + "step": 357900 + }, + { + "epoch": 7.58, + "learning_rate": 2.9006530612244897e-05, + "loss": 1.2857, + "step": 358000 + }, + { + "epoch": 7.58, + "learning_rate": 2.8986122448979592e-05, + "loss": 1.2797, + "step": 358100 + }, + { + "epoch": 7.58, + "learning_rate": 2.8965714285714284e-05, + "loss": 1.2811, + "step": 358200 + }, + { + "epoch": 7.58, + "learning_rate": 2.894551020408163e-05, + "loss": 1.2908, + "step": 358300 + }, + { + "epoch": 7.59, + "learning_rate": 2.8925102040816326e-05, + "loss": 1.2872, + "step": 358400 + }, + { + "epoch": 7.59, + "learning_rate": 2.890469387755102e-05, + "loss": 1.2767, + "step": 358500 + }, + { + "epoch": 7.59, + "learning_rate": 2.8884285714285713e-05, + "loss": 1.2941, + "step": 358600 + }, + { + "epoch": 7.59, + "learning_rate": 2.8863877551020408e-05, + "loss": 1.2854, + "step": 358700 + }, + { + "epoch": 7.59, + "learning_rate": 2.88434693877551e-05, + "loss": 1.2763, + "step": 358800 + }, + { + "epoch": 7.6, + "learning_rate": 2.8823061224489794e-05, + "loss": 1.2837, + "step": 358900 + }, + { + "epoch": 7.6, + "learning_rate": 2.8802653061224493e-05, + "loss": 1.278, + "step": 359000 + }, + { + "epoch": 7.6, + "learning_rate": 2.8782448979591837e-05, + "loss": 1.2906, + "step": 359100 + }, + { + "epoch": 7.6, + "learning_rate": 2.876204081632653e-05, + "loss": 1.2836, + "step": 359200 + }, + { + "epoch": 7.6, + "learning_rate": 2.8741632653061224e-05, + "loss": 1.2908, + "step": 359300 + }, + { + "epoch": 7.61, + "learning_rate": 2.872122448979592e-05, + "loss": 1.2887, + "step": 359400 + }, + { + "epoch": 7.61, + "learning_rate": 2.8700816326530617e-05, + "loss": 1.2855, + "step": 359500 + }, + { + "epoch": 7.61, + "learning_rate": 2.868040816326531e-05, + "loss": 1.2855, + "step": 359600 + }, + { + "epoch": 7.61, + "learning_rate": 2.8660000000000003e-05, + "loss": 1.2858, + "step": 359700 + }, + { + "epoch": 7.61, + "learning_rate": 2.86395918367347e-05, + "loss": 1.284, + "step": 359800 + }, + { + "epoch": 7.62, + "learning_rate": 2.861918367346939e-05, + "loss": 1.2785, + "step": 359900 + }, + { + "epoch": 7.62, + "learning_rate": 2.8598775510204085e-05, + "loss": 1.2803, + "step": 360000 + }, + { + "epoch": 7.62, + "learning_rate": 2.8578367346938777e-05, + "loss": 1.2821, + "step": 360100 + }, + { + "epoch": 7.62, + "learning_rate": 2.855795918367347e-05, + "loss": 1.2841, + "step": 360200 + }, + { + "epoch": 7.63, + "learning_rate": 2.8537551020408166e-05, + "loss": 1.28, + "step": 360300 + }, + { + "epoch": 7.63, + "learning_rate": 2.8517142857142858e-05, + "loss": 1.2831, + "step": 360400 + }, + { + "epoch": 7.63, + "learning_rate": 2.8496734693877553e-05, + "loss": 1.287, + "step": 360500 + }, + { + "epoch": 7.63, + "learning_rate": 2.8476326530612245e-05, + "loss": 1.2871, + "step": 360600 + }, + { + "epoch": 7.63, + "learning_rate": 2.845591836734694e-05, + "loss": 1.2758, + "step": 360700 + }, + { + "epoch": 7.64, + "learning_rate": 2.8435510204081635e-05, + "loss": 1.2892, + "step": 360800 + }, + { + "epoch": 7.64, + "learning_rate": 2.8415102040816326e-05, + "loss": 1.2854, + "step": 360900 + }, + { + "epoch": 7.64, + "learning_rate": 2.839469387755102e-05, + "loss": 1.2857, + "step": 361000 + }, + { + "epoch": 7.64, + "learning_rate": 2.8374285714285713e-05, + "loss": 1.2808, + "step": 361100 + }, + { + "epoch": 7.64, + "learning_rate": 2.8353877551020408e-05, + "loss": 1.2867, + "step": 361200 + }, + { + "epoch": 7.65, + "learning_rate": 2.8333469387755103e-05, + "loss": 1.2836, + "step": 361300 + }, + { + "epoch": 7.65, + "learning_rate": 2.8313061224489794e-05, + "loss": 1.29, + "step": 361400 + }, + { + "epoch": 7.65, + "learning_rate": 2.829265306122449e-05, + "loss": 1.28, + "step": 361500 + }, + { + "epoch": 7.65, + "learning_rate": 2.8272244897959184e-05, + "loss": 1.2846, + "step": 361600 + }, + { + "epoch": 7.66, + "learning_rate": 2.8251836734693876e-05, + "loss": 1.283, + "step": 361700 + }, + { + "epoch": 7.66, + "learning_rate": 2.8231428571428574e-05, + "loss": 1.2807, + "step": 361800 + }, + { + "epoch": 7.66, + "learning_rate": 2.821102040816327e-05, + "loss": 1.2994, + "step": 361900 + }, + { + "epoch": 7.66, + "learning_rate": 2.8190612244897964e-05, + "loss": 1.2895, + "step": 362000 + }, + { + "epoch": 7.66, + "learning_rate": 2.8170204081632656e-05, + "loss": 1.2841, + "step": 362100 + }, + { + "epoch": 7.67, + "learning_rate": 2.814979591836735e-05, + "loss": 1.2795, + "step": 362200 + }, + { + "epoch": 7.67, + "learning_rate": 2.8129387755102042e-05, + "loss": 1.2858, + "step": 362300 + }, + { + "epoch": 7.67, + "learning_rate": 2.8108979591836737e-05, + "loss": 1.2847, + "step": 362400 + }, + { + "epoch": 7.67, + "learning_rate": 2.8088571428571432e-05, + "loss": 1.2844, + "step": 362500 + }, + { + "epoch": 7.67, + "learning_rate": 2.8068163265306124e-05, + "loss": 1.2881, + "step": 362600 + }, + { + "epoch": 7.68, + "learning_rate": 2.804775510204082e-05, + "loss": 1.2865, + "step": 362700 + }, + { + "epoch": 7.68, + "learning_rate": 2.802734693877551e-05, + "loss": 1.2924, + "step": 362800 + }, + { + "epoch": 7.68, + "learning_rate": 2.8006938775510205e-05, + "loss": 1.2798, + "step": 362900 + }, + { + "epoch": 7.68, + "learning_rate": 2.79865306122449e-05, + "loss": 1.2857, + "step": 363000 + }, + { + "epoch": 7.68, + "learning_rate": 2.7966122448979592e-05, + "loss": 1.2842, + "step": 363100 + }, + { + "epoch": 7.69, + "learning_rate": 2.7945714285714287e-05, + "loss": 1.2828, + "step": 363200 + }, + { + "epoch": 7.69, + "learning_rate": 2.792530612244898e-05, + "loss": 1.2823, + "step": 363300 + }, + { + "epoch": 7.69, + "learning_rate": 2.7904897959183673e-05, + "loss": 1.2853, + "step": 363400 + }, + { + "epoch": 7.69, + "learning_rate": 2.788448979591837e-05, + "loss": 1.2882, + "step": 363500 + }, + { + "epoch": 7.7, + "learning_rate": 2.786408163265306e-05, + "loss": 1.2871, + "step": 363600 + }, + { + "epoch": 7.7, + "learning_rate": 2.7843673469387755e-05, + "loss": 1.2749, + "step": 363700 + }, + { + "epoch": 7.7, + "learning_rate": 2.7823265306122446e-05, + "loss": 1.2753, + "step": 363800 + }, + { + "epoch": 7.7, + "learning_rate": 2.780285714285714e-05, + "loss": 1.277, + "step": 363900 + }, + { + "epoch": 7.7, + "learning_rate": 2.778244897959184e-05, + "loss": 1.2835, + "step": 364000 + }, + { + "epoch": 7.71, + "learning_rate": 2.7762040816326535e-05, + "loss": 1.2791, + "step": 364100 + }, + { + "epoch": 7.71, + "learning_rate": 2.774163265306123e-05, + "loss": 1.287, + "step": 364200 + }, + { + "epoch": 7.71, + "learning_rate": 2.772122448979592e-05, + "loss": 1.2695, + "step": 364300 + }, + { + "epoch": 7.71, + "learning_rate": 2.7700816326530616e-05, + "loss": 1.2803, + "step": 364400 + }, + { + "epoch": 7.71, + "learning_rate": 2.7680408163265308e-05, + "loss": 1.2806, + "step": 364500 + }, + { + "epoch": 7.72, + "learning_rate": 2.7660000000000003e-05, + "loss": 1.2846, + "step": 364600 + }, + { + "epoch": 7.72, + "learning_rate": 2.7639591836734698e-05, + "loss": 1.2824, + "step": 364700 + }, + { + "epoch": 7.72, + "learning_rate": 2.761918367346939e-05, + "loss": 1.2764, + "step": 364800 + }, + { + "epoch": 7.72, + "learning_rate": 2.7598775510204084e-05, + "loss": 1.2811, + "step": 364900 + }, + { + "epoch": 7.72, + "learning_rate": 2.7578367346938776e-05, + "loss": 1.2876, + "step": 365000 + }, + { + "epoch": 7.73, + "learning_rate": 2.755795918367347e-05, + "loss": 1.2791, + "step": 365100 + }, + { + "epoch": 7.73, + "learning_rate": 2.7537551020408166e-05, + "loss": 1.2867, + "step": 365200 + }, + { + "epoch": 7.73, + "learning_rate": 2.7517142857142857e-05, + "loss": 1.2803, + "step": 365300 + }, + { + "epoch": 7.73, + "learning_rate": 2.7496734693877552e-05, + "loss": 1.2844, + "step": 365400 + }, + { + "epoch": 7.74, + "learning_rate": 2.74765306122449e-05, + "loss": 1.287, + "step": 365500 + }, + { + "epoch": 7.74, + "learning_rate": 2.745612244897959e-05, + "loss": 1.2741, + "step": 365600 + }, + { + "epoch": 7.74, + "learning_rate": 2.7435714285714287e-05, + "loss": 1.2776, + "step": 365700 + }, + { + "epoch": 7.74, + "learning_rate": 2.7415510204081634e-05, + "loss": 1.2867, + "step": 365800 + }, + { + "epoch": 7.74, + "learning_rate": 2.739510204081633e-05, + "loss": 1.2806, + "step": 365900 + }, + { + "epoch": 7.75, + "learning_rate": 2.737469387755102e-05, + "loss": 1.2823, + "step": 366000 + }, + { + "epoch": 7.75, + "learning_rate": 2.7354285714285716e-05, + "loss": 1.2802, + "step": 366100 + }, + { + "epoch": 7.75, + "learning_rate": 2.733387755102041e-05, + "loss": 1.2765, + "step": 366200 + }, + { + "epoch": 7.75, + "learning_rate": 2.7313469387755102e-05, + "loss": 1.2814, + "step": 366300 + }, + { + "epoch": 7.75, + "learning_rate": 2.7293061224489797e-05, + "loss": 1.2769, + "step": 366400 + }, + { + "epoch": 7.76, + "learning_rate": 2.7272857142857145e-05, + "loss": 1.2855, + "step": 366500 + }, + { + "epoch": 7.76, + "learning_rate": 2.7252448979591836e-05, + "loss": 1.2811, + "step": 366600 + }, + { + "epoch": 7.76, + "learning_rate": 2.723204081632653e-05, + "loss": 1.2833, + "step": 366700 + }, + { + "epoch": 7.76, + "learning_rate": 2.7211632653061226e-05, + "loss": 1.2842, + "step": 366800 + }, + { + "epoch": 7.77, + "learning_rate": 2.7191224489795918e-05, + "loss": 1.2856, + "step": 366900 + }, + { + "epoch": 7.77, + "learning_rate": 2.7170816326530613e-05, + "loss": 1.2788, + "step": 367000 + }, + { + "epoch": 7.77, + "learning_rate": 2.7150408163265308e-05, + "loss": 1.276, + "step": 367100 + }, + { + "epoch": 7.77, + "learning_rate": 2.713e-05, + "loss": 1.2867, + "step": 367200 + }, + { + "epoch": 7.77, + "learning_rate": 2.7109591836734694e-05, + "loss": 1.2834, + "step": 367300 + }, + { + "epoch": 7.78, + "learning_rate": 2.7089183673469386e-05, + "loss": 1.2718, + "step": 367400 + }, + { + "epoch": 7.78, + "learning_rate": 2.706877551020408e-05, + "loss": 1.2791, + "step": 367500 + }, + { + "epoch": 7.78, + "learning_rate": 2.7048367346938776e-05, + "loss": 1.2779, + "step": 367600 + }, + { + "epoch": 7.78, + "learning_rate": 2.7027959183673468e-05, + "loss": 1.2858, + "step": 367700 + }, + { + "epoch": 7.78, + "learning_rate": 2.7007551020408166e-05, + "loss": 1.286, + "step": 367800 + }, + { + "epoch": 7.79, + "learning_rate": 2.698714285714286e-05, + "loss": 1.2821, + "step": 367900 + }, + { + "epoch": 7.79, + "learning_rate": 2.6966734693877556e-05, + "loss": 1.2819, + "step": 368000 + }, + { + "epoch": 7.79, + "learning_rate": 2.6946326530612247e-05, + "loss": 1.2838, + "step": 368100 + }, + { + "epoch": 7.79, + "learning_rate": 2.6925918367346942e-05, + "loss": 1.2796, + "step": 368200 + }, + { + "epoch": 7.79, + "learning_rate": 2.6905510204081634e-05, + "loss": 1.2813, + "step": 368300 + }, + { + "epoch": 7.8, + "learning_rate": 2.688510204081633e-05, + "loss": 1.2715, + "step": 368400 + }, + { + "epoch": 7.8, + "learning_rate": 2.6864693877551024e-05, + "loss": 1.2788, + "step": 368500 + }, + { + "epoch": 7.8, + "learning_rate": 2.6844285714285715e-05, + "loss": 1.2862, + "step": 368600 + }, + { + "epoch": 7.8, + "learning_rate": 2.682387755102041e-05, + "loss": 1.2743, + "step": 368700 + }, + { + "epoch": 7.81, + "learning_rate": 2.6803469387755102e-05, + "loss": 1.28, + "step": 368800 + }, + { + "epoch": 7.81, + "learning_rate": 2.6783061224489797e-05, + "loss": 1.2773, + "step": 368900 + }, + { + "epoch": 7.81, + "learning_rate": 2.6762653061224492e-05, + "loss": 1.2807, + "step": 369000 + }, + { + "epoch": 7.81, + "learning_rate": 2.6742244897959184e-05, + "loss": 1.2764, + "step": 369100 + }, + { + "epoch": 7.81, + "learning_rate": 2.672183673469388e-05, + "loss": 1.2843, + "step": 369200 + }, + { + "epoch": 7.82, + "learning_rate": 2.670142857142857e-05, + "loss": 1.2796, + "step": 369300 + }, + { + "epoch": 7.82, + "learning_rate": 2.6681020408163265e-05, + "loss": 1.2856, + "step": 369400 + }, + { + "epoch": 7.82, + "learning_rate": 2.666061224489796e-05, + "loss": 1.2846, + "step": 369500 + }, + { + "epoch": 7.82, + "learning_rate": 2.664020408163265e-05, + "loss": 1.282, + "step": 369600 + }, + { + "epoch": 7.82, + "learning_rate": 2.6619795918367347e-05, + "loss": 1.2746, + "step": 369700 + }, + { + "epoch": 7.83, + "learning_rate": 2.6599387755102038e-05, + "loss": 1.2804, + "step": 369800 + }, + { + "epoch": 7.83, + "learning_rate": 2.6578979591836733e-05, + "loss": 1.2804, + "step": 369900 + }, + { + "epoch": 7.83, + "learning_rate": 2.6558571428571428e-05, + "loss": 1.2805, + "step": 370000 + }, + { + "epoch": 7.83, + "learning_rate": 2.6538163265306127e-05, + "loss": 1.2806, + "step": 370100 + }, + { + "epoch": 7.83, + "learning_rate": 2.651775510204082e-05, + "loss": 1.2758, + "step": 370200 + }, + { + "epoch": 7.84, + "learning_rate": 2.6497346938775513e-05, + "loss": 1.2783, + "step": 370300 + }, + { + "epoch": 7.84, + "learning_rate": 2.6476938775510208e-05, + "loss": 1.2883, + "step": 370400 + }, + { + "epoch": 7.84, + "learning_rate": 2.64565306122449e-05, + "loss": 1.2812, + "step": 370500 + }, + { + "epoch": 7.84, + "learning_rate": 2.6436122448979595e-05, + "loss": 1.2784, + "step": 370600 + }, + { + "epoch": 7.85, + "learning_rate": 2.641571428571429e-05, + "loss": 1.278, + "step": 370700 + }, + { + "epoch": 7.85, + "learning_rate": 2.639530612244898e-05, + "loss": 1.2798, + "step": 370800 + }, + { + "epoch": 7.85, + "learning_rate": 2.6374897959183676e-05, + "loss": 1.2777, + "step": 370900 + }, + { + "epoch": 7.85, + "learning_rate": 2.6354489795918368e-05, + "loss": 1.2721, + "step": 371000 + }, + { + "epoch": 7.85, + "learning_rate": 2.6334081632653063e-05, + "loss": 1.2754, + "step": 371100 + }, + { + "epoch": 7.86, + "learning_rate": 2.6313673469387758e-05, + "loss": 1.2762, + "step": 371200 + }, + { + "epoch": 7.86, + "learning_rate": 2.629326530612245e-05, + "loss": 1.2827, + "step": 371300 + }, + { + "epoch": 7.86, + "learning_rate": 2.6273061224489797e-05, + "loss": 1.2791, + "step": 371400 + }, + { + "epoch": 7.86, + "learning_rate": 2.6252653061224492e-05, + "loss": 1.277, + "step": 371500 + }, + { + "epoch": 7.86, + "learning_rate": 2.6232244897959183e-05, + "loss": 1.28, + "step": 371600 + }, + { + "epoch": 7.87, + "learning_rate": 2.621183673469388e-05, + "loss": 1.2834, + "step": 371700 + }, + { + "epoch": 7.87, + "learning_rate": 2.6191428571428573e-05, + "loss": 1.2721, + "step": 371800 + }, + { + "epoch": 7.87, + "learning_rate": 2.6171020408163265e-05, + "loss": 1.2729, + "step": 371900 + }, + { + "epoch": 7.87, + "learning_rate": 2.615061224489796e-05, + "loss": 1.2694, + "step": 372000 + }, + { + "epoch": 7.88, + "learning_rate": 2.6130204081632655e-05, + "loss": 1.2768, + "step": 372100 + }, + { + "epoch": 7.88, + "learning_rate": 2.6109795918367346e-05, + "loss": 1.277, + "step": 372200 + }, + { + "epoch": 7.88, + "learning_rate": 2.608938775510204e-05, + "loss": 1.2818, + "step": 372300 + }, + { + "epoch": 7.88, + "learning_rate": 2.6068979591836733e-05, + "loss": 1.2887, + "step": 372400 + }, + { + "epoch": 7.88, + "learning_rate": 2.6048571428571428e-05, + "loss": 1.2802, + "step": 372500 + }, + { + "epoch": 7.89, + "learning_rate": 2.6028163265306123e-05, + "loss": 1.2816, + "step": 372600 + }, + { + "epoch": 7.89, + "learning_rate": 2.6007755102040815e-05, + "loss": 1.2844, + "step": 372700 + }, + { + "epoch": 7.89, + "learning_rate": 2.5987346938775513e-05, + "loss": 1.2803, + "step": 372800 + }, + { + "epoch": 7.89, + "learning_rate": 2.5966938775510208e-05, + "loss": 1.2753, + "step": 372900 + }, + { + "epoch": 7.89, + "learning_rate": 2.5946530612244903e-05, + "loss": 1.2785, + "step": 373000 + }, + { + "epoch": 7.9, + "learning_rate": 2.5926122448979594e-05, + "loss": 1.2811, + "step": 373100 + }, + { + "epoch": 7.9, + "learning_rate": 2.590571428571429e-05, + "loss": 1.2787, + "step": 373200 + }, + { + "epoch": 7.9, + "learning_rate": 2.588530612244898e-05, + "loss": 1.2811, + "step": 373300 + }, + { + "epoch": 7.9, + "learning_rate": 2.5864897959183676e-05, + "loss": 1.2761, + "step": 373400 + }, + { + "epoch": 7.9, + "learning_rate": 2.584448979591837e-05, + "loss": 1.2775, + "step": 373500 + }, + { + "epoch": 7.91, + "learning_rate": 2.5824081632653062e-05, + "loss": 1.2855, + "step": 373600 + }, + { + "epoch": 7.91, + "learning_rate": 2.5803673469387757e-05, + "loss": 1.2823, + "step": 373700 + }, + { + "epoch": 7.91, + "learning_rate": 2.578326530612245e-05, + "loss": 1.2706, + "step": 373800 + }, + { + "epoch": 7.91, + "learning_rate": 2.5762857142857144e-05, + "loss": 1.2717, + "step": 373900 + }, + { + "epoch": 7.92, + "learning_rate": 2.574244897959184e-05, + "loss": 1.2783, + "step": 374000 + }, + { + "epoch": 7.92, + "learning_rate": 2.572204081632653e-05, + "loss": 1.2737, + "step": 374100 + }, + { + "epoch": 7.92, + "learning_rate": 2.5701632653061226e-05, + "loss": 1.2832, + "step": 374200 + }, + { + "epoch": 7.92, + "learning_rate": 2.5681224489795917e-05, + "loss": 1.2828, + "step": 374300 + }, + { + "epoch": 7.92, + "learning_rate": 2.5660816326530612e-05, + "loss": 1.2812, + "step": 374400 + }, + { + "epoch": 7.93, + "learning_rate": 2.5640408163265307e-05, + "loss": 1.2722, + "step": 374500 + }, + { + "epoch": 7.93, + "learning_rate": 2.562e-05, + "loss": 1.2758, + "step": 374600 + }, + { + "epoch": 7.93, + "learning_rate": 2.5599591836734694e-05, + "loss": 1.2754, + "step": 374700 + }, + { + "epoch": 7.93, + "learning_rate": 2.5579183673469385e-05, + "loss": 1.2757, + "step": 374800 + }, + { + "epoch": 7.93, + "learning_rate": 2.555877551020408e-05, + "loss": 1.284, + "step": 374900 + }, + { + "epoch": 7.94, + "learning_rate": 2.5538367346938775e-05, + "loss": 1.2697, + "step": 375000 + }, + { + "epoch": 7.94, + "learning_rate": 2.5517959183673474e-05, + "loss": 1.2736, + "step": 375100 + }, + { + "epoch": 7.94, + "learning_rate": 2.5497755102040814e-05, + "loss": 1.2727, + "step": 375200 + }, + { + "epoch": 7.94, + "learning_rate": 2.547734693877551e-05, + "loss": 1.2769, + "step": 375300 + }, + { + "epoch": 7.94, + "learning_rate": 2.5456938775510204e-05, + "loss": 1.2802, + "step": 375400 + }, + { + "epoch": 7.95, + "learning_rate": 2.5436530612244896e-05, + "loss": 1.2828, + "step": 375500 + }, + { + "epoch": 7.95, + "learning_rate": 2.5416122448979594e-05, + "loss": 1.2714, + "step": 375600 + }, + { + "epoch": 7.95, + "learning_rate": 2.539571428571429e-05, + "loss": 1.2751, + "step": 375700 + }, + { + "epoch": 7.95, + "learning_rate": 2.5375306122448984e-05, + "loss": 1.2703, + "step": 375800 + }, + { + "epoch": 7.96, + "learning_rate": 2.5354897959183676e-05, + "loss": 1.2795, + "step": 375900 + }, + { + "epoch": 7.96, + "learning_rate": 2.533448979591837e-05, + "loss": 1.2723, + "step": 376000 + }, + { + "epoch": 7.96, + "learning_rate": 2.5314285714285718e-05, + "loss": 1.2766, + "step": 376100 + }, + { + "epoch": 7.96, + "learning_rate": 2.5293877551020413e-05, + "loss": 1.273, + "step": 376200 + }, + { + "epoch": 7.96, + "learning_rate": 2.5273469387755105e-05, + "loss": 1.2848, + "step": 376300 + }, + { + "epoch": 7.97, + "learning_rate": 2.52530612244898e-05, + "loss": 1.2706, + "step": 376400 + }, + { + "epoch": 7.97, + "learning_rate": 2.523265306122449e-05, + "loss": 1.2772, + "step": 376500 + }, + { + "epoch": 7.97, + "learning_rate": 2.5212244897959186e-05, + "loss": 1.2788, + "step": 376600 + }, + { + "epoch": 7.97, + "learning_rate": 2.519183673469388e-05, + "loss": 1.2772, + "step": 376700 + }, + { + "epoch": 7.97, + "learning_rate": 2.5171428571428573e-05, + "loss": 1.2746, + "step": 376800 + }, + { + "epoch": 7.98, + "learning_rate": 2.5151020408163268e-05, + "loss": 1.2796, + "step": 376900 + }, + { + "epoch": 7.98, + "learning_rate": 2.513061224489796e-05, + "loss": 1.2695, + "step": 377000 + }, + { + "epoch": 7.98, + "learning_rate": 2.5110204081632654e-05, + "loss": 1.2724, + "step": 377100 + }, + { + "epoch": 7.98, + "learning_rate": 2.508979591836735e-05, + "loss": 1.2707, + "step": 377200 + }, + { + "epoch": 7.99, + "learning_rate": 2.506938775510204e-05, + "loss": 1.2725, + "step": 377300 + }, + { + "epoch": 7.99, + "learning_rate": 2.5048979591836736e-05, + "loss": 1.2809, + "step": 377400 + }, + { + "epoch": 7.99, + "learning_rate": 2.5028571428571428e-05, + "loss": 1.2766, + "step": 377500 + }, + { + "epoch": 7.99, + "learning_rate": 2.5008163265306123e-05, + "loss": 1.2769, + "step": 377600 + }, + { + "epoch": 7.99, + "learning_rate": 2.4987755102040818e-05, + "loss": 1.2773, + "step": 377700 + }, + { + "epoch": 8.0, + "learning_rate": 2.4967346938775512e-05, + "loss": 1.2796, + "step": 377800 + }, + { + "epoch": 8.0, + "learning_rate": 2.4946938775510207e-05, + "loss": 1.2747, + "step": 377900 + }, + { + "epoch": 8.0, + "learning_rate": 2.49265306122449e-05, + "loss": 1.2734, + "step": 378000 + }, + { + "epoch": 8.0, + "learning_rate": 2.4906122448979594e-05, + "loss": 1.2695, + "step": 378100 + }, + { + "epoch": 8.0, + "learning_rate": 2.4885714285714286e-05, + "loss": 1.273, + "step": 378200 + }, + { + "epoch": 8.01, + "learning_rate": 2.486530612244898e-05, + "loss": 1.2782, + "step": 378300 + }, + { + "epoch": 8.01, + "learning_rate": 2.4844897959183676e-05, + "loss": 1.2723, + "step": 378400 + }, + { + "epoch": 8.01, + "learning_rate": 2.4824489795918367e-05, + "loss": 1.2732, + "step": 378500 + }, + { + "epoch": 8.01, + "learning_rate": 2.4804081632653062e-05, + "loss": 1.2817, + "step": 378600 + }, + { + "epoch": 8.01, + "learning_rate": 2.4783673469387754e-05, + "loss": 1.274, + "step": 378700 + }, + { + "epoch": 8.02, + "learning_rate": 2.476326530612245e-05, + "loss": 1.2809, + "step": 378800 + }, + { + "epoch": 8.02, + "learning_rate": 2.4742857142857147e-05, + "loss": 1.2733, + "step": 378900 + }, + { + "epoch": 8.02, + "learning_rate": 2.472244897959184e-05, + "loss": 1.2722, + "step": 379000 + }, + { + "epoch": 8.02, + "learning_rate": 2.4702040816326534e-05, + "loss": 1.2782, + "step": 379100 + }, + { + "epoch": 8.03, + "learning_rate": 2.4681632653061225e-05, + "loss": 1.2677, + "step": 379200 + }, + { + "epoch": 8.03, + "learning_rate": 2.466122448979592e-05, + "loss": 1.2856, + "step": 379300 + }, + { + "epoch": 8.03, + "learning_rate": 2.4640816326530615e-05, + "loss": 1.2705, + "step": 379400 + }, + { + "epoch": 8.03, + "learning_rate": 2.4620408163265307e-05, + "loss": 1.2663, + "step": 379500 + }, + { + "epoch": 8.03, + "learning_rate": 2.46e-05, + "loss": 1.2757, + "step": 379600 + }, + { + "epoch": 8.04, + "learning_rate": 2.4579591836734693e-05, + "loss": 1.2639, + "step": 379700 + }, + { + "epoch": 8.04, + "learning_rate": 2.4559183673469388e-05, + "loss": 1.2714, + "step": 379800 + }, + { + "epoch": 8.04, + "learning_rate": 2.4538775510204083e-05, + "loss": 1.2703, + "step": 379900 + }, + { + "epoch": 8.04, + "learning_rate": 2.4518367346938775e-05, + "loss": 1.2746, + "step": 380000 + }, + { + "epoch": 8.04, + "learning_rate": 2.4497959183673473e-05, + "loss": 1.2744, + "step": 380100 + }, + { + "epoch": 8.05, + "learning_rate": 2.4477551020408165e-05, + "loss": 1.2748, + "step": 380200 + }, + { + "epoch": 8.05, + "learning_rate": 2.445714285714286e-05, + "loss": 1.2734, + "step": 380300 + }, + { + "epoch": 8.05, + "learning_rate": 2.443673469387755e-05, + "loss": 1.2663, + "step": 380400 + }, + { + "epoch": 8.05, + "learning_rate": 2.4416326530612246e-05, + "loss": 1.2649, + "step": 380500 + }, + { + "epoch": 8.06, + "learning_rate": 2.439591836734694e-05, + "loss": 1.2684, + "step": 380600 + }, + { + "epoch": 8.06, + "learning_rate": 2.4375510204081633e-05, + "loss": 1.2642, + "step": 380700 + }, + { + "epoch": 8.06, + "learning_rate": 2.4355102040816328e-05, + "loss": 1.2774, + "step": 380800 + }, + { + "epoch": 8.06, + "learning_rate": 2.433469387755102e-05, + "loss": 1.2657, + "step": 380900 + }, + { + "epoch": 8.06, + "learning_rate": 2.4314285714285714e-05, + "loss": 1.2709, + "step": 381000 + }, + { + "epoch": 8.07, + "learning_rate": 2.429387755102041e-05, + "loss": 1.2718, + "step": 381100 + }, + { + "epoch": 8.07, + "learning_rate": 2.4273469387755104e-05, + "loss": 1.2666, + "step": 381200 + }, + { + "epoch": 8.07, + "learning_rate": 2.42530612244898e-05, + "loss": 1.2714, + "step": 381300 + }, + { + "epoch": 8.07, + "learning_rate": 2.4232857142857143e-05, + "loss": 1.2711, + "step": 381400 + }, + { + "epoch": 8.07, + "learning_rate": 2.421244897959184e-05, + "loss": 1.2745, + "step": 381500 + }, + { + "epoch": 8.08, + "learning_rate": 2.419204081632653e-05, + "loss": 1.2711, + "step": 381600 + }, + { + "epoch": 8.08, + "learning_rate": 2.417163265306123e-05, + "loss": 1.2681, + "step": 381700 + }, + { + "epoch": 8.08, + "learning_rate": 2.415122448979592e-05, + "loss": 1.2698, + "step": 381800 + }, + { + "epoch": 8.08, + "learning_rate": 2.4130816326530615e-05, + "loss": 1.2719, + "step": 381900 + }, + { + "epoch": 8.08, + "learning_rate": 2.4110408163265306e-05, + "loss": 1.2721, + "step": 382000 + }, + { + "epoch": 8.09, + "learning_rate": 2.409e-05, + "loss": 1.2688, + "step": 382100 + }, + { + "epoch": 8.09, + "learning_rate": 2.4069591836734696e-05, + "loss": 1.2674, + "step": 382200 + }, + { + "epoch": 8.09, + "learning_rate": 2.4049183673469388e-05, + "loss": 1.267, + "step": 382300 + }, + { + "epoch": 8.09, + "learning_rate": 2.4028775510204083e-05, + "loss": 1.2732, + "step": 382400 + }, + { + "epoch": 8.1, + "learning_rate": 2.4008367346938775e-05, + "loss": 1.2744, + "step": 382500 + }, + { + "epoch": 8.1, + "learning_rate": 2.398795918367347e-05, + "loss": 1.271, + "step": 382600 + }, + { + "epoch": 8.1, + "learning_rate": 2.3967551020408164e-05, + "loss": 1.2749, + "step": 382700 + }, + { + "epoch": 8.1, + "learning_rate": 2.394714285714286e-05, + "loss": 1.2704, + "step": 382800 + }, + { + "epoch": 8.1, + "learning_rate": 2.3926734693877554e-05, + "loss": 1.2655, + "step": 382900 + }, + { + "epoch": 8.11, + "learning_rate": 2.3906326530612246e-05, + "loss": 1.2684, + "step": 383000 + }, + { + "epoch": 8.11, + "learning_rate": 2.388591836734694e-05, + "loss": 1.2765, + "step": 383100 + }, + { + "epoch": 8.11, + "learning_rate": 2.3865510204081633e-05, + "loss": 1.2726, + "step": 383200 + }, + { + "epoch": 8.11, + "learning_rate": 2.3845102040816328e-05, + "loss": 1.2724, + "step": 383300 + }, + { + "epoch": 8.11, + "learning_rate": 2.3824693877551023e-05, + "loss": 1.2608, + "step": 383400 + }, + { + "epoch": 8.12, + "learning_rate": 2.3804285714285714e-05, + "loss": 1.2764, + "step": 383500 + }, + { + "epoch": 8.12, + "learning_rate": 2.378387755102041e-05, + "loss": 1.2758, + "step": 383600 + }, + { + "epoch": 8.12, + "learning_rate": 2.37634693877551e-05, + "loss": 1.2716, + "step": 383700 + }, + { + "epoch": 8.12, + "learning_rate": 2.3743061224489796e-05, + "loss": 1.2664, + "step": 383800 + }, + { + "epoch": 8.12, + "learning_rate": 2.372265306122449e-05, + "loss": 1.2686, + "step": 383900 + }, + { + "epoch": 8.13, + "learning_rate": 2.3702244897959186e-05, + "loss": 1.2711, + "step": 384000 + }, + { + "epoch": 8.13, + "learning_rate": 2.368183673469388e-05, + "loss": 1.2676, + "step": 384100 + }, + { + "epoch": 8.13, + "learning_rate": 2.3661428571428572e-05, + "loss": 1.2675, + "step": 384200 + }, + { + "epoch": 8.13, + "learning_rate": 2.3641020408163267e-05, + "loss": 1.2674, + "step": 384300 + }, + { + "epoch": 8.14, + "learning_rate": 2.3620612244897962e-05, + "loss": 1.2728, + "step": 384400 + }, + { + "epoch": 8.14, + "learning_rate": 2.3600204081632654e-05, + "loss": 1.2676, + "step": 384500 + }, + { + "epoch": 8.14, + "learning_rate": 2.357979591836735e-05, + "loss": 1.2728, + "step": 384600 + }, + { + "epoch": 8.14, + "learning_rate": 2.355938775510204e-05, + "loss": 1.2663, + "step": 384700 + }, + { + "epoch": 8.14, + "learning_rate": 2.3538979591836735e-05, + "loss": 1.2724, + "step": 384800 + }, + { + "epoch": 8.15, + "learning_rate": 2.351857142857143e-05, + "loss": 1.2703, + "step": 384900 + }, + { + "epoch": 8.15, + "learning_rate": 2.3498163265306122e-05, + "loss": 1.2651, + "step": 385000 + }, + { + "epoch": 8.15, + "learning_rate": 2.347775510204082e-05, + "loss": 1.2631, + "step": 385100 + }, + { + "epoch": 8.15, + "learning_rate": 2.345734693877551e-05, + "loss": 1.2661, + "step": 385200 + }, + { + "epoch": 8.15, + "learning_rate": 2.3436938775510207e-05, + "loss": 1.27, + "step": 385300 + }, + { + "epoch": 8.16, + "learning_rate": 2.3416530612244898e-05, + "loss": 1.2687, + "step": 385400 + }, + { + "epoch": 8.16, + "learning_rate": 2.3396122448979593e-05, + "loss": 1.2703, + "step": 385500 + }, + { + "epoch": 8.16, + "learning_rate": 2.337591836734694e-05, + "loss": 1.27, + "step": 385600 + }, + { + "epoch": 8.16, + "learning_rate": 2.3355510204081636e-05, + "loss": 1.2641, + "step": 385700 + }, + { + "epoch": 8.17, + "learning_rate": 2.3335102040816327e-05, + "loss": 1.2655, + "step": 385800 + }, + { + "epoch": 8.17, + "learning_rate": 2.3314693877551022e-05, + "loss": 1.2691, + "step": 385900 + }, + { + "epoch": 8.17, + "learning_rate": 2.3294285714285717e-05, + "loss": 1.2696, + "step": 386000 + }, + { + "epoch": 8.17, + "learning_rate": 2.327387755102041e-05, + "loss": 1.2634, + "step": 386100 + }, + { + "epoch": 8.17, + "learning_rate": 2.3253469387755104e-05, + "loss": 1.2582, + "step": 386200 + }, + { + "epoch": 8.18, + "learning_rate": 2.3233061224489795e-05, + "loss": 1.2701, + "step": 386300 + }, + { + "epoch": 8.18, + "learning_rate": 2.3212857142857143e-05, + "loss": 1.2726, + "step": 386400 + }, + { + "epoch": 8.18, + "learning_rate": 2.3192448979591838e-05, + "loss": 1.2661, + "step": 386500 + }, + { + "epoch": 8.18, + "learning_rate": 2.3172244897959186e-05, + "loss": 1.2686, + "step": 386600 + }, + { + "epoch": 8.18, + "learning_rate": 2.315183673469388e-05, + "loss": 1.2653, + "step": 386700 + }, + { + "epoch": 8.19, + "learning_rate": 2.3131428571428572e-05, + "loss": 1.2711, + "step": 386800 + }, + { + "epoch": 8.19, + "learning_rate": 2.3111020408163267e-05, + "loss": 1.2652, + "step": 386900 + }, + { + "epoch": 8.19, + "learning_rate": 2.3090612244897962e-05, + "loss": 1.2709, + "step": 387000 + }, + { + "epoch": 8.19, + "learning_rate": 2.3070204081632654e-05, + "loss": 1.2628, + "step": 387100 + }, + { + "epoch": 8.19, + "learning_rate": 2.304979591836735e-05, + "loss": 1.2654, + "step": 387200 + }, + { + "epoch": 8.2, + "learning_rate": 2.302938775510204e-05, + "loss": 1.2686, + "step": 387300 + }, + { + "epoch": 8.2, + "learning_rate": 2.3008979591836735e-05, + "loss": 1.2716, + "step": 387400 + }, + { + "epoch": 8.2, + "learning_rate": 2.298857142857143e-05, + "loss": 1.2732, + "step": 387500 + }, + { + "epoch": 8.2, + "learning_rate": 2.2968163265306122e-05, + "loss": 1.2631, + "step": 387600 + }, + { + "epoch": 8.21, + "learning_rate": 2.294775510204082e-05, + "loss": 1.2714, + "step": 387700 + }, + { + "epoch": 8.21, + "learning_rate": 2.292734693877551e-05, + "loss": 1.2724, + "step": 387800 + }, + { + "epoch": 8.21, + "learning_rate": 2.2906938775510207e-05, + "loss": 1.2706, + "step": 387900 + }, + { + "epoch": 8.21, + "learning_rate": 2.2886530612244898e-05, + "loss": 1.2693, + "step": 388000 + }, + { + "epoch": 8.21, + "learning_rate": 2.2866122448979593e-05, + "loss": 1.2707, + "step": 388100 + }, + { + "epoch": 8.22, + "learning_rate": 2.2845714285714288e-05, + "loss": 1.2726, + "step": 388200 + }, + { + "epoch": 8.22, + "learning_rate": 2.282530612244898e-05, + "loss": 1.2703, + "step": 388300 + }, + { + "epoch": 8.22, + "learning_rate": 2.2804897959183675e-05, + "loss": 1.2675, + "step": 388400 + }, + { + "epoch": 8.22, + "learning_rate": 2.2784489795918366e-05, + "loss": 1.2677, + "step": 388500 + }, + { + "epoch": 8.22, + "learning_rate": 2.276408163265306e-05, + "loss": 1.273, + "step": 388600 + }, + { + "epoch": 8.23, + "learning_rate": 2.2743673469387756e-05, + "loss": 1.2677, + "step": 388700 + }, + { + "epoch": 8.23, + "learning_rate": 2.2723265306122448e-05, + "loss": 1.2606, + "step": 388800 + }, + { + "epoch": 8.23, + "learning_rate": 2.2702857142857146e-05, + "loss": 1.2746, + "step": 388900 + }, + { + "epoch": 8.23, + "learning_rate": 2.2682448979591838e-05, + "loss": 1.2675, + "step": 389000 + }, + { + "epoch": 8.23, + "learning_rate": 2.2662040816326533e-05, + "loss": 1.2648, + "step": 389100 + }, + { + "epoch": 8.24, + "learning_rate": 2.2641632653061228e-05, + "loss": 1.2704, + "step": 389200 + }, + { + "epoch": 8.24, + "learning_rate": 2.262122448979592e-05, + "loss": 1.2692, + "step": 389300 + }, + { + "epoch": 8.24, + "learning_rate": 2.2600816326530614e-05, + "loss": 1.2691, + "step": 389400 + }, + { + "epoch": 8.24, + "learning_rate": 2.2580408163265306e-05, + "loss": 1.2689, + "step": 389500 + }, + { + "epoch": 8.25, + "learning_rate": 2.256e-05, + "loss": 1.2681, + "step": 389600 + }, + { + "epoch": 8.25, + "learning_rate": 2.2539591836734696e-05, + "loss": 1.261, + "step": 389700 + }, + { + "epoch": 8.25, + "learning_rate": 2.2519183673469387e-05, + "loss": 1.2676, + "step": 389800 + }, + { + "epoch": 8.25, + "learning_rate": 2.2498775510204082e-05, + "loss": 1.2589, + "step": 389900 + }, + { + "epoch": 8.25, + "learning_rate": 2.2478367346938777e-05, + "loss": 1.2675, + "step": 390000 + }, + { + "epoch": 8.26, + "learning_rate": 2.2457959183673472e-05, + "loss": 1.2669, + "step": 390100 + }, + { + "epoch": 8.26, + "learning_rate": 2.2437551020408164e-05, + "loss": 1.265, + "step": 390200 + }, + { + "epoch": 8.26, + "learning_rate": 2.241714285714286e-05, + "loss": 1.2673, + "step": 390300 + }, + { + "epoch": 8.26, + "learning_rate": 2.2396734693877554e-05, + "loss": 1.2676, + "step": 390400 + }, + { + "epoch": 8.26, + "learning_rate": 2.2376326530612245e-05, + "loss": 1.2659, + "step": 390500 + }, + { + "epoch": 8.27, + "learning_rate": 2.2356122448979593e-05, + "loss": 1.2664, + "step": 390600 + }, + { + "epoch": 8.27, + "learning_rate": 2.2335714285714288e-05, + "loss": 1.2708, + "step": 390700 + }, + { + "epoch": 8.27, + "learning_rate": 2.231530612244898e-05, + "loss": 1.2633, + "step": 390800 + }, + { + "epoch": 8.27, + "learning_rate": 2.2294897959183675e-05, + "loss": 1.2667, + "step": 390900 + }, + { + "epoch": 8.28, + "learning_rate": 2.227448979591837e-05, + "loss": 1.2632, + "step": 391000 + }, + { + "epoch": 8.28, + "learning_rate": 2.225408163265306e-05, + "loss": 1.2655, + "step": 391100 + }, + { + "epoch": 8.28, + "learning_rate": 2.2233673469387756e-05, + "loss": 1.2609, + "step": 391200 + }, + { + "epoch": 8.28, + "learning_rate": 2.221326530612245e-05, + "loss": 1.2611, + "step": 391300 + }, + { + "epoch": 8.28, + "learning_rate": 2.2192857142857143e-05, + "loss": 1.2707, + "step": 391400 + }, + { + "epoch": 8.29, + "learning_rate": 2.2172448979591838e-05, + "loss": 1.2668, + "step": 391500 + }, + { + "epoch": 8.29, + "learning_rate": 2.2152040816326533e-05, + "loss": 1.2655, + "step": 391600 + }, + { + "epoch": 8.29, + "learning_rate": 2.2131632653061228e-05, + "loss": 1.264, + "step": 391700 + }, + { + "epoch": 8.29, + "learning_rate": 2.211122448979592e-05, + "loss": 1.2739, + "step": 391800 + }, + { + "epoch": 8.29, + "learning_rate": 2.2090816326530614e-05, + "loss": 1.2578, + "step": 391900 + }, + { + "epoch": 8.3, + "learning_rate": 2.207040816326531e-05, + "loss": 1.2735, + "step": 392000 + }, + { + "epoch": 8.3, + "learning_rate": 2.205e-05, + "loss": 1.263, + "step": 392100 + }, + { + "epoch": 8.3, + "learning_rate": 2.2029591836734696e-05, + "loss": 1.2637, + "step": 392200 + }, + { + "epoch": 8.3, + "learning_rate": 2.2009183673469387e-05, + "loss": 1.2771, + "step": 392300 + }, + { + "epoch": 8.3, + "learning_rate": 2.1988775510204082e-05, + "loss": 1.2645, + "step": 392400 + }, + { + "epoch": 8.31, + "learning_rate": 2.1968367346938777e-05, + "loss": 1.2677, + "step": 392500 + }, + { + "epoch": 8.31, + "learning_rate": 2.194795918367347e-05, + "loss": 1.2612, + "step": 392600 + }, + { + "epoch": 8.31, + "learning_rate": 2.1927551020408164e-05, + "loss": 1.2616, + "step": 392700 + }, + { + "epoch": 8.31, + "learning_rate": 2.190714285714286e-05, + "loss": 1.2549, + "step": 392800 + }, + { + "epoch": 8.32, + "learning_rate": 2.1886734693877554e-05, + "loss": 1.2601, + "step": 392900 + }, + { + "epoch": 8.32, + "learning_rate": 2.1866326530612245e-05, + "loss": 1.2708, + "step": 393000 + }, + { + "epoch": 8.32, + "learning_rate": 2.184591836734694e-05, + "loss": 1.2736, + "step": 393100 + }, + { + "epoch": 8.32, + "learning_rate": 2.1825510204081635e-05, + "loss": 1.2712, + "step": 393200 + }, + { + "epoch": 8.32, + "learning_rate": 2.1805102040816327e-05, + "loss": 1.2668, + "step": 393300 + }, + { + "epoch": 8.33, + "learning_rate": 2.1784693877551022e-05, + "loss": 1.2512, + "step": 393400 + }, + { + "epoch": 8.33, + "learning_rate": 2.1764285714285713e-05, + "loss": 1.2725, + "step": 393500 + }, + { + "epoch": 8.33, + "learning_rate": 2.1743877551020408e-05, + "loss": 1.2645, + "step": 393600 + }, + { + "epoch": 8.33, + "learning_rate": 2.1723469387755103e-05, + "loss": 1.2671, + "step": 393700 + }, + { + "epoch": 8.33, + "learning_rate": 2.1703061224489795e-05, + "loss": 1.2671, + "step": 393800 + }, + { + "epoch": 8.34, + "learning_rate": 2.1682653061224493e-05, + "loss": 1.2667, + "step": 393900 + }, + { + "epoch": 8.34, + "learning_rate": 2.1662244897959185e-05, + "loss": 1.2628, + "step": 394000 + }, + { + "epoch": 8.34, + "learning_rate": 2.1642040816326532e-05, + "loss": 1.2645, + "step": 394100 + }, + { + "epoch": 8.34, + "learning_rate": 2.1621632653061224e-05, + "loss": 1.2584, + "step": 394200 + }, + { + "epoch": 8.34, + "learning_rate": 2.160122448979592e-05, + "loss": 1.2632, + "step": 394300 + }, + { + "epoch": 8.35, + "learning_rate": 2.1580816326530614e-05, + "loss": 1.2646, + "step": 394400 + }, + { + "epoch": 8.35, + "learning_rate": 2.156040816326531e-05, + "loss": 1.2643, + "step": 394500 + }, + { + "epoch": 8.35, + "learning_rate": 2.154e-05, + "loss": 1.2643, + "step": 394600 + }, + { + "epoch": 8.35, + "learning_rate": 2.1519591836734695e-05, + "loss": 1.2687, + "step": 394700 + }, + { + "epoch": 8.36, + "learning_rate": 2.149918367346939e-05, + "loss": 1.2673, + "step": 394800 + }, + { + "epoch": 8.36, + "learning_rate": 2.1478775510204082e-05, + "loss": 1.2607, + "step": 394900 + }, + { + "epoch": 8.36, + "learning_rate": 2.1458367346938777e-05, + "loss": 1.2668, + "step": 395000 + }, + { + "epoch": 8.36, + "learning_rate": 2.143795918367347e-05, + "loss": 1.2635, + "step": 395100 + }, + { + "epoch": 8.36, + "learning_rate": 2.1417551020408163e-05, + "loss": 1.2705, + "step": 395200 + }, + { + "epoch": 8.37, + "learning_rate": 2.139714285714286e-05, + "loss": 1.2689, + "step": 395300 + }, + { + "epoch": 8.37, + "learning_rate": 2.137673469387755e-05, + "loss": 1.2594, + "step": 395400 + }, + { + "epoch": 8.37, + "learning_rate": 2.135632653061225e-05, + "loss": 1.2576, + "step": 395500 + }, + { + "epoch": 8.37, + "learning_rate": 2.133591836734694e-05, + "loss": 1.2606, + "step": 395600 + }, + { + "epoch": 8.37, + "learning_rate": 2.1315510204081635e-05, + "loss": 1.2633, + "step": 395700 + }, + { + "epoch": 8.38, + "learning_rate": 2.1295102040816327e-05, + "loss": 1.257, + "step": 395800 + }, + { + "epoch": 8.38, + "learning_rate": 2.127469387755102e-05, + "loss": 1.2553, + "step": 395900 + }, + { + "epoch": 8.38, + "learning_rate": 2.1254285714285716e-05, + "loss": 1.2625, + "step": 396000 + }, + { + "epoch": 8.38, + "learning_rate": 2.1233877551020408e-05, + "loss": 1.2609, + "step": 396100 + }, + { + "epoch": 8.39, + "learning_rate": 2.1213673469387756e-05, + "loss": 1.2607, + "step": 396200 + }, + { + "epoch": 8.39, + "learning_rate": 2.119326530612245e-05, + "loss": 1.2657, + "step": 396300 + }, + { + "epoch": 8.39, + "learning_rate": 2.1172857142857146e-05, + "loss": 1.2626, + "step": 396400 + }, + { + "epoch": 8.39, + "learning_rate": 2.1152448979591837e-05, + "loss": 1.27, + "step": 396500 + }, + { + "epoch": 8.39, + "learning_rate": 2.1132040816326532e-05, + "loss": 1.263, + "step": 396600 + }, + { + "epoch": 8.4, + "learning_rate": 2.1111632653061224e-05, + "loss": 1.2595, + "step": 396700 + }, + { + "epoch": 8.4, + "learning_rate": 2.109122448979592e-05, + "loss": 1.2583, + "step": 396800 + }, + { + "epoch": 8.4, + "learning_rate": 2.1070816326530614e-05, + "loss": 1.2562, + "step": 396900 + }, + { + "epoch": 8.4, + "learning_rate": 2.1050408163265305e-05, + "loss": 1.2682, + "step": 397000 + }, + { + "epoch": 8.4, + "learning_rate": 2.103e-05, + "loss": 1.2611, + "step": 397100 + }, + { + "epoch": 8.41, + "learning_rate": 2.1009591836734695e-05, + "loss": 1.2685, + "step": 397200 + }, + { + "epoch": 8.41, + "learning_rate": 2.098918367346939e-05, + "loss": 1.2673, + "step": 397300 + }, + { + "epoch": 8.41, + "learning_rate": 2.0968775510204082e-05, + "loss": 1.2659, + "step": 397400 + }, + { + "epoch": 8.41, + "learning_rate": 2.0948367346938777e-05, + "loss": 1.2589, + "step": 397500 + }, + { + "epoch": 8.41, + "learning_rate": 2.0927959183673472e-05, + "loss": 1.2624, + "step": 397600 + }, + { + "epoch": 8.42, + "learning_rate": 2.0907551020408163e-05, + "loss": 1.2723, + "step": 397700 + }, + { + "epoch": 8.42, + "learning_rate": 2.0887142857142858e-05, + "loss": 1.258, + "step": 397800 + }, + { + "epoch": 8.42, + "learning_rate": 2.086673469387755e-05, + "loss": 1.2599, + "step": 397900 + }, + { + "epoch": 8.42, + "learning_rate": 2.0846326530612245e-05, + "loss": 1.2705, + "step": 398000 + }, + { + "epoch": 8.43, + "learning_rate": 2.082591836734694e-05, + "loss": 1.2635, + "step": 398100 + }, + { + "epoch": 8.43, + "learning_rate": 2.080551020408163e-05, + "loss": 1.2548, + "step": 398200 + }, + { + "epoch": 8.43, + "learning_rate": 2.078510204081633e-05, + "loss": 1.2636, + "step": 398300 + }, + { + "epoch": 8.43, + "learning_rate": 2.076469387755102e-05, + "loss": 1.2629, + "step": 398400 + }, + { + "epoch": 8.43, + "learning_rate": 2.0744285714285716e-05, + "loss": 1.2608, + "step": 398500 + }, + { + "epoch": 8.44, + "learning_rate": 2.072387755102041e-05, + "loss": 1.2602, + "step": 398600 + }, + { + "epoch": 8.44, + "learning_rate": 2.0703673469387755e-05, + "loss": 1.2627, + "step": 398700 + }, + { + "epoch": 8.44, + "learning_rate": 2.068326530612245e-05, + "loss": 1.2626, + "step": 398800 + }, + { + "epoch": 8.44, + "learning_rate": 2.0662857142857145e-05, + "loss": 1.2648, + "step": 398900 + }, + { + "epoch": 8.44, + "learning_rate": 2.0642448979591837e-05, + "loss": 1.2617, + "step": 399000 + }, + { + "epoch": 8.45, + "learning_rate": 2.0622040816326532e-05, + "loss": 1.2639, + "step": 399100 + }, + { + "epoch": 8.45, + "learning_rate": 2.0601632653061227e-05, + "loss": 1.2627, + "step": 399200 + }, + { + "epoch": 8.45, + "learning_rate": 2.058122448979592e-05, + "loss": 1.26, + "step": 399300 + }, + { + "epoch": 8.45, + "learning_rate": 2.0560816326530613e-05, + "loss": 1.262, + "step": 399400 + }, + { + "epoch": 8.46, + "learning_rate": 2.0540408163265305e-05, + "loss": 1.2644, + "step": 399500 + }, + { + "epoch": 8.46, + "learning_rate": 2.052e-05, + "loss": 1.2592, + "step": 399600 + }, + { + "epoch": 8.46, + "learning_rate": 2.0499591836734695e-05, + "loss": 1.2655, + "step": 399700 + }, + { + "epoch": 8.46, + "learning_rate": 2.0479183673469387e-05, + "loss": 1.2594, + "step": 399800 + }, + { + "epoch": 8.46, + "learning_rate": 2.045877551020408e-05, + "loss": 1.2693, + "step": 399900 + }, + { + "epoch": 8.47, + "learning_rate": 2.0438367346938777e-05, + "loss": 1.258, + "step": 400000 + }, + { + "epoch": 8.47, + "learning_rate": 2.041795918367347e-05, + "loss": 1.2611, + "step": 400100 + }, + { + "epoch": 8.47, + "learning_rate": 2.0397551020408166e-05, + "loss": 1.2568, + "step": 400200 + }, + { + "epoch": 8.47, + "learning_rate": 2.0377142857142858e-05, + "loss": 1.2601, + "step": 400300 + }, + { + "epoch": 8.47, + "learning_rate": 2.0356734693877553e-05, + "loss": 1.2633, + "step": 400400 + }, + { + "epoch": 8.48, + "learning_rate": 2.0336326530612245e-05, + "loss": 1.264, + "step": 400500 + }, + { + "epoch": 8.48, + "learning_rate": 2.031591836734694e-05, + "loss": 1.259, + "step": 400600 + }, + { + "epoch": 8.48, + "learning_rate": 2.0295510204081635e-05, + "loss": 1.2678, + "step": 400700 + }, + { + "epoch": 8.48, + "learning_rate": 2.0275102040816326e-05, + "loss": 1.2579, + "step": 400800 + }, + { + "epoch": 8.48, + "learning_rate": 2.025469387755102e-05, + "loss": 1.2642, + "step": 400900 + }, + { + "epoch": 8.49, + "learning_rate": 2.0234285714285713e-05, + "loss": 1.2625, + "step": 401000 + }, + { + "epoch": 8.49, + "learning_rate": 2.021408163265306e-05, + "loss": 1.2733, + "step": 401100 + }, + { + "epoch": 8.49, + "learning_rate": 2.0193673469387755e-05, + "loss": 1.2567, + "step": 401200 + }, + { + "epoch": 8.49, + "learning_rate": 2.017326530612245e-05, + "loss": 1.2723, + "step": 401300 + }, + { + "epoch": 8.5, + "learning_rate": 2.0152857142857142e-05, + "loss": 1.2624, + "step": 401400 + }, + { + "epoch": 8.5, + "learning_rate": 2.0132448979591837e-05, + "loss": 1.2674, + "step": 401500 + }, + { + "epoch": 8.5, + "learning_rate": 2.0112040816326532e-05, + "loss": 1.259, + "step": 401600 + }, + { + "epoch": 8.5, + "learning_rate": 2.0091632653061227e-05, + "loss": 1.2631, + "step": 401700 + }, + { + "epoch": 8.5, + "learning_rate": 2.0071224489795922e-05, + "loss": 1.2579, + "step": 401800 + }, + { + "epoch": 8.51, + "learning_rate": 2.0051020408163266e-05, + "loss": 1.2579, + "step": 401900 + }, + { + "epoch": 8.51, + "learning_rate": 2.0030612244897957e-05, + "loss": 1.268, + "step": 402000 + }, + { + "epoch": 8.51, + "learning_rate": 2.0010204081632656e-05, + "loss": 1.2664, + "step": 402100 + }, + { + "epoch": 8.51, + "learning_rate": 1.9989795918367347e-05, + "loss": 1.2609, + "step": 402200 + }, + { + "epoch": 8.51, + "learning_rate": 1.9969387755102042e-05, + "loss": 1.2514, + "step": 402300 + }, + { + "epoch": 8.52, + "learning_rate": 1.9948979591836737e-05, + "loss": 1.2603, + "step": 402400 + }, + { + "epoch": 8.52, + "learning_rate": 1.992857142857143e-05, + "loss": 1.2527, + "step": 402500 + }, + { + "epoch": 8.52, + "learning_rate": 1.9908163265306124e-05, + "loss": 1.2599, + "step": 402600 + }, + { + "epoch": 8.52, + "learning_rate": 1.9887755102040816e-05, + "loss": 1.2546, + "step": 402700 + }, + { + "epoch": 8.52, + "learning_rate": 1.986734693877551e-05, + "loss": 1.2524, + "step": 402800 + }, + { + "epoch": 8.53, + "learning_rate": 1.9846938775510205e-05, + "loss": 1.263, + "step": 402900 + }, + { + "epoch": 8.53, + "learning_rate": 1.9826530612244897e-05, + "loss": 1.2614, + "step": 403000 + }, + { + "epoch": 8.53, + "learning_rate": 1.9806122448979592e-05, + "loss": 1.2598, + "step": 403100 + }, + { + "epoch": 8.53, + "learning_rate": 1.9785714285714287e-05, + "loss": 1.2624, + "step": 403200 + }, + { + "epoch": 8.54, + "learning_rate": 1.9765306122448982e-05, + "loss": 1.2539, + "step": 403300 + }, + { + "epoch": 8.54, + "learning_rate": 1.9744897959183677e-05, + "loss": 1.2634, + "step": 403400 + }, + { + "epoch": 8.54, + "learning_rate": 1.972448979591837e-05, + "loss": 1.261, + "step": 403500 + }, + { + "epoch": 8.54, + "learning_rate": 1.9704081632653063e-05, + "loss": 1.2612, + "step": 403600 + }, + { + "epoch": 8.54, + "learning_rate": 1.9683673469387755e-05, + "loss": 1.2622, + "step": 403700 + }, + { + "epoch": 8.55, + "learning_rate": 1.966326530612245e-05, + "loss": 1.2593, + "step": 403800 + }, + { + "epoch": 8.55, + "learning_rate": 1.9642857142857145e-05, + "loss": 1.259, + "step": 403900 + }, + { + "epoch": 8.55, + "learning_rate": 1.9622448979591837e-05, + "loss": 1.2549, + "step": 404000 + }, + { + "epoch": 8.55, + "learning_rate": 1.960204081632653e-05, + "loss": 1.2541, + "step": 404100 + }, + { + "epoch": 8.55, + "learning_rate": 1.9581632653061223e-05, + "loss": 1.2608, + "step": 404200 + }, + { + "epoch": 8.56, + "learning_rate": 1.9561224489795918e-05, + "loss": 1.2674, + "step": 404300 + }, + { + "epoch": 8.56, + "learning_rate": 1.9540816326530613e-05, + "loss": 1.2603, + "step": 404400 + }, + { + "epoch": 8.56, + "learning_rate": 1.9520408163265308e-05, + "loss": 1.2555, + "step": 404500 + }, + { + "epoch": 8.56, + "learning_rate": 1.9500000000000003e-05, + "loss": 1.2582, + "step": 404600 + }, + { + "epoch": 8.57, + "learning_rate": 1.9479591836734695e-05, + "loss": 1.2605, + "step": 404700 + }, + { + "epoch": 8.57, + "learning_rate": 1.945918367346939e-05, + "loss": 1.26, + "step": 404800 + }, + { + "epoch": 8.57, + "learning_rate": 1.943877551020408e-05, + "loss": 1.2607, + "step": 404900 + }, + { + "epoch": 8.57, + "learning_rate": 1.9418367346938776e-05, + "loss": 1.256, + "step": 405000 + }, + { + "epoch": 8.57, + "learning_rate": 1.939795918367347e-05, + "loss": 1.2628, + "step": 405100 + }, + { + "epoch": 8.58, + "learning_rate": 1.9377551020408163e-05, + "loss": 1.2578, + "step": 405200 + }, + { + "epoch": 8.58, + "learning_rate": 1.9357142857142858e-05, + "loss": 1.2588, + "step": 405300 + }, + { + "epoch": 8.58, + "learning_rate": 1.933673469387755e-05, + "loss": 1.2634, + "step": 405400 + }, + { + "epoch": 8.58, + "learning_rate": 1.9316326530612248e-05, + "loss": 1.2607, + "step": 405500 + }, + { + "epoch": 8.58, + "learning_rate": 1.929591836734694e-05, + "loss": 1.255, + "step": 405600 + }, + { + "epoch": 8.59, + "learning_rate": 1.9275510204081634e-05, + "loss": 1.2534, + "step": 405700 + }, + { + "epoch": 8.59, + "learning_rate": 1.925510204081633e-05, + "loss": 1.2635, + "step": 405800 + }, + { + "epoch": 8.59, + "learning_rate": 1.923469387755102e-05, + "loss": 1.2602, + "step": 405900 + }, + { + "epoch": 8.59, + "learning_rate": 1.9214285714285716e-05, + "loss": 1.2696, + "step": 406000 + }, + { + "epoch": 8.59, + "learning_rate": 1.9193877551020407e-05, + "loss": 1.2571, + "step": 406100 + }, + { + "epoch": 8.6, + "learning_rate": 1.9173469387755102e-05, + "loss": 1.2476, + "step": 406200 + }, + { + "epoch": 8.6, + "learning_rate": 1.9153061224489797e-05, + "loss": 1.2572, + "step": 406300 + }, + { + "epoch": 8.6, + "learning_rate": 1.913265306122449e-05, + "loss": 1.2598, + "step": 406400 + }, + { + "epoch": 8.6, + "learning_rate": 1.9112244897959184e-05, + "loss": 1.2498, + "step": 406500 + }, + { + "epoch": 8.61, + "learning_rate": 1.909183673469388e-05, + "loss": 1.2602, + "step": 406600 + }, + { + "epoch": 8.61, + "learning_rate": 1.9071428571428574e-05, + "loss": 1.2536, + "step": 406700 + }, + { + "epoch": 8.61, + "learning_rate": 1.905102040816327e-05, + "loss": 1.2625, + "step": 406800 + }, + { + "epoch": 8.61, + "learning_rate": 1.903061224489796e-05, + "loss": 1.2586, + "step": 406900 + }, + { + "epoch": 8.61, + "learning_rate": 1.9010204081632655e-05, + "loss": 1.2504, + "step": 407000 + }, + { + "epoch": 8.62, + "learning_rate": 1.8989795918367347e-05, + "loss": 1.2601, + "step": 407100 + }, + { + "epoch": 8.62, + "learning_rate": 1.8969387755102042e-05, + "loss": 1.258, + "step": 407200 + }, + { + "epoch": 8.62, + "learning_rate": 1.8948979591836737e-05, + "loss": 1.259, + "step": 407300 + }, + { + "epoch": 8.62, + "learning_rate": 1.892857142857143e-05, + "loss": 1.2561, + "step": 407400 + }, + { + "epoch": 8.62, + "learning_rate": 1.8908163265306123e-05, + "loss": 1.2664, + "step": 407500 + }, + { + "epoch": 8.63, + "learning_rate": 1.8887755102040815e-05, + "loss": 1.263, + "step": 407600 + }, + { + "epoch": 8.63, + "learning_rate": 1.886734693877551e-05, + "loss": 1.2656, + "step": 407700 + }, + { + "epoch": 8.63, + "learning_rate": 1.8846938775510205e-05, + "loss": 1.2585, + "step": 407800 + }, + { + "epoch": 8.63, + "learning_rate": 1.8826734693877552e-05, + "loss": 1.261, + "step": 407900 + }, + { + "epoch": 8.63, + "learning_rate": 1.8806326530612244e-05, + "loss": 1.2562, + "step": 408000 + }, + { + "epoch": 8.64, + "learning_rate": 1.878591836734694e-05, + "loss": 1.2523, + "step": 408100 + }, + { + "epoch": 8.64, + "learning_rate": 1.876551020408163e-05, + "loss": 1.2603, + "step": 408200 + }, + { + "epoch": 8.64, + "learning_rate": 1.874510204081633e-05, + "loss": 1.261, + "step": 408300 + }, + { + "epoch": 8.64, + "learning_rate": 1.8724693877551024e-05, + "loss": 1.2556, + "step": 408400 + }, + { + "epoch": 8.65, + "learning_rate": 1.8704285714285716e-05, + "loss": 1.2585, + "step": 408500 + }, + { + "epoch": 8.65, + "learning_rate": 1.868387755102041e-05, + "loss": 1.2595, + "step": 408600 + }, + { + "epoch": 8.65, + "learning_rate": 1.8663673469387755e-05, + "loss": 1.2546, + "step": 408700 + }, + { + "epoch": 8.65, + "learning_rate": 1.864326530612245e-05, + "loss": 1.2516, + "step": 408800 + }, + { + "epoch": 8.65, + "learning_rate": 1.8622857142857145e-05, + "loss": 1.2588, + "step": 408900 + }, + { + "epoch": 8.66, + "learning_rate": 1.860244897959184e-05, + "loss": 1.2629, + "step": 409000 + }, + { + "epoch": 8.66, + "learning_rate": 1.858204081632653e-05, + "loss": 1.2602, + "step": 409100 + }, + { + "epoch": 8.66, + "learning_rate": 1.8561632653061226e-05, + "loss": 1.2556, + "step": 409200 + }, + { + "epoch": 8.66, + "learning_rate": 1.8541224489795918e-05, + "loss": 1.2625, + "step": 409300 + }, + { + "epoch": 8.66, + "learning_rate": 1.8520816326530613e-05, + "loss": 1.2564, + "step": 409400 + }, + { + "epoch": 8.67, + "learning_rate": 1.8500408163265308e-05, + "loss": 1.2562, + "step": 409500 + }, + { + "epoch": 8.67, + "learning_rate": 1.848e-05, + "loss": 1.2538, + "step": 409600 + }, + { + "epoch": 8.67, + "learning_rate": 1.8459591836734694e-05, + "loss": 1.2593, + "step": 409700 + }, + { + "epoch": 8.67, + "learning_rate": 1.8439183673469386e-05, + "loss": 1.2531, + "step": 409800 + }, + { + "epoch": 8.68, + "learning_rate": 1.8418775510204084e-05, + "loss": 1.2618, + "step": 409900 + }, + { + "epoch": 8.68, + "learning_rate": 1.8398367346938776e-05, + "loss": 1.255, + "step": 410000 + }, + { + "epoch": 8.68, + "learning_rate": 1.837795918367347e-05, + "loss": 1.255, + "step": 410100 + }, + { + "epoch": 8.68, + "learning_rate": 1.8357551020408166e-05, + "loss": 1.2565, + "step": 410200 + }, + { + "epoch": 8.68, + "learning_rate": 1.8337142857142857e-05, + "loss": 1.25, + "step": 410300 + }, + { + "epoch": 8.69, + "learning_rate": 1.8316734693877552e-05, + "loss": 1.2487, + "step": 410400 + }, + { + "epoch": 8.69, + "learning_rate": 1.8296326530612247e-05, + "loss": 1.2644, + "step": 410500 + }, + { + "epoch": 8.69, + "learning_rate": 1.827591836734694e-05, + "loss": 1.2544, + "step": 410600 + }, + { + "epoch": 8.69, + "learning_rate": 1.8255510204081634e-05, + "loss": 1.2551, + "step": 410700 + }, + { + "epoch": 8.69, + "learning_rate": 1.8235102040816325e-05, + "loss": 1.2503, + "step": 410800 + }, + { + "epoch": 8.7, + "learning_rate": 1.821469387755102e-05, + "loss": 1.2652, + "step": 410900 + }, + { + "epoch": 8.7, + "learning_rate": 1.8194285714285715e-05, + "loss": 1.2552, + "step": 411000 + }, + { + "epoch": 8.7, + "learning_rate": 1.817387755102041e-05, + "loss": 1.2582, + "step": 411100 + }, + { + "epoch": 8.7, + "learning_rate": 1.8153469387755105e-05, + "loss": 1.2576, + "step": 411200 + }, + { + "epoch": 8.7, + "learning_rate": 1.8133061224489797e-05, + "loss": 1.2581, + "step": 411300 + }, + { + "epoch": 8.71, + "learning_rate": 1.8112653061224492e-05, + "loss": 1.2441, + "step": 411400 + }, + { + "epoch": 8.71, + "learning_rate": 1.8092244897959183e-05, + "loss": 1.2618, + "step": 411500 + }, + { + "epoch": 8.71, + "learning_rate": 1.807183673469388e-05, + "loss": 1.2513, + "step": 411600 + }, + { + "epoch": 8.71, + "learning_rate": 1.8051428571428573e-05, + "loss": 1.2618, + "step": 411700 + }, + { + "epoch": 8.72, + "learning_rate": 1.8031020408163265e-05, + "loss": 1.2569, + "step": 411800 + }, + { + "epoch": 8.72, + "learning_rate": 1.801061224489796e-05, + "loss": 1.2483, + "step": 411900 + }, + { + "epoch": 8.72, + "learning_rate": 1.799020408163265e-05, + "loss": 1.2642, + "step": 412000 + }, + { + "epoch": 8.72, + "learning_rate": 1.7969795918367346e-05, + "loss": 1.2622, + "step": 412100 + }, + { + "epoch": 8.72, + "learning_rate": 1.794938775510204e-05, + "loss": 1.2524, + "step": 412200 + }, + { + "epoch": 8.73, + "learning_rate": 1.7928979591836736e-05, + "loss": 1.2497, + "step": 412300 + }, + { + "epoch": 8.73, + "learning_rate": 1.790857142857143e-05, + "loss": 1.2536, + "step": 412400 + }, + { + "epoch": 8.73, + "learning_rate": 1.7888163265306123e-05, + "loss": 1.2561, + "step": 412500 + }, + { + "epoch": 8.73, + "learning_rate": 1.7867755102040818e-05, + "loss": 1.2552, + "step": 412600 + }, + { + "epoch": 8.73, + "learning_rate": 1.7847551020408165e-05, + "loss": 1.2535, + "step": 412700 + }, + { + "epoch": 8.74, + "learning_rate": 1.782714285714286e-05, + "loss": 1.2486, + "step": 412800 + }, + { + "epoch": 8.74, + "learning_rate": 1.7806734693877552e-05, + "loss": 1.2536, + "step": 412900 + }, + { + "epoch": 8.74, + "learning_rate": 1.7786326530612247e-05, + "loss": 1.2563, + "step": 413000 + }, + { + "epoch": 8.74, + "learning_rate": 1.776591836734694e-05, + "loss": 1.2471, + "step": 413100 + }, + { + "epoch": 8.74, + "learning_rate": 1.7745510204081634e-05, + "loss": 1.2577, + "step": 413200 + }, + { + "epoch": 8.75, + "learning_rate": 1.772530612244898e-05, + "loss": 1.2535, + "step": 413300 + }, + { + "epoch": 8.75, + "learning_rate": 1.7704897959183676e-05, + "loss": 1.247, + "step": 413400 + }, + { + "epoch": 8.75, + "learning_rate": 1.7684489795918368e-05, + "loss": 1.2554, + "step": 413500 + }, + { + "epoch": 8.75, + "learning_rate": 1.7664081632653063e-05, + "loss": 1.2545, + "step": 413600 + }, + { + "epoch": 8.76, + "learning_rate": 1.7643673469387754e-05, + "loss": 1.26, + "step": 413700 + }, + { + "epoch": 8.76, + "learning_rate": 1.762326530612245e-05, + "loss": 1.2521, + "step": 413800 + }, + { + "epoch": 8.76, + "learning_rate": 1.7602857142857144e-05, + "loss": 1.2505, + "step": 413900 + }, + { + "epoch": 8.76, + "learning_rate": 1.7582448979591836e-05, + "loss": 1.2529, + "step": 414000 + }, + { + "epoch": 8.76, + "learning_rate": 1.756204081632653e-05, + "loss": 1.2488, + "step": 414100 + }, + { + "epoch": 8.77, + "learning_rate": 1.7541632653061226e-05, + "loss": 1.2591, + "step": 414200 + }, + { + "epoch": 8.77, + "learning_rate": 1.752122448979592e-05, + "loss": 1.2518, + "step": 414300 + }, + { + "epoch": 8.77, + "learning_rate": 1.7500816326530616e-05, + "loss": 1.2535, + "step": 414400 + }, + { + "epoch": 8.77, + "learning_rate": 1.7480408163265307e-05, + "loss": 1.2596, + "step": 414500 + }, + { + "epoch": 8.77, + "learning_rate": 1.7460000000000002e-05, + "loss": 1.2469, + "step": 414600 + }, + { + "epoch": 8.78, + "learning_rate": 1.7439591836734694e-05, + "loss": 1.257, + "step": 414700 + }, + { + "epoch": 8.78, + "learning_rate": 1.741918367346939e-05, + "loss": 1.2585, + "step": 414800 + }, + { + "epoch": 8.78, + "learning_rate": 1.7398775510204084e-05, + "loss": 1.2557, + "step": 414900 + }, + { + "epoch": 8.78, + "learning_rate": 1.7378367346938775e-05, + "loss": 1.2614, + "step": 415000 + }, + { + "epoch": 8.79, + "learning_rate": 1.735795918367347e-05, + "loss": 1.259, + "step": 415100 + }, + { + "epoch": 8.79, + "learning_rate": 1.7337551020408162e-05, + "loss": 1.2583, + "step": 415200 + }, + { + "epoch": 8.79, + "learning_rate": 1.731734693877551e-05, + "loss": 1.25, + "step": 415300 + }, + { + "epoch": 8.79, + "learning_rate": 1.7296938775510204e-05, + "loss": 1.2558, + "step": 415400 + }, + { + "epoch": 8.79, + "learning_rate": 1.72765306122449e-05, + "loss": 1.2593, + "step": 415500 + }, + { + "epoch": 8.8, + "learning_rate": 1.725612244897959e-05, + "loss": 1.2559, + "step": 415600 + }, + { + "epoch": 8.8, + "learning_rate": 1.7235714285714286e-05, + "loss": 1.2592, + "step": 415700 + }, + { + "epoch": 8.8, + "learning_rate": 1.7215306122448978e-05, + "loss": 1.2569, + "step": 415800 + }, + { + "epoch": 8.8, + "learning_rate": 1.7194897959183676e-05, + "loss": 1.2492, + "step": 415900 + }, + { + "epoch": 8.8, + "learning_rate": 1.717448979591837e-05, + "loss": 1.2555, + "step": 416000 + }, + { + "epoch": 8.81, + "learning_rate": 1.7154081632653062e-05, + "loss": 1.26, + "step": 416100 + }, + { + "epoch": 8.81, + "learning_rate": 1.7133673469387757e-05, + "loss": 1.2585, + "step": 416200 + }, + { + "epoch": 8.81, + "learning_rate": 1.711326530612245e-05, + "loss": 1.251, + "step": 416300 + }, + { + "epoch": 8.81, + "learning_rate": 1.7092857142857144e-05, + "loss": 1.2587, + "step": 416400 + }, + { + "epoch": 8.81, + "learning_rate": 1.707244897959184e-05, + "loss": 1.2496, + "step": 416500 + }, + { + "epoch": 8.82, + "learning_rate": 1.705204081632653e-05, + "loss": 1.2498, + "step": 416600 + }, + { + "epoch": 8.82, + "learning_rate": 1.7031632653061226e-05, + "loss": 1.2545, + "step": 416700 + }, + { + "epoch": 8.82, + "learning_rate": 1.7011224489795917e-05, + "loss": 1.2533, + "step": 416800 + }, + { + "epoch": 8.82, + "learning_rate": 1.6990816326530612e-05, + "loss": 1.2556, + "step": 416900 + }, + { + "epoch": 8.83, + "learning_rate": 1.6970408163265307e-05, + "loss": 1.2513, + "step": 417000 + }, + { + "epoch": 8.83, + "learning_rate": 1.6950000000000002e-05, + "loss": 1.2494, + "step": 417100 + }, + { + "epoch": 8.83, + "learning_rate": 1.6929591836734697e-05, + "loss": 1.257, + "step": 417200 + }, + { + "epoch": 8.83, + "learning_rate": 1.690918367346939e-05, + "loss": 1.2473, + "step": 417300 + }, + { + "epoch": 8.83, + "learning_rate": 1.6888775510204084e-05, + "loss": 1.2577, + "step": 417400 + }, + { + "epoch": 8.84, + "learning_rate": 1.6868367346938775e-05, + "loss": 1.251, + "step": 417500 + }, + { + "epoch": 8.84, + "learning_rate": 1.684795918367347e-05, + "loss": 1.2515, + "step": 417600 + }, + { + "epoch": 8.84, + "learning_rate": 1.6827551020408165e-05, + "loss": 1.2551, + "step": 417700 + }, + { + "epoch": 8.84, + "learning_rate": 1.6807142857142857e-05, + "loss": 1.2595, + "step": 417800 + }, + { + "epoch": 8.84, + "learning_rate": 1.678673469387755e-05, + "loss": 1.2507, + "step": 417900 + }, + { + "epoch": 8.85, + "learning_rate": 1.6766326530612243e-05, + "loss": 1.2466, + "step": 418000 + }, + { + "epoch": 8.85, + "learning_rate": 1.6745918367346938e-05, + "loss": 1.2532, + "step": 418100 + }, + { + "epoch": 8.85, + "learning_rate": 1.6725714285714286e-05, + "loss": 1.241, + "step": 418200 + }, + { + "epoch": 8.85, + "learning_rate": 1.670530612244898e-05, + "loss": 1.2578, + "step": 418300 + }, + { + "epoch": 8.86, + "learning_rate": 1.6684897959183672e-05, + "loss": 1.2522, + "step": 418400 + }, + { + "epoch": 8.86, + "learning_rate": 1.6664489795918367e-05, + "loss": 1.2526, + "step": 418500 + }, + { + "epoch": 8.86, + "learning_rate": 1.6644081632653062e-05, + "loss": 1.2487, + "step": 418600 + }, + { + "epoch": 8.86, + "learning_rate": 1.6623673469387757e-05, + "loss": 1.2462, + "step": 418700 + }, + { + "epoch": 8.86, + "learning_rate": 1.6603265306122452e-05, + "loss": 1.2541, + "step": 418800 + }, + { + "epoch": 8.87, + "learning_rate": 1.6582857142857144e-05, + "loss": 1.2481, + "step": 418900 + }, + { + "epoch": 8.87, + "learning_rate": 1.656244897959184e-05, + "loss": 1.2549, + "step": 419000 + }, + { + "epoch": 8.87, + "learning_rate": 1.654204081632653e-05, + "loss": 1.252, + "step": 419100 + }, + { + "epoch": 8.87, + "learning_rate": 1.6521632653061225e-05, + "loss": 1.2552, + "step": 419200 + }, + { + "epoch": 8.87, + "learning_rate": 1.650122448979592e-05, + "loss": 1.2527, + "step": 419300 + }, + { + "epoch": 8.88, + "learning_rate": 1.6480816326530612e-05, + "loss": 1.2525, + "step": 419400 + }, + { + "epoch": 8.88, + "learning_rate": 1.6460408163265307e-05, + "loss": 1.2543, + "step": 419500 + }, + { + "epoch": 8.88, + "learning_rate": 1.644e-05, + "loss": 1.2489, + "step": 419600 + }, + { + "epoch": 8.88, + "learning_rate": 1.6419591836734693e-05, + "loss": 1.2588, + "step": 419700 + }, + { + "epoch": 8.88, + "learning_rate": 1.639918367346939e-05, + "loss": 1.2491, + "step": 419800 + }, + { + "epoch": 8.89, + "learning_rate": 1.6378775510204083e-05, + "loss": 1.2531, + "step": 419900 + }, + { + "epoch": 8.89, + "learning_rate": 1.635836734693878e-05, + "loss": 1.2512, + "step": 420000 + }, + { + "epoch": 8.89, + "learning_rate": 1.633795918367347e-05, + "loss": 1.2617, + "step": 420100 + }, + { + "epoch": 8.89, + "learning_rate": 1.6317551020408165e-05, + "loss": 1.252, + "step": 420200 + }, + { + "epoch": 8.9, + "learning_rate": 1.6297142857142856e-05, + "loss": 1.2526, + "step": 420300 + }, + { + "epoch": 8.9, + "learning_rate": 1.627673469387755e-05, + "loss": 1.2501, + "step": 420400 + }, + { + "epoch": 8.9, + "learning_rate": 1.6256326530612246e-05, + "loss": 1.25, + "step": 420500 + }, + { + "epoch": 8.9, + "learning_rate": 1.6235918367346938e-05, + "loss": 1.2583, + "step": 420600 + }, + { + "epoch": 8.9, + "learning_rate": 1.6215510204081633e-05, + "loss": 1.2578, + "step": 420700 + }, + { + "epoch": 8.91, + "learning_rate": 1.6195102040816325e-05, + "loss": 1.2493, + "step": 420800 + }, + { + "epoch": 8.91, + "learning_rate": 1.617469387755102e-05, + "loss": 1.2547, + "step": 420900 + }, + { + "epoch": 8.91, + "learning_rate": 1.6154285714285718e-05, + "loss": 1.2509, + "step": 421000 + }, + { + "epoch": 8.91, + "learning_rate": 1.613387755102041e-05, + "loss": 1.2547, + "step": 421100 + }, + { + "epoch": 8.91, + "learning_rate": 1.6113469387755104e-05, + "loss": 1.2496, + "step": 421200 + }, + { + "epoch": 8.92, + "learning_rate": 1.6093061224489796e-05, + "loss": 1.2533, + "step": 421300 + }, + { + "epoch": 8.92, + "learning_rate": 1.607265306122449e-05, + "loss": 1.2485, + "step": 421400 + }, + { + "epoch": 8.92, + "learning_rate": 1.6052244897959186e-05, + "loss": 1.246, + "step": 421500 + }, + { + "epoch": 8.92, + "learning_rate": 1.6031836734693878e-05, + "loss": 1.2499, + "step": 421600 + }, + { + "epoch": 8.92, + "learning_rate": 1.6011428571428573e-05, + "loss": 1.2493, + "step": 421700 + }, + { + "epoch": 8.93, + "learning_rate": 1.5991020408163264e-05, + "loss": 1.2488, + "step": 421800 + }, + { + "epoch": 8.93, + "learning_rate": 1.597061224489796e-05, + "loss": 1.2498, + "step": 421900 + }, + { + "epoch": 8.93, + "learning_rate": 1.5950204081632654e-05, + "loss": 1.2426, + "step": 422000 + }, + { + "epoch": 8.93, + "learning_rate": 1.592979591836735e-05, + "loss": 1.2524, + "step": 422100 + }, + { + "epoch": 8.94, + "learning_rate": 1.5909387755102044e-05, + "loss": 1.2535, + "step": 422200 + }, + { + "epoch": 8.94, + "learning_rate": 1.5889183673469388e-05, + "loss": 1.2581, + "step": 422300 + }, + { + "epoch": 8.94, + "learning_rate": 1.586877551020408e-05, + "loss": 1.252, + "step": 422400 + }, + { + "epoch": 8.94, + "learning_rate": 1.5848367346938775e-05, + "loss": 1.2556, + "step": 422500 + }, + { + "epoch": 8.94, + "learning_rate": 1.582795918367347e-05, + "loss": 1.252, + "step": 422600 + }, + { + "epoch": 8.95, + "learning_rate": 1.5807551020408165e-05, + "loss": 1.2431, + "step": 422700 + }, + { + "epoch": 8.95, + "learning_rate": 1.578714285714286e-05, + "loss": 1.2529, + "step": 422800 + }, + { + "epoch": 8.95, + "learning_rate": 1.576673469387755e-05, + "loss": 1.2541, + "step": 422900 + }, + { + "epoch": 8.95, + "learning_rate": 1.57465306122449e-05, + "loss": 1.2466, + "step": 423000 + }, + { + "epoch": 8.95, + "learning_rate": 1.5726122448979594e-05, + "loss": 1.252, + "step": 423100 + }, + { + "epoch": 8.96, + "learning_rate": 1.570571428571429e-05, + "loss": 1.2509, + "step": 423200 + }, + { + "epoch": 8.96, + "learning_rate": 1.568530612244898e-05, + "loss": 1.2527, + "step": 423300 + }, + { + "epoch": 8.96, + "learning_rate": 1.5664897959183675e-05, + "loss": 1.2534, + "step": 423400 + }, + { + "epoch": 8.96, + "learning_rate": 1.5644489795918367e-05, + "loss": 1.2526, + "step": 423500 + }, + { + "epoch": 8.97, + "learning_rate": 1.5624081632653062e-05, + "loss": 1.2479, + "step": 423600 + }, + { + "epoch": 8.97, + "learning_rate": 1.5603673469387757e-05, + "loss": 1.2484, + "step": 423700 + }, + { + "epoch": 8.97, + "learning_rate": 1.558326530612245e-05, + "loss": 1.2543, + "step": 423800 + }, + { + "epoch": 8.97, + "learning_rate": 1.5562857142857143e-05, + "loss": 1.2522, + "step": 423900 + }, + { + "epoch": 8.97, + "learning_rate": 1.5542448979591835e-05, + "loss": 1.2415, + "step": 424000 + }, + { + "epoch": 8.98, + "learning_rate": 1.552204081632653e-05, + "loss": 1.2497, + "step": 424100 + }, + { + "epoch": 8.98, + "learning_rate": 1.5501632653061225e-05, + "loss": 1.2466, + "step": 424200 + }, + { + "epoch": 8.98, + "learning_rate": 1.548122448979592e-05, + "loss": 1.2482, + "step": 424300 + }, + { + "epoch": 8.98, + "learning_rate": 1.5460816326530615e-05, + "loss": 1.2536, + "step": 424400 + }, + { + "epoch": 8.98, + "learning_rate": 1.5440408163265306e-05, + "loss": 1.254, + "step": 424500 + }, + { + "epoch": 8.99, + "learning_rate": 1.542e-05, + "loss": 1.2451, + "step": 424600 + }, + { + "epoch": 8.99, + "learning_rate": 1.5399591836734696e-05, + "loss": 1.2566, + "step": 424700 + }, + { + "epoch": 8.99, + "learning_rate": 1.5379183673469388e-05, + "loss": 1.2447, + "step": 424800 + }, + { + "epoch": 8.99, + "learning_rate": 1.5358775510204083e-05, + "loss": 1.2508, + "step": 424900 + }, + { + "epoch": 8.99, + "learning_rate": 1.5338367346938775e-05, + "loss": 1.2528, + "step": 425000 + }, + { + "epoch": 9.0, + "learning_rate": 1.531795918367347e-05, + "loss": 1.2482, + "step": 425100 + }, + { + "epoch": 9.0, + "learning_rate": 1.5297551020408164e-05, + "loss": 1.2503, + "step": 425200 + }, + { + "epoch": 9.0, + "learning_rate": 1.5277142857142856e-05, + "loss": 1.2447, + "step": 425300 + }, + { + "epoch": 9.0, + "learning_rate": 1.5256734693877553e-05, + "loss": 1.2446, + "step": 425400 + }, + { + "epoch": 9.01, + "learning_rate": 1.5236326530612246e-05, + "loss": 1.2468, + "step": 425500 + }, + { + "epoch": 9.01, + "learning_rate": 1.5215918367346941e-05, + "loss": 1.2499, + "step": 425600 + }, + { + "epoch": 9.01, + "learning_rate": 1.5195510204081634e-05, + "loss": 1.24, + "step": 425700 + }, + { + "epoch": 9.01, + "learning_rate": 1.5175102040816328e-05, + "loss": 1.2441, + "step": 425800 + }, + { + "epoch": 9.01, + "learning_rate": 1.515469387755102e-05, + "loss": 1.2437, + "step": 425900 + }, + { + "epoch": 9.02, + "learning_rate": 1.5134285714285714e-05, + "loss": 1.2485, + "step": 426000 + }, + { + "epoch": 9.02, + "learning_rate": 1.5113877551020409e-05, + "loss": 1.2501, + "step": 426100 + }, + { + "epoch": 9.02, + "learning_rate": 1.5093469387755102e-05, + "loss": 1.249, + "step": 426200 + }, + { + "epoch": 9.02, + "learning_rate": 1.5073061224489796e-05, + "loss": 1.2437, + "step": 426300 + }, + { + "epoch": 9.02, + "learning_rate": 1.5052653061224489e-05, + "loss": 1.244, + "step": 426400 + }, + { + "epoch": 9.03, + "learning_rate": 1.5032244897959186e-05, + "loss": 1.2462, + "step": 426500 + }, + { + "epoch": 9.03, + "learning_rate": 1.5011836734693879e-05, + "loss": 1.2543, + "step": 426600 + }, + { + "epoch": 9.03, + "learning_rate": 1.4991428571428572e-05, + "loss": 1.2522, + "step": 426700 + }, + { + "epoch": 9.03, + "learning_rate": 1.4971020408163267e-05, + "loss": 1.2431, + "step": 426800 + }, + { + "epoch": 9.03, + "learning_rate": 1.495061224489796e-05, + "loss": 1.2455, + "step": 426900 + }, + { + "epoch": 9.04, + "learning_rate": 1.4930408163265308e-05, + "loss": 1.2441, + "step": 427000 + }, + { + "epoch": 9.04, + "learning_rate": 1.4910000000000001e-05, + "loss": 1.2413, + "step": 427100 + }, + { + "epoch": 9.04, + "learning_rate": 1.4889591836734696e-05, + "loss": 1.2441, + "step": 427200 + }, + { + "epoch": 9.04, + "learning_rate": 1.486918367346939e-05, + "loss": 1.2464, + "step": 427300 + }, + { + "epoch": 9.05, + "learning_rate": 1.4848775510204083e-05, + "loss": 1.2408, + "step": 427400 + }, + { + "epoch": 9.05, + "learning_rate": 1.4828367346938776e-05, + "loss": 1.2466, + "step": 427500 + }, + { + "epoch": 9.05, + "learning_rate": 1.480795918367347e-05, + "loss": 1.2478, + "step": 427600 + }, + { + "epoch": 9.05, + "learning_rate": 1.4787551020408164e-05, + "loss": 1.2459, + "step": 427700 + }, + { + "epoch": 9.05, + "learning_rate": 1.4767142857142858e-05, + "loss": 1.2411, + "step": 427800 + }, + { + "epoch": 9.06, + "learning_rate": 1.4746734693877551e-05, + "loss": 1.2473, + "step": 427900 + }, + { + "epoch": 9.06, + "learning_rate": 1.4726326530612244e-05, + "loss": 1.2414, + "step": 428000 + }, + { + "epoch": 9.06, + "learning_rate": 1.470591836734694e-05, + "loss": 1.2479, + "step": 428100 + }, + { + "epoch": 9.06, + "learning_rate": 1.4685510204081634e-05, + "loss": 1.2459, + "step": 428200 + }, + { + "epoch": 9.06, + "learning_rate": 1.4665102040816327e-05, + "loss": 1.2477, + "step": 428300 + }, + { + "epoch": 9.07, + "learning_rate": 1.4644693877551022e-05, + "loss": 1.2413, + "step": 428400 + }, + { + "epoch": 9.07, + "learning_rate": 1.4624285714285716e-05, + "loss": 1.2492, + "step": 428500 + }, + { + "epoch": 9.07, + "learning_rate": 1.4603877551020409e-05, + "loss": 1.2411, + "step": 428600 + }, + { + "epoch": 9.07, + "learning_rate": 1.4583469387755102e-05, + "loss": 1.2534, + "step": 428700 + }, + { + "epoch": 9.08, + "learning_rate": 1.4563061224489795e-05, + "loss": 1.2448, + "step": 428800 + }, + { + "epoch": 9.08, + "learning_rate": 1.454265306122449e-05, + "loss": 1.2504, + "step": 428900 + }, + { + "epoch": 9.08, + "learning_rate": 1.4522244897959184e-05, + "loss": 1.2511, + "step": 429000 + }, + { + "epoch": 9.08, + "learning_rate": 1.4501836734693877e-05, + "loss": 1.2464, + "step": 429100 + }, + { + "epoch": 9.08, + "learning_rate": 1.448142857142857e-05, + "loss": 1.2422, + "step": 429200 + }, + { + "epoch": 9.09, + "learning_rate": 1.446122448979592e-05, + "loss": 1.2493, + "step": 429300 + }, + { + "epoch": 9.09, + "learning_rate": 1.4440816326530613e-05, + "loss": 1.2444, + "step": 429400 + }, + { + "epoch": 9.09, + "learning_rate": 1.4420408163265306e-05, + "loss": 1.2438, + "step": 429500 + }, + { + "epoch": 9.09, + "learning_rate": 1.44e-05, + "loss": 1.244, + "step": 429600 + }, + { + "epoch": 9.09, + "learning_rate": 1.4379591836734693e-05, + "loss": 1.2438, + "step": 429700 + }, + { + "epoch": 9.1, + "learning_rate": 1.435918367346939e-05, + "loss": 1.2469, + "step": 429800 + }, + { + "epoch": 9.1, + "learning_rate": 1.4338775510204083e-05, + "loss": 1.2465, + "step": 429900 + }, + { + "epoch": 9.1, + "learning_rate": 1.4318367346938778e-05, + "loss": 1.2478, + "step": 430000 + }, + { + "epoch": 9.1, + "learning_rate": 1.429795918367347e-05, + "loss": 1.2451, + "step": 430100 + }, + { + "epoch": 9.1, + "learning_rate": 1.4277551020408164e-05, + "loss": 1.2459, + "step": 430200 + }, + { + "epoch": 9.11, + "learning_rate": 1.4257142857142857e-05, + "loss": 1.2414, + "step": 430300 + }, + { + "epoch": 9.11, + "learning_rate": 1.423673469387755e-05, + "loss": 1.2511, + "step": 430400 + }, + { + "epoch": 9.11, + "learning_rate": 1.4216326530612246e-05, + "loss": 1.2425, + "step": 430500 + }, + { + "epoch": 9.11, + "learning_rate": 1.4195918367346939e-05, + "loss": 1.2427, + "step": 430600 + }, + { + "epoch": 9.12, + "learning_rate": 1.4175510204081632e-05, + "loss": 1.2444, + "step": 430700 + }, + { + "epoch": 9.12, + "learning_rate": 1.4155102040816325e-05, + "loss": 1.2515, + "step": 430800 + }, + { + "epoch": 9.12, + "learning_rate": 1.4134693877551022e-05, + "loss": 1.2433, + "step": 430900 + }, + { + "epoch": 9.12, + "learning_rate": 1.4114285714285715e-05, + "loss": 1.2427, + "step": 431000 + }, + { + "epoch": 9.12, + "learning_rate": 1.409387755102041e-05, + "loss": 1.2457, + "step": 431100 + }, + { + "epoch": 9.13, + "learning_rate": 1.4073469387755104e-05, + "loss": 1.2503, + "step": 431200 + }, + { + "epoch": 9.13, + "learning_rate": 1.4053265306122448e-05, + "loss": 1.2489, + "step": 431300 + }, + { + "epoch": 9.13, + "learning_rate": 1.4032857142857145e-05, + "loss": 1.2492, + "step": 431400 + }, + { + "epoch": 9.13, + "learning_rate": 1.4012448979591838e-05, + "loss": 1.2427, + "step": 431500 + }, + { + "epoch": 9.13, + "learning_rate": 1.3992040816326533e-05, + "loss": 1.2501, + "step": 431600 + }, + { + "epoch": 9.14, + "learning_rate": 1.3971632653061226e-05, + "loss": 1.2509, + "step": 431700 + }, + { + "epoch": 9.14, + "learning_rate": 1.395122448979592e-05, + "loss": 1.2448, + "step": 431800 + }, + { + "epoch": 9.14, + "learning_rate": 1.3930816326530613e-05, + "loss": 1.2509, + "step": 431900 + }, + { + "epoch": 9.14, + "learning_rate": 1.3910408163265306e-05, + "loss": 1.2475, + "step": 432000 + }, + { + "epoch": 9.14, + "learning_rate": 1.389e-05, + "loss": 1.2423, + "step": 432100 + }, + { + "epoch": 9.15, + "learning_rate": 1.3869591836734694e-05, + "loss": 1.2425, + "step": 432200 + }, + { + "epoch": 9.15, + "learning_rate": 1.3849183673469387e-05, + "loss": 1.2463, + "step": 432300 + }, + { + "epoch": 9.15, + "learning_rate": 1.382877551020408e-05, + "loss": 1.2475, + "step": 432400 + }, + { + "epoch": 9.15, + "learning_rate": 1.3808367346938777e-05, + "loss": 1.2456, + "step": 432500 + }, + { + "epoch": 9.16, + "learning_rate": 1.378795918367347e-05, + "loss": 1.2465, + "step": 432600 + }, + { + "epoch": 9.16, + "learning_rate": 1.3767551020408166e-05, + "loss": 1.241, + "step": 432700 + }, + { + "epoch": 9.16, + "learning_rate": 1.3747142857142859e-05, + "loss": 1.2522, + "step": 432800 + }, + { + "epoch": 9.16, + "learning_rate": 1.3726734693877552e-05, + "loss": 1.2393, + "step": 432900 + }, + { + "epoch": 9.16, + "learning_rate": 1.3706326530612245e-05, + "loss": 1.2474, + "step": 433000 + }, + { + "epoch": 9.17, + "learning_rate": 1.3685918367346939e-05, + "loss": 1.2451, + "step": 433100 + }, + { + "epoch": 9.17, + "learning_rate": 1.3665510204081634e-05, + "loss": 1.2423, + "step": 433200 + }, + { + "epoch": 9.17, + "learning_rate": 1.3645306122448981e-05, + "loss": 1.2449, + "step": 433300 + }, + { + "epoch": 9.17, + "learning_rate": 1.3624897959183675e-05, + "loss": 1.2491, + "step": 433400 + }, + { + "epoch": 9.17, + "learning_rate": 1.3604489795918368e-05, + "loss": 1.2475, + "step": 433500 + }, + { + "epoch": 9.18, + "learning_rate": 1.3584081632653061e-05, + "loss": 1.246, + "step": 433600 + }, + { + "epoch": 9.18, + "learning_rate": 1.3563673469387756e-05, + "loss": 1.2427, + "step": 433700 + }, + { + "epoch": 9.18, + "learning_rate": 1.354326530612245e-05, + "loss": 1.2478, + "step": 433800 + }, + { + "epoch": 9.18, + "learning_rate": 1.3522857142857143e-05, + "loss": 1.2471, + "step": 433900 + }, + { + "epoch": 9.19, + "learning_rate": 1.3502448979591836e-05, + "loss": 1.2414, + "step": 434000 + }, + { + "epoch": 9.19, + "learning_rate": 1.348204081632653e-05, + "loss": 1.239, + "step": 434100 + }, + { + "epoch": 9.19, + "learning_rate": 1.3461632653061226e-05, + "loss": 1.2398, + "step": 434200 + }, + { + "epoch": 9.19, + "learning_rate": 1.344122448979592e-05, + "loss": 1.247, + "step": 434300 + }, + { + "epoch": 9.19, + "learning_rate": 1.3420816326530614e-05, + "loss": 1.2393, + "step": 434400 + }, + { + "epoch": 9.2, + "learning_rate": 1.3400408163265307e-05, + "loss": 1.2441, + "step": 434500 + }, + { + "epoch": 9.2, + "learning_rate": 1.338e-05, + "loss": 1.2365, + "step": 434600 + }, + { + "epoch": 9.2, + "learning_rate": 1.3359591836734694e-05, + "loss": 1.2406, + "step": 434700 + }, + { + "epoch": 9.2, + "learning_rate": 1.3339183673469389e-05, + "loss": 1.2336, + "step": 434800 + }, + { + "epoch": 9.2, + "learning_rate": 1.3318775510204082e-05, + "loss": 1.2418, + "step": 434900 + }, + { + "epoch": 9.21, + "learning_rate": 1.3298367346938775e-05, + "loss": 1.2416, + "step": 435000 + }, + { + "epoch": 9.21, + "learning_rate": 1.3277959183673469e-05, + "loss": 1.2439, + "step": 435100 + }, + { + "epoch": 9.21, + "learning_rate": 1.3257551020408162e-05, + "loss": 1.245, + "step": 435200 + }, + { + "epoch": 9.21, + "learning_rate": 1.3237142857142859e-05, + "loss": 1.2494, + "step": 435300 + }, + { + "epoch": 9.21, + "learning_rate": 1.3216938775510205e-05, + "loss": 1.239, + "step": 435400 + }, + { + "epoch": 9.22, + "learning_rate": 1.3196530612244898e-05, + "loss": 1.2371, + "step": 435500 + }, + { + "epoch": 9.22, + "learning_rate": 1.3176122448979591e-05, + "loss": 1.2432, + "step": 435600 + }, + { + "epoch": 9.22, + "learning_rate": 1.3155714285714284e-05, + "loss": 1.2382, + "step": 435700 + }, + { + "epoch": 9.22, + "learning_rate": 1.3135306122448981e-05, + "loss": 1.2415, + "step": 435800 + }, + { + "epoch": 9.23, + "learning_rate": 1.3114897959183674e-05, + "loss": 1.2455, + "step": 435900 + }, + { + "epoch": 9.23, + "learning_rate": 1.309448979591837e-05, + "loss": 1.2492, + "step": 436000 + }, + { + "epoch": 9.23, + "learning_rate": 1.3074081632653063e-05, + "loss": 1.2541, + "step": 436100 + }, + { + "epoch": 9.23, + "learning_rate": 1.3053673469387756e-05, + "loss": 1.2342, + "step": 436200 + }, + { + "epoch": 9.23, + "learning_rate": 1.3033265306122449e-05, + "loss": 1.2483, + "step": 436300 + }, + { + "epoch": 9.24, + "learning_rate": 1.3012857142857144e-05, + "loss": 1.2407, + "step": 436400 + }, + { + "epoch": 9.24, + "learning_rate": 1.2992448979591837e-05, + "loss": 1.2419, + "step": 436500 + }, + { + "epoch": 9.24, + "learning_rate": 1.297204081632653e-05, + "loss": 1.2389, + "step": 436600 + }, + { + "epoch": 9.24, + "learning_rate": 1.2951632653061224e-05, + "loss": 1.2445, + "step": 436700 + }, + { + "epoch": 9.24, + "learning_rate": 1.2931224489795917e-05, + "loss": 1.2438, + "step": 436800 + }, + { + "epoch": 9.25, + "learning_rate": 1.2910816326530614e-05, + "loss": 1.2435, + "step": 436900 + }, + { + "epoch": 9.25, + "learning_rate": 1.2890408163265307e-05, + "loss": 1.2434, + "step": 437000 + }, + { + "epoch": 9.25, + "learning_rate": 1.2870000000000002e-05, + "loss": 1.2368, + "step": 437100 + }, + { + "epoch": 9.25, + "learning_rate": 1.2849591836734695e-05, + "loss": 1.2445, + "step": 437200 + }, + { + "epoch": 9.26, + "learning_rate": 1.2829183673469389e-05, + "loss": 1.2442, + "step": 437300 + }, + { + "epoch": 9.26, + "learning_rate": 1.2808775510204082e-05, + "loss": 1.2458, + "step": 437400 + }, + { + "epoch": 9.26, + "learning_rate": 1.278857142857143e-05, + "loss": 1.247, + "step": 437500 + }, + { + "epoch": 9.26, + "learning_rate": 1.2768163265306125e-05, + "loss": 1.2343, + "step": 437600 + }, + { + "epoch": 9.26, + "learning_rate": 1.2747755102040818e-05, + "loss": 1.2412, + "step": 437700 + }, + { + "epoch": 9.27, + "learning_rate": 1.2727346938775511e-05, + "loss": 1.2437, + "step": 437800 + }, + { + "epoch": 9.27, + "learning_rate": 1.2706938775510204e-05, + "loss": 1.2473, + "step": 437900 + }, + { + "epoch": 9.27, + "learning_rate": 1.2686530612244898e-05, + "loss": 1.2436, + "step": 438000 + }, + { + "epoch": 9.27, + "learning_rate": 1.2666122448979593e-05, + "loss": 1.2375, + "step": 438100 + }, + { + "epoch": 9.27, + "learning_rate": 1.2645714285714286e-05, + "loss": 1.25, + "step": 438200 + }, + { + "epoch": 9.28, + "learning_rate": 1.262530612244898e-05, + "loss": 1.2376, + "step": 438300 + }, + { + "epoch": 9.28, + "learning_rate": 1.2604897959183672e-05, + "loss": 1.2371, + "step": 438400 + }, + { + "epoch": 9.28, + "learning_rate": 1.2584489795918367e-05, + "loss": 1.248, + "step": 438500 + }, + { + "epoch": 9.28, + "learning_rate": 1.2564081632653062e-05, + "loss": 1.242, + "step": 438600 + }, + { + "epoch": 9.28, + "learning_rate": 1.2543673469387757e-05, + "loss": 1.2369, + "step": 438700 + }, + { + "epoch": 9.29, + "learning_rate": 1.252326530612245e-05, + "loss": 1.2444, + "step": 438800 + }, + { + "epoch": 9.29, + "learning_rate": 1.2502857142857144e-05, + "loss": 1.2433, + "step": 438900 + }, + { + "epoch": 9.29, + "learning_rate": 1.2482448979591837e-05, + "loss": 1.2451, + "step": 439000 + }, + { + "epoch": 9.29, + "learning_rate": 1.246204081632653e-05, + "loss": 1.243, + "step": 439100 + }, + { + "epoch": 9.3, + "learning_rate": 1.2441632653061225e-05, + "loss": 1.2411, + "step": 439200 + }, + { + "epoch": 9.3, + "learning_rate": 1.2421224489795919e-05, + "loss": 1.236, + "step": 439300 + }, + { + "epoch": 9.3, + "learning_rate": 1.2400816326530614e-05, + "loss": 1.2418, + "step": 439400 + }, + { + "epoch": 9.3, + "learning_rate": 1.2380408163265307e-05, + "loss": 1.2419, + "step": 439500 + }, + { + "epoch": 9.3, + "learning_rate": 1.2360204081632653e-05, + "loss": 1.2371, + "step": 439600 + }, + { + "epoch": 9.31, + "learning_rate": 1.2339795918367348e-05, + "loss": 1.249, + "step": 439700 + }, + { + "epoch": 9.31, + "learning_rate": 1.2319591836734694e-05, + "loss": 1.25, + "step": 439800 + }, + { + "epoch": 9.31, + "learning_rate": 1.2299183673469389e-05, + "loss": 1.2408, + "step": 439900 + }, + { + "epoch": 9.31, + "learning_rate": 1.2278775510204082e-05, + "loss": 1.2405, + "step": 440000 + }, + { + "epoch": 9.31, + "learning_rate": 1.2258367346938775e-05, + "loss": 1.239, + "step": 440100 + }, + { + "epoch": 9.32, + "learning_rate": 1.223795918367347e-05, + "loss": 1.2454, + "step": 440200 + }, + { + "epoch": 9.32, + "learning_rate": 1.2217551020408163e-05, + "loss": 1.2353, + "step": 440300 + }, + { + "epoch": 9.32, + "learning_rate": 1.2197142857142858e-05, + "loss": 1.2421, + "step": 440400 + }, + { + "epoch": 9.32, + "learning_rate": 1.2176734693877552e-05, + "loss": 1.2458, + "step": 440500 + }, + { + "epoch": 9.32, + "learning_rate": 1.2156326530612245e-05, + "loss": 1.2406, + "step": 440600 + }, + { + "epoch": 9.33, + "learning_rate": 1.213591836734694e-05, + "loss": 1.2481, + "step": 440700 + }, + { + "epoch": 9.33, + "learning_rate": 1.2115510204081633e-05, + "loss": 1.2404, + "step": 440800 + }, + { + "epoch": 9.33, + "learning_rate": 1.2095102040816327e-05, + "loss": 1.2408, + "step": 440900 + }, + { + "epoch": 9.33, + "learning_rate": 1.2074693877551022e-05, + "loss": 1.2416, + "step": 441000 + }, + { + "epoch": 9.34, + "learning_rate": 1.2054285714285715e-05, + "loss": 1.2391, + "step": 441100 + }, + { + "epoch": 9.34, + "learning_rate": 1.2033877551020408e-05, + "loss": 1.2511, + "step": 441200 + }, + { + "epoch": 9.34, + "learning_rate": 1.2013469387755103e-05, + "loss": 1.2394, + "step": 441300 + }, + { + "epoch": 9.34, + "learning_rate": 1.1993061224489796e-05, + "loss": 1.2472, + "step": 441400 + }, + { + "epoch": 9.34, + "learning_rate": 1.197265306122449e-05, + "loss": 1.2344, + "step": 441500 + }, + { + "epoch": 9.35, + "learning_rate": 1.1952244897959185e-05, + "loss": 1.2471, + "step": 441600 + }, + { + "epoch": 9.35, + "learning_rate": 1.1931836734693878e-05, + "loss": 1.2389, + "step": 441700 + }, + { + "epoch": 9.35, + "learning_rate": 1.1911428571428573e-05, + "loss": 1.2424, + "step": 441800 + }, + { + "epoch": 9.35, + "learning_rate": 1.1891020408163266e-05, + "loss": 1.2401, + "step": 441900 + }, + { + "epoch": 9.35, + "learning_rate": 1.187061224489796e-05, + "loss": 1.2402, + "step": 442000 + }, + { + "epoch": 9.36, + "learning_rate": 1.1850204081632654e-05, + "loss": 1.239, + "step": 442100 + }, + { + "epoch": 9.36, + "learning_rate": 1.1829795918367348e-05, + "loss": 1.2472, + "step": 442200 + }, + { + "epoch": 9.36, + "learning_rate": 1.1809387755102041e-05, + "loss": 1.2464, + "step": 442300 + }, + { + "epoch": 9.36, + "learning_rate": 1.1788979591836736e-05, + "loss": 1.2418, + "step": 442400 + }, + { + "epoch": 9.37, + "learning_rate": 1.176857142857143e-05, + "loss": 1.2398, + "step": 442500 + }, + { + "epoch": 9.37, + "learning_rate": 1.1748163265306122e-05, + "loss": 1.2416, + "step": 442600 + }, + { + "epoch": 9.37, + "learning_rate": 1.1727755102040817e-05, + "loss": 1.2363, + "step": 442700 + }, + { + "epoch": 9.37, + "learning_rate": 1.170734693877551e-05, + "loss": 1.2457, + "step": 442800 + }, + { + "epoch": 9.37, + "learning_rate": 1.1686938775510204e-05, + "loss": 1.2488, + "step": 442900 + }, + { + "epoch": 9.38, + "learning_rate": 1.1666530612244899e-05, + "loss": 1.2418, + "step": 443000 + }, + { + "epoch": 9.38, + "learning_rate": 1.1646122448979592e-05, + "loss": 1.2437, + "step": 443100 + }, + { + "epoch": 9.38, + "learning_rate": 1.1625714285714285e-05, + "loss": 1.2398, + "step": 443200 + }, + { + "epoch": 9.38, + "learning_rate": 1.160530612244898e-05, + "loss": 1.2464, + "step": 443300 + }, + { + "epoch": 9.38, + "learning_rate": 1.1584897959183674e-05, + "loss": 1.2376, + "step": 443400 + }, + { + "epoch": 9.39, + "learning_rate": 1.1564489795918369e-05, + "loss": 1.247, + "step": 443500 + }, + { + "epoch": 9.39, + "learning_rate": 1.1544081632653062e-05, + "loss": 1.2382, + "step": 443600 + }, + { + "epoch": 9.39, + "learning_rate": 1.1523673469387755e-05, + "loss": 1.2397, + "step": 443700 + }, + { + "epoch": 9.39, + "learning_rate": 1.150326530612245e-05, + "loss": 1.2359, + "step": 443800 + }, + { + "epoch": 9.39, + "learning_rate": 1.1483061224489796e-05, + "loss": 1.2326, + "step": 443900 + }, + { + "epoch": 9.4, + "learning_rate": 1.1462653061224491e-05, + "loss": 1.2398, + "step": 444000 + }, + { + "epoch": 9.4, + "learning_rate": 1.1442244897959184e-05, + "loss": 1.243, + "step": 444100 + }, + { + "epoch": 9.4, + "learning_rate": 1.1421836734693878e-05, + "loss": 1.2349, + "step": 444200 + }, + { + "epoch": 9.4, + "learning_rate": 1.1401428571428573e-05, + "loss": 1.2441, + "step": 444300 + }, + { + "epoch": 9.41, + "learning_rate": 1.1381020408163266e-05, + "loss": 1.2426, + "step": 444400 + }, + { + "epoch": 9.41, + "learning_rate": 1.136061224489796e-05, + "loss": 1.2412, + "step": 444500 + }, + { + "epoch": 9.41, + "learning_rate": 1.1340204081632654e-05, + "loss": 1.2404, + "step": 444600 + }, + { + "epoch": 9.41, + "learning_rate": 1.1319795918367347e-05, + "loss": 1.2443, + "step": 444700 + }, + { + "epoch": 9.41, + "learning_rate": 1.129938775510204e-05, + "loss": 1.2444, + "step": 444800 + }, + { + "epoch": 9.42, + "learning_rate": 1.1278979591836736e-05, + "loss": 1.2364, + "step": 444900 + }, + { + "epoch": 9.42, + "learning_rate": 1.1258571428571429e-05, + "loss": 1.2353, + "step": 445000 + }, + { + "epoch": 9.42, + "learning_rate": 1.1238163265306122e-05, + "loss": 1.236, + "step": 445100 + }, + { + "epoch": 9.42, + "learning_rate": 1.1217755102040817e-05, + "loss": 1.2363, + "step": 445200 + }, + { + "epoch": 9.42, + "learning_rate": 1.119734693877551e-05, + "loss": 1.2415, + "step": 445300 + }, + { + "epoch": 9.43, + "learning_rate": 1.1176938775510204e-05, + "loss": 1.2381, + "step": 445400 + }, + { + "epoch": 9.43, + "learning_rate": 1.1156530612244899e-05, + "loss": 1.2474, + "step": 445500 + }, + { + "epoch": 9.43, + "learning_rate": 1.1136122448979592e-05, + "loss": 1.249, + "step": 445600 + }, + { + "epoch": 9.43, + "learning_rate": 1.1115714285714287e-05, + "loss": 1.2396, + "step": 445700 + }, + { + "epoch": 9.43, + "learning_rate": 1.109530612244898e-05, + "loss": 1.2368, + "step": 445800 + }, + { + "epoch": 9.44, + "learning_rate": 1.1074897959183674e-05, + "loss": 1.2354, + "step": 445900 + }, + { + "epoch": 9.44, + "learning_rate": 1.1054489795918369e-05, + "loss": 1.2342, + "step": 446000 + }, + { + "epoch": 9.44, + "learning_rate": 1.1034081632653062e-05, + "loss": 1.2404, + "step": 446100 + }, + { + "epoch": 9.44, + "learning_rate": 1.1013673469387755e-05, + "loss": 1.2393, + "step": 446200 + }, + { + "epoch": 9.45, + "learning_rate": 1.0993469387755103e-05, + "loss": 1.2364, + "step": 446300 + }, + { + "epoch": 9.45, + "learning_rate": 1.0973265306122449e-05, + "loss": 1.234, + "step": 446400 + }, + { + "epoch": 9.45, + "learning_rate": 1.0952857142857144e-05, + "loss": 1.2337, + "step": 446500 + }, + { + "epoch": 9.45, + "learning_rate": 1.0932448979591837e-05, + "loss": 1.2395, + "step": 446600 + }, + { + "epoch": 9.45, + "learning_rate": 1.0912040816326532e-05, + "loss": 1.2408, + "step": 446700 + }, + { + "epoch": 9.46, + "learning_rate": 1.0891632653061225e-05, + "loss": 1.2434, + "step": 446800 + }, + { + "epoch": 9.46, + "learning_rate": 1.0871224489795918e-05, + "loss": 1.2354, + "step": 446900 + }, + { + "epoch": 9.46, + "learning_rate": 1.0850816326530613e-05, + "loss": 1.2333, + "step": 447000 + }, + { + "epoch": 9.46, + "learning_rate": 1.0830408163265307e-05, + "loss": 1.2321, + "step": 447100 + }, + { + "epoch": 9.46, + "learning_rate": 1.081e-05, + "loss": 1.2326, + "step": 447200 + }, + { + "epoch": 9.47, + "learning_rate": 1.0789591836734695e-05, + "loss": 1.2303, + "step": 447300 + }, + { + "epoch": 9.47, + "learning_rate": 1.0769183673469388e-05, + "loss": 1.2372, + "step": 447400 + }, + { + "epoch": 9.47, + "learning_rate": 1.0748775510204081e-05, + "loss": 1.2471, + "step": 447500 + }, + { + "epoch": 9.47, + "learning_rate": 1.0728367346938776e-05, + "loss": 1.237, + "step": 447600 + }, + { + "epoch": 9.48, + "learning_rate": 1.070795918367347e-05, + "loss": 1.2402, + "step": 447700 + }, + { + "epoch": 9.48, + "learning_rate": 1.0687551020408165e-05, + "loss": 1.2388, + "step": 447800 + }, + { + "epoch": 9.48, + "learning_rate": 1.0667142857142858e-05, + "loss": 1.2264, + "step": 447900 + }, + { + "epoch": 9.48, + "learning_rate": 1.0646734693877551e-05, + "loss": 1.239, + "step": 448000 + }, + { + "epoch": 9.48, + "learning_rate": 1.0626326530612244e-05, + "loss": 1.2327, + "step": 448100 + }, + { + "epoch": 9.49, + "learning_rate": 1.060591836734694e-05, + "loss": 1.2407, + "step": 448200 + }, + { + "epoch": 9.49, + "learning_rate": 1.0585510204081633e-05, + "loss": 1.2407, + "step": 448300 + }, + { + "epoch": 9.49, + "learning_rate": 1.0565102040816328e-05, + "loss": 1.2353, + "step": 448400 + }, + { + "epoch": 9.49, + "learning_rate": 1.0544693877551021e-05, + "loss": 1.2456, + "step": 448500 + }, + { + "epoch": 9.49, + "learning_rate": 1.0524285714285714e-05, + "loss": 1.2371, + "step": 448600 + }, + { + "epoch": 9.5, + "learning_rate": 1.050387755102041e-05, + "loss": 1.2392, + "step": 448700 + }, + { + "epoch": 9.5, + "learning_rate": 1.0483469387755102e-05, + "loss": 1.2384, + "step": 448800 + }, + { + "epoch": 9.5, + "learning_rate": 1.0463061224489797e-05, + "loss": 1.2366, + "step": 448900 + }, + { + "epoch": 9.5, + "learning_rate": 1.044265306122449e-05, + "loss": 1.2367, + "step": 449000 + }, + { + "epoch": 9.5, + "learning_rate": 1.0422244897959184e-05, + "loss": 1.2388, + "step": 449100 + }, + { + "epoch": 9.51, + "learning_rate": 1.0401836734693877e-05, + "loss": 1.2501, + "step": 449200 + }, + { + "epoch": 9.51, + "learning_rate": 1.0381428571428572e-05, + "loss": 1.2356, + "step": 449300 + }, + { + "epoch": 9.51, + "learning_rate": 1.0361020408163266e-05, + "loss": 1.2407, + "step": 449400 + }, + { + "epoch": 9.51, + "learning_rate": 1.034061224489796e-05, + "loss": 1.2405, + "step": 449500 + }, + { + "epoch": 9.52, + "learning_rate": 1.0320204081632654e-05, + "loss": 1.2381, + "step": 449600 + }, + { + "epoch": 9.52, + "learning_rate": 1.0299795918367347e-05, + "loss": 1.2382, + "step": 449700 + }, + { + "epoch": 9.52, + "learning_rate": 1.027938775510204e-05, + "loss": 1.2388, + "step": 449800 + }, + { + "epoch": 9.52, + "learning_rate": 1.0258979591836735e-05, + "loss": 1.2367, + "step": 449900 + }, + { + "epoch": 9.52, + "learning_rate": 1.0238571428571429e-05, + "loss": 1.2336, + "step": 450000 + }, + { + "epoch": 9.53, + "learning_rate": 1.0218163265306124e-05, + "loss": 1.239, + "step": 450100 + }, + { + "epoch": 9.53, + "learning_rate": 1.0197755102040817e-05, + "loss": 1.2423, + "step": 450200 + }, + { + "epoch": 9.53, + "learning_rate": 1.017734693877551e-05, + "loss": 1.2413, + "step": 450300 + }, + { + "epoch": 9.53, + "learning_rate": 1.0157142857142858e-05, + "loss": 1.2273, + "step": 450400 + }, + { + "epoch": 9.53, + "learning_rate": 1.0136734693877551e-05, + "loss": 1.2404, + "step": 450500 + }, + { + "epoch": 9.54, + "learning_rate": 1.0116326530612246e-05, + "loss": 1.2397, + "step": 450600 + }, + { + "epoch": 9.54, + "learning_rate": 1.009591836734694e-05, + "loss": 1.2398, + "step": 450700 + }, + { + "epoch": 9.54, + "learning_rate": 1.0075510204081632e-05, + "loss": 1.237, + "step": 450800 + }, + { + "epoch": 9.54, + "learning_rate": 1.0055102040816327e-05, + "loss": 1.238, + "step": 450900 + }, + { + "epoch": 9.54, + "learning_rate": 1.003469387755102e-05, + "loss": 1.2335, + "step": 451000 + }, + { + "epoch": 9.55, + "learning_rate": 1.0014285714285716e-05, + "loss": 1.2384, + "step": 451100 + }, + { + "epoch": 9.55, + "learning_rate": 9.993877551020409e-06, + "loss": 1.2306, + "step": 451200 + }, + { + "epoch": 9.55, + "learning_rate": 9.973469387755102e-06, + "loss": 1.2378, + "step": 451300 + }, + { + "epoch": 9.55, + "learning_rate": 9.953061224489796e-06, + "loss": 1.2386, + "step": 451400 + }, + { + "epoch": 9.56, + "learning_rate": 9.93265306122449e-06, + "loss": 1.2336, + "step": 451500 + }, + { + "epoch": 9.56, + "learning_rate": 9.912244897959184e-06, + "loss": 1.2373, + "step": 451600 + }, + { + "epoch": 9.56, + "learning_rate": 9.891836734693879e-06, + "loss": 1.2421, + "step": 451700 + }, + { + "epoch": 9.56, + "learning_rate": 9.871428571428572e-06, + "loss": 1.249, + "step": 451800 + }, + { + "epoch": 9.56, + "learning_rate": 9.851020408163265e-06, + "loss": 1.2417, + "step": 451900 + }, + { + "epoch": 9.57, + "learning_rate": 9.830612244897959e-06, + "loss": 1.232, + "step": 452000 + }, + { + "epoch": 9.57, + "learning_rate": 9.810204081632654e-06, + "loss": 1.2359, + "step": 452100 + }, + { + "epoch": 9.57, + "learning_rate": 9.789795918367347e-06, + "loss": 1.2358, + "step": 452200 + }, + { + "epoch": 9.57, + "learning_rate": 9.769387755102042e-06, + "loss": 1.236, + "step": 452300 + }, + { + "epoch": 9.57, + "learning_rate": 9.748979591836735e-06, + "loss": 1.2347, + "step": 452400 + }, + { + "epoch": 9.58, + "learning_rate": 9.728571428571428e-06, + "loss": 1.2311, + "step": 452500 + }, + { + "epoch": 9.58, + "learning_rate": 9.708163265306123e-06, + "loss": 1.2371, + "step": 452600 + }, + { + "epoch": 9.58, + "learning_rate": 9.687959183673471e-06, + "loss": 1.2427, + "step": 452700 + }, + { + "epoch": 9.58, + "learning_rate": 9.667551020408164e-06, + "loss": 1.2357, + "step": 452800 + }, + { + "epoch": 9.59, + "learning_rate": 9.647142857142857e-06, + "loss": 1.2333, + "step": 452900 + }, + { + "epoch": 9.59, + "learning_rate": 9.62673469387755e-06, + "loss": 1.2403, + "step": 453000 + }, + { + "epoch": 9.59, + "learning_rate": 9.606326530612246e-06, + "loss": 1.2368, + "step": 453100 + }, + { + "epoch": 9.59, + "learning_rate": 9.585918367346939e-06, + "loss": 1.2322, + "step": 453200 + }, + { + "epoch": 9.59, + "learning_rate": 9.565510204081634e-06, + "loss": 1.2443, + "step": 453300 + }, + { + "epoch": 9.6, + "learning_rate": 9.545102040816327e-06, + "loss": 1.2406, + "step": 453400 + }, + { + "epoch": 9.6, + "learning_rate": 9.52469387755102e-06, + "loss": 1.2337, + "step": 453500 + }, + { + "epoch": 9.6, + "learning_rate": 9.504285714285714e-06, + "loss": 1.2353, + "step": 453600 + }, + { + "epoch": 9.6, + "learning_rate": 9.483877551020409e-06, + "loss": 1.2402, + "step": 453700 + }, + { + "epoch": 9.6, + "learning_rate": 9.463469387755102e-06, + "loss": 1.2325, + "step": 453800 + }, + { + "epoch": 9.61, + "learning_rate": 9.443061224489797e-06, + "loss": 1.2299, + "step": 453900 + }, + { + "epoch": 9.61, + "learning_rate": 9.42265306122449e-06, + "loss": 1.2384, + "step": 454000 + }, + { + "epoch": 9.61, + "learning_rate": 9.402244897959184e-06, + "loss": 1.2388, + "step": 454100 + }, + { + "epoch": 9.61, + "learning_rate": 9.381836734693877e-06, + "loss": 1.234, + "step": 454200 + }, + { + "epoch": 9.61, + "learning_rate": 9.361428571428572e-06, + "loss": 1.2338, + "step": 454300 + }, + { + "epoch": 9.62, + "learning_rate": 9.341020408163267e-06, + "loss": 1.2374, + "step": 454400 + }, + { + "epoch": 9.62, + "learning_rate": 9.32061224489796e-06, + "loss": 1.2315, + "step": 454500 + }, + { + "epoch": 9.62, + "learning_rate": 9.300204081632653e-06, + "loss": 1.2373, + "step": 454600 + }, + { + "epoch": 9.62, + "learning_rate": 9.28e-06, + "loss": 1.2415, + "step": 454700 + }, + { + "epoch": 9.63, + "learning_rate": 9.259591836734694e-06, + "loss": 1.2385, + "step": 454800 + }, + { + "epoch": 9.63, + "learning_rate": 9.23918367346939e-06, + "loss": 1.2395, + "step": 454900 + }, + { + "epoch": 9.63, + "learning_rate": 9.218775510204082e-06, + "loss": 1.2405, + "step": 455000 + }, + { + "epoch": 9.63, + "learning_rate": 9.198367346938776e-06, + "loss": 1.232, + "step": 455100 + }, + { + "epoch": 9.63, + "learning_rate": 9.177959183673469e-06, + "loss": 1.2298, + "step": 455200 + }, + { + "epoch": 9.64, + "learning_rate": 9.157551020408164e-06, + "loss": 1.231, + "step": 455300 + }, + { + "epoch": 9.64, + "learning_rate": 9.137142857142857e-06, + "loss": 1.2352, + "step": 455400 + }, + { + "epoch": 9.64, + "learning_rate": 9.116734693877552e-06, + "loss": 1.2358, + "step": 455500 + }, + { + "epoch": 9.64, + "learning_rate": 9.096326530612246e-06, + "loss": 1.2365, + "step": 455600 + }, + { + "epoch": 9.64, + "learning_rate": 9.075918367346939e-06, + "loss": 1.2348, + "step": 455700 + }, + { + "epoch": 9.65, + "learning_rate": 9.055510204081632e-06, + "loss": 1.2391, + "step": 455800 + }, + { + "epoch": 9.65, + "learning_rate": 9.035102040816327e-06, + "loss": 1.2354, + "step": 455900 + }, + { + "epoch": 9.65, + "learning_rate": 9.014693877551022e-06, + "loss": 1.2323, + "step": 456000 + }, + { + "epoch": 9.65, + "learning_rate": 8.994285714285715e-06, + "loss": 1.231, + "step": 456100 + }, + { + "epoch": 9.66, + "learning_rate": 8.973877551020409e-06, + "loss": 1.2351, + "step": 456200 + }, + { + "epoch": 9.66, + "learning_rate": 8.953469387755102e-06, + "loss": 1.2354, + "step": 456300 + }, + { + "epoch": 9.66, + "learning_rate": 8.933061224489795e-06, + "loss": 1.2342, + "step": 456400 + }, + { + "epoch": 9.66, + "learning_rate": 8.91265306122449e-06, + "loss": 1.2367, + "step": 456500 + }, + { + "epoch": 9.66, + "learning_rate": 8.892244897959185e-06, + "loss": 1.2297, + "step": 456600 + }, + { + "epoch": 9.67, + "learning_rate": 8.871836734693878e-06, + "loss": 1.2352, + "step": 456700 + }, + { + "epoch": 9.67, + "learning_rate": 8.851632653061224e-06, + "loss": 1.2217, + "step": 456800 + }, + { + "epoch": 9.67, + "learning_rate": 8.831224489795918e-06, + "loss": 1.2358, + "step": 456900 + }, + { + "epoch": 9.67, + "learning_rate": 8.810816326530612e-06, + "loss": 1.2382, + "step": 457000 + }, + { + "epoch": 9.67, + "learning_rate": 8.790408163265307e-06, + "loss": 1.234, + "step": 457100 + }, + { + "epoch": 9.68, + "learning_rate": 8.77e-06, + "loss": 1.2403, + "step": 457200 + }, + { + "epoch": 9.68, + "learning_rate": 8.749591836734694e-06, + "loss": 1.2419, + "step": 457300 + }, + { + "epoch": 9.68, + "learning_rate": 8.729183673469387e-06, + "loss": 1.2362, + "step": 457400 + }, + { + "epoch": 9.68, + "learning_rate": 8.708775510204082e-06, + "loss": 1.2348, + "step": 457500 + }, + { + "epoch": 9.68, + "learning_rate": 8.688367346938776e-06, + "loss": 1.2304, + "step": 457600 + }, + { + "epoch": 9.69, + "learning_rate": 8.66795918367347e-06, + "loss": 1.2397, + "step": 457700 + }, + { + "epoch": 9.69, + "learning_rate": 8.647551020408164e-06, + "loss": 1.2366, + "step": 457800 + }, + { + "epoch": 9.69, + "learning_rate": 8.627142857142857e-06, + "loss": 1.2333, + "step": 457900 + }, + { + "epoch": 9.69, + "learning_rate": 8.60673469387755e-06, + "loss": 1.2369, + "step": 458000 + }, + { + "epoch": 9.7, + "learning_rate": 8.586326530612245e-06, + "loss": 1.2423, + "step": 458100 + }, + { + "epoch": 9.7, + "learning_rate": 8.56591836734694e-06, + "loss": 1.2342, + "step": 458200 + }, + { + "epoch": 9.7, + "learning_rate": 8.545510204081634e-06, + "loss": 1.2385, + "step": 458300 + }, + { + "epoch": 9.7, + "learning_rate": 8.525102040816327e-06, + "loss": 1.2378, + "step": 458400 + }, + { + "epoch": 9.7, + "learning_rate": 8.50469387755102e-06, + "loss": 1.2301, + "step": 458500 + }, + { + "epoch": 9.71, + "learning_rate": 8.484285714285713e-06, + "loss": 1.2421, + "step": 458600 + }, + { + "epoch": 9.71, + "learning_rate": 8.463877551020408e-06, + "loss": 1.2405, + "step": 458700 + }, + { + "epoch": 9.71, + "learning_rate": 8.443673469387756e-06, + "loss": 1.2233, + "step": 458800 + }, + { + "epoch": 9.71, + "learning_rate": 8.42326530612245e-06, + "loss": 1.2338, + "step": 458900 + }, + { + "epoch": 9.71, + "learning_rate": 8.402857142857143e-06, + "loss": 1.2351, + "step": 459000 + }, + { + "epoch": 9.72, + "learning_rate": 8.382448979591836e-06, + "loss": 1.2339, + "step": 459100 + }, + { + "epoch": 9.72, + "learning_rate": 8.36204081632653e-06, + "loss": 1.2354, + "step": 459200 + }, + { + "epoch": 9.72, + "learning_rate": 8.341632653061226e-06, + "loss": 1.2351, + "step": 459300 + }, + { + "epoch": 9.72, + "learning_rate": 8.321224489795919e-06, + "loss": 1.2375, + "step": 459400 + }, + { + "epoch": 9.72, + "learning_rate": 8.300816326530612e-06, + "loss": 1.2342, + "step": 459500 + }, + { + "epoch": 9.73, + "learning_rate": 8.280408163265306e-06, + "loss": 1.2346, + "step": 459600 + }, + { + "epoch": 9.73, + "learning_rate": 8.26e-06, + "loss": 1.2345, + "step": 459700 + }, + { + "epoch": 9.73, + "learning_rate": 8.239591836734696e-06, + "loss": 1.235, + "step": 459800 + }, + { + "epoch": 9.73, + "learning_rate": 8.219183673469389e-06, + "loss": 1.2278, + "step": 459900 + }, + { + "epoch": 9.74, + "learning_rate": 8.198775510204082e-06, + "loss": 1.2402, + "step": 460000 + }, + { + "epoch": 9.74, + "learning_rate": 8.178367346938775e-06, + "loss": 1.236, + "step": 460100 + }, + { + "epoch": 9.74, + "learning_rate": 8.157959183673469e-06, + "loss": 1.2323, + "step": 460200 + }, + { + "epoch": 9.74, + "learning_rate": 8.137551020408164e-06, + "loss": 1.2335, + "step": 460300 + }, + { + "epoch": 9.74, + "learning_rate": 8.117346938775511e-06, + "loss": 1.2294, + "step": 460400 + }, + { + "epoch": 9.75, + "learning_rate": 8.096938775510204e-06, + "loss": 1.2386, + "step": 460500 + }, + { + "epoch": 9.75, + "learning_rate": 8.076530612244898e-06, + "loss": 1.2357, + "step": 460600 + }, + { + "epoch": 9.75, + "learning_rate": 8.056122448979591e-06, + "loss": 1.2268, + "step": 460700 + }, + { + "epoch": 9.75, + "learning_rate": 8.035714285714286e-06, + "loss": 1.2276, + "step": 460800 + }, + { + "epoch": 9.75, + "learning_rate": 8.015306122448981e-06, + "loss": 1.2283, + "step": 460900 + }, + { + "epoch": 9.76, + "learning_rate": 7.994897959183674e-06, + "loss": 1.2358, + "step": 461000 + }, + { + "epoch": 9.76, + "learning_rate": 7.974489795918368e-06, + "loss": 1.2315, + "step": 461100 + }, + { + "epoch": 9.76, + "learning_rate": 7.95408163265306e-06, + "loss": 1.2306, + "step": 461200 + }, + { + "epoch": 9.76, + "learning_rate": 7.933673469387754e-06, + "loss": 1.2313, + "step": 461300 + }, + { + "epoch": 9.77, + "learning_rate": 7.913265306122449e-06, + "loss": 1.2294, + "step": 461400 + }, + { + "epoch": 9.77, + "learning_rate": 7.892857142857144e-06, + "loss": 1.2403, + "step": 461500 + }, + { + "epoch": 9.77, + "learning_rate": 7.872448979591837e-06, + "loss": 1.2268, + "step": 461600 + }, + { + "epoch": 9.77, + "learning_rate": 7.85204081632653e-06, + "loss": 1.2377, + "step": 461700 + }, + { + "epoch": 9.77, + "learning_rate": 7.831632653061224e-06, + "loss": 1.2258, + "step": 461800 + }, + { + "epoch": 9.78, + "learning_rate": 7.811224489795919e-06, + "loss": 1.2371, + "step": 461900 + }, + { + "epoch": 9.78, + "learning_rate": 7.790816326530614e-06, + "loss": 1.2308, + "step": 462000 + }, + { + "epoch": 9.78, + "learning_rate": 7.770408163265307e-06, + "loss": 1.2333, + "step": 462100 + }, + { + "epoch": 9.78, + "learning_rate": 7.75e-06, + "loss": 1.2338, + "step": 462200 + }, + { + "epoch": 9.78, + "learning_rate": 7.729591836734694e-06, + "loss": 1.232, + "step": 462300 + }, + { + "epoch": 9.79, + "learning_rate": 7.709183673469387e-06, + "loss": 1.2401, + "step": 462400 + }, + { + "epoch": 9.79, + "learning_rate": 7.688775510204082e-06, + "loss": 1.2345, + "step": 462500 + }, + { + "epoch": 9.79, + "learning_rate": 7.668367346938777e-06, + "loss": 1.2323, + "step": 462600 + }, + { + "epoch": 9.79, + "learning_rate": 7.64795918367347e-06, + "loss": 1.2383, + "step": 462700 + }, + { + "epoch": 9.79, + "learning_rate": 7.627551020408163e-06, + "loss": 1.2403, + "step": 462800 + }, + { + "epoch": 9.8, + "learning_rate": 7.6071428571428575e-06, + "loss": 1.2365, + "step": 462900 + }, + { + "epoch": 9.8, + "learning_rate": 7.586734693877551e-06, + "loss": 1.2308, + "step": 463000 + }, + { + "epoch": 9.8, + "learning_rate": 7.566326530612246e-06, + "loss": 1.2328, + "step": 463100 + }, + { + "epoch": 9.8, + "learning_rate": 7.545918367346939e-06, + "loss": 1.2273, + "step": 463200 + }, + { + "epoch": 9.81, + "learning_rate": 7.525510204081633e-06, + "loss": 1.2312, + "step": 463300 + }, + { + "epoch": 9.81, + "learning_rate": 7.5051020408163264e-06, + "loss": 1.2311, + "step": 463400 + }, + { + "epoch": 9.81, + "learning_rate": 7.4846938775510206e-06, + "loss": 1.2328, + "step": 463500 + }, + { + "epoch": 9.81, + "learning_rate": 7.4642857142857155e-06, + "loss": 1.2314, + "step": 463600 + }, + { + "epoch": 9.81, + "learning_rate": 7.443877551020409e-06, + "loss": 1.2309, + "step": 463700 + }, + { + "epoch": 9.82, + "learning_rate": 7.423469387755103e-06, + "loss": 1.2321, + "step": 463800 + }, + { + "epoch": 9.82, + "learning_rate": 7.403061224489796e-06, + "loss": 1.2434, + "step": 463900 + }, + { + "epoch": 9.82, + "learning_rate": 7.3826530612244895e-06, + "loss": 1.2362, + "step": 464000 + }, + { + "epoch": 9.82, + "learning_rate": 7.362244897959184e-06, + "loss": 1.2339, + "step": 464100 + }, + { + "epoch": 9.82, + "learning_rate": 7.341836734693879e-06, + "loss": 1.2285, + "step": 464200 + }, + { + "epoch": 9.83, + "learning_rate": 7.321428571428572e-06, + "loss": 1.2407, + "step": 464300 + }, + { + "epoch": 9.83, + "learning_rate": 7.301020408163266e-06, + "loss": 1.2298, + "step": 464400 + }, + { + "epoch": 9.83, + "learning_rate": 7.280816326530612e-06, + "loss": 1.2265, + "step": 464500 + }, + { + "epoch": 9.83, + "learning_rate": 7.260408163265306e-06, + "loss": 1.2275, + "step": 464600 + }, + { + "epoch": 9.83, + "learning_rate": 7.240000000000001e-06, + "loss": 1.2379, + "step": 464700 + }, + { + "epoch": 9.84, + "learning_rate": 7.219591836734694e-06, + "loss": 1.2394, + "step": 464800 + }, + { + "epoch": 9.84, + "learning_rate": 7.199183673469388e-06, + "loss": 1.2284, + "step": 464900 + }, + { + "epoch": 9.84, + "learning_rate": 7.178775510204082e-06, + "loss": 1.234, + "step": 465000 + }, + { + "epoch": 9.84, + "learning_rate": 7.158367346938776e-06, + "loss": 1.2377, + "step": 465100 + }, + { + "epoch": 9.85, + "learning_rate": 7.137959183673469e-06, + "loss": 1.223, + "step": 465200 + }, + { + "epoch": 9.85, + "learning_rate": 7.117551020408164e-06, + "loss": 1.2339, + "step": 465300 + }, + { + "epoch": 9.85, + "learning_rate": 7.097142857142858e-06, + "loss": 1.228, + "step": 465400 + }, + { + "epoch": 9.85, + "learning_rate": 7.076938775510204e-06, + "loss": 1.2262, + "step": 465500 + }, + { + "epoch": 9.85, + "learning_rate": 7.056530612244898e-06, + "loss": 1.2399, + "step": 465600 + }, + { + "epoch": 9.86, + "learning_rate": 7.0361224489795915e-06, + "loss": 1.2311, + "step": 465700 + }, + { + "epoch": 9.86, + "learning_rate": 7.0157142857142864e-06, + "loss": 1.2337, + "step": 465800 + }, + { + "epoch": 9.86, + "learning_rate": 6.9953061224489806e-06, + "loss": 1.2323, + "step": 465900 + }, + { + "epoch": 9.86, + "learning_rate": 6.974897959183674e-06, + "loss": 1.2335, + "step": 466000 + }, + { + "epoch": 9.86, + "learning_rate": 6.954489795918367e-06, + "loss": 1.2329, + "step": 466100 + }, + { + "epoch": 9.87, + "learning_rate": 6.934081632653061e-06, + "loss": 1.2296, + "step": 466200 + }, + { + "epoch": 9.87, + "learning_rate": 6.913673469387756e-06, + "loss": 1.2313, + "step": 466300 + }, + { + "epoch": 9.87, + "learning_rate": 6.8932653061224495e-06, + "loss": 1.2323, + "step": 466400 + }, + { + "epoch": 9.87, + "learning_rate": 6.872857142857144e-06, + "loss": 1.2332, + "step": 466500 + }, + { + "epoch": 9.88, + "learning_rate": 6.852448979591837e-06, + "loss": 1.2308, + "step": 466600 + }, + { + "epoch": 9.88, + "learning_rate": 6.832040816326531e-06, + "loss": 1.2311, + "step": 466700 + }, + { + "epoch": 9.88, + "learning_rate": 6.811632653061224e-06, + "loss": 1.2272, + "step": 466800 + }, + { + "epoch": 9.88, + "learning_rate": 6.791224489795919e-06, + "loss": 1.2278, + "step": 466900 + }, + { + "epoch": 9.88, + "learning_rate": 6.7708163265306125e-06, + "loss": 1.2325, + "step": 467000 + }, + { + "epoch": 9.89, + "learning_rate": 6.750408163265307e-06, + "loss": 1.2324, + "step": 467100 + }, + { + "epoch": 9.89, + "learning_rate": 6.73e-06, + "loss": 1.2384, + "step": 467200 + }, + { + "epoch": 9.89, + "learning_rate": 6.709591836734694e-06, + "loss": 1.23, + "step": 467300 + }, + { + "epoch": 9.89, + "learning_rate": 6.689183673469387e-06, + "loss": 1.2333, + "step": 467400 + }, + { + "epoch": 9.89, + "learning_rate": 6.668775510204082e-06, + "loss": 1.2248, + "step": 467500 + }, + { + "epoch": 9.9, + "learning_rate": 6.6483673469387764e-06, + "loss": 1.2297, + "step": 467600 + }, + { + "epoch": 9.9, + "learning_rate": 6.62795918367347e-06, + "loss": 1.235, + "step": 467700 + }, + { + "epoch": 9.9, + "learning_rate": 6.607551020408163e-06, + "loss": 1.2267, + "step": 467800 + }, + { + "epoch": 9.9, + "learning_rate": 6.587142857142857e-06, + "loss": 1.231, + "step": 467900 + }, + { + "epoch": 9.9, + "learning_rate": 6.566734693877552e-06, + "loss": 1.2336, + "step": 468000 + }, + { + "epoch": 9.91, + "learning_rate": 6.546326530612245e-06, + "loss": 1.2284, + "step": 468100 + }, + { + "epoch": 9.91, + "learning_rate": 6.5259183673469395e-06, + "loss": 1.2295, + "step": 468200 + }, + { + "epoch": 9.91, + "learning_rate": 6.505510204081633e-06, + "loss": 1.2237, + "step": 468300 + }, + { + "epoch": 9.91, + "learning_rate": 6.485102040816327e-06, + "loss": 1.228, + "step": 468400 + }, + { + "epoch": 9.92, + "learning_rate": 6.46469387755102e-06, + "loss": 1.231, + "step": 468500 + }, + { + "epoch": 9.92, + "learning_rate": 6.444285714285715e-06, + "loss": 1.2329, + "step": 468600 + }, + { + "epoch": 9.92, + "learning_rate": 6.423877551020408e-06, + "loss": 1.229, + "step": 468700 + }, + { + "epoch": 9.92, + "learning_rate": 6.4034693877551025e-06, + "loss": 1.2292, + "step": 468800 + }, + { + "epoch": 9.92, + "learning_rate": 6.383061224489796e-06, + "loss": 1.2316, + "step": 468900 + }, + { + "epoch": 9.93, + "learning_rate": 6.36265306122449e-06, + "loss": 1.23, + "step": 469000 + }, + { + "epoch": 9.93, + "learning_rate": 6.342244897959183e-06, + "loss": 1.232, + "step": 469100 + }, + { + "epoch": 9.93, + "learning_rate": 6.321836734693878e-06, + "loss": 1.2288, + "step": 469200 + }, + { + "epoch": 9.93, + "learning_rate": 6.301428571428572e-06, + "loss": 1.234, + "step": 469300 + }, + { + "epoch": 9.93, + "learning_rate": 6.281224489795918e-06, + "loss": 1.2364, + "step": 469400 + }, + { + "epoch": 9.94, + "learning_rate": 6.260816326530612e-06, + "loss": 1.2225, + "step": 469500 + }, + { + "epoch": 9.94, + "learning_rate": 6.2404081632653065e-06, + "loss": 1.2374, + "step": 469600 + }, + { + "epoch": 9.94, + "learning_rate": 6.22e-06, + "loss": 1.2283, + "step": 469700 + }, + { + "epoch": 9.94, + "learning_rate": 6.199591836734695e-06, + "loss": 1.2271, + "step": 469800 + }, + { + "epoch": 9.94, + "learning_rate": 6.179183673469388e-06, + "loss": 1.2299, + "step": 469900 + }, + { + "epoch": 9.95, + "learning_rate": 6.158775510204082e-06, + "loss": 1.2249, + "step": 470000 + }, + { + "epoch": 9.95, + "learning_rate": 6.138367346938776e-06, + "loss": 1.2313, + "step": 470100 + }, + { + "epoch": 9.95, + "learning_rate": 6.1179591836734695e-06, + "loss": 1.2344, + "step": 470200 + }, + { + "epoch": 9.95, + "learning_rate": 6.097551020408164e-06, + "loss": 1.2297, + "step": 470300 + }, + { + "epoch": 9.96, + "learning_rate": 6.077142857142858e-06, + "loss": 1.2277, + "step": 470400 + }, + { + "epoch": 9.96, + "learning_rate": 6.056734693877551e-06, + "loss": 1.2318, + "step": 470500 + }, + { + "epoch": 9.96, + "learning_rate": 6.036326530612245e-06, + "loss": 1.2318, + "step": 470600 + }, + { + "epoch": 9.96, + "learning_rate": 6.015918367346939e-06, + "loss": 1.2237, + "step": 470700 + }, + { + "epoch": 9.96, + "learning_rate": 5.9955102040816326e-06, + "loss": 1.2307, + "step": 470800 + }, + { + "epoch": 9.97, + "learning_rate": 5.975102040816327e-06, + "loss": 1.2302, + "step": 470900 + }, + { + "epoch": 9.97, + "learning_rate": 5.954693877551021e-06, + "loss": 1.2313, + "step": 471000 + }, + { + "epoch": 9.97, + "learning_rate": 5.934285714285714e-06, + "loss": 1.2205, + "step": 471100 + }, + { + "epoch": 9.97, + "learning_rate": 5.913877551020409e-06, + "loss": 1.2286, + "step": 471200 + }, + { + "epoch": 9.97, + "learning_rate": 5.893469387755102e-06, + "loss": 1.2247, + "step": 471300 + }, + { + "epoch": 9.98, + "learning_rate": 5.873061224489796e-06, + "loss": 1.2393, + "step": 471400 + }, + { + "epoch": 9.98, + "learning_rate": 5.852653061224491e-06, + "loss": 1.2316, + "step": 471500 + }, + { + "epoch": 9.98, + "learning_rate": 5.832244897959184e-06, + "loss": 1.2247, + "step": 471600 + }, + { + "epoch": 9.98, + "learning_rate": 5.811836734693878e-06, + "loss": 1.2227, + "step": 471700 + }, + { + "epoch": 9.99, + "learning_rate": 5.791428571428572e-06, + "loss": 1.2309, + "step": 471800 + }, + { + "epoch": 9.99, + "learning_rate": 5.771020408163265e-06, + "loss": 1.2262, + "step": 471900 + }, + { + "epoch": 9.99, + "learning_rate": 5.7506122448979595e-06, + "loss": 1.2302, + "step": 472000 + }, + { + "epoch": 9.99, + "learning_rate": 5.730204081632654e-06, + "loss": 1.2347, + "step": 472100 + }, + { + "epoch": 9.99, + "learning_rate": 5.709795918367347e-06, + "loss": 1.2315, + "step": 472200 + }, + { + "epoch": 10.0, + "learning_rate": 5.689387755102041e-06, + "loss": 1.2299, + "step": 472300 + }, + { + "epoch": 10.0, + "learning_rate": 5.668979591836735e-06, + "loss": 1.2335, + "step": 472400 + }, + { + "epoch": 10.0, + "learning_rate": 5.6485714285714285e-06, + "loss": 1.2314, + "step": 472500 + }, + { + "epoch": 10.0, + "learning_rate": 5.628163265306123e-06, + "loss": 1.2308, + "step": 472600 + }, + { + "epoch": 10.0, + "learning_rate": 5.607755102040817e-06, + "loss": 1.2312, + "step": 472700 + }, + { + "epoch": 10.01, + "learning_rate": 5.58734693877551e-06, + "loss": 1.2307, + "step": 472800 + }, + { + "epoch": 10.01, + "learning_rate": 5.566938775510205e-06, + "loss": 1.2278, + "step": 472900 + }, + { + "epoch": 10.01, + "learning_rate": 5.546530612244898e-06, + "loss": 1.2265, + "step": 473000 + }, + { + "epoch": 10.01, + "learning_rate": 5.526122448979592e-06, + "loss": 1.2263, + "step": 473100 + }, + { + "epoch": 10.01, + "learning_rate": 5.5057142857142865e-06, + "loss": 1.2235, + "step": 473200 + }, + { + "epoch": 10.02, + "learning_rate": 5.485510204081633e-06, + "loss": 1.2288, + "step": 473300 + }, + { + "epoch": 10.02, + "learning_rate": 5.465102040816327e-06, + "loss": 1.2196, + "step": 473400 + }, + { + "epoch": 10.02, + "learning_rate": 5.444693877551021e-06, + "loss": 1.2312, + "step": 473500 + }, + { + "epoch": 10.02, + "learning_rate": 5.424285714285715e-06, + "loss": 1.2283, + "step": 473600 + }, + { + "epoch": 10.03, + "learning_rate": 5.403877551020409e-06, + "loss": 1.2309, + "step": 473700 + }, + { + "epoch": 10.03, + "learning_rate": 5.383469387755102e-06, + "loss": 1.2182, + "step": 473800 + }, + { + "epoch": 10.03, + "learning_rate": 5.363061224489796e-06, + "loss": 1.2239, + "step": 473900 + }, + { + "epoch": 10.03, + "learning_rate": 5.34265306122449e-06, + "loss": 1.2318, + "step": 474000 + }, + { + "epoch": 10.03, + "learning_rate": 5.322244897959184e-06, + "loss": 1.2261, + "step": 474100 + }, + { + "epoch": 10.04, + "learning_rate": 5.301836734693878e-06, + "loss": 1.2312, + "step": 474200 + }, + { + "epoch": 10.04, + "learning_rate": 5.281428571428572e-06, + "loss": 1.2369, + "step": 474300 + }, + { + "epoch": 10.04, + "learning_rate": 5.261020408163265e-06, + "loss": 1.2279, + "step": 474400 + }, + { + "epoch": 10.04, + "learning_rate": 5.240612244897959e-06, + "loss": 1.2224, + "step": 474500 + }, + { + "epoch": 10.04, + "learning_rate": 5.2202040816326535e-06, + "loss": 1.231, + "step": 474600 + }, + { + "epoch": 10.05, + "learning_rate": 5.199795918367347e-06, + "loss": 1.2239, + "step": 474700 + }, + { + "epoch": 10.05, + "learning_rate": 5.179387755102041e-06, + "loss": 1.2269, + "step": 474800 + }, + { + "epoch": 10.05, + "learning_rate": 5.158979591836735e-06, + "loss": 1.2187, + "step": 474900 + }, + { + "epoch": 10.05, + "learning_rate": 5.138571428571429e-06, + "loss": 1.2271, + "step": 475000 + }, + { + "epoch": 10.06, + "learning_rate": 5.118163265306123e-06, + "loss": 1.2337, + "step": 475100 + }, + { + "epoch": 10.06, + "learning_rate": 5.0977551020408165e-06, + "loss": 1.229, + "step": 475200 + }, + { + "epoch": 10.06, + "learning_rate": 5.077346938775511e-06, + "loss": 1.2337, + "step": 475300 + }, + { + "epoch": 10.06, + "learning_rate": 5.056938775510205e-06, + "loss": 1.2321, + "step": 475400 + }, + { + "epoch": 10.06, + "learning_rate": 5.036530612244898e-06, + "loss": 1.228, + "step": 475500 + }, + { + "epoch": 10.07, + "learning_rate": 5.016122448979592e-06, + "loss": 1.2311, + "step": 475600 + }, + { + "epoch": 10.07, + "learning_rate": 4.995714285714286e-06, + "loss": 1.2198, + "step": 475700 + }, + { + "epoch": 10.07, + "learning_rate": 4.9753061224489796e-06, + "loss": 1.229, + "step": 475800 + }, + { + "epoch": 10.07, + "learning_rate": 4.954897959183674e-06, + "loss": 1.2195, + "step": 475900 + }, + { + "epoch": 10.07, + "learning_rate": 4.934489795918368e-06, + "loss": 1.2255, + "step": 476000 + }, + { + "epoch": 10.08, + "learning_rate": 4.914081632653061e-06, + "loss": 1.2266, + "step": 476100 + }, + { + "epoch": 10.08, + "learning_rate": 4.893673469387755e-06, + "loss": 1.225, + "step": 476200 + }, + { + "epoch": 10.08, + "learning_rate": 4.873265306122449e-06, + "loss": 1.2273, + "step": 476300 + }, + { + "epoch": 10.08, + "learning_rate": 4.852857142857143e-06, + "loss": 1.2315, + "step": 476400 + }, + { + "epoch": 10.08, + "learning_rate": 4.832448979591837e-06, + "loss": 1.2277, + "step": 476500 + }, + { + "epoch": 10.09, + "learning_rate": 4.812040816326531e-06, + "loss": 1.2338, + "step": 476600 + }, + { + "epoch": 10.09, + "learning_rate": 4.791632653061225e-06, + "loss": 1.2265, + "step": 476700 + }, + { + "epoch": 10.09, + "learning_rate": 4.771224489795919e-06, + "loss": 1.2267, + "step": 476800 + }, + { + "epoch": 10.09, + "learning_rate": 4.750816326530612e-06, + "loss": 1.2231, + "step": 476900 + }, + { + "epoch": 10.1, + "learning_rate": 4.7304081632653065e-06, + "loss": 1.2203, + "step": 477000 + }, + { + "epoch": 10.1, + "learning_rate": 4.710000000000001e-06, + "loss": 1.2236, + "step": 477100 + }, + { + "epoch": 10.1, + "learning_rate": 4.689591836734694e-06, + "loss": 1.2229, + "step": 477200 + }, + { + "epoch": 10.1, + "learning_rate": 4.669183673469388e-06, + "loss": 1.2329, + "step": 477300 + }, + { + "epoch": 10.1, + "learning_rate": 4.648979591836735e-06, + "loss": 1.2221, + "step": 477400 + }, + { + "epoch": 10.11, + "learning_rate": 4.628571428571429e-06, + "loss": 1.2226, + "step": 477500 + }, + { + "epoch": 10.11, + "learning_rate": 4.608163265306123e-06, + "loss": 1.2282, + "step": 477600 + }, + { + "epoch": 10.11, + "learning_rate": 4.587755102040816e-06, + "loss": 1.2237, + "step": 477700 + }, + { + "epoch": 10.11, + "learning_rate": 4.5673469387755104e-06, + "loss": 1.2311, + "step": 477800 + }, + { + "epoch": 10.11, + "learning_rate": 4.5469387755102046e-06, + "loss": 1.2274, + "step": 477900 + }, + { + "epoch": 10.12, + "learning_rate": 4.526530612244898e-06, + "loss": 1.2282, + "step": 478000 + }, + { + "epoch": 10.12, + "learning_rate": 4.506122448979592e-06, + "loss": 1.2281, + "step": 478100 + }, + { + "epoch": 10.12, + "learning_rate": 4.485714285714286e-06, + "loss": 1.2275, + "step": 478200 + }, + { + "epoch": 10.12, + "learning_rate": 4.465306122448979e-06, + "loss": 1.22, + "step": 478300 + }, + { + "epoch": 10.12, + "learning_rate": 4.4448979591836735e-06, + "loss": 1.2232, + "step": 478400 + }, + { + "epoch": 10.13, + "learning_rate": 4.424489795918368e-06, + "loss": 1.2264, + "step": 478500 + }, + { + "epoch": 10.13, + "learning_rate": 4.404081632653062e-06, + "loss": 1.2218, + "step": 478600 + }, + { + "epoch": 10.13, + "learning_rate": 4.383673469387755e-06, + "loss": 1.2193, + "step": 478700 + }, + { + "epoch": 10.13, + "learning_rate": 4.363265306122449e-06, + "loss": 1.2281, + "step": 478800 + }, + { + "epoch": 10.14, + "learning_rate": 4.342857142857143e-06, + "loss": 1.2275, + "step": 478900 + }, + { + "epoch": 10.14, + "learning_rate": 4.322448979591837e-06, + "loss": 1.2196, + "step": 479000 + }, + { + "epoch": 10.14, + "learning_rate": 4.302040816326531e-06, + "loss": 1.2273, + "step": 479100 + }, + { + "epoch": 10.14, + "learning_rate": 4.281632653061225e-06, + "loss": 1.2242, + "step": 479200 + }, + { + "epoch": 10.14, + "learning_rate": 4.261224489795919e-06, + "loss": 1.2295, + "step": 479300 + }, + { + "epoch": 10.15, + "learning_rate": 4.240816326530612e-06, + "loss": 1.2229, + "step": 479400 + }, + { + "epoch": 10.15, + "learning_rate": 4.22061224489796e-06, + "loss": 1.2248, + "step": 479500 + }, + { + "epoch": 10.15, + "learning_rate": 4.200204081632653e-06, + "loss": 1.2287, + "step": 479600 + }, + { + "epoch": 10.15, + "learning_rate": 4.179795918367347e-06, + "loss": 1.228, + "step": 479700 + }, + { + "epoch": 10.15, + "learning_rate": 4.159387755102041e-06, + "loss": 1.2262, + "step": 479800 + }, + { + "epoch": 10.16, + "learning_rate": 4.138979591836735e-06, + "loss": 1.2226, + "step": 479900 + }, + { + "epoch": 10.16, + "learning_rate": 4.118571428571429e-06, + "loss": 1.2266, + "step": 480000 + }, + { + "epoch": 10.16, + "learning_rate": 4.098163265306123e-06, + "loss": 1.2271, + "step": 480100 + }, + { + "epoch": 10.16, + "learning_rate": 4.077755102040817e-06, + "loss": 1.2317, + "step": 480200 + }, + { + "epoch": 10.17, + "learning_rate": 4.05734693877551e-06, + "loss": 1.2212, + "step": 480300 + }, + { + "epoch": 10.17, + "learning_rate": 4.036938775510204e-06, + "loss": 1.2195, + "step": 480400 + }, + { + "epoch": 10.17, + "learning_rate": 4.0165306122448985e-06, + "loss": 1.2338, + "step": 480500 + }, + { + "epoch": 10.17, + "learning_rate": 3.996122448979592e-06, + "loss": 1.2159, + "step": 480600 + }, + { + "epoch": 10.17, + "learning_rate": 3.975714285714286e-06, + "loss": 1.2264, + "step": 480700 + }, + { + "epoch": 10.18, + "learning_rate": 3.95530612244898e-06, + "loss": 1.229, + "step": 480800 + }, + { + "epoch": 10.18, + "learning_rate": 3.934897959183673e-06, + "loss": 1.2207, + "step": 480900 + }, + { + "epoch": 10.18, + "learning_rate": 3.914489795918367e-06, + "loss": 1.2257, + "step": 481000 + }, + { + "epoch": 10.18, + "learning_rate": 3.8940816326530615e-06, + "loss": 1.2255, + "step": 481100 + }, + { + "epoch": 10.18, + "learning_rate": 3.873673469387756e-06, + "loss": 1.2307, + "step": 481200 + }, + { + "epoch": 10.19, + "learning_rate": 3.853265306122449e-06, + "loss": 1.2227, + "step": 481300 + }, + { + "epoch": 10.19, + "learning_rate": 3.832857142857143e-06, + "loss": 1.2256, + "step": 481400 + }, + { + "epoch": 10.19, + "learning_rate": 3.812448979591837e-06, + "loss": 1.2249, + "step": 481500 + }, + { + "epoch": 10.19, + "learning_rate": 3.792244897959184e-06, + "loss": 1.2172, + "step": 481600 + }, + { + "epoch": 10.19, + "learning_rate": 3.771836734693878e-06, + "loss": 1.222, + "step": 481700 + }, + { + "epoch": 10.2, + "learning_rate": 3.7514285714285718e-06, + "loss": 1.2157, + "step": 481800 + }, + { + "epoch": 10.2, + "learning_rate": 3.7310204081632655e-06, + "loss": 1.237, + "step": 481900 + }, + { + "epoch": 10.2, + "learning_rate": 3.7106122448979596e-06, + "loss": 1.2277, + "step": 482000 + }, + { + "epoch": 10.2, + "learning_rate": 3.6902040816326533e-06, + "loss": 1.2335, + "step": 482100 + }, + { + "epoch": 10.21, + "learning_rate": 3.669795918367347e-06, + "loss": 1.2252, + "step": 482200 + }, + { + "epoch": 10.21, + "learning_rate": 3.649591836734694e-06, + "loss": 1.2236, + "step": 482300 + }, + { + "epoch": 10.21, + "learning_rate": 3.629183673469388e-06, + "loss": 1.2247, + "step": 482400 + }, + { + "epoch": 10.21, + "learning_rate": 3.608775510204082e-06, + "loss": 1.2235, + "step": 482500 + }, + { + "epoch": 10.21, + "learning_rate": 3.5883673469387757e-06, + "loss": 1.2295, + "step": 482600 + }, + { + "epoch": 10.22, + "learning_rate": 3.5679591836734694e-06, + "loss": 1.2266, + "step": 482700 + }, + { + "epoch": 10.22, + "learning_rate": 3.5475510204081635e-06, + "loss": 1.2223, + "step": 482800 + }, + { + "epoch": 10.22, + "learning_rate": 3.527142857142857e-06, + "loss": 1.2246, + "step": 482900 + }, + { + "epoch": 10.22, + "learning_rate": 3.506734693877551e-06, + "loss": 1.2246, + "step": 483000 + }, + { + "epoch": 10.22, + "learning_rate": 3.4863265306122454e-06, + "loss": 1.2276, + "step": 483100 + }, + { + "epoch": 10.23, + "learning_rate": 3.4659183673469387e-06, + "loss": 1.2216, + "step": 483200 + }, + { + "epoch": 10.23, + "learning_rate": 3.4455102040816324e-06, + "loss": 1.2202, + "step": 483300 + }, + { + "epoch": 10.23, + "learning_rate": 3.425102040816327e-06, + "loss": 1.2219, + "step": 483400 + }, + { + "epoch": 10.23, + "learning_rate": 3.4046938775510207e-06, + "loss": 1.2339, + "step": 483500 + }, + { + "epoch": 10.23, + "learning_rate": 3.384285714285714e-06, + "loss": 1.2185, + "step": 483600 + }, + { + "epoch": 10.24, + "learning_rate": 3.3638775510204085e-06, + "loss": 1.2231, + "step": 483700 + }, + { + "epoch": 10.24, + "learning_rate": 3.343469387755102e-06, + "loss": 1.2306, + "step": 483800 + }, + { + "epoch": 10.24, + "learning_rate": 3.323061224489796e-06, + "loss": 1.2254, + "step": 483900 + }, + { + "epoch": 10.24, + "learning_rate": 3.30265306122449e-06, + "loss": 1.2276, + "step": 484000 + }, + { + "epoch": 10.25, + "learning_rate": 3.2822448979591837e-06, + "loss": 1.2308, + "step": 484100 + }, + { + "epoch": 10.25, + "learning_rate": 3.261836734693878e-06, + "loss": 1.2268, + "step": 484200 + }, + { + "epoch": 10.25, + "learning_rate": 3.2414285714285716e-06, + "loss": 1.2258, + "step": 484300 + }, + { + "epoch": 10.25, + "learning_rate": 3.2212244897959183e-06, + "loss": 1.2246, + "step": 484400 + }, + { + "epoch": 10.25, + "learning_rate": 3.2008163265306124e-06, + "loss": 1.2265, + "step": 484500 + }, + { + "epoch": 10.26, + "learning_rate": 3.180408163265306e-06, + "loss": 1.2245, + "step": 484600 + }, + { + "epoch": 10.26, + "learning_rate": 3.1600000000000007e-06, + "loss": 1.2169, + "step": 484700 + }, + { + "epoch": 10.26, + "learning_rate": 3.139591836734694e-06, + "loss": 1.2282, + "step": 484800 + }, + { + "epoch": 10.26, + "learning_rate": 3.119183673469388e-06, + "loss": 1.2285, + "step": 484900 + }, + { + "epoch": 10.26, + "learning_rate": 3.0987755102040818e-06, + "loss": 1.223, + "step": 485000 + }, + { + "epoch": 10.27, + "learning_rate": 3.078367346938776e-06, + "loss": 1.2171, + "step": 485100 + }, + { + "epoch": 10.27, + "learning_rate": 3.0579591836734696e-06, + "loss": 1.2261, + "step": 485200 + }, + { + "epoch": 10.27, + "learning_rate": 3.0375510204081633e-06, + "loss": 1.2176, + "step": 485300 + }, + { + "epoch": 10.27, + "learning_rate": 3.0171428571428574e-06, + "loss": 1.2157, + "step": 485400 + }, + { + "epoch": 10.28, + "learning_rate": 2.996734693877551e-06, + "loss": 1.2245, + "step": 485500 + }, + { + "epoch": 10.28, + "learning_rate": 2.976326530612245e-06, + "loss": 1.2222, + "step": 485600 + }, + { + "epoch": 10.28, + "learning_rate": 2.955918367346939e-06, + "loss": 1.2213, + "step": 485700 + }, + { + "epoch": 10.28, + "learning_rate": 2.9355102040816326e-06, + "loss": 1.219, + "step": 485800 + }, + { + "epoch": 10.28, + "learning_rate": 2.9151020408163268e-06, + "loss": 1.222, + "step": 485900 + }, + { + "epoch": 10.29, + "learning_rate": 2.8946938775510205e-06, + "loss": 1.2206, + "step": 486000 + }, + { + "epoch": 10.29, + "learning_rate": 2.8742857142857146e-06, + "loss": 1.2267, + "step": 486100 + }, + { + "epoch": 10.29, + "learning_rate": 2.8538775510204083e-06, + "loss": 1.2256, + "step": 486200 + }, + { + "epoch": 10.29, + "learning_rate": 2.833469387755102e-06, + "loss": 1.226, + "step": 486300 + }, + { + "epoch": 10.29, + "learning_rate": 2.813061224489796e-06, + "loss": 1.2251, + "step": 486400 + }, + { + "epoch": 10.3, + "learning_rate": 2.79265306122449e-06, + "loss": 1.2263, + "step": 486500 + }, + { + "epoch": 10.3, + "learning_rate": 2.772244897959184e-06, + "loss": 1.2272, + "step": 486600 + }, + { + "epoch": 10.3, + "learning_rate": 2.7518367346938777e-06, + "loss": 1.2217, + "step": 486700 + }, + { + "epoch": 10.3, + "learning_rate": 2.7314285714285718e-06, + "loss": 1.2232, + "step": 486800 + }, + { + "epoch": 10.3, + "learning_rate": 2.7110204081632655e-06, + "loss": 1.2339, + "step": 486900 + }, + { + "epoch": 10.31, + "learning_rate": 2.690612244897959e-06, + "loss": 1.2232, + "step": 487000 + }, + { + "epoch": 10.31, + "learning_rate": 2.6702040816326533e-06, + "loss": 1.2222, + "step": 487100 + }, + { + "epoch": 10.31, + "learning_rate": 2.649795918367347e-06, + "loss": 1.2229, + "step": 487200 + }, + { + "epoch": 10.31, + "learning_rate": 2.6293877551020407e-06, + "loss": 1.2266, + "step": 487300 + }, + { + "epoch": 10.32, + "learning_rate": 2.608979591836735e-06, + "loss": 1.2234, + "step": 487400 + }, + { + "epoch": 10.32, + "learning_rate": 2.5885714285714285e-06, + "loss": 1.2215, + "step": 487500 + }, + { + "epoch": 10.32, + "learning_rate": 2.5681632653061227e-06, + "loss": 1.2245, + "step": 487600 + }, + { + "epoch": 10.32, + "learning_rate": 2.5477551020408164e-06, + "loss": 1.2196, + "step": 487700 + }, + { + "epoch": 10.32, + "learning_rate": 2.5273469387755105e-06, + "loss": 1.2243, + "step": 487800 + }, + { + "epoch": 10.33, + "learning_rate": 2.506938775510204e-06, + "loss": 1.2196, + "step": 487900 + }, + { + "epoch": 10.33, + "learning_rate": 2.486530612244898e-06, + "loss": 1.2181, + "step": 488000 + }, + { + "epoch": 10.33, + "learning_rate": 2.466122448979592e-06, + "loss": 1.2224, + "step": 488100 + }, + { + "epoch": 10.33, + "learning_rate": 2.4457142857142857e-06, + "loss": 1.2226, + "step": 488200 + }, + { + "epoch": 10.33, + "learning_rate": 2.42530612244898e-06, + "loss": 1.2301, + "step": 488300 + }, + { + "epoch": 10.34, + "learning_rate": 2.4048979591836735e-06, + "loss": 1.2223, + "step": 488400 + }, + { + "epoch": 10.34, + "learning_rate": 2.3844897959183677e-06, + "loss": 1.2235, + "step": 488500 + }, + { + "epoch": 10.34, + "learning_rate": 2.3642857142857144e-06, + "loss": 1.2188, + "step": 488600 + }, + { + "epoch": 10.34, + "learning_rate": 2.3438775510204085e-06, + "loss": 1.2206, + "step": 488700 + }, + { + "epoch": 10.34, + "learning_rate": 2.3234693877551022e-06, + "loss": 1.2249, + "step": 488800 + }, + { + "epoch": 10.35, + "learning_rate": 2.303061224489796e-06, + "loss": 1.2195, + "step": 488900 + }, + { + "epoch": 10.35, + "learning_rate": 2.28265306122449e-06, + "loss": 1.2179, + "step": 489000 + }, + { + "epoch": 10.35, + "learning_rate": 2.2622448979591838e-06, + "loss": 1.2227, + "step": 489100 + }, + { + "epoch": 10.35, + "learning_rate": 2.2418367346938775e-06, + "loss": 1.2185, + "step": 489200 + }, + { + "epoch": 10.36, + "learning_rate": 2.2214285714285716e-06, + "loss": 1.2242, + "step": 489300 + }, + { + "epoch": 10.36, + "learning_rate": 2.2010204081632657e-06, + "loss": 1.2216, + "step": 489400 + }, + { + "epoch": 10.36, + "learning_rate": 2.180612244897959e-06, + "loss": 1.2285, + "step": 489500 + }, + { + "epoch": 10.36, + "learning_rate": 2.160204081632653e-06, + "loss": 1.2212, + "step": 489600 + }, + { + "epoch": 10.36, + "learning_rate": 2.1397959183673472e-06, + "loss": 1.2219, + "step": 489700 + }, + { + "epoch": 10.37, + "learning_rate": 2.119387755102041e-06, + "loss": 1.2212, + "step": 489800 + }, + { + "epoch": 10.37, + "learning_rate": 2.0989795918367346e-06, + "loss": 1.2282, + "step": 489900 + }, + { + "epoch": 10.37, + "learning_rate": 2.0785714285714288e-06, + "loss": 1.2213, + "step": 490000 + }, + { + "epoch": 10.37, + "learning_rate": 2.0581632653061225e-06, + "loss": 1.2181, + "step": 490100 + }, + { + "epoch": 10.37, + "learning_rate": 2.037755102040816e-06, + "loss": 1.2207, + "step": 490200 + }, + { + "epoch": 10.38, + "learning_rate": 2.0173469387755103e-06, + "loss": 1.2176, + "step": 490300 + }, + { + "epoch": 10.38, + "learning_rate": 1.9969387755102044e-06, + "loss": 1.2252, + "step": 490400 + }, + { + "epoch": 10.38, + "learning_rate": 1.9765306122448977e-06, + "loss": 1.2247, + "step": 490500 + }, + { + "epoch": 10.38, + "learning_rate": 1.9563265306122453e-06, + "loss": 1.2245, + "step": 490600 + }, + { + "epoch": 10.39, + "learning_rate": 1.9359183673469385e-06, + "loss": 1.2242, + "step": 490700 + }, + { + "epoch": 10.39, + "learning_rate": 1.9155102040816327e-06, + "loss": 1.2224, + "step": 490800 + }, + { + "epoch": 10.39, + "learning_rate": 1.8951020408163268e-06, + "loss": 1.2253, + "step": 490900 + }, + { + "epoch": 10.39, + "learning_rate": 1.8746938775510203e-06, + "loss": 1.2197, + "step": 491000 + }, + { + "epoch": 10.39, + "learning_rate": 1.8542857142857144e-06, + "loss": 1.2264, + "step": 491100 + }, + { + "epoch": 10.4, + "learning_rate": 1.8338775510204083e-06, + "loss": 1.2218, + "step": 491200 + }, + { + "epoch": 10.4, + "learning_rate": 1.8134693877551022e-06, + "loss": 1.2184, + "step": 491300 + }, + { + "epoch": 10.4, + "learning_rate": 1.793061224489796e-06, + "loss": 1.2283, + "step": 491400 + }, + { + "epoch": 10.4, + "learning_rate": 1.7726530612244899e-06, + "loss": 1.2203, + "step": 491500 + }, + { + "epoch": 10.4, + "learning_rate": 1.7522448979591838e-06, + "loss": 1.2184, + "step": 491600 + }, + { + "epoch": 10.41, + "learning_rate": 1.7318367346938775e-06, + "loss": 1.2223, + "step": 491700 + }, + { + "epoch": 10.41, + "learning_rate": 1.7114285714285714e-06, + "loss": 1.2258, + "step": 491800 + }, + { + "epoch": 10.41, + "learning_rate": 1.6910204081632655e-06, + "loss": 1.2317, + "step": 491900 + }, + { + "epoch": 10.41, + "learning_rate": 1.6706122448979594e-06, + "loss": 1.2195, + "step": 492000 + }, + { + "epoch": 10.41, + "learning_rate": 1.6502040816326531e-06, + "loss": 1.2249, + "step": 492100 + }, + { + "epoch": 10.42, + "learning_rate": 1.629795918367347e-06, + "loss": 1.2281, + "step": 492200 + }, + { + "epoch": 10.42, + "learning_rate": 1.609387755102041e-06, + "loss": 1.2272, + "step": 492300 + }, + { + "epoch": 10.42, + "learning_rate": 1.5889795918367346e-06, + "loss": 1.2195, + "step": 492400 + }, + { + "epoch": 10.42, + "learning_rate": 1.5685714285714286e-06, + "loss": 1.2191, + "step": 492500 + }, + { + "epoch": 10.43, + "learning_rate": 1.5481632653061227e-06, + "loss": 1.2325, + "step": 492600 + }, + { + "epoch": 10.43, + "learning_rate": 1.5277551020408164e-06, + "loss": 1.2208, + "step": 492700 + }, + { + "epoch": 10.43, + "learning_rate": 1.5075510204081633e-06, + "loss": 1.2266, + "step": 492800 + }, + { + "epoch": 10.43, + "learning_rate": 1.4871428571428572e-06, + "loss": 1.2201, + "step": 492900 + }, + { + "epoch": 10.43, + "learning_rate": 1.4667346938775512e-06, + "loss": 1.2213, + "step": 493000 + }, + { + "epoch": 10.44, + "learning_rate": 1.446326530612245e-06, + "loss": 1.2284, + "step": 493100 + }, + { + "epoch": 10.44, + "learning_rate": 1.426122448979592e-06, + "loss": 1.2221, + "step": 493200 + }, + { + "epoch": 10.44, + "learning_rate": 1.4057142857142857e-06, + "loss": 1.2205, + "step": 493300 + }, + { + "epoch": 10.44, + "learning_rate": 1.3853061224489796e-06, + "loss": 1.219, + "step": 493400 + }, + { + "epoch": 10.44, + "learning_rate": 1.3648979591836735e-06, + "loss": 1.2211, + "step": 493500 + }, + { + "epoch": 10.45, + "learning_rate": 1.3444897959183675e-06, + "loss": 1.2197, + "step": 493600 + }, + { + "epoch": 10.45, + "learning_rate": 1.3240816326530614e-06, + "loss": 1.2257, + "step": 493700 + }, + { + "epoch": 10.45, + "learning_rate": 1.303673469387755e-06, + "loss": 1.2174, + "step": 493800 + }, + { + "epoch": 10.45, + "learning_rate": 1.283265306122449e-06, + "loss": 1.218, + "step": 493900 + }, + { + "epoch": 10.46, + "learning_rate": 1.262857142857143e-06, + "loss": 1.2164, + "step": 494000 + }, + { + "epoch": 10.46, + "learning_rate": 1.2424489795918368e-06, + "loss": 1.2246, + "step": 494100 + }, + { + "epoch": 10.46, + "learning_rate": 1.2220408163265307e-06, + "loss": 1.2148, + "step": 494200 + }, + { + "epoch": 10.46, + "learning_rate": 1.2016326530612244e-06, + "loss": 1.2194, + "step": 494300 + }, + { + "epoch": 10.46, + "learning_rate": 1.1812244897959185e-06, + "loss": 1.2263, + "step": 494400 + }, + { + "epoch": 10.47, + "learning_rate": 1.1608163265306123e-06, + "loss": 1.2212, + "step": 494500 + }, + { + "epoch": 10.47, + "learning_rate": 1.1404081632653062e-06, + "loss": 1.2207, + "step": 494600 + }, + { + "epoch": 10.47, + "learning_rate": 1.12e-06, + "loss": 1.2134, + "step": 494700 + }, + { + "epoch": 10.47, + "learning_rate": 1.0995918367346938e-06, + "loss": 1.2241, + "step": 494800 + }, + { + "epoch": 10.47, + "learning_rate": 1.079183673469388e-06, + "loss": 1.2178, + "step": 494900 + }, + { + "epoch": 10.48, + "learning_rate": 1.0587755102040816e-06, + "loss": 1.2188, + "step": 495000 + }, + { + "epoch": 10.48, + "learning_rate": 1.0383673469387755e-06, + "loss": 1.2221, + "step": 495100 + }, + { + "epoch": 10.48, + "learning_rate": 1.0179591836734694e-06, + "loss": 1.2234, + "step": 495200 + }, + { + "epoch": 10.48, + "learning_rate": 9.975510204081633e-07, + "loss": 1.226, + "step": 495300 + }, + { + "epoch": 10.48, + "learning_rate": 9.771428571428573e-07, + "loss": 1.2343, + "step": 495400 + }, + { + "epoch": 10.49, + "learning_rate": 9.56734693877551e-07, + "loss": 1.2268, + "step": 495500 + }, + { + "epoch": 10.49, + "learning_rate": 9.36326530612245e-07, + "loss": 1.2226, + "step": 495600 + }, + { + "epoch": 10.49, + "learning_rate": 9.159183673469388e-07, + "loss": 1.2204, + "step": 495700 + }, + { + "epoch": 10.49, + "learning_rate": 8.955102040816328e-07, + "loss": 1.2191, + "step": 495800 + }, + { + "epoch": 10.5, + "learning_rate": 8.751020408163266e-07, + "loss": 1.2223, + "step": 495900 + }, + { + "epoch": 10.5, + "learning_rate": 8.546938775510204e-07, + "loss": 1.2241, + "step": 496000 + }, + { + "epoch": 10.5, + "learning_rate": 8.342857142857143e-07, + "loss": 1.2242, + "step": 496100 + }, + { + "epoch": 10.5, + "learning_rate": 8.138775510204081e-07, + "loss": 1.2202, + "step": 496200 + }, + { + "epoch": 10.5, + "learning_rate": 7.934693877551022e-07, + "loss": 1.2285, + "step": 496300 + }, + { + "epoch": 10.51, + "learning_rate": 7.73061224489796e-07, + "loss": 1.2218, + "step": 496400 + }, + { + "epoch": 10.51, + "learning_rate": 7.526530612244899e-07, + "loss": 1.2196, + "step": 496500 + }, + { + "epoch": 10.51, + "learning_rate": 7.322448979591837e-07, + "loss": 1.227, + "step": 496600 + }, + { + "epoch": 10.51, + "learning_rate": 7.118367346938776e-07, + "loss": 1.2142, + "step": 496700 + }, + { + "epoch": 10.51, + "learning_rate": 6.914285714285714e-07, + "loss": 1.225, + "step": 496800 + }, + { + "epoch": 10.52, + "learning_rate": 6.710204081632653e-07, + "loss": 1.2231, + "step": 496900 + }, + { + "epoch": 10.52, + "learning_rate": 6.506122448979592e-07, + "loss": 1.2213, + "step": 497000 + }, + { + "epoch": 10.52, + "learning_rate": 6.302040816326531e-07, + "loss": 1.2222, + "step": 497100 + }, + { + "epoch": 10.52, + "learning_rate": 6.100000000000001e-07, + "loss": 1.2253, + "step": 497200 + }, + { + "epoch": 10.52, + "learning_rate": 5.895918367346939e-07, + "loss": 1.2174, + "step": 497300 + }, + { + "epoch": 10.53, + "learning_rate": 5.691836734693878e-07, + "loss": 1.2149, + "step": 497400 + }, + { + "epoch": 10.53, + "learning_rate": 5.487755102040816e-07, + "loss": 1.2161, + "step": 497500 + }, + { + "epoch": 10.53, + "learning_rate": 5.283673469387755e-07, + "loss": 1.2249, + "step": 497600 + }, + { + "epoch": 10.53, + "learning_rate": 5.079591836734694e-07, + "loss": 1.2213, + "step": 497700 + }, + { + "epoch": 10.54, + "learning_rate": 4.875510204081632e-07, + "loss": 1.2238, + "step": 497800 + }, + { + "epoch": 10.54, + "learning_rate": 4.6714285714285716e-07, + "loss": 1.2297, + "step": 497900 + }, + { + "epoch": 10.54, + "learning_rate": 4.469387755102041e-07, + "loss": 1.2267, + "step": 498000 + }, + { + "epoch": 10.54, + "learning_rate": 4.2653061224489797e-07, + "loss": 1.2237, + "step": 498100 + }, + { + "epoch": 10.54, + "learning_rate": 4.061224489795919e-07, + "loss": 1.2292, + "step": 498200 + }, + { + "epoch": 10.55, + "learning_rate": 3.8571428571428574e-07, + "loss": 1.2176, + "step": 498300 + }, + { + "epoch": 10.55, + "learning_rate": 3.653061224489796e-07, + "loss": 1.223, + "step": 498400 + }, + { + "epoch": 10.55, + "learning_rate": 3.4489795918367346e-07, + "loss": 1.2245, + "step": 498500 + }, + { + "epoch": 10.55, + "learning_rate": 3.244897959183674e-07, + "loss": 1.2216, + "step": 498600 + }, + { + "epoch": 10.55, + "learning_rate": 3.0408163265306124e-07, + "loss": 1.2207, + "step": 498700 + }, + { + "epoch": 10.56, + "learning_rate": 2.836734693877551e-07, + "loss": 1.223, + "step": 498800 + }, + { + "epoch": 10.56, + "learning_rate": 2.63265306122449e-07, + "loss": 1.2292, + "step": 498900 + }, + { + "epoch": 10.56, + "learning_rate": 2.4285714285714287e-07, + "loss": 1.2224, + "step": 499000 + }, + { + "epoch": 10.56, + "learning_rate": 2.2244897959183673e-07, + "loss": 1.2235, + "step": 499100 + }, + { + "epoch": 10.57, + "learning_rate": 2.0204081632653064e-07, + "loss": 1.2154, + "step": 499200 + }, + { + "epoch": 10.57, + "learning_rate": 1.816326530612245e-07, + "loss": 1.2203, + "step": 499300 + }, + { + "epoch": 10.57, + "learning_rate": 1.6122448979591836e-07, + "loss": 1.2276, + "step": 499400 + }, + { + "epoch": 10.57, + "learning_rate": 1.4081632653061225e-07, + "loss": 1.2178, + "step": 499500 + }, + { + "epoch": 10.57, + "learning_rate": 1.2040816326530614e-07, + "loss": 1.2197, + "step": 499600 + }, + { + "epoch": 10.58, + "learning_rate": 1.0000000000000001e-07, + "loss": 1.2206, + "step": 499700 + }, + { + "epoch": 10.58, + "learning_rate": 7.959183673469388e-08, + "loss": 1.2273, + "step": 499800 + }, + { + "epoch": 10.58, + "learning_rate": 5.9183673469387755e-08, + "loss": 1.2239, + "step": 499900 + }, + { + "epoch": 10.58, + "learning_rate": 3.8775510204081635e-08, + "loss": 1.2166, + "step": 500000 + } + ], + "max_steps": 500000, + "num_train_epochs": 11, + "total_flos": 1.6845286741848883e+19, + "trial_name": null, + "trial_params": null +}