diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,13439 +1,6701 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 10.0, - "global_step": 2230, + "epoch": 9.995515695067265, + "global_step": 1110, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.0, - "learning_rate": 6.000000000000001e-08, - "loss": 8.2054, + "epoch": 0.01, + "learning_rate": 0.0, + "loss": 10.1809, "step": 1 }, { - "epoch": 0.01, - "learning_rate": 6.000000000000001e-08, - "loss": 8.1219, + "epoch": 0.02, + "learning_rate": 6e-07, + "loss": 10.3677, "step": 2 }, { - "epoch": 0.01, - "learning_rate": 6.000000000000001e-08, - "loss": 8.2333, + "epoch": 0.03, + "learning_rate": 6e-07, + "loss": 10.32, "step": 3 }, { - "epoch": 0.02, - "learning_rate": 1.2000000000000002e-07, - "loss": 8.1853, + "epoch": 0.04, + "learning_rate": 1.2e-06, + "loss": 10.153, "step": 4 }, { - "epoch": 0.02, - "learning_rate": 1.8e-07, - "loss": 8.1226, + "epoch": 0.04, + "learning_rate": 1.8e-06, + "loss": 10.1314, "step": 5 }, { - "epoch": 0.03, - "learning_rate": 2.4000000000000003e-07, - "loss": 8.0944, + "epoch": 0.05, + "learning_rate": 2.4e-06, + "loss": 9.8055, "step": 6 }, { - "epoch": 0.03, - "learning_rate": 3.0000000000000004e-07, - "loss": 8.1681, + "epoch": 0.06, + "learning_rate": 2.9999999999999997e-06, + "loss": 9.6007, "step": 7 }, { - "epoch": 0.04, - "learning_rate": 3.6e-07, - "loss": 8.1575, + "epoch": 0.07, + "learning_rate": 3.6e-06, + "loss": 9.0158, "step": 8 }, { - "epoch": 0.04, - "learning_rate": 4.2e-07, - "loss": 8.184, + "epoch": 0.08, + "learning_rate": 4.2e-06, + "loss": 8.4618, "step": 9 }, { - "epoch": 0.04, - "learning_rate": 4.800000000000001e-07, - "loss": 8.1123, + "epoch": 0.09, + "learning_rate": 4.8e-06, + "loss": 8.2283, "step": 10 }, { - "epoch": 0.05, - "learning_rate": 5.4e-07, - "loss": 8.0231, + "epoch": 0.1, + "learning_rate": 5.399999999999999e-06, + "loss": 7.7538, "step": 11 }, { - "epoch": 0.05, - "learning_rate": 6.000000000000001e-07, - "loss": 7.9512, + "epoch": 0.11, + "learning_rate": 5.999999999999999e-06, + "loss": 7.5517, "step": 12 }, { - "epoch": 0.06, - "learning_rate": 6.599999999999999e-07, - "loss": 7.9844, + "epoch": 0.12, + "learning_rate": 6.599999999999999e-06, + "loss": 7.3315, "step": 13 }, { - "epoch": 0.06, - "learning_rate": 7.2e-07, - "loss": 8.0196, + "epoch": 0.13, + "learning_rate": 7.2e-06, + "loss": 7.1076, "step": 14 }, { - "epoch": 0.07, - "learning_rate": 7.799999999999999e-07, - "loss": 7.7862, + "epoch": 0.13, + "learning_rate": 7.799999999999998e-06, + "loss": 6.8569, "step": 15 }, { - "epoch": 0.07, - "learning_rate": 8.4e-07, - "loss": 7.7522, + "epoch": 0.14, + "learning_rate": 8.4e-06, + "loss": 6.7689, "step": 16 }, { - "epoch": 0.08, - "learning_rate": 9e-07, - "loss": 7.7857, + "epoch": 0.15, + "learning_rate": 8.999999999999999e-06, + "loss": 6.5553, "step": 17 }, { - "epoch": 0.08, - "learning_rate": 9.600000000000001e-07, - "loss": 7.7129, + "epoch": 0.16, + "learning_rate": 9.6e-06, + "loss": 6.3901, "step": 18 }, { - "epoch": 0.09, - "learning_rate": 1.0200000000000002e-06, - "loss": 7.6604, + "epoch": 0.17, + "learning_rate": 1.02e-05, + "loss": 6.3045, "step": 19 }, { - "epoch": 0.09, - "learning_rate": 1.08e-06, - "loss": 7.5476, + "epoch": 0.18, + "learning_rate": 1.0799999999999998e-05, + "loss": 6.1671, "step": 20 }, { - "epoch": 0.09, - "learning_rate": 1.14e-06, - "loss": 7.4493, + "epoch": 0.19, + "learning_rate": 1.14e-05, + "loss": 5.9445, "step": 21 }, { - "epoch": 0.1, - "learning_rate": 1.2000000000000002e-06, - "loss": 7.4508, + "epoch": 0.2, + "learning_rate": 1.1999999999999999e-05, + "loss": 5.8393, "step": 22 }, { - "epoch": 0.1, - "learning_rate": 1.26e-06, - "loss": 7.3061, + "epoch": 0.21, + "learning_rate": 1.26e-05, + "loss": 5.8426, "step": 23 }, { - "epoch": 0.11, - "learning_rate": 1.3199999999999999e-06, - "loss": 7.3454, + "epoch": 0.22, + "learning_rate": 1.3199999999999997e-05, + "loss": 5.6518, "step": 24 }, { - "epoch": 0.11, - "learning_rate": 1.38e-06, - "loss": 7.252, + "epoch": 0.22, + "learning_rate": 1.3799999999999998e-05, + "loss": 5.7463, "step": 25 }, { - "epoch": 0.12, - "learning_rate": 1.44e-06, - "loss": 7.1289, + "epoch": 0.23, + "learning_rate": 1.44e-05, + "loss": 5.6185, "step": 26 }, { - "epoch": 0.12, - "learning_rate": 1.5e-06, - "loss": 7.0522, + "epoch": 0.24, + "learning_rate": 1.4999999999999999e-05, + "loss": 5.5114, "step": 27 }, { - "epoch": 0.13, - "learning_rate": 1.5599999999999999e-06, - "loss": 6.962, + "epoch": 0.25, + "learning_rate": 1.5599999999999996e-05, + "loss": 5.3849, "step": 28 }, { - "epoch": 0.13, - "learning_rate": 1.5599999999999999e-06, - "loss": 6.8158, + "epoch": 0.26, + "learning_rate": 1.6199999999999997e-05, + "loss": 5.3192, "step": 29 }, { - "epoch": 0.13, - "learning_rate": 1.62e-06, - "loss": 6.9474, + "epoch": 0.27, + "learning_rate": 1.68e-05, + "loss": 5.2528, "step": 30 }, { - "epoch": 0.14, - "learning_rate": 1.68e-06, - "loss": 6.8897, + "epoch": 0.28, + "learning_rate": 1.74e-05, + "loss": 5.1951, "step": 31 }, { - "epoch": 0.14, - "learning_rate": 1.74e-06, - "loss": 6.821, + "epoch": 0.29, + "learning_rate": 1.7999999999999997e-05, + "loss": 5.1437, "step": 32 }, { - "epoch": 0.15, - "learning_rate": 1.8e-06, - "loss": 6.7075, + "epoch": 0.3, + "learning_rate": 1.8599999999999998e-05, + "loss": 5.1332, "step": 33 }, { - "epoch": 0.15, - "learning_rate": 1.86e-06, - "loss": 6.7231, + "epoch": 0.3, + "learning_rate": 1.92e-05, + "loss": 5.082, "step": 34 }, { - "epoch": 0.16, - "learning_rate": 1.9200000000000003e-06, - "loss": 6.617, + "epoch": 0.31, + "learning_rate": 1.98e-05, + "loss": 4.9941, "step": 35 }, { - "epoch": 0.16, - "learning_rate": 1.98e-06, - "loss": 6.5733, + "epoch": 0.32, + "learning_rate": 2.04e-05, + "loss": 4.977, "step": 36 }, { - "epoch": 0.17, - "learning_rate": 2.0400000000000004e-06, - "loss": 6.6763, + "epoch": 0.33, + "learning_rate": 2.1e-05, + "loss": 4.9351, "step": 37 }, { - "epoch": 0.17, - "learning_rate": 2.1000000000000002e-06, - "loss": 6.4522, + "epoch": 0.34, + "learning_rate": 2.1599999999999996e-05, + "loss": 4.9614, "step": 38 }, { - "epoch": 0.17, - "learning_rate": 2.16e-06, - "loss": 6.4858, + "epoch": 0.35, + "learning_rate": 2.2199999999999998e-05, + "loss": 4.8991, "step": 39 }, { - "epoch": 0.18, - "learning_rate": 2.22e-06, - "loss": 6.4352, + "epoch": 0.36, + "learning_rate": 2.28e-05, + "loss": 4.9255, "step": 40 }, { - "epoch": 0.18, - "learning_rate": 2.28e-06, - "loss": 6.3867, + "epoch": 0.37, + "learning_rate": 2.34e-05, + "loss": 4.8665, "step": 41 }, { - "epoch": 0.19, - "learning_rate": 2.34e-06, - "loss": 6.3603, + "epoch": 0.38, + "learning_rate": 2.3999999999999997e-05, + "loss": 4.8518, "step": 42 }, { - "epoch": 0.19, - "learning_rate": 2.4000000000000003e-06, - "loss": 6.2443, + "epoch": 0.39, + "learning_rate": 2.4599999999999998e-05, + "loss": 4.7914, "step": 43 }, { - "epoch": 0.2, - "learning_rate": 2.46e-06, - "loss": 6.2912, + "epoch": 0.39, + "learning_rate": 2.52e-05, + "loss": 4.8202, "step": 44 }, { - "epoch": 0.2, - "learning_rate": 2.52e-06, - "loss": 6.2332, + "epoch": 0.4, + "learning_rate": 2.5799999999999997e-05, + "loss": 4.9285, "step": 45 }, { - "epoch": 0.21, - "learning_rate": 2.58e-06, - "loss": 6.3944, + "epoch": 0.41, + "learning_rate": 2.6399999999999995e-05, + "loss": 4.8156, "step": 46 }, { - "epoch": 0.21, - "learning_rate": 2.6399999999999997e-06, - "loss": 6.2816, + "epoch": 0.42, + "learning_rate": 2.6999999999999996e-05, + "loss": 4.8383, "step": 47 }, { - "epoch": 0.22, - "learning_rate": 2.7e-06, - "loss": 6.185, + "epoch": 0.43, + "learning_rate": 2.7599999999999997e-05, + "loss": 4.8539, "step": 48 }, { - "epoch": 0.22, - "learning_rate": 2.76e-06, - "loss": 6.337, + "epoch": 0.44, + "learning_rate": 2.8199999999999998e-05, + "loss": 4.9313, "step": 49 }, { - "epoch": 0.22, - "learning_rate": 2.82e-06, - "loss": 6.1375, + "epoch": 0.45, + "learning_rate": 2.88e-05, + "loss": 4.9483, "step": 50 }, { - "epoch": 0.23, - "learning_rate": 2.88e-06, - "loss": 5.8827, + "epoch": 0.46, + "learning_rate": 2.94e-05, + "loss": 5.2754, "step": 51 }, { - "epoch": 0.23, - "learning_rate": 2.9400000000000002e-06, - "loss": 5.725, + "epoch": 0.47, + "learning_rate": 2.9999999999999997e-05, + "loss": 5.3344, "step": 52 }, { - "epoch": 0.24, - "learning_rate": 3e-06, - "loss": 5.6994, + "epoch": 0.48, + "learning_rate": 3.06e-05, + "loss": 5.2162, "step": 53 }, { - "epoch": 0.24, - "learning_rate": 3.06e-06, - "loss": 5.6824, + "epoch": 0.48, + "learning_rate": 3.119999999999999e-05, + "loss": 5.0826, "step": 54 }, { - "epoch": 0.25, - "learning_rate": 3.1199999999999998e-06, - "loss": 5.6554, + "epoch": 0.49, + "learning_rate": 3.1799999999999994e-05, + "loss": 4.8626, "step": 55 }, { - "epoch": 0.25, - "learning_rate": 3.18e-06, - "loss": 5.5958, + "epoch": 0.5, + "learning_rate": 3.2399999999999995e-05, + "loss": 4.8319, "step": 56 }, { - "epoch": 0.26, - "learning_rate": 3.24e-06, - "loss": 5.5991, + "epoch": 0.51, + "learning_rate": 3.2999999999999996e-05, + "loss": 4.771, "step": 57 }, { - "epoch": 0.26, - "learning_rate": 3.3e-06, - "loss": 5.5462, + "epoch": 0.52, + "learning_rate": 3.36e-05, + "loss": 4.6895, "step": 58 }, { - "epoch": 0.26, - "learning_rate": 3.36e-06, - "loss": 5.494, + "epoch": 0.53, + "learning_rate": 3.42e-05, + "loss": 4.693, "step": 59 }, { - "epoch": 0.27, - "learning_rate": 3.4200000000000003e-06, - "loss": 5.4829, + "epoch": 0.54, + "learning_rate": 3.48e-05, + "loss": 4.7241, "step": 60 }, { - "epoch": 0.27, - "learning_rate": 3.48e-06, - "loss": 5.4825, + "epoch": 0.55, + "learning_rate": 3.539999999999999e-05, + "loss": 4.7604, "step": 61 }, { - "epoch": 0.28, - "learning_rate": 3.54e-06, - "loss": 5.3116, + "epoch": 0.56, + "learning_rate": 3.5999999999999994e-05, + "loss": 4.6755, "step": 62 }, { - "epoch": 0.28, - "learning_rate": 3.6e-06, - "loss": 5.4564, + "epoch": 0.57, + "learning_rate": 3.6599999999999995e-05, + "loss": 4.6489, "step": 63 }, { - "epoch": 0.29, - "learning_rate": 3.66e-06, - "loss": 5.374, + "epoch": 0.57, + "learning_rate": 3.7199999999999996e-05, + "loss": 4.658, "step": 64 }, { - "epoch": 0.29, - "learning_rate": 3.72e-06, - "loss": 5.311, + "epoch": 0.58, + "learning_rate": 3.78e-05, + "loss": 4.6951, "step": 65 }, { - "epoch": 0.3, - "learning_rate": 3.7800000000000002e-06, - "loss": 5.2815, + "epoch": 0.59, + "learning_rate": 3.84e-05, + "loss": 4.5903, "step": 66 }, { - "epoch": 0.3, - "learning_rate": 3.8400000000000005e-06, - "loss": 5.2217, + "epoch": 0.6, + "learning_rate": 3.9e-05, + "loss": 4.6142, "step": 67 }, { - "epoch": 0.3, - "learning_rate": 3.9e-06, - "loss": 5.2679, + "epoch": 0.61, + "learning_rate": 3.96e-05, + "loss": 4.6912, "step": 68 }, { - "epoch": 0.31, - "learning_rate": 3.96e-06, - "loss": 5.2664, + "epoch": 0.62, + "learning_rate": 4.02e-05, + "loss": 4.6049, "step": 69 }, { - "epoch": 0.31, - "learning_rate": 4.0200000000000005e-06, - "loss": 5.186, + "epoch": 0.63, + "learning_rate": 4.08e-05, + "loss": 4.5636, "step": 70 }, { - "epoch": 0.32, - "learning_rate": 4.080000000000001e-06, - "loss": 5.1406, + "epoch": 0.64, + "learning_rate": 4.14e-05, + "loss": 4.6753, "step": 71 }, { - "epoch": 0.32, - "learning_rate": 4.14e-06, - "loss": 5.1835, + "epoch": 0.65, + "learning_rate": 4.2e-05, + "loss": 4.7513, "step": 72 }, { - "epoch": 0.33, - "learning_rate": 4.2000000000000004e-06, - "loss": 5.0411, + "epoch": 0.65, + "learning_rate": 4.259999999999999e-05, + "loss": 4.655, "step": 73 }, { - "epoch": 0.33, - "learning_rate": 4.26e-06, - "loss": 5.0951, + "epoch": 0.66, + "learning_rate": 4.319999999999999e-05, + "loss": 4.6118, "step": 74 }, { - "epoch": 0.34, - "learning_rate": 4.32e-06, - "loss": 5.1969, + "epoch": 0.67, + "learning_rate": 4.3799999999999994e-05, + "loss": 4.5642, "step": 75 }, { - "epoch": 0.34, - "learning_rate": 4.3799999999999996e-06, - "loss": 5.1237, + "epoch": 0.68, + "learning_rate": 4.4399999999999995e-05, + "loss": 5.0646, "step": 76 }, { - "epoch": 0.35, - "learning_rate": 4.44e-06, - "loss": 4.9818, + "epoch": 0.69, + "learning_rate": 4.4999999999999996e-05, + "loss": 5.1318, "step": 77 }, { - "epoch": 0.35, - "learning_rate": 4.5e-06, - "loss": 5.0479, + "epoch": 0.7, + "learning_rate": 4.56e-05, + "loss": 5.1167, "step": 78 }, { - "epoch": 0.35, - "learning_rate": 4.56e-06, - "loss": 5.0285, + "epoch": 0.71, + "learning_rate": 4.62e-05, + "loss": 5.0646, "step": 79 }, { - "epoch": 0.36, - "learning_rate": 4.62e-06, - "loss": 5.0754, + "epoch": 0.72, + "learning_rate": 4.68e-05, + "loss": 4.8794, "step": 80 }, { - "epoch": 0.36, - "learning_rate": 4.68e-06, - "loss": 4.9368, + "epoch": 0.73, + "learning_rate": 4.7399999999999993e-05, + "loss": 4.6735, "step": 81 }, { - "epoch": 0.37, - "learning_rate": 4.74e-06, - "loss": 4.9667, + "epoch": 0.74, + "learning_rate": 4.7999999999999994e-05, + "loss": 4.6389, "step": 82 }, { - "epoch": 0.37, - "learning_rate": 4.800000000000001e-06, - "loss": 4.9748, + "epoch": 0.74, + "learning_rate": 4.8599999999999995e-05, + "loss": 4.6545, "step": 83 }, { - "epoch": 0.38, - "learning_rate": 4.86e-06, - "loss": 4.9711, + "epoch": 0.75, + "learning_rate": 4.9199999999999997e-05, + "loss": 4.6443, "step": 84 }, { - "epoch": 0.38, - "learning_rate": 4.92e-06, - "loss": 5.035, + "epoch": 0.76, + "learning_rate": 4.98e-05, + "loss": 4.6087, "step": 85 }, { - "epoch": 0.39, - "learning_rate": 4.980000000000001e-06, - "loss": 4.8273, + "epoch": 0.77, + "learning_rate": 5.04e-05, + "loss": 4.5696, "step": 86 }, { - "epoch": 0.39, - "learning_rate": 5.04e-06, - "loss": 4.965, + "epoch": 0.78, + "learning_rate": 5.1e-05, + "loss": 4.5662, "step": 87 }, { - "epoch": 0.39, - "learning_rate": 5.1e-06, - "loss": 4.9021, + "epoch": 0.79, + "learning_rate": 5.1599999999999994e-05, + "loss": 4.6608, "step": 88 }, { - "epoch": 0.4, - "learning_rate": 5.16e-06, - "loss": 4.9831, + "epoch": 0.8, + "learning_rate": 5.2199999999999995e-05, + "loss": 4.5441, "step": 89 }, { - "epoch": 0.4, - "learning_rate": 5.22e-06, - "loss": 5.0348, + "epoch": 0.81, + "learning_rate": 5.279999999999999e-05, + "loss": 4.6029, "step": 90 }, { - "epoch": 0.41, - "learning_rate": 5.279999999999999e-06, - "loss": 4.9893, + "epoch": 0.82, + "learning_rate": 5.339999999999999e-05, + "loss": 4.4904, "step": 91 }, { - "epoch": 0.41, - "learning_rate": 5.34e-06, - "loss": 4.9375, + "epoch": 0.83, + "learning_rate": 5.399999999999999e-05, + "loss": 4.5875, "step": 92 }, { - "epoch": 0.42, - "learning_rate": 5.4e-06, - "loss": 4.9366, + "epoch": 0.83, + "learning_rate": 5.459999999999999e-05, + "loss": 4.5483, "step": 93 }, { - "epoch": 0.42, - "learning_rate": 5.46e-06, - "loss": 4.9483, + "epoch": 0.84, + "learning_rate": 5.519999999999999e-05, + "loss": 4.5813, "step": 94 }, { - "epoch": 0.43, - "learning_rate": 5.52e-06, - "loss": 4.9207, + "epoch": 0.85, + "learning_rate": 5.5799999999999994e-05, + "loss": 4.5633, "step": 95 }, { - "epoch": 0.43, - "learning_rate": 5.58e-06, - "loss": 4.9721, + "epoch": 0.86, + "learning_rate": 5.6399999999999995e-05, + "loss": 4.5124, "step": 96 }, { - "epoch": 0.43, - "learning_rate": 5.64e-06, - "loss": 5.0164, + "epoch": 0.87, + "learning_rate": 5.6999999999999996e-05, + "loss": 4.5139, "step": 97 }, { - "epoch": 0.44, - "learning_rate": 5.7000000000000005e-06, - "loss": 5.1602, + "epoch": 0.88, + "learning_rate": 5.76e-05, + "loss": 4.5894, "step": 98 }, { - "epoch": 0.44, - "learning_rate": 5.76e-06, - "loss": 4.9884, + "epoch": 0.89, + "learning_rate": 5.82e-05, + "loss": 4.5597, "step": 99 }, { - "epoch": 0.45, - "learning_rate": 5.82e-06, - "loss": 4.9934, + "epoch": 0.9, + "learning_rate": 5.88e-05, + "loss": 4.492, "step": 100 }, { - "epoch": 0.45, - "learning_rate": 5.8800000000000005e-06, - "loss": 5.4184, + "epoch": 0.91, + "learning_rate": 5.94e-05, + "loss": 5.1473, "step": 101 }, { - "epoch": 0.46, - "learning_rate": 5.940000000000001e-06, - "loss": 5.4165, + "epoch": 0.91, + "learning_rate": 5.9999999999999995e-05, + "loss": 5.2295, "step": 102 }, { - "epoch": 0.46, - "learning_rate": 6e-06, - "loss": 5.4173, + "epoch": 0.92, + "learning_rate": 6.0599999999999996e-05, + "loss": 5.1966, "step": 103 }, { - "epoch": 0.47, - "learning_rate": 6.0600000000000004e-06, - "loss": 5.3571, + "epoch": 0.93, + "learning_rate": 6.12e-05, + "loss": 4.9692, "step": 104 }, { - "epoch": 0.47, - "learning_rate": 6.12e-06, - "loss": 5.3026, + "epoch": 0.94, + "learning_rate": 6.18e-05, + "loss": 4.8494, "step": 105 }, { - "epoch": 0.48, - "learning_rate": 6.18e-06, - "loss": 5.2382, + "epoch": 0.95, + "learning_rate": 6.239999999999999e-05, + "loss": 4.6873, "step": 106 }, { - "epoch": 0.48, - "learning_rate": 6.2399999999999995e-06, - "loss": 5.0287, + "epoch": 0.96, + "learning_rate": 6.299999999999999e-05, + "loss": 4.639, "step": 107 }, { - "epoch": 0.48, - "learning_rate": 6.3e-06, - "loss": 5.0118, + "epoch": 0.97, + "learning_rate": 6.359999999999999e-05, + "loss": 4.5857, "step": 108 }, { - "epoch": 0.49, - "learning_rate": 6.36e-06, - "loss": 4.952, + "epoch": 0.98, + "learning_rate": 6.419999999999999e-05, + "loss": 4.5445, "step": 109 }, { - "epoch": 0.49, - "learning_rate": 6.42e-06, - "loss": 4.9043, + "epoch": 0.99, + "learning_rate": 6.479999999999999e-05, + "loss": 4.6775, "step": 110 }, { - "epoch": 0.5, - "learning_rate": 6.48e-06, - "loss": 4.8084, + "epoch": 1.0, + "learning_rate": 6.539999999999999e-05, + "loss": 4.7204, "step": 111 }, { - "epoch": 0.5, - "learning_rate": 6.54e-06, - "loss": 4.8808, + "epoch": 1.01, + "learning_rate": 6.599999999999999e-05, + "loss": 6.9572, "step": 112 }, { - "epoch": 0.51, - "learning_rate": 6.6e-06, - "loss": 4.8721, + "epoch": 1.02, + "learning_rate": 6.659999999999999e-05, + "loss": 4.5871, "step": 113 }, { - "epoch": 0.51, - "learning_rate": 6.660000000000001e-06, - "loss": 4.7812, + "epoch": 1.03, + "learning_rate": 6.72e-05, + "loss": 4.4989, "step": 114 }, { - "epoch": 0.52, - "learning_rate": 6.72e-06, - "loss": 4.8802, + "epoch": 1.04, + "learning_rate": 6.78e-05, + "loss": 4.4903, "step": 115 }, { - "epoch": 0.52, - "learning_rate": 6.78e-06, - "loss": 4.8304, + "epoch": 1.04, + "learning_rate": 6.84e-05, + "loss": 4.4851, "step": 116 }, { - "epoch": 0.52, - "learning_rate": 6.840000000000001e-06, - "loss": 4.88, + "epoch": 1.05, + "learning_rate": 6.9e-05, + "loss": 4.4768, "step": 117 }, { - "epoch": 0.53, - "learning_rate": 6.900000000000001e-06, - "loss": 4.8054, + "epoch": 1.06, + "learning_rate": 6.96e-05, + "loss": 4.4474, "step": 118 }, { - "epoch": 0.53, - "learning_rate": 6.96e-06, - "loss": 4.7917, + "epoch": 1.07, + "learning_rate": 7.02e-05, + "loss": 4.4565, "step": 119 }, { - "epoch": 0.54, - "learning_rate": 7.0200000000000006e-06, - "loss": 4.8489, + "epoch": 1.08, + "learning_rate": 7.079999999999999e-05, + "loss": 4.4612, "step": 120 }, { - "epoch": 0.54, - "learning_rate": 7.08e-06, - "loss": 4.7469, + "epoch": 1.09, + "learning_rate": 7.139999999999999e-05, + "loss": 4.4651, "step": 121 }, { - "epoch": 0.55, - "learning_rate": 7.14e-06, - "loss": 4.7927, + "epoch": 1.1, + "learning_rate": 7.199999999999999e-05, + "loss": 4.3432, "step": 122 }, { - "epoch": 0.55, - "learning_rate": 7.2e-06, - "loss": 4.7484, + "epoch": 1.11, + "learning_rate": 7.259999999999999e-05, + "loss": 4.3871, "step": 123 }, { - "epoch": 0.56, - "learning_rate": 7.26e-06, - "loss": 4.7096, + "epoch": 1.12, + "learning_rate": 7.319999999999999e-05, + "loss": 4.3813, "step": 124 }, { - "epoch": 0.56, - "learning_rate": 7.32e-06, - "loss": 4.8926, + "epoch": 1.13, + "learning_rate": 7.379999999999999e-05, + "loss": 4.3116, "step": 125 }, { - "epoch": 0.57, - "learning_rate": 7.3800000000000005e-06, - "loss": 4.763, + "epoch": 1.13, + "learning_rate": 7.439999999999999e-05, + "loss": 4.2469, "step": 126 }, { - "epoch": 0.57, - "learning_rate": 7.44e-06, - "loss": 4.82, + "epoch": 1.14, + "learning_rate": 7.5e-05, + "loss": 4.2718, "step": 127 }, { - "epoch": 0.57, - "learning_rate": 7.5e-06, - "loss": 4.8304, + "epoch": 1.15, + "learning_rate": 7.56e-05, + "loss": 4.3074, "step": 128 }, { - "epoch": 0.58, - "learning_rate": 7.5600000000000005e-06, - "loss": 4.8098, + "epoch": 1.16, + "learning_rate": 7.62e-05, + "loss": 4.3431, "step": 129 }, { - "epoch": 0.58, - "learning_rate": 7.62e-06, - "loss": 4.8215, + "epoch": 1.17, + "learning_rate": 7.68e-05, + "loss": 4.3321, "step": 130 }, { - "epoch": 0.59, - "learning_rate": 7.680000000000001e-06, - "loss": 4.8154, + "epoch": 1.18, + "learning_rate": 7.74e-05, + "loss": 4.2847, "step": 131 }, { - "epoch": 0.59, - "learning_rate": 7.74e-06, - "loss": 4.6749, + "epoch": 1.19, + "learning_rate": 7.8e-05, + "loss": 4.3293, "step": 132 }, { - "epoch": 0.6, - "learning_rate": 7.8e-06, - "loss": 4.6698, + "epoch": 1.2, + "learning_rate": 7.86e-05, + "loss": 4.3492, "step": 133 }, { - "epoch": 0.6, - "learning_rate": 7.860000000000001e-06, - "loss": 4.7338, + "epoch": 1.21, + "learning_rate": 7.92e-05, + "loss": 4.2798, "step": 134 }, { - "epoch": 0.61, - "learning_rate": 7.92e-06, - "loss": 4.7836, + "epoch": 1.22, + "learning_rate": 7.98e-05, + "loss": 4.3546, "step": 135 }, { - "epoch": 0.61, - "learning_rate": 7.98e-06, - "loss": 4.7491, + "epoch": 1.22, + "learning_rate": 8.04e-05, + "loss": 4.235, "step": 136 }, { - "epoch": 0.61, - "learning_rate": 8.040000000000001e-06, - "loss": 4.8325, + "epoch": 1.23, + "learning_rate": 8.1e-05, + "loss": 5.5645, "step": 137 }, { - "epoch": 0.62, - "learning_rate": 8.1e-06, - "loss": 4.7691, + "epoch": 1.24, + "learning_rate": 8.16e-05, + "loss": 5.6934, "step": 138 }, { - "epoch": 0.62, - "learning_rate": 8.160000000000001e-06, - "loss": 4.6887, + "epoch": 1.25, + "learning_rate": 8.22e-05, + "loss": 5.5843, "step": 139 }, { - "epoch": 0.63, - "learning_rate": 8.220000000000001e-06, - "loss": 4.8142, + "epoch": 1.26, + "learning_rate": 8.28e-05, + "loss": 5.3033, "step": 140 }, { - "epoch": 0.63, - "learning_rate": 8.28e-06, - "loss": 4.7964, + "epoch": 1.27, + "learning_rate": 8.34e-05, + "loss": 4.9826, "step": 141 }, { - "epoch": 0.64, - "learning_rate": 8.340000000000001e-06, - "loss": 4.816, + "epoch": 1.28, + "learning_rate": 8.4e-05, + "loss": 4.7511, "step": 142 }, { - "epoch": 0.64, - "learning_rate": 8.400000000000001e-06, - "loss": 4.9627, + "epoch": 1.29, + "learning_rate": 8.459999999999998e-05, + "loss": 4.5923, "step": 143 }, { - "epoch": 0.65, - "learning_rate": 8.459999999999999e-06, - "loss": 4.884, + "epoch": 1.3, + "learning_rate": 8.519999999999998e-05, + "loss": 4.4865, "step": 144 }, { - "epoch": 0.65, - "learning_rate": 8.52e-06, - "loss": 4.7096, + "epoch": 1.3, + "learning_rate": 8.579999999999998e-05, + "loss": 4.4707, "step": 145 }, { - "epoch": 0.65, - "learning_rate": 8.58e-06, - "loss": 4.7363, + "epoch": 1.31, + "learning_rate": 8.639999999999999e-05, + "loss": 4.4407, "step": 146 }, { - "epoch": 0.66, - "learning_rate": 8.64e-06, - "loss": 4.7037, + "epoch": 1.32, + "learning_rate": 8.699999999999999e-05, + "loss": 4.4704, "step": 147 }, { - "epoch": 0.66, - "learning_rate": 8.7e-06, - "loss": 5.0372, + "epoch": 1.33, + "learning_rate": 8.759999999999999e-05, + "loss": 4.3955, "step": 148 }, { - "epoch": 0.67, - "learning_rate": 8.759999999999999e-06, - "loss": 4.8017, + "epoch": 1.34, + "learning_rate": 8.819999999999999e-05, + "loss": 4.4305, "step": 149 }, { - "epoch": 0.67, - "learning_rate": 8.82e-06, - "loss": 4.9157, + "epoch": 1.35, + "learning_rate": 8.879999999999999e-05, + "loss": 4.4178, "step": 150 }, { - "epoch": 0.68, - "learning_rate": 8.88e-06, - "loss": 5.2338, + "epoch": 1.36, + "learning_rate": 8.939999999999999e-05, + "loss": 4.3738, "step": 151 }, { - "epoch": 0.68, - "learning_rate": 8.939999999999999e-06, - "loss": 5.2328, + "epoch": 1.37, + "learning_rate": 8.999999999999999e-05, + "loss": 4.345, "step": 152 }, { - "epoch": 0.69, - "learning_rate": 9e-06, - "loss": 5.2131, + "epoch": 1.38, + "learning_rate": 9.059999999999999e-05, + "loss": 4.3189, "step": 153 }, { - "epoch": 0.69, - "learning_rate": 9.06e-06, - "loss": 5.0913, + "epoch": 1.39, + "learning_rate": 9.12e-05, + "loss": 4.3548, "step": 154 }, { - "epoch": 0.7, - "learning_rate": 9.12e-06, - "loss": 5.1419, + "epoch": 1.39, + "learning_rate": 9.18e-05, + "loss": 4.3476, "step": 155 }, { - "epoch": 0.7, - "learning_rate": 9.18e-06, - "loss": 5.0874, + "epoch": 1.4, + "learning_rate": 9.24e-05, + "loss": 4.3373, "step": 156 }, { - "epoch": 0.7, - "learning_rate": 9.24e-06, - "loss": 4.9494, + "epoch": 1.41, + "learning_rate": 9.3e-05, + "loss": 4.3655, "step": 157 }, { - "epoch": 0.71, - "learning_rate": 9.3e-06, - "loss": 4.8881, + "epoch": 1.42, + "learning_rate": 9.36e-05, + "loss": 4.3406, "step": 158 }, { - "epoch": 0.71, - "learning_rate": 9.36e-06, - "loss": 4.7511, + "epoch": 1.43, + "learning_rate": 9.419999999999999e-05, + "loss": 4.2992, "step": 159 }, { - "epoch": 0.72, - "learning_rate": 9.42e-06, - "loss": 4.81, + "epoch": 1.44, + "learning_rate": 9.479999999999999e-05, + "loss": 4.3091, "step": 160 }, { - "epoch": 0.72, - "learning_rate": 9.48e-06, - "loss": 4.6441, + "epoch": 1.45, + "learning_rate": 9.539999999999999e-05, + "loss": 4.2523, "step": 161 }, { - "epoch": 0.73, - "learning_rate": 9.54e-06, - "loss": 4.6864, + "epoch": 1.46, + "learning_rate": 9.599999999999999e-05, + "loss": 5.3727, "step": 162 }, { - "epoch": 0.73, - "learning_rate": 9.600000000000001e-06, - "loss": 4.7303, + "epoch": 1.47, + "learning_rate": 9.659999999999999e-05, + "loss": 5.5254, "step": 163 }, { - "epoch": 0.74, - "learning_rate": 9.66e-06, - "loss": 4.7054, + "epoch": 1.48, + "learning_rate": 9.719999999999999e-05, + "loss": 5.3926, "step": 164 }, { - "epoch": 0.74, - "learning_rate": 9.72e-06, - "loss": 4.8041, + "epoch": 1.48, + "learning_rate": 9.779999999999999e-05, + "loss": 5.1976, "step": 165 }, { - "epoch": 0.74, - "learning_rate": 9.780000000000001e-06, - "loss": 4.7566, + "epoch": 1.49, + "learning_rate": 9.839999999999999e-05, + "loss": 4.84, "step": 166 }, { - "epoch": 0.75, - "learning_rate": 9.84e-06, - "loss": 4.7286, + "epoch": 1.5, + "learning_rate": 9.9e-05, + "loss": 4.6971, "step": 167 }, { - "epoch": 0.75, - "learning_rate": 9.9e-06, - "loss": 4.7454, + "epoch": 1.51, + "learning_rate": 9.96e-05, + "loss": 4.6292, "step": 168 }, { - "epoch": 0.76, - "learning_rate": 9.960000000000001e-06, - "loss": 4.5812, + "epoch": 1.52, + "learning_rate": 0.0001002, + "loss": 4.5307, "step": 169 }, { - "epoch": 0.76, - "learning_rate": 1.002e-05, - "loss": 4.802, + "epoch": 1.53, + "learning_rate": 0.0001008, + "loss": 4.4417, "step": 170 }, { - "epoch": 0.77, - "learning_rate": 1.008e-05, - "loss": 4.724, + "epoch": 1.54, + "learning_rate": 0.0001014, + "loss": 4.471, "step": 171 }, { - "epoch": 0.77, - "learning_rate": 1.0140000000000001e-05, - "loss": 4.5918, + "epoch": 1.55, + "learning_rate": 0.000102, + "loss": 4.4859, "step": 172 }, { - "epoch": 0.78, - "learning_rate": 1.02e-05, - "loss": 4.6252, + "epoch": 1.56, + "learning_rate": 0.0001026, + "loss": 4.4296, "step": 173 }, { - "epoch": 0.78, - "learning_rate": 1.0260000000000002e-05, - "loss": 4.646, + "epoch": 1.57, + "learning_rate": 0.00010319999999999999, + "loss": 4.415, "step": 174 }, { - "epoch": 0.78, - "learning_rate": 1.032e-05, - "loss": 4.7106, + "epoch": 1.57, + "learning_rate": 0.00010379999999999999, + "loss": 4.346, "step": 175 }, { - "epoch": 0.79, - "learning_rate": 1.0379999999999999e-05, - "loss": 4.6334, + "epoch": 1.58, + "learning_rate": 0.00010439999999999999, + "loss": 4.3523, "step": 176 }, { - "epoch": 0.79, - "learning_rate": 1.044e-05, - "loss": 4.6837, + "epoch": 1.59, + "learning_rate": 0.00010499999999999999, + "loss": 4.2742, "step": 177 }, { - "epoch": 0.8, - "learning_rate": 1.05e-05, - "loss": 4.6227, + "epoch": 1.6, + "learning_rate": 0.00010559999999999998, + "loss": 4.3161, "step": 178 }, { - "epoch": 0.8, - "learning_rate": 1.0559999999999999e-05, - "loss": 4.6745, + "epoch": 1.61, + "learning_rate": 0.00010619999999999998, + "loss": 4.3355, "step": 179 }, { - "epoch": 0.81, - "learning_rate": 1.062e-05, - "loss": 4.6251, + "epoch": 1.62, + "learning_rate": 0.00010679999999999998, + "loss": 4.2709, "step": 180 }, { - "epoch": 0.81, - "learning_rate": 1.068e-05, - "loss": 4.7493, + "epoch": 1.63, + "learning_rate": 0.00010739999999999998, + "loss": 4.2814, "step": 181 }, { - "epoch": 0.82, - "learning_rate": 1.074e-05, - "loss": 4.5725, + "epoch": 1.64, + "learning_rate": 0.00010799999999999998, + "loss": 4.282, "step": 182 }, { - "epoch": 0.82, - "learning_rate": 1.08e-05, - "loss": 4.7591, + "epoch": 1.65, + "learning_rate": 0.00010859999999999998, + "loss": 4.2875, "step": 183 }, { - "epoch": 0.83, - "learning_rate": 1.086e-05, - "loss": 4.6551, + "epoch": 1.65, + "learning_rate": 0.00010919999999999998, + "loss": 4.3036, "step": 184 }, { - "epoch": 0.83, - "learning_rate": 1.092e-05, - "loss": 4.7288, + "epoch": 1.66, + "learning_rate": 0.00010979999999999999, + "loss": 4.2712, "step": 185 }, { - "epoch": 0.83, - "learning_rate": 1.098e-05, - "loss": 4.6177, + "epoch": 1.67, + "learning_rate": 0.00011039999999999999, + "loss": 4.0503, "step": 186 }, { - "epoch": 0.84, - "learning_rate": 1.104e-05, - "loss": 4.6272, + "epoch": 1.68, + "learning_rate": 0.00011099999999999999, + "loss": 5.436, "step": 187 }, { - "epoch": 0.84, - "learning_rate": 1.11e-05, - "loss": 4.7299, + "epoch": 1.69, + "learning_rate": 0.00011159999999999999, + "loss": 5.7089, "step": 188 }, { - "epoch": 0.85, - "learning_rate": 1.116e-05, - "loss": 4.6558, + "epoch": 1.7, + "learning_rate": 0.00011219999999999999, + "loss": 5.6178, "step": 189 }, { - "epoch": 0.85, - "learning_rate": 1.1220000000000001e-05, - "loss": 4.6845, + "epoch": 1.71, + "learning_rate": 0.00011279999999999999, + "loss": 5.3762, "step": 190 }, { - "epoch": 0.86, - "learning_rate": 1.128e-05, - "loss": 4.7153, + "epoch": 1.72, + "learning_rate": 0.00011339999999999999, + "loss": 4.9978, "step": 191 }, { - "epoch": 0.86, - "learning_rate": 1.134e-05, - "loss": 4.639, + "epoch": 1.73, + "learning_rate": 0.00011399999999999999, + "loss": 4.7483, "step": 192 }, { - "epoch": 0.87, - "learning_rate": 1.1400000000000001e-05, - "loss": 4.6371, + "epoch": 1.74, + "learning_rate": 0.0001146, + "loss": 4.6654, "step": 193 }, { - "epoch": 0.87, - "learning_rate": 1.146e-05, - "loss": 4.6644, + "epoch": 1.74, + "learning_rate": 0.0001152, + "loss": 4.585, "step": 194 }, { - "epoch": 0.87, - "learning_rate": 1.152e-05, - "loss": 4.6621, + "epoch": 1.75, + "learning_rate": 0.0001158, + "loss": 4.4605, "step": 195 }, { - "epoch": 0.88, - "learning_rate": 1.1580000000000001e-05, - "loss": 4.7406, + "epoch": 1.76, + "learning_rate": 0.0001164, + "loss": 4.4682, "step": 196 }, { - "epoch": 0.88, - "learning_rate": 1.164e-05, - "loss": 4.7166, + "epoch": 1.77, + "learning_rate": 0.000117, + "loss": 4.4438, "step": 197 }, { - "epoch": 0.89, - "learning_rate": 1.1700000000000001e-05, - "loss": 4.6911, + "epoch": 1.78, + "learning_rate": 0.0001176, + "loss": 4.3513, "step": 198 }, { - "epoch": 0.89, - "learning_rate": 1.1760000000000001e-05, - "loss": 4.8327, + "epoch": 1.79, + "learning_rate": 0.0001182, + "loss": 4.3653, "step": 199 }, { - "epoch": 0.9, - "learning_rate": 1.182e-05, - "loss": 4.5631, + "epoch": 1.8, + "learning_rate": 0.0001188, + "loss": 4.2853, "step": 200 }, { - "epoch": 0.9, - "learning_rate": 1.1880000000000001e-05, - "loss": 5.0977, + "epoch": 1.81, + "learning_rate": 0.0001194, + "loss": 4.2774, "step": 201 }, { - "epoch": 0.91, - "learning_rate": 1.1940000000000001e-05, - "loss": 5.2063, + "epoch": 1.82, + "learning_rate": 0.00011999999999999999, + "loss": 4.3431, "step": 202 }, { - "epoch": 0.91, - "learning_rate": 1.2e-05, - "loss": 5.2377, + "epoch": 1.83, + "learning_rate": 0.00012059999999999999, + "loss": 4.2969, "step": 203 }, { - "epoch": 0.91, - "learning_rate": 1.2060000000000001e-05, - "loss": 5.2376, + "epoch": 1.83, + "learning_rate": 0.00012119999999999999, + "loss": 4.3317, "step": 204 }, { - "epoch": 0.92, - "learning_rate": 1.2120000000000001e-05, - "loss": 5.1215, + "epoch": 1.84, + "learning_rate": 0.00012179999999999999, + "loss": 4.2546, "step": 205 }, { - "epoch": 0.92, - "learning_rate": 1.2180000000000002e-05, - "loss": 5.1138, + "epoch": 1.85, + "learning_rate": 0.0001224, + "loss": 4.3031, "step": 206 }, { - "epoch": 0.93, - "learning_rate": 1.224e-05, - "loss": 4.919, + "epoch": 1.86, + "learning_rate": 0.00012299999999999998, + "loss": 4.2682, "step": 207 }, { - "epoch": 0.93, - "learning_rate": 1.2299999999999999e-05, - "loss": 4.8602, + "epoch": 1.87, + "learning_rate": 0.0001236, + "loss": 4.3388, "step": 208 }, { - "epoch": 0.94, - "learning_rate": 1.236e-05, - "loss": 4.7021, + "epoch": 1.88, + "learning_rate": 0.00012419999999999998, + "loss": 4.2636, "step": 209 }, { - "epoch": 0.94, - "learning_rate": 1.242e-05, - "loss": 4.6941, + "epoch": 1.89, + "learning_rate": 0.00012479999999999997, + "loss": 4.1723, "step": 210 }, { - "epoch": 0.95, - "learning_rate": 1.2479999999999999e-05, - "loss": 4.6834, + "epoch": 1.9, + "learning_rate": 0.00012539999999999999, + "loss": 4.2299, "step": 211 }, { - "epoch": 0.95, - "learning_rate": 1.254e-05, - "loss": 4.7128, + "epoch": 1.91, + "learning_rate": 0.00012599999999999997, + "loss": 5.1353, "step": 212 }, { - "epoch": 0.96, - "learning_rate": 1.26e-05, - "loss": 4.7082, + "epoch": 1.91, + "learning_rate": 0.0001266, + "loss": 5.3656, "step": 213 }, { - "epoch": 0.96, - "learning_rate": 1.2659999999999999e-05, - "loss": 4.6521, + "epoch": 1.92, + "learning_rate": 0.00012719999999999997, + "loss": 5.2901, "step": 214 }, { - "epoch": 0.96, - "learning_rate": 1.272e-05, - "loss": 4.6062, + "epoch": 1.93, + "learning_rate": 0.0001278, + "loss": 4.9679, "step": 215 }, { - "epoch": 0.97, - "learning_rate": 1.278e-05, - "loss": 4.7968, + "epoch": 1.94, + "learning_rate": 0.00012839999999999998, + "loss": 4.74, "step": 216 }, { - "epoch": 0.97, - "learning_rate": 1.284e-05, - "loss": 4.6265, + "epoch": 1.95, + "learning_rate": 0.000129, + "loss": 4.6402, "step": 217 }, { - "epoch": 0.98, - "learning_rate": 1.29e-05, - "loss": 4.645, + "epoch": 1.96, + "learning_rate": 0.00012959999999999998, + "loss": 4.5565, "step": 218 }, { - "epoch": 0.98, - "learning_rate": 1.296e-05, - "loss": 4.7365, + "epoch": 1.97, + "learning_rate": 0.0001302, + "loss": 4.3116, "step": 219 }, { - "epoch": 0.99, - "learning_rate": 1.302e-05, - "loss": 4.712, + "epoch": 1.98, + "learning_rate": 0.00013079999999999998, + "loss": 4.3372, "step": 220 }, { - "epoch": 0.99, - "learning_rate": 1.308e-05, - "loss": 4.8344, + "epoch": 1.99, + "learning_rate": 0.0001314, + "loss": 4.4229, "step": 221 }, { - "epoch": 1.0, - "learning_rate": 1.314e-05, - "loss": 4.7704, + "epoch": 2.0, + "learning_rate": 0.00013199999999999998, + "loss": 4.2879, "step": 222 }, { - "epoch": 1.0, - "learning_rate": 1.32e-05, - "loss": 4.8638, + "epoch": 2.01, + "learning_rate": 0.0001326, + "loss": 6.599, "step": 223 }, { - "epoch": 1.0, - "learning_rate": 1.326e-05, - "loss": 4.6617, + "epoch": 2.02, + "learning_rate": 0.00013319999999999999, + "loss": 4.4173, "step": 224 }, { - "epoch": 1.01, - "learning_rate": 1.3320000000000001e-05, - "loss": 4.5823, + "epoch": 2.03, + "learning_rate": 0.0001338, + "loss": 4.423, "step": 225 }, { - "epoch": 1.01, - "learning_rate": 1.338e-05, - "loss": 4.6875, + "epoch": 2.04, + "learning_rate": 0.0001344, + "loss": 4.3792, "step": 226 }, { - "epoch": 1.02, - "learning_rate": 1.344e-05, - "loss": 4.6017, + "epoch": 2.04, + "learning_rate": 0.000135, + "loss": 4.3279, "step": 227 }, { - "epoch": 1.02, - "learning_rate": 1.3500000000000001e-05, - "loss": 4.6531, + "epoch": 2.05, + "learning_rate": 0.0001356, + "loss": 4.2576, "step": 228 }, { - "epoch": 1.03, - "learning_rate": 1.356e-05, - "loss": 4.6418, + "epoch": 2.06, + "learning_rate": 0.0001362, + "loss": 4.1748, "step": 229 }, { - "epoch": 1.03, - "learning_rate": 1.362e-05, - "loss": 4.605, + "epoch": 2.07, + "learning_rate": 0.0001368, + "loss": 4.1601, "step": 230 }, { - "epoch": 1.04, - "learning_rate": 1.3680000000000001e-05, - "loss": 4.5926, + "epoch": 2.08, + "learning_rate": 0.0001374, + "loss": 4.1532, "step": 231 }, { - "epoch": 1.04, - "learning_rate": 1.374e-05, - "loss": 4.5042, + "epoch": 2.09, + "learning_rate": 0.000138, + "loss": 4.08, "step": 232 }, { - "epoch": 1.04, - "learning_rate": 1.3800000000000002e-05, - "loss": 4.6279, + "epoch": 2.1, + "learning_rate": 0.0001386, + "loss": 4.089, "step": 233 }, { - "epoch": 1.05, - "learning_rate": 1.3860000000000001e-05, - "loss": 4.5776, + "epoch": 2.11, + "learning_rate": 0.0001392, + "loss": 4.0562, "step": 234 }, { - "epoch": 1.05, - "learning_rate": 1.392e-05, - "loss": 4.6267, + "epoch": 2.12, + "learning_rate": 0.00013979999999999998, + "loss": 4.0354, "step": 235 }, { - "epoch": 1.06, - "learning_rate": 1.3980000000000002e-05, - "loss": 4.6372, + "epoch": 2.13, + "learning_rate": 0.0001404, + "loss": 3.9942, "step": 236 }, { - "epoch": 1.06, - "learning_rate": 1.4040000000000001e-05, - "loss": 4.5405, + "epoch": 2.13, + "learning_rate": 0.00014099999999999998, + "loss": 3.9382, "step": 237 }, { - "epoch": 1.07, - "learning_rate": 1.4099999999999999e-05, - "loss": 4.6718, + "epoch": 2.14, + "learning_rate": 0.00014159999999999997, + "loss": 3.8565, "step": 238 }, { - "epoch": 1.07, - "learning_rate": 1.416e-05, - "loss": 4.5861, + "epoch": 2.15, + "learning_rate": 0.0001422, + "loss": 3.8866, "step": 239 }, { - "epoch": 1.08, - "learning_rate": 1.422e-05, - "loss": 4.6616, + "epoch": 2.16, + "learning_rate": 0.00014279999999999997, + "loss": 3.8903, "step": 240 }, { - "epoch": 1.08, - "learning_rate": 1.428e-05, - "loss": 4.5046, + "epoch": 2.17, + "learning_rate": 0.0001434, + "loss": 3.801, "step": 241 }, { - "epoch": 1.09, - "learning_rate": 1.434e-05, - "loss": 4.5972, + "epoch": 2.18, + "learning_rate": 0.00014399999999999998, + "loss": 3.8563, "step": 242 }, { - "epoch": 1.09, - "learning_rate": 1.44e-05, - "loss": 4.4748, + "epoch": 2.19, + "learning_rate": 0.0001446, + "loss": 3.8008, "step": 243 }, { - "epoch": 1.09, - "learning_rate": 1.446e-05, - "loss": 4.6557, + "epoch": 2.2, + "learning_rate": 0.00014519999999999998, + "loss": 3.7619, "step": 244 }, { - "epoch": 1.1, - "learning_rate": 1.452e-05, - "loss": 4.5391, + "epoch": 2.21, + "learning_rate": 0.0001458, + "loss": 3.7284, "step": 245 }, { - "epoch": 1.1, - "learning_rate": 1.458e-05, - "loss": 4.5457, + "epoch": 2.22, + "learning_rate": 0.00014639999999999998, + "loss": 3.6987, "step": 246 }, { - "epoch": 1.11, - "learning_rate": 1.464e-05, - "loss": 4.529, + "epoch": 2.22, + "learning_rate": 0.000147, + "loss": 3.5216, "step": 247 }, { - "epoch": 1.11, - "learning_rate": 1.47e-05, - "loss": 4.5921, + "epoch": 2.23, + "learning_rate": 0.00014759999999999998, + "loss": 5.8039, "step": 248 }, { - "epoch": 1.12, - "learning_rate": 1.4760000000000001e-05, - "loss": 4.5124, + "epoch": 2.24, + "learning_rate": 0.0001482, + "loss": 5.738, "step": 249 }, { - "epoch": 1.12, - "learning_rate": 1.482e-05, - "loss": 4.5682, + "epoch": 2.25, + "learning_rate": 0.00014879999999999998, + "loss": 5.4078, "step": 250 }, { - "epoch": 1.13, - "learning_rate": 1.488e-05, - "loss": 4.4989, + "epoch": 2.26, + "learning_rate": 0.0001494, + "loss": 5.0056, "step": 251 }, { - "epoch": 1.13, - "learning_rate": 1.4940000000000001e-05, - "loss": 4.424, + "epoch": 2.27, + "learning_rate": 0.00015, + "loss": 4.8029, "step": 252 }, { - "epoch": 1.13, - "learning_rate": 1.5e-05, - "loss": 4.5844, + "epoch": 2.28, + "learning_rate": 0.00015059999999999997, + "loss": 4.6233, "step": 253 }, { - "epoch": 1.14, - "learning_rate": 1.506e-05, - "loss": 4.5445, + "epoch": 2.29, + "learning_rate": 0.0001512, + "loss": 4.4243, "step": 254 }, { - "epoch": 1.14, - "learning_rate": 1.5120000000000001e-05, - "loss": 4.5088, + "epoch": 2.3, + "learning_rate": 0.00015179999999999998, + "loss": 4.2563, "step": 255 }, { - "epoch": 1.15, - "learning_rate": 1.518e-05, - "loss": 4.5028, + "epoch": 2.3, + "learning_rate": 0.0001524, + "loss": 4.2108, "step": 256 }, { - "epoch": 1.15, - "learning_rate": 1.524e-05, - "loss": 4.5224, + "epoch": 2.31, + "learning_rate": 0.00015299999999999998, + "loss": 4.1417, "step": 257 }, { - "epoch": 1.16, - "learning_rate": 1.53e-05, - "loss": 4.5915, + "epoch": 2.32, + "learning_rate": 0.0001536, + "loss": 4.2294, "step": 258 }, { - "epoch": 1.16, - "learning_rate": 1.5360000000000002e-05, - "loss": 4.4791, + "epoch": 2.33, + "learning_rate": 0.00015419999999999998, + "loss": 4.1063, "step": 259 }, { - "epoch": 1.17, - "learning_rate": 1.542e-05, - "loss": 4.493, + "epoch": 2.34, + "learning_rate": 0.0001548, + "loss": 4.0417, "step": 260 }, { - "epoch": 1.17, - "learning_rate": 1.548e-05, - "loss": 4.5977, + "epoch": 2.35, + "learning_rate": 0.00015539999999999998, + "loss": 3.9855, "step": 261 }, { - "epoch": 1.17, - "learning_rate": 1.554e-05, - "loss": 4.5526, + "epoch": 2.36, + "learning_rate": 0.000156, + "loss": 3.9049, "step": 262 }, { - "epoch": 1.18, - "learning_rate": 1.56e-05, - "loss": 4.4401, + "epoch": 2.37, + "learning_rate": 0.00015659999999999998, + "loss": 3.9212, "step": 263 }, { - "epoch": 1.18, - "learning_rate": 1.5660000000000003e-05, - "loss": 4.591, + "epoch": 2.38, + "learning_rate": 0.0001572, + "loss": 3.8797, "step": 264 }, { - "epoch": 1.19, - "learning_rate": 1.5720000000000002e-05, - "loss": 4.4974, + "epoch": 2.39, + "learning_rate": 0.0001578, + "loss": 3.9043, "step": 265 }, { - "epoch": 1.19, - "learning_rate": 1.578e-05, - "loss": 4.576, + "epoch": 2.39, + "learning_rate": 0.0001584, + "loss": 3.8784, "step": 266 }, { - "epoch": 1.2, - "learning_rate": 1.584e-05, - "loss": 4.5276, + "epoch": 2.4, + "learning_rate": 0.000159, + "loss": 3.8376, "step": 267 }, { - "epoch": 1.2, - "learning_rate": 1.59e-05, - "loss": 4.5828, + "epoch": 2.41, + "learning_rate": 0.0001596, + "loss": 3.916, "step": 268 }, { - "epoch": 1.21, - "learning_rate": 1.596e-05, - "loss": 4.4746, + "epoch": 2.42, + "learning_rate": 0.0001602, + "loss": 3.9107, "step": 269 }, { - "epoch": 1.21, - "learning_rate": 1.6020000000000002e-05, - "loss": 4.4368, + "epoch": 2.43, + "learning_rate": 0.0001608, + "loss": 3.7992, "step": 270 }, { - "epoch": 1.22, - "learning_rate": 1.6080000000000002e-05, - "loss": 4.5396, + "epoch": 2.44, + "learning_rate": 0.0001614, + "loss": 3.6413, "step": 271 }, { - "epoch": 1.22, - "learning_rate": 1.614e-05, - "loss": 4.5714, + "epoch": 2.45, + "learning_rate": 0.000162, + "loss": 3.5566, "step": 272 }, { - "epoch": 1.22, - "learning_rate": 1.62e-05, - "loss": 4.4684, + "epoch": 2.46, + "learning_rate": 0.0001626, + "loss": 5.6041, "step": 273 }, { - "epoch": 1.23, - "learning_rate": 1.626e-05, - "loss": 5.3867, + "epoch": 2.47, + "learning_rate": 0.0001632, + "loss": 5.8567, "step": 274 }, { - "epoch": 1.23, - "learning_rate": 1.6320000000000003e-05, - "loss": 5.2534, + "epoch": 2.48, + "learning_rate": 0.0001638, + "loss": 5.6862, "step": 275 }, { - "epoch": 1.24, - "learning_rate": 1.6380000000000002e-05, - "loss": 5.2386, + "epoch": 2.48, + "learning_rate": 0.0001644, + "loss": 5.4036, "step": 276 }, { - "epoch": 1.24, - "learning_rate": 1.6440000000000002e-05, - "loss": 5.1946, + "epoch": 2.49, + "learning_rate": 0.000165, + "loss": 4.9913, "step": 277 }, { - "epoch": 1.25, - "learning_rate": 1.65e-05, - "loss": 5.0371, + "epoch": 2.5, + "learning_rate": 0.0001656, + "loss": 4.7303, "step": 278 }, { - "epoch": 1.25, - "learning_rate": 1.656e-05, - "loss": 5.035, + "epoch": 2.51, + "learning_rate": 0.0001662, + "loss": 4.5465, "step": 279 }, { - "epoch": 1.26, - "learning_rate": 1.6620000000000004e-05, - "loss": 4.9167, + "epoch": 2.52, + "learning_rate": 0.0001668, + "loss": 4.33, "step": 280 }, { - "epoch": 1.26, - "learning_rate": 1.6680000000000003e-05, - "loss": 4.7318, + "epoch": 2.53, + "learning_rate": 0.0001674, + "loss": 4.2422, "step": 281 }, { - "epoch": 1.26, - "learning_rate": 1.6740000000000002e-05, - "loss": 4.7087, + "epoch": 2.54, + "learning_rate": 0.000168, + "loss": 4.2522, "step": 282 }, { - "epoch": 1.27, - "learning_rate": 1.6800000000000002e-05, - "loss": 4.5858, + "epoch": 2.55, + "learning_rate": 0.0001686, + "loss": 4.1577, "step": 283 }, { - "epoch": 1.27, - "learning_rate": 1.686e-05, - "loss": 4.5477, + "epoch": 2.56, + "learning_rate": 0.00016919999999999997, + "loss": 4.1028, "step": 284 }, { - "epoch": 1.28, - "learning_rate": 1.6919999999999997e-05, - "loss": 4.5948, + "epoch": 2.57, + "learning_rate": 0.00016979999999999998, + "loss": 4.1082, "step": 285 }, { - "epoch": 1.28, - "learning_rate": 1.698e-05, - "loss": 4.5715, + "epoch": 2.57, + "learning_rate": 0.00017039999999999997, + "loss": 4.0649, "step": 286 }, { - "epoch": 1.29, - "learning_rate": 1.704e-05, - "loss": 4.5578, + "epoch": 2.58, + "learning_rate": 0.00017099999999999998, + "loss": 4.0507, "step": 287 }, { - "epoch": 1.29, - "learning_rate": 1.71e-05, - "loss": 4.5813, + "epoch": 2.59, + "learning_rate": 0.00017159999999999997, + "loss": 3.921, "step": 288 }, { - "epoch": 1.3, - "learning_rate": 1.716e-05, - "loss": 4.5517, + "epoch": 2.6, + "learning_rate": 0.00017219999999999998, + "loss": 3.9083, "step": 289 }, { - "epoch": 1.3, - "learning_rate": 1.7219999999999998e-05, - "loss": 4.6325, + "epoch": 2.61, + "learning_rate": 0.00017279999999999997, + "loss": 3.8807, "step": 290 }, { - "epoch": 1.3, - "learning_rate": 1.728e-05, - "loss": 4.5377, + "epoch": 2.62, + "learning_rate": 0.00017339999999999996, + "loss": 3.9007, "step": 291 }, { - "epoch": 1.31, - "learning_rate": 1.734e-05, - "loss": 4.5294, + "epoch": 2.63, + "learning_rate": 0.00017399999999999997, + "loss": 3.8992, "step": 292 }, { - "epoch": 1.31, - "learning_rate": 1.74e-05, - "loss": 4.5711, + "epoch": 2.64, + "learning_rate": 0.00017459999999999996, + "loss": 3.9531, "step": 293 }, { - "epoch": 1.32, - "learning_rate": 1.746e-05, - "loss": 4.4966, + "epoch": 2.65, + "learning_rate": 0.00017519999999999998, + "loss": 3.8481, "step": 294 }, { - "epoch": 1.32, - "learning_rate": 1.7519999999999998e-05, - "loss": 4.4795, + "epoch": 2.65, + "learning_rate": 0.00017579999999999996, + "loss": 3.7191, "step": 295 }, { - "epoch": 1.33, - "learning_rate": 1.758e-05, - "loss": 4.4509, + "epoch": 2.66, + "learning_rate": 0.00017639999999999998, + "loss": 3.5684, "step": 296 }, { - "epoch": 1.33, - "learning_rate": 1.764e-05, - "loss": 4.458, + "epoch": 2.67, + "learning_rate": 0.00017699999999999997, + "loss": 3.4456, "step": 297 }, { - "epoch": 1.34, - "learning_rate": 1.77e-05, - "loss": 4.4804, + "epoch": 2.68, + "learning_rate": 0.00017759999999999998, + "loss": 5.4417, "step": 298 }, { - "epoch": 1.34, - "learning_rate": 1.77e-05, - "loss": 4.5604, + "epoch": 2.69, + "learning_rate": 0.00017819999999999997, + "loss": 5.4952, "step": 299 }, { - "epoch": 1.35, - "learning_rate": 1.776e-05, - "loss": 4.5135, + "epoch": 2.7, + "learning_rate": 0.00017879999999999998, + "loss": 5.2771, "step": 300 }, { - "epoch": 1.35, - "learning_rate": 1.782e-05, - "loss": 4.4916, + "epoch": 2.71, + "learning_rate": 0.00017939999999999997, + "loss": 4.9689, "step": 301 }, { - "epoch": 1.35, - "learning_rate": 1.7879999999999998e-05, - "loss": 4.4833, + "epoch": 2.72, + "learning_rate": 0.00017999999999999998, + "loss": 4.709, "step": 302 }, { - "epoch": 1.36, - "learning_rate": 1.794e-05, - "loss": 4.5261, + "epoch": 2.73, + "learning_rate": 0.00018059999999999997, + "loss": 4.507, "step": 303 }, { - "epoch": 1.36, - "learning_rate": 1.8e-05, - "loss": 4.4753, + "epoch": 2.74, + "learning_rate": 0.00018119999999999999, + "loss": 4.3383, "step": 304 }, { - "epoch": 1.37, - "learning_rate": 1.806e-05, - "loss": 4.4629, + "epoch": 2.74, + "learning_rate": 0.00018179999999999997, + "loss": 4.2212, "step": 305 }, { - "epoch": 1.37, - "learning_rate": 1.812e-05, - "loss": 4.5449, + "epoch": 2.75, + "learning_rate": 0.0001824, + "loss": 4.2254, "step": 306 }, { - "epoch": 1.38, - "learning_rate": 1.818e-05, - "loss": 4.5235, + "epoch": 2.76, + "learning_rate": 0.00018299999999999998, + "loss": 4.2006, "step": 307 }, { - "epoch": 1.38, - "learning_rate": 1.824e-05, - "loss": 4.5807, + "epoch": 2.77, + "learning_rate": 0.0001836, + "loss": 4.0609, "step": 308 }, { - "epoch": 1.39, - "learning_rate": 1.83e-05, - "loss": 4.5404, + "epoch": 2.78, + "learning_rate": 0.00018419999999999998, + "loss": 4.0235, "step": 309 }, { - "epoch": 1.39, - "learning_rate": 1.836e-05, - "loss": 4.5097, + "epoch": 2.79, + "learning_rate": 0.0001848, + "loss": 4.0431, "step": 310 }, { - "epoch": 1.39, - "learning_rate": 1.842e-05, - "loss": 4.4021, + "epoch": 2.8, + "learning_rate": 0.00018539999999999998, + "loss": 3.9653, "step": 311 }, { - "epoch": 1.4, - "learning_rate": 1.848e-05, - "loss": 4.3692, + "epoch": 2.81, + "learning_rate": 0.000186, + "loss": 3.9405, "step": 312 }, { - "epoch": 1.4, - "learning_rate": 1.854e-05, - "loss": 4.5206, + "epoch": 2.82, + "learning_rate": 0.00018659999999999998, + "loss": 3.8778, "step": 313 }, { - "epoch": 1.41, - "learning_rate": 1.86e-05, - "loss": 4.5324, + "epoch": 2.83, + "learning_rate": 0.0001872, + "loss": 3.8905, "step": 314 }, { - "epoch": 1.41, - "learning_rate": 1.866e-05, - "loss": 4.5604, + "epoch": 2.83, + "learning_rate": 0.00018779999999999998, + "loss": 3.8976, "step": 315 }, { - "epoch": 1.42, - "learning_rate": 1.872e-05, - "loss": 4.4855, + "epoch": 2.84, + "learning_rate": 0.00018839999999999997, + "loss": 3.8547, "step": 316 }, { - "epoch": 1.42, - "learning_rate": 1.878e-05, - "loss": 4.3344, + "epoch": 2.85, + "learning_rate": 0.00018899999999999999, + "loss": 3.8121, "step": 317 }, { - "epoch": 1.43, - "learning_rate": 1.884e-05, - "loss": 4.4, + "epoch": 2.86, + "learning_rate": 0.00018959999999999997, + "loss": 3.8189, "step": 318 }, { - "epoch": 1.43, - "learning_rate": 1.8900000000000002e-05, - "loss": 4.5497, + "epoch": 2.87, + "learning_rate": 0.0001902, + "loss": 3.7694, "step": 319 }, { - "epoch": 1.43, - "learning_rate": 1.896e-05, - "loss": 4.4678, + "epoch": 2.88, + "learning_rate": 0.00019079999999999998, + "loss": 3.6308, "step": 320 }, { - "epoch": 1.44, - "learning_rate": 1.902e-05, - "loss": 4.4474, + "epoch": 2.89, + "learning_rate": 0.0001914, + "loss": 3.5794, "step": 321 }, { - "epoch": 1.44, - "learning_rate": 1.908e-05, - "loss": 4.53, + "epoch": 2.9, + "learning_rate": 0.00019199999999999998, + "loss": 3.3702, "step": 322 }, { - "epoch": 1.45, - "learning_rate": 1.914e-05, - "loss": 4.5706, + "epoch": 2.91, + "learning_rate": 0.0001926, + "loss": 5.1264, "step": 323 }, { - "epoch": 1.45, - "learning_rate": 1.9200000000000003e-05, - "loss": 5.1229, + "epoch": 2.91, + "learning_rate": 0.00019319999999999998, + "loss": 4.9258, "step": 324 }, { - "epoch": 1.46, - "learning_rate": 1.9260000000000002e-05, - "loss": 5.1525, + "epoch": 2.92, + "learning_rate": 0.0001938, + "loss": 4.3596, "step": 325 }, { - "epoch": 1.46, - "learning_rate": 1.932e-05, - "loss": 5.2277, + "epoch": 2.93, + "learning_rate": 0.00019439999999999998, + "loss": 4.2863, "step": 326 }, { - "epoch": 1.47, - "learning_rate": 1.938e-05, - "loss": 5.0693, + "epoch": 2.94, + "learning_rate": 0.000195, + "loss": 4.1943, "step": 327 }, { - "epoch": 1.47, - "learning_rate": 1.944e-05, - "loss": 4.9382, + "epoch": 2.95, + "learning_rate": 0.00019559999999999998, + "loss": 4.2371, "step": 328 }, { - "epoch": 1.48, - "learning_rate": 1.95e-05, - "loss": 4.8279, + "epoch": 2.96, + "learning_rate": 0.0001962, + "loss": 4.0108, "step": 329 }, { - "epoch": 1.48, - "learning_rate": 1.9560000000000002e-05, - "loss": 4.7044, + "epoch": 2.97, + "learning_rate": 0.00019679999999999999, + "loss": 3.9665, "step": 330 }, { - "epoch": 1.48, - "learning_rate": 1.9620000000000002e-05, - "loss": 4.7312, + "epoch": 2.98, + "learning_rate": 0.0001974, + "loss": 3.9371, "step": 331 }, { - "epoch": 1.49, - "learning_rate": 1.968e-05, - "loss": 4.6491, + "epoch": 2.99, + "learning_rate": 0.000198, + "loss": 3.8094, "step": 332 }, { - "epoch": 1.49, - "learning_rate": 1.974e-05, - "loss": 4.598, + "epoch": 3.0, + "learning_rate": 0.0001986, + "loss": 3.6996, "step": 333 }, { - "epoch": 1.5, - "learning_rate": 1.98e-05, - "loss": 4.5046, + "epoch": 3.01, + "learning_rate": 0.0001992, + "loss": 6.1462, "step": 334 }, { - "epoch": 1.5, - "learning_rate": 1.9860000000000003e-05, - "loss": 4.5695, + "epoch": 3.02, + "learning_rate": 0.0001998, + "loss": 4.3462, "step": 335 }, { - "epoch": 1.51, - "learning_rate": 1.9920000000000002e-05, - "loss": 4.4972, + "epoch": 3.03, + "learning_rate": 0.0002004, + "loss": 4.1581, "step": 336 }, { - "epoch": 1.51, - "learning_rate": 1.9980000000000002e-05, - "loss": 4.5461, + "epoch": 3.04, + "learning_rate": 0.000201, + "loss": 4.1832, "step": 337 }, { - "epoch": 1.52, - "learning_rate": 2.004e-05, - "loss": 4.4988, + "epoch": 3.04, + "learning_rate": 0.0002016, + "loss": 4.1241, "step": 338 }, { - "epoch": 1.52, - "learning_rate": 2.01e-05, - "loss": 4.4832, + "epoch": 3.05, + "learning_rate": 0.0002022, + "loss": 3.9371, "step": 339 }, { - "epoch": 1.52, - "learning_rate": 2.016e-05, - "loss": 4.4368, + "epoch": 3.06, + "learning_rate": 0.0002028, + "loss": 3.8334, "step": 340 }, { - "epoch": 1.53, - "learning_rate": 2.0220000000000003e-05, - "loss": 4.5237, + "epoch": 3.07, + "learning_rate": 0.00020339999999999998, + "loss": 3.7693, "step": 341 }, { - "epoch": 1.53, - "learning_rate": 2.0280000000000002e-05, - "loss": 4.4749, + "epoch": 3.08, + "learning_rate": 0.000204, + "loss": 3.7465, "step": 342 }, { - "epoch": 1.54, - "learning_rate": 2.0340000000000002e-05, - "loss": 4.509, + "epoch": 3.09, + "learning_rate": 0.00020459999999999999, + "loss": 3.6217, "step": 343 }, { - "epoch": 1.54, - "learning_rate": 2.04e-05, - "loss": 4.4118, + "epoch": 3.1, + "learning_rate": 0.0002052, + "loss": 3.6467, "step": 344 }, { - "epoch": 1.55, - "learning_rate": 2.046e-05, - "loss": 4.5027, + "epoch": 3.11, + "learning_rate": 0.0002058, + "loss": 3.5401, "step": 345 }, { - "epoch": 1.55, - "learning_rate": 2.0520000000000003e-05, - "loss": 4.4594, + "epoch": 3.12, + "learning_rate": 0.00020639999999999998, + "loss": 3.5786, "step": 346 }, { - "epoch": 1.56, - "learning_rate": 2.0580000000000003e-05, - "loss": 4.4279, + "epoch": 3.13, + "learning_rate": 0.00020699999999999996, + "loss": 3.4404, "step": 347 }, { - "epoch": 1.56, - "learning_rate": 2.064e-05, - "loss": 4.4261, + "epoch": 3.13, + "learning_rate": 0.00020759999999999998, + "loss": 3.3864, "step": 348 }, { - "epoch": 1.57, - "learning_rate": 2.07e-05, - "loss": 4.5078, + "epoch": 3.14, + "learning_rate": 0.00020819999999999996, + "loss": 3.2147, "step": 349 }, { - "epoch": 1.57, - "learning_rate": 2.0759999999999998e-05, - "loss": 4.454, + "epoch": 3.15, + "learning_rate": 0.00020879999999999998, + "loss": 3.3568, "step": 350 }, { - "epoch": 1.57, - "learning_rate": 2.082e-05, - "loss": 4.4375, + "epoch": 3.16, + "learning_rate": 0.00020939999999999997, + "loss": 3.34, "step": 351 }, { - "epoch": 1.58, - "learning_rate": 2.088e-05, - "loss": 4.4272, + "epoch": 3.17, + "learning_rate": 0.00020999999999999998, + "loss": 3.2556, "step": 352 }, { - "epoch": 1.58, - "learning_rate": 2.094e-05, - "loss": 4.4982, + "epoch": 3.18, + "learning_rate": 0.00021059999999999997, + "loss": 3.2095, "step": 353 }, { - "epoch": 1.59, - "learning_rate": 2.1e-05, - "loss": 4.4815, + "epoch": 3.19, + "learning_rate": 0.00021119999999999996, + "loss": 3.237, "step": 354 }, { - "epoch": 1.59, - "learning_rate": 2.1059999999999998e-05, - "loss": 4.3681, + "epoch": 3.2, + "learning_rate": 0.00021179999999999997, + "loss": 3.0777, "step": 355 }, { - "epoch": 1.6, - "learning_rate": 2.1119999999999998e-05, - "loss": 4.3848, + "epoch": 3.21, + "learning_rate": 0.00021239999999999996, + "loss": 2.9569, "step": 356 }, { - "epoch": 1.6, - "learning_rate": 2.118e-05, - "loss": 4.4376, + "epoch": 3.22, + "learning_rate": 0.00021299999999999997, + "loss": 2.5873, "step": 357 }, { - "epoch": 1.61, - "learning_rate": 2.124e-05, - "loss": 4.4052, + "epoch": 3.22, + "learning_rate": 0.00021359999999999996, + "loss": 2.4589, "step": 358 }, { - "epoch": 1.61, - "learning_rate": 2.13e-05, - "loss": 4.4144, + "epoch": 3.23, + "learning_rate": 0.00021419999999999998, + "loss": 5.7497, "step": 359 }, { - "epoch": 1.61, - "learning_rate": 2.136e-05, - "loss": 4.4934, + "epoch": 3.24, + "learning_rate": 0.00021479999999999996, + "loss": 5.7524, "step": 360 }, { - "epoch": 1.62, - "learning_rate": 2.1419999999999998e-05, - "loss": 4.4825, + "epoch": 3.25, + "learning_rate": 0.00021539999999999998, + "loss": 5.2796, "step": 361 }, { - "epoch": 1.62, - "learning_rate": 2.148e-05, - "loss": 4.41, + "epoch": 3.26, + "learning_rate": 0.00021599999999999996, + "loss": 4.8365, "step": 362 }, { - "epoch": 1.63, - "learning_rate": 2.154e-05, - "loss": 4.5081, + "epoch": 3.27, + "learning_rate": 0.00021659999999999998, + "loss": 4.4809, "step": 363 }, { - "epoch": 1.63, - "learning_rate": 2.16e-05, - "loss": 4.4704, + "epoch": 3.28, + "learning_rate": 0.00021719999999999997, + "loss": 4.1632, "step": 364 }, { - "epoch": 1.64, - "learning_rate": 2.166e-05, - "loss": 4.4811, + "epoch": 3.29, + "learning_rate": 0.00021779999999999998, + "loss": 3.9612, "step": 365 }, { - "epoch": 1.64, - "learning_rate": 2.172e-05, - "loss": 4.5751, + "epoch": 3.3, + "learning_rate": 0.00021839999999999997, + "loss": 3.9396, "step": 366 }, { - "epoch": 1.65, - "learning_rate": 2.178e-05, - "loss": 4.5841, + "epoch": 3.3, + "learning_rate": 0.00021899999999999998, + "loss": 3.8539, "step": 367 }, { - "epoch": 1.65, - "learning_rate": 2.184e-05, - "loss": 4.5714, + "epoch": 3.31, + "learning_rate": 0.00021959999999999997, + "loss": 3.7321, "step": 368 }, { - "epoch": 1.65, - "learning_rate": 2.19e-05, - "loss": 4.6033, + "epoch": 3.32, + "learning_rate": 0.00022019999999999999, + "loss": 3.6681, "step": 369 }, { - "epoch": 1.66, - "learning_rate": 2.196e-05, - "loss": 4.4132, + "epoch": 3.33, + "learning_rate": 0.00022079999999999997, + "loss": 3.6725, "step": 370 }, { - "epoch": 1.66, - "learning_rate": 2.202e-05, - "loss": 4.6072, + "epoch": 3.34, + "learning_rate": 0.0002214, + "loss": 3.5996, "step": 371 }, { - "epoch": 1.67, - "learning_rate": 2.208e-05, - "loss": 4.4627, + "epoch": 3.35, + "learning_rate": 0.00022199999999999998, + "loss": 3.502, "step": 372 }, { - "epoch": 1.67, - "learning_rate": 2.214e-05, - "loss": 4.2765, + "epoch": 3.36, + "learning_rate": 0.0002226, + "loss": 3.4053, "step": 373 }, { - "epoch": 1.68, - "learning_rate": 2.22e-05, - "loss": 5.1555, + "epoch": 3.37, + "learning_rate": 0.00022319999999999998, + "loss": 3.3827, "step": 374 }, { - "epoch": 1.68, - "learning_rate": 2.226e-05, - "loss": 5.2, + "epoch": 3.38, + "learning_rate": 0.0002238, + "loss": 3.3173, "step": 375 }, { - "epoch": 1.69, - "learning_rate": 2.232e-05, - "loss": 5.211, + "epoch": 3.39, + "learning_rate": 0.00022439999999999998, + "loss": 3.315, "step": 376 }, { - "epoch": 1.69, - "learning_rate": 2.238e-05, - "loss": 5.0057, + "epoch": 3.39, + "learning_rate": 0.000225, + "loss": 3.3281, "step": 377 }, { - "epoch": 1.7, - "learning_rate": 2.2440000000000002e-05, - "loss": 4.9118, + "epoch": 3.4, + "learning_rate": 0.00022559999999999998, + "loss": 3.27, "step": 378 }, { - "epoch": 1.7, - "learning_rate": 2.25e-05, - "loss": 4.7976, + "epoch": 3.41, + "learning_rate": 0.00022619999999999997, + "loss": 3.2502, "step": 379 }, { - "epoch": 1.7, - "learning_rate": 2.256e-05, - "loss": 4.7151, + "epoch": 3.42, + "learning_rate": 0.00022679999999999998, + "loss": 3.1024, "step": 380 }, { - "epoch": 1.71, - "learning_rate": 2.262e-05, - "loss": 4.7074, + "epoch": 3.43, + "learning_rate": 0.00022739999999999997, + "loss": 2.9408, "step": 381 }, { - "epoch": 1.71, - "learning_rate": 2.268e-05, - "loss": 4.6109, + "epoch": 3.44, + "learning_rate": 0.00022799999999999999, + "loss": 2.7313, "step": 382 }, { - "epoch": 1.72, - "learning_rate": 2.274e-05, - "loss": 4.7031, + "epoch": 3.45, + "learning_rate": 0.00022859999999999997, + "loss": 2.4126, "step": 383 }, { - "epoch": 1.72, - "learning_rate": 2.2800000000000002e-05, - "loss": 4.4871, + "epoch": 3.46, + "learning_rate": 0.0002292, + "loss": 5.2794, "step": 384 }, { - "epoch": 1.73, - "learning_rate": 2.286e-05, - "loss": 4.4501, + "epoch": 3.47, + "learning_rate": 0.00022979999999999997, + "loss": 5.2546, "step": 385 }, { - "epoch": 1.73, - "learning_rate": 2.292e-05, - "loss": 4.4951, + "epoch": 3.48, + "learning_rate": 0.0002304, + "loss": 4.9179, "step": 386 }, { - "epoch": 1.74, - "learning_rate": 2.298e-05, - "loss": 4.448, + "epoch": 3.48, + "learning_rate": 0.00023099999999999998, + "loss": 4.6224, "step": 387 }, { - "epoch": 1.74, - "learning_rate": 2.304e-05, - "loss": 4.5137, + "epoch": 3.49, + "learning_rate": 0.0002316, + "loss": 4.3587, "step": 388 }, { - "epoch": 1.74, - "learning_rate": 2.3100000000000002e-05, - "loss": 4.4997, + "epoch": 3.5, + "learning_rate": 0.00023219999999999998, + "loss": 4.0705, "step": 389 }, { - "epoch": 1.75, - "learning_rate": 2.3160000000000002e-05, - "loss": 4.4241, + "epoch": 3.51, + "learning_rate": 0.0002328, + "loss": 3.9197, "step": 390 }, { - "epoch": 1.75, - "learning_rate": 2.322e-05, - "loss": 4.4799, + "epoch": 3.52, + "learning_rate": 0.00023339999999999998, + "loss": 3.8546, "step": 391 }, { - "epoch": 1.76, - "learning_rate": 2.328e-05, - "loss": 4.4917, + "epoch": 3.53, + "learning_rate": 0.000234, + "loss": 3.7795, "step": 392 }, { - "epoch": 1.76, - "learning_rate": 2.334e-05, - "loss": 4.5182, + "epoch": 3.54, + "learning_rate": 0.00023459999999999998, + "loss": 3.7303, "step": 393 }, { - "epoch": 1.77, - "learning_rate": 2.3400000000000003e-05, - "loss": 4.4918, + "epoch": 3.55, + "learning_rate": 0.0002352, + "loss": 3.7024, "step": 394 }, { - "epoch": 1.77, - "learning_rate": 2.3460000000000002e-05, - "loss": 4.4888, + "epoch": 3.56, + "learning_rate": 0.00023579999999999999, + "loss": 3.5815, "step": 395 }, { - "epoch": 1.78, - "learning_rate": 2.3520000000000002e-05, - "loss": 4.3878, + "epoch": 3.57, + "learning_rate": 0.0002364, + "loss": 3.5363, "step": 396 }, { - "epoch": 1.78, - "learning_rate": 2.358e-05, - "loss": 4.5381, + "epoch": 3.57, + "learning_rate": 0.000237, + "loss": 3.494, "step": 397 }, { - "epoch": 1.78, - "learning_rate": 2.364e-05, - "loss": 4.4603, + "epoch": 3.58, + "learning_rate": 0.0002376, + "loss": 3.3856, "step": 398 }, { - "epoch": 1.79, - "learning_rate": 2.37e-05, - "loss": 4.5001, + "epoch": 3.59, + "learning_rate": 0.0002382, + "loss": 3.3395, "step": 399 }, { - "epoch": 1.79, - "learning_rate": 2.3760000000000003e-05, - "loss": 4.4627, + "epoch": 3.6, + "learning_rate": 0.0002388, + "loss": 3.3198, "step": 400 }, { - "epoch": 1.8, - "learning_rate": 2.3820000000000002e-05, - "loss": 4.442, + "epoch": 3.61, + "learning_rate": 0.0002394, + "loss": 3.311, "step": 401 }, { - "epoch": 1.8, - "learning_rate": 2.3880000000000002e-05, - "loss": 4.3704, + "epoch": 3.62, + "learning_rate": 0.00023999999999999998, + "loss": 3.2043, "step": 402 }, { - "epoch": 1.81, - "learning_rate": 2.394e-05, - "loss": 4.4678, + "epoch": 3.63, + "learning_rate": 0.0002406, + "loss": 3.2077, "step": 403 }, { - "epoch": 1.81, - "learning_rate": 2.4e-05, - "loss": 4.5184, + "epoch": 3.64, + "learning_rate": 0.00024119999999999998, + "loss": 3.2145, "step": 404 }, { - "epoch": 1.82, - "learning_rate": 2.4060000000000003e-05, - "loss": 4.3222, + "epoch": 3.65, + "learning_rate": 0.0002418, + "loss": 3.0301, "step": 405 }, { - "epoch": 1.82, - "learning_rate": 2.4120000000000003e-05, - "loss": 4.4296, + "epoch": 3.65, + "learning_rate": 0.00024239999999999998, + "loss": 2.8861, "step": 406 }, { - "epoch": 1.83, - "learning_rate": 2.4180000000000002e-05, - "loss": 4.4505, + "epoch": 3.66, + "learning_rate": 0.000243, + "loss": 2.633, "step": 407 }, { - "epoch": 1.83, - "learning_rate": 2.4240000000000002e-05, - "loss": 4.4017, + "epoch": 3.67, + "learning_rate": 0.00024359999999999999, + "loss": 2.191, "step": 408 }, { - "epoch": 1.83, - "learning_rate": 2.43e-05, - "loss": 4.4974, + "epoch": 3.68, + "learning_rate": 0.00024419999999999997, + "loss": 5.444, "step": 409 }, { - "epoch": 1.84, - "learning_rate": 2.4360000000000004e-05, - "loss": 4.4035, + "epoch": 3.69, + "learning_rate": 0.0002448, + "loss": 5.6085, "step": 410 }, { - "epoch": 1.84, - "learning_rate": 2.442e-05, - "loss": 4.4994, + "epoch": 3.7, + "learning_rate": 0.00024539999999999995, + "loss": 5.2909, "step": 411 }, { - "epoch": 1.85, - "learning_rate": 2.448e-05, - "loss": 4.4141, + "epoch": 3.71, + "learning_rate": 0.00024599999999999996, + "loss": 4.7823, "step": 412 }, { - "epoch": 1.85, - "learning_rate": 2.454e-05, - "loss": 4.4548, + "epoch": 3.72, + "learning_rate": 0.0002466, + "loss": 4.4985, "step": 413 }, { - "epoch": 1.86, - "learning_rate": 2.4599999999999998e-05, - "loss": 4.4444, + "epoch": 3.73, + "learning_rate": 0.0002472, + "loss": 4.2019, "step": 414 }, { - "epoch": 1.86, - "learning_rate": 2.4659999999999998e-05, - "loss": 4.3861, + "epoch": 3.74, + "learning_rate": 0.00024779999999999995, + "loss": 4.024, "step": 415 }, { - "epoch": 1.87, - "learning_rate": 2.472e-05, - "loss": 4.4311, + "epoch": 3.74, + "learning_rate": 0.00024839999999999997, + "loss": 3.8789, "step": 416 }, { - "epoch": 1.87, - "learning_rate": 2.478e-05, - "loss": 4.4903, + "epoch": 3.75, + "learning_rate": 0.000249, + "loss": 3.7719, "step": 417 }, { - "epoch": 1.87, - "learning_rate": 2.484e-05, - "loss": 4.4648, + "epoch": 3.76, + "learning_rate": 0.00024959999999999994, + "loss": 3.773, "step": 418 }, { - "epoch": 1.88, - "learning_rate": 2.49e-05, - "loss": 4.3468, + "epoch": 3.77, + "learning_rate": 0.00025019999999999996, + "loss": 3.5885, "step": 419 }, { - "epoch": 1.88, - "learning_rate": 2.4959999999999998e-05, - "loss": 4.4791, + "epoch": 3.78, + "learning_rate": 0.00025079999999999997, + "loss": 3.5845, "step": 420 }, { - "epoch": 1.89, - "learning_rate": 2.502e-05, - "loss": 4.411, + "epoch": 3.79, + "learning_rate": 0.0002514, + "loss": 3.523, "step": 421 }, { - "epoch": 1.89, - "learning_rate": 2.508e-05, - "loss": 4.5466, + "epoch": 3.8, + "learning_rate": 0.00025199999999999995, + "loss": 3.4934, "step": 422 }, { - "epoch": 1.9, - "learning_rate": 2.514e-05, - "loss": 4.289, + "epoch": 3.81, + "learning_rate": 0.00025259999999999996, + "loss": 3.4074, "step": 423 }, { - "epoch": 1.9, - "learning_rate": 2.52e-05, - "loss": 5.1587, + "epoch": 3.82, + "learning_rate": 0.0002532, + "loss": 3.3231, "step": 424 }, { - "epoch": 1.91, - "learning_rate": 2.526e-05, - "loss": 5.2573, + "epoch": 3.83, + "learning_rate": 0.0002538, + "loss": 3.3202, "step": 425 }, { - "epoch": 1.91, - "learning_rate": 2.5319999999999998e-05, - "loss": 5.2852, + "epoch": 3.83, + "learning_rate": 0.00025439999999999995, + "loss": 3.2468, "step": 426 }, { - "epoch": 1.91, - "learning_rate": 2.538e-05, - "loss": 5.1566, + "epoch": 3.84, + "learning_rate": 0.00025499999999999996, + "loss": 3.2139, "step": 427 }, { - "epoch": 1.92, - "learning_rate": 2.544e-05, - "loss": 5.0713, + "epoch": 3.85, + "learning_rate": 0.0002556, + "loss": 3.1771, "step": 428 }, { - "epoch": 1.92, - "learning_rate": 2.55e-05, - "loss": 4.9891, + "epoch": 3.86, + "learning_rate": 0.0002562, + "loss": 3.056, "step": 429 }, { - "epoch": 1.93, - "learning_rate": 2.556e-05, - "loss": 4.6524, + "epoch": 3.87, + "learning_rate": 0.00025679999999999995, + "loss": 2.9464, "step": 430 }, { - "epoch": 1.93, - "learning_rate": 2.562e-05, - "loss": 4.6018, + "epoch": 3.88, + "learning_rate": 0.00025739999999999997, + "loss": 2.7896, "step": 431 }, { - "epoch": 1.94, - "learning_rate": 2.568e-05, - "loss": 4.5233, + "epoch": 3.89, + "learning_rate": 0.000258, + "loss": 2.5634, "step": 432 }, { - "epoch": 1.94, - "learning_rate": 2.574e-05, - "loss": 4.4211, + "epoch": 3.9, + "learning_rate": 0.0002586, + "loss": 2.1404, "step": 433 }, { - "epoch": 1.95, - "learning_rate": 2.58e-05, - "loss": 4.5052, + "epoch": 3.91, + "learning_rate": 0.00025919999999999996, + "loss": 5.0964, "step": 434 }, { - "epoch": 1.95, - "learning_rate": 2.586e-05, - "loss": 4.5205, + "epoch": 3.91, + "learning_rate": 0.00025979999999999997, + "loss": 5.1443, "step": 435 }, { - "epoch": 1.96, - "learning_rate": 2.592e-05, - "loss": 4.424, + "epoch": 3.92, + "learning_rate": 0.0002604, + "loss": 4.5983, "step": 436 }, { - "epoch": 1.96, - "learning_rate": 2.5980000000000002e-05, - "loss": 4.4676, + "epoch": 3.93, + "learning_rate": 0.000261, + "loss": 4.1566, "step": 437 }, { - "epoch": 1.96, - "learning_rate": 2.604e-05, - "loss": 4.4421, + "epoch": 3.94, + "learning_rate": 0.00026159999999999996, + "loss": 3.7846, "step": 438 }, { - "epoch": 1.97, - "learning_rate": 2.61e-05, - "loss": 4.3955, + "epoch": 3.95, + "learning_rate": 0.0002622, + "loss": 3.6671, "step": 439 }, { - "epoch": 1.97, - "learning_rate": 2.616e-05, - "loss": 4.453, + "epoch": 3.96, + "learning_rate": 0.0002628, + "loss": 3.4806, "step": 440 }, { - "epoch": 1.98, - "learning_rate": 2.622e-05, - "loss": 4.4004, + "epoch": 3.97, + "learning_rate": 0.00026339999999999995, + "loss": 3.5046, "step": 441 }, { - "epoch": 1.98, - "learning_rate": 2.628e-05, - "loss": 4.4617, + "epoch": 3.98, + "learning_rate": 0.00026399999999999997, + "loss": 3.3366, "step": 442 }, { - "epoch": 1.99, - "learning_rate": 2.6340000000000002e-05, - "loss": 4.5231, + "epoch": 3.99, + "learning_rate": 0.0002646, + "loss": 3.1014, "step": 443 }, { - "epoch": 1.99, - "learning_rate": 2.64e-05, - "loss": 4.518, + "epoch": 4.0, + "learning_rate": 0.0002652, + "loss": 2.7035, "step": 444 }, { - "epoch": 2.0, - "learning_rate": 2.646e-05, - "loss": 4.6626, + "epoch": 4.01, + "learning_rate": 0.00026579999999999996, + "loss": 5.0943, "step": 445 }, { - "epoch": 2.0, - "learning_rate": 2.652e-05, - "loss": 4.4557, + "epoch": 4.02, + "learning_rate": 0.00026639999999999997, + "loss": 3.9256, "step": 446 }, { - "epoch": 2.0, - "learning_rate": 2.658e-05, - "loss": 4.5692, + "epoch": 4.03, + "learning_rate": 0.000267, + "loss": 3.7867, "step": 447 }, { - "epoch": 2.01, - "learning_rate": 2.6640000000000002e-05, - "loss": 4.4734, + "epoch": 4.04, + "learning_rate": 0.0002676, + "loss": 3.6839, "step": 448 }, { - "epoch": 2.01, - "learning_rate": 2.6700000000000002e-05, - "loss": 4.534, + "epoch": 4.04, + "learning_rate": 0.00026819999999999996, + "loss": 3.6042, "step": 449 }, { - "epoch": 2.02, - "learning_rate": 2.676e-05, - "loss": 4.4035, + "epoch": 4.05, + "learning_rate": 0.0002688, + "loss": 3.3529, "step": 450 }, { - "epoch": 2.02, - "learning_rate": 2.682e-05, - "loss": 4.5474, + "epoch": 4.06, + "learning_rate": 0.0002694, + "loss": 3.3266, "step": 451 }, { - "epoch": 2.03, - "learning_rate": 2.688e-05, - "loss": 4.4534, + "epoch": 4.07, + "learning_rate": 0.00027, + "loss": 3.2218, "step": 452 }, { - "epoch": 2.03, - "learning_rate": 2.6940000000000003e-05, - "loss": 4.3725, + "epoch": 4.08, + "learning_rate": 0.00027059999999999996, + "loss": 3.1004, "step": 453 }, { - "epoch": 2.04, - "learning_rate": 2.7000000000000002e-05, - "loss": 4.397, + "epoch": 4.09, + "learning_rate": 0.0002712, + "loss": 3.0669, "step": 454 }, { - "epoch": 2.04, - "learning_rate": 2.7060000000000002e-05, - "loss": 4.3831, + "epoch": 4.1, + "learning_rate": 0.0002718, + "loss": 2.8998, "step": 455 }, { - "epoch": 2.04, - "learning_rate": 2.712e-05, - "loss": 4.4096, + "epoch": 4.11, + "learning_rate": 0.0002724, + "loss": 2.8966, "step": 456 }, { - "epoch": 2.05, - "learning_rate": 2.718e-05, - "loss": 4.2949, + "epoch": 4.12, + "learning_rate": 0.00027299999999999997, + "loss": 2.8114, "step": 457 }, { - "epoch": 2.05, - "learning_rate": 2.724e-05, - "loss": 4.344, + "epoch": 4.13, + "learning_rate": 0.0002736, + "loss": 2.6145, "step": 458 }, { - "epoch": 2.06, - "learning_rate": 2.7300000000000003e-05, - "loss": 4.2924, + "epoch": 4.13, + "learning_rate": 0.0002742, + "loss": 2.5475, "step": 459 }, { - "epoch": 2.06, - "learning_rate": 2.7360000000000002e-05, - "loss": 4.3708, + "epoch": 4.14, + "learning_rate": 0.0002742, + "loss": 2.5106, "step": 460 }, { - "epoch": 2.07, - "learning_rate": 2.7420000000000002e-05, - "loss": 4.3975, + "epoch": 4.15, + "learning_rate": 0.0002748, + "loss": 2.4872, "step": 461 }, { - "epoch": 2.07, - "learning_rate": 2.748e-05, - "loss": 4.4022, + "epoch": 4.16, + "learning_rate": 0.00027539999999999997, + "loss": 2.3962, "step": 462 }, { - "epoch": 2.08, - "learning_rate": 2.754e-05, - "loss": 4.3767, + "epoch": 4.17, + "learning_rate": 0.000276, + "loss": 2.2508, "step": 463 }, { - "epoch": 2.08, - "learning_rate": 2.7600000000000003e-05, - "loss": 4.2759, + "epoch": 4.18, + "learning_rate": 0.0002766, + "loss": 2.201, "step": 464 }, { - "epoch": 2.09, - "learning_rate": 2.7660000000000003e-05, - "loss": 4.3917, + "epoch": 4.19, + "learning_rate": 0.0002772, + "loss": 2.1194, "step": 465 }, { - "epoch": 2.09, - "learning_rate": 2.7720000000000002e-05, - "loss": 4.2681, + "epoch": 4.2, + "learning_rate": 0.0002778, + "loss": 1.8332, "step": 466 }, { - "epoch": 2.09, - "learning_rate": 2.778e-05, - "loss": 4.2556, + "epoch": 4.21, + "learning_rate": 0.0002784, + "loss": 1.6656, "step": 467 }, { - "epoch": 2.1, - "learning_rate": 2.784e-05, - "loss": 4.289, + "epoch": 4.22, + "learning_rate": 0.000279, + "loss": 1.2958, "step": 468 }, { - "epoch": 2.1, - "learning_rate": 2.79e-05, - "loss": 4.2804, + "epoch": 4.22, + "learning_rate": 0.00027959999999999997, + "loss": 1.0378, "step": 469 }, { - "epoch": 2.11, - "learning_rate": 2.7960000000000003e-05, - "loss": 4.2821, + "epoch": 4.23, + "learning_rate": 0.0002802, + "loss": 5.2736, "step": 470 }, { - "epoch": 2.11, - "learning_rate": 2.8020000000000003e-05, - "loss": 4.3523, + "epoch": 4.24, + "learning_rate": 0.0002808, + "loss": 4.6758, "step": 471 }, { - "epoch": 2.12, - "learning_rate": 2.8080000000000002e-05, - "loss": 4.2698, + "epoch": 4.25, + "learning_rate": 0.00028139999999999996, + "loss": 4.0751, "step": 472 }, { - "epoch": 2.12, - "learning_rate": 2.8139999999999998e-05, - "loss": 4.2848, + "epoch": 4.26, + "learning_rate": 0.00028199999999999997, + "loss": 3.7796, "step": 473 }, { - "epoch": 2.13, - "learning_rate": 2.8199999999999998e-05, - "loss": 4.1485, + "epoch": 4.27, + "learning_rate": 0.0002826, + "loss": 3.4917, "step": 474 }, { - "epoch": 2.13, - "learning_rate": 2.826e-05, - "loss": 4.1966, + "epoch": 4.28, + "learning_rate": 0.00028319999999999994, + "loss": 3.2089, "step": 475 }, { - "epoch": 2.13, - "learning_rate": 2.832e-05, - "loss": 4.2595, + "epoch": 4.29, + "learning_rate": 0.00028379999999999996, + "loss": 3.0105, "step": 476 }, { - "epoch": 2.14, - "learning_rate": 2.838e-05, - "loss": 4.3192, + "epoch": 4.3, + "learning_rate": 0.0002844, + "loss": 2.9742, "step": 477 }, { - "epoch": 2.14, - "learning_rate": 2.844e-05, - "loss": 4.2328, + "epoch": 4.3, + "learning_rate": 0.000285, + "loss": 2.8292, "step": 478 }, { - "epoch": 2.15, - "learning_rate": 2.8499999999999998e-05, - "loss": 4.2779, + "epoch": 4.31, + "learning_rate": 0.00028559999999999995, + "loss": 2.7116, "step": 479 }, { - "epoch": 2.15, - "learning_rate": 2.856e-05, - "loss": 4.2889, + "epoch": 4.32, + "learning_rate": 0.00028619999999999996, + "loss": 2.5631, "step": 480 }, { - "epoch": 2.16, - "learning_rate": 2.862e-05, - "loss": 4.2063, + "epoch": 4.33, + "learning_rate": 0.0002868, + "loss": 2.5057, "step": 481 }, { - "epoch": 2.16, - "learning_rate": 2.868e-05, - "loss": 4.2333, + "epoch": 4.34, + "learning_rate": 0.00028739999999999994, + "loss": 2.4719, "step": 482 }, { - "epoch": 2.17, - "learning_rate": 2.874e-05, - "loss": 4.289, + "epoch": 4.35, + "learning_rate": 0.00028799999999999995, + "loss": 2.2309, "step": 483 }, { - "epoch": 2.17, - "learning_rate": 2.88e-05, - "loss": 4.2304, + "epoch": 4.36, + "learning_rate": 0.00028859999999999997, + "loss": 2.2296, "step": 484 }, { - "epoch": 2.17, - "learning_rate": 2.8859999999999998e-05, - "loss": 4.2534, + "epoch": 4.37, + "learning_rate": 0.0002892, + "loss": 1.9529, "step": 485 }, { - "epoch": 2.18, - "learning_rate": 2.892e-05, - "loss": 4.1826, + "epoch": 4.38, + "learning_rate": 0.00028979999999999994, + "loss": 2.0596, "step": 486 }, { - "epoch": 2.18, - "learning_rate": 2.898e-05, - "loss": 4.2797, + "epoch": 4.39, + "learning_rate": 0.00029039999999999996, + "loss": 1.9794, "step": 487 }, { - "epoch": 2.19, - "learning_rate": 2.904e-05, - "loss": 4.2838, + "epoch": 4.39, + "learning_rate": 0.00029099999999999997, + "loss": 1.7684, "step": 488 }, { - "epoch": 2.19, - "learning_rate": 2.91e-05, - "loss": 4.2498, + "epoch": 4.4, + "learning_rate": 0.0002916, + "loss": 1.5721, "step": 489 }, { - "epoch": 2.2, - "learning_rate": 2.916e-05, - "loss": 4.205, + "epoch": 4.41, + "learning_rate": 0.00029219999999999995, + "loss": 1.6114, "step": 490 }, { - "epoch": 2.2, - "learning_rate": 2.922e-05, - "loss": 4.1591, + "epoch": 4.42, + "learning_rate": 0.00029279999999999996, + "loss": 1.4997, "step": 491 }, { - "epoch": 2.21, - "learning_rate": 2.928e-05, - "loss": 4.3021, + "epoch": 4.43, + "learning_rate": 0.0002934, + "loss": 1.2318, "step": 492 }, { - "epoch": 2.21, - "learning_rate": 2.934e-05, - "loss": 4.2822, + "epoch": 4.44, + "learning_rate": 0.000294, + "loss": 0.9552, "step": 493 }, { - "epoch": 2.22, - "learning_rate": 2.94e-05, - "loss": 4.1881, + "epoch": 4.45, + "learning_rate": 0.00029459999999999995, + "loss": 0.8515, "step": 494 }, { - "epoch": 2.22, - "learning_rate": 2.946e-05, - "loss": 4.3554, + "epoch": 4.46, + "learning_rate": 0.00029519999999999997, + "loss": 6.105, "step": 495 }, { - "epoch": 2.22, - "learning_rate": 2.9520000000000002e-05, - "loss": 4.0258, + "epoch": 4.47, + "learning_rate": 0.0002958, + "loss": 4.909, "step": 496 }, { - "epoch": 2.23, - "learning_rate": 2.958e-05, - "loss": 5.1196, + "epoch": 4.48, + "learning_rate": 0.0002964, + "loss": 3.761, "step": 497 }, { - "epoch": 2.23, - "learning_rate": 2.964e-05, - "loss": 5.1393, + "epoch": 4.48, + "learning_rate": 0.00029699999999999996, + "loss": 3.2873, "step": 498 }, { - "epoch": 2.24, - "learning_rate": 2.97e-05, - "loss": 5.1106, + "epoch": 4.49, + "learning_rate": 0.00029759999999999997, + "loss": 2.961, "step": 499 }, { - "epoch": 2.24, - "learning_rate": 2.976e-05, - "loss": 4.9509, + "epoch": 4.5, + "learning_rate": 0.0002982, + "loss": 2.7605, "step": 500 }, { - "epoch": 2.24, - "eval_loss": 4.63620138168335, - "eval_runtime": 439.1186, - "eval_samples_per_second": 6.017, - "eval_steps_per_second": 0.754, - "eval_wer": 1.0192383974613248, + "epoch": 4.5, + "eval_loss": 2.6299471855163574, + "eval_runtime": 336.2822, + "eval_samples_per_second": 7.856, + "eval_steps_per_second": 0.494, + "eval_wer": 1.4451408171360571, "step": 500 }, { - "epoch": 2.25, - "learning_rate": 2.982e-05, - "loss": 4.7798, + "epoch": 4.51, + "learning_rate": 0.0002988, + "loss": 2.3839, "step": 501 }, { - "epoch": 2.25, - "learning_rate": 2.9880000000000002e-05, - "loss": 4.7623, + "epoch": 4.52, + "learning_rate": 0.00029939999999999996, + "loss": 2.2854, "step": 502 }, { - "epoch": 2.26, - "learning_rate": 2.994e-05, - "loss": 4.589, + "epoch": 4.53, + "learning_rate": 0.0003, + "loss": 2.1582, "step": 503 }, { - "epoch": 2.26, - "learning_rate": 3e-05, - "loss": 4.6571, + "epoch": 4.54, + "learning_rate": 0.0002995081967213115, + "loss": 1.9157, "step": 504 }, { - "epoch": 2.26, - "learning_rate": 2.9982658959537573e-05, - "loss": 4.4665, + "epoch": 4.55, + "learning_rate": 0.0002990163934426229, + "loss": 1.851, "step": 505 }, { - "epoch": 2.27, - "learning_rate": 2.9965317919075145e-05, - "loss": 4.384, + "epoch": 4.56, + "learning_rate": 0.00029852459016393437, + "loss": 1.8247, "step": 506 }, { - "epoch": 2.27, - "learning_rate": 2.9947976878612717e-05, - "loss": 4.3958, + "epoch": 4.57, + "learning_rate": 0.00029803278688524587, + "loss": 1.7489, "step": 507 }, { - "epoch": 2.28, - "learning_rate": 2.993063583815029e-05, - "loss": 4.465, + "epoch": 4.57, + "learning_rate": 0.00029754098360655737, + "loss": 1.6941, "step": 508 }, { - "epoch": 2.28, - "learning_rate": 2.991329479768786e-05, - "loss": 4.4328, + "epoch": 4.58, + "learning_rate": 0.0002970491803278688, + "loss": 1.513, "step": 509 }, { - "epoch": 2.29, - "learning_rate": 2.9895953757225433e-05, - "loss": 4.3279, + "epoch": 4.59, + "learning_rate": 0.0002965573770491803, + "loss": 1.401, "step": 510 }, { - "epoch": 2.29, - "learning_rate": 2.987861271676301e-05, - "loss": 4.418, + "epoch": 4.6, + "learning_rate": 0.0002960655737704918, + "loss": 1.3516, "step": 511 }, { - "epoch": 2.3, - "learning_rate": 2.9861271676300577e-05, - "loss": 4.3604, + "epoch": 4.61, + "learning_rate": 0.00029557377049180326, + "loss": 1.27, "step": 512 }, { - "epoch": 2.3, - "learning_rate": 2.984393063583815e-05, - "loss": 4.3442, + "epoch": 4.62, + "learning_rate": 0.0002950819672131147, + "loss": 1.2286, "step": 513 }, { - "epoch": 2.3, - "learning_rate": 2.982658959537572e-05, - "loss": 4.3416, + "epoch": 4.63, + "learning_rate": 0.0002945901639344262, + "loss": 1.1139, "step": 514 }, { - "epoch": 2.31, - "learning_rate": 2.9809248554913297e-05, - "loss": 4.3385, + "epoch": 4.64, + "learning_rate": 0.0002940983606557377, + "loss": 1.1178, "step": 515 }, { - "epoch": 2.31, - "learning_rate": 2.979190751445087e-05, - "loss": 4.2913, + "epoch": 4.65, + "learning_rate": 0.00029360655737704916, + "loss": 1.0057, "step": 516 }, { - "epoch": 2.32, - "learning_rate": 2.977456647398844e-05, - "loss": 4.279, + "epoch": 4.65, + "learning_rate": 0.0002931147540983606, + "loss": 0.8247, "step": 517 }, { - "epoch": 2.32, - "learning_rate": 2.975722543352601e-05, - "loss": 4.2749, + "epoch": 4.66, + "learning_rate": 0.0002926229508196721, + "loss": 0.7677, "step": 518 }, { - "epoch": 2.33, - "learning_rate": 2.9739884393063585e-05, - "loss": 4.2359, + "epoch": 4.67, + "learning_rate": 0.00029213114754098355, + "loss": 0.8346, "step": 519 }, { - "epoch": 2.33, - "learning_rate": 2.9722543352601157e-05, - "loss": 4.2566, + "epoch": 4.68, + "learning_rate": 0.00029163934426229505, + "loss": 5.4198, "step": 520 }, { - "epoch": 2.34, - "learning_rate": 2.970520231213873e-05, - "loss": 4.2878, + "epoch": 4.69, + "learning_rate": 0.00029114754098360655, + "loss": 4.7027, "step": 521 }, { - "epoch": 2.34, - "learning_rate": 2.96878612716763e-05, - "loss": 4.4059, + "epoch": 4.7, + "learning_rate": 0.000290655737704918, + "loss": 3.6879, "step": 522 }, { - "epoch": 2.35, - "learning_rate": 2.9670520231213873e-05, - "loss": 4.3049, + "epoch": 4.71, + "learning_rate": 0.00029016393442622945, + "loss": 2.9378, "step": 523 }, { - "epoch": 2.35, - "learning_rate": 2.9653179190751446e-05, - "loss": 4.2217, + "epoch": 4.72, + "learning_rate": 0.00028967213114754095, + "loss": 2.511, "step": 524 }, { - "epoch": 2.35, - "learning_rate": 2.9635838150289018e-05, - "loss": 4.2402, + "epoch": 4.73, + "learning_rate": 0.00028918032786885245, + "loss": 2.1899, "step": 525 }, { - "epoch": 2.36, - "learning_rate": 2.961849710982659e-05, - "loss": 4.2514, + "epoch": 4.74, + "learning_rate": 0.0002886885245901639, + "loss": 1.9893, "step": 526 }, { - "epoch": 2.36, - "learning_rate": 2.9601156069364165e-05, - "loss": 4.2706, + "epoch": 4.74, + "learning_rate": 0.0002881967213114754, + "loss": 1.7267, "step": 527 }, { - "epoch": 2.37, - "learning_rate": 2.9583815028901734e-05, - "loss": 4.0966, + "epoch": 4.75, + "learning_rate": 0.00028770491803278684, + "loss": 1.5322, "step": 528 }, { - "epoch": 2.37, - "learning_rate": 2.9566473988439306e-05, - "loss": 4.3039, + "epoch": 4.76, + "learning_rate": 0.00028721311475409834, + "loss": 1.4467, "step": 529 }, { - "epoch": 2.38, - "learning_rate": 2.9549132947976878e-05, - "loss": 4.1996, + "epoch": 4.77, + "learning_rate": 0.0002867213114754098, + "loss": 1.3211, "step": 530 }, { - "epoch": 2.38, - "learning_rate": 2.9531791907514453e-05, - "loss": 4.3097, + "epoch": 4.78, + "learning_rate": 0.0002862295081967213, + "loss": 1.1658, "step": 531 }, { - "epoch": 2.39, - "learning_rate": 2.9514450867052025e-05, - "loss": 4.2551, + "epoch": 4.79, + "learning_rate": 0.0002857377049180328, + "loss": 1.1107, "step": 532 }, { - "epoch": 2.39, - "learning_rate": 2.9497109826589594e-05, - "loss": 4.252, + "epoch": 4.8, + "learning_rate": 0.00028524590163934424, + "loss": 1.0426, "step": 533 }, { - "epoch": 2.39, - "learning_rate": 2.9479768786127166e-05, - "loss": 4.2636, + "epoch": 4.81, + "learning_rate": 0.0002847540983606557, + "loss": 1.1146, "step": 534 }, { - "epoch": 2.4, - "learning_rate": 2.946242774566474e-05, - "loss": 4.1686, + "epoch": 4.82, + "learning_rate": 0.0002842622950819672, + "loss": 0.9751, "step": 535 }, { - "epoch": 2.4, - "learning_rate": 2.9445086705202314e-05, - "loss": 4.2744, + "epoch": 4.83, + "learning_rate": 0.0002837704918032787, + "loss": 0.9805, "step": 536 }, { - "epoch": 2.41, - "learning_rate": 2.9427745664739886e-05, - "loss": 4.3114, + "epoch": 4.83, + "learning_rate": 0.00028327868852459013, + "loss": 0.9604, "step": 537 }, { - "epoch": 2.41, - "learning_rate": 2.9410404624277454e-05, - "loss": 4.2249, + "epoch": 4.84, + "learning_rate": 0.00028278688524590163, + "loss": 0.825, "step": 538 }, { - "epoch": 2.42, - "learning_rate": 2.939306358381503e-05, - "loss": 4.3479, + "epoch": 4.85, + "learning_rate": 0.0002822950819672131, + "loss": 0.8417, "step": 539 }, { - "epoch": 2.42, - "learning_rate": 2.9375722543352602e-05, - "loss": 4.3015, + "epoch": 4.86, + "learning_rate": 0.0002818032786885246, + "loss": 0.851, "step": 540 }, { - "epoch": 2.43, - "learning_rate": 2.9358381502890174e-05, - "loss": 4.1955, + "epoch": 4.87, + "learning_rate": 0.00028131147540983603, + "loss": 0.6944, "step": 541 }, { - "epoch": 2.43, - "learning_rate": 2.9341040462427746e-05, - "loss": 4.283, + "epoch": 4.88, + "learning_rate": 0.00028081967213114753, + "loss": 0.6604, "step": 542 }, { - "epoch": 2.43, - "learning_rate": 2.9323699421965318e-05, - "loss": 4.2069, + "epoch": 4.89, + "learning_rate": 0.00028032786885245903, + "loss": 0.5602, "step": 543 }, { - "epoch": 2.44, - "learning_rate": 2.930635838150289e-05, - "loss": 4.2541, + "epoch": 4.9, + "learning_rate": 0.0002798360655737705, + "loss": 0.577, "step": 544 }, { - "epoch": 2.44, - "learning_rate": 2.9289017341040462e-05, - "loss": 4.2081, + "epoch": 4.91, + "learning_rate": 0.0002793442622950819, + "loss": 5.3539, "step": 545 }, { - "epoch": 2.45, - "learning_rate": 2.9271676300578034e-05, - "loss": 4.1246, + "epoch": 4.91, + "learning_rate": 0.0002788524590163934, + "loss": 3.4873, "step": 546 }, { - "epoch": 2.45, - "learning_rate": 2.925433526011561e-05, - "loss": 5.2825, + "epoch": 4.92, + "learning_rate": 0.00027836065573770487, + "loss": 1.9983, "step": 547 }, { - "epoch": 2.46, - "learning_rate": 2.9236994219653182e-05, - "loss": 5.1339, + "epoch": 4.93, + "learning_rate": 0.00027786885245901637, + "loss": 1.6671, "step": 548 }, { - "epoch": 2.46, - "learning_rate": 2.921965317919075e-05, - "loss": 5.171, + "epoch": 4.94, + "learning_rate": 0.00027737704918032787, + "loss": 1.535, "step": 549 }, { - "epoch": 2.47, - "learning_rate": 2.9202312138728323e-05, - "loss": 5.0012, + "epoch": 4.95, + "learning_rate": 0.0002768852459016393, + "loss": 1.2795, "step": 550 }, { - "epoch": 2.47, - "learning_rate": 2.9184971098265898e-05, - "loss": 4.8356, + "epoch": 4.96, + "learning_rate": 0.00027639344262295076, + "loss": 1.1159, "step": 551 }, { - "epoch": 2.48, - "learning_rate": 2.916763005780347e-05, - "loss": 4.7004, + "epoch": 4.97, + "learning_rate": 0.00027590163934426227, + "loss": 0.9898, "step": 552 }, { - "epoch": 2.48, - "learning_rate": 2.9150289017341042e-05, - "loss": 4.6067, + "epoch": 4.98, + "learning_rate": 0.00027540983606557377, + "loss": 0.8807, "step": 553 }, { - "epoch": 2.48, - "learning_rate": 2.913294797687861e-05, - "loss": 4.5256, + "epoch": 4.99, + "learning_rate": 0.0002749180327868852, + "loss": 0.8109, "step": 554 }, { - "epoch": 2.49, - "learning_rate": 2.9115606936416186e-05, - "loss": 4.5049, + "epoch": 5.0, + "learning_rate": 0.00027442622950819666, + "loss": 0.6494, "step": 555 }, { - "epoch": 2.49, - "learning_rate": 2.909826589595376e-05, - "loss": 4.5028, + "epoch": 5.01, + "learning_rate": 0.00027393442622950816, + "loss": 1.9571, "step": 556 }, { - "epoch": 2.5, - "learning_rate": 2.908092485549133e-05, - "loss": 4.3853, + "epoch": 5.02, + "learning_rate": 0.00027344262295081966, + "loss": 1.5384, "step": 557 }, { - "epoch": 2.5, - "learning_rate": 2.9063583815028903e-05, - "loss": 4.3765, + "epoch": 5.03, + "learning_rate": 0.0002729508196721311, + "loss": 1.2256, "step": 558 }, { - "epoch": 2.51, - "learning_rate": 2.9046242774566475e-05, - "loss": 4.4652, + "epoch": 5.04, + "learning_rate": 0.0002724590163934426, + "loss": 1.0547, "step": 559 }, { - "epoch": 2.51, - "learning_rate": 2.9028901734104047e-05, - "loss": 4.2795, + "epoch": 5.04, + "learning_rate": 0.0002719672131147541, + "loss": 0.9737, "step": 560 }, { - "epoch": 2.52, - "learning_rate": 2.901156069364162e-05, - "loss": 4.3639, + "epoch": 5.05, + "learning_rate": 0.00027147540983606556, + "loss": 0.8202, "step": 561 }, { - "epoch": 2.52, - "learning_rate": 2.899421965317919e-05, - "loss": 4.3035, + "epoch": 5.06, + "learning_rate": 0.000270983606557377, + "loss": 0.7816, "step": 562 }, { - "epoch": 2.52, - "learning_rate": 2.8976878612716766e-05, - "loss": 4.3586, + "epoch": 5.07, + "learning_rate": 0.0002704918032786885, + "loss": 0.7362, "step": 563 }, { - "epoch": 2.53, - "learning_rate": 2.8959537572254335e-05, - "loss": 4.2871, + "epoch": 5.08, + "learning_rate": 0.00027, + "loss": 0.6287, "step": 564 }, { - "epoch": 2.53, - "learning_rate": 2.8942196531791907e-05, - "loss": 4.3294, + "epoch": 5.09, + "learning_rate": 0.00026950819672131145, + "loss": 0.5932, "step": 565 }, { - "epoch": 2.54, - "learning_rate": 2.892485549132948e-05, - "loss": 4.3578, + "epoch": 5.1, + "learning_rate": 0.0002690163934426229, + "loss": 0.5249, "step": 566 }, { - "epoch": 2.54, - "learning_rate": 2.8907514450867055e-05, - "loss": 4.3041, + "epoch": 5.11, + "learning_rate": 0.0002685245901639344, + "loss": 0.5345, "step": 567 }, { - "epoch": 2.55, - "learning_rate": 2.8890173410404627e-05, - "loss": 4.2709, + "epoch": 5.12, + "learning_rate": 0.0002680327868852459, + "loss": 0.5351, "step": 568 }, { - "epoch": 2.55, - "learning_rate": 2.8872832369942195e-05, - "loss": 4.2124, + "epoch": 5.13, + "learning_rate": 0.00026754098360655734, + "loss": 0.4818, "step": 569 }, { - "epoch": 2.56, - "learning_rate": 2.8855491329479767e-05, - "loss": 4.2613, + "epoch": 5.13, + "learning_rate": 0.00026704918032786885, + "loss": 0.4507, "step": 570 }, { - "epoch": 2.56, - "learning_rate": 2.8838150289017343e-05, - "loss": 4.3141, + "epoch": 5.14, + "learning_rate": 0.00026655737704918035, + "loss": 0.4339, "step": 571 }, { - "epoch": 2.57, - "learning_rate": 2.8820809248554915e-05, - "loss": 4.2084, + "epoch": 5.15, + "learning_rate": 0.0002660655737704918, + "loss": 0.4029, "step": 572 }, { - "epoch": 2.57, - "learning_rate": 2.8803468208092487e-05, - "loss": 4.2483, + "epoch": 5.16, + "learning_rate": 0.00026557377049180324, + "loss": 0.4651, "step": 573 }, { - "epoch": 2.57, - "learning_rate": 2.878612716763006e-05, - "loss": 4.2259, + "epoch": 5.17, + "learning_rate": 0.00026508196721311474, + "loss": 0.3721, "step": 574 }, { - "epoch": 2.58, - "learning_rate": 2.876878612716763e-05, - "loss": 4.2839, + "epoch": 5.18, + "learning_rate": 0.0002645901639344262, + "loss": 0.3766, "step": 575 }, { - "epoch": 2.58, - "learning_rate": 2.8751445086705203e-05, - "loss": 4.2728, + "epoch": 5.19, + "learning_rate": 0.0002640983606557377, + "loss": 0.3318, "step": 576 }, { - "epoch": 2.59, - "learning_rate": 2.8734104046242775e-05, - "loss": 4.2867, + "epoch": 5.2, + "learning_rate": 0.00026360655737704913, + "loss": 0.331, "step": 577 }, { - "epoch": 2.59, - "learning_rate": 2.8716763005780347e-05, - "loss": 4.2084, + "epoch": 5.21, + "learning_rate": 0.00026311475409836063, + "loss": 0.2748, "step": 578 }, { - "epoch": 2.6, - "learning_rate": 2.8699421965317923e-05, - "loss": 4.2008, + "epoch": 5.22, + "learning_rate": 0.0002626229508196721, + "loss": 0.2241, "step": 579 }, { - "epoch": 2.6, - "learning_rate": 2.868208092485549e-05, - "loss": 4.2647, + "epoch": 5.22, + "learning_rate": 0.0002621311475409836, + "loss": 0.3001, "step": 580 }, { - "epoch": 2.61, - "learning_rate": 2.8664739884393064e-05, - "loss": 4.2243, + "epoch": 5.23, + "learning_rate": 0.0002616393442622951, + "loss": 3.1772, "step": 581 }, { - "epoch": 2.61, - "learning_rate": 2.8647398843930636e-05, - "loss": 4.3127, + "epoch": 5.24, + "learning_rate": 0.00026114754098360653, + "loss": 2.0168, "step": 582 }, { - "epoch": 2.61, - "learning_rate": 2.863005780346821e-05, - "loss": 4.287, + "epoch": 5.25, + "learning_rate": 0.000260655737704918, + "loss": 1.2479, "step": 583 }, { - "epoch": 2.62, - "learning_rate": 2.8612716763005783e-05, - "loss": 4.2541, + "epoch": 5.26, + "learning_rate": 0.0002601639344262295, + "loss": 1.2017, "step": 584 }, { - "epoch": 2.62, - "learning_rate": 2.8595375722543352e-05, - "loss": 4.3456, + "epoch": 5.27, + "learning_rate": 0.000259672131147541, + "loss": 1.1079, "step": 585 }, { - "epoch": 2.63, - "learning_rate": 2.8578034682080924e-05, - "loss": 4.1791, + "epoch": 5.28, + "learning_rate": 0.0002591803278688524, + "loss": 0.8699, "step": 586 }, { - "epoch": 2.63, - "learning_rate": 2.85606936416185e-05, - "loss": 4.2295, + "epoch": 5.29, + "learning_rate": 0.0002586885245901639, + "loss": 0.7881, "step": 587 }, { - "epoch": 2.64, - "learning_rate": 2.854335260115607e-05, - "loss": 4.2505, + "epoch": 5.3, + "learning_rate": 0.00025819672131147537, + "loss": 0.7279, "step": 588 }, { - "epoch": 2.64, - "learning_rate": 2.8526011560693643e-05, - "loss": 4.2815, + "epoch": 5.3, + "learning_rate": 0.00025770491803278687, + "loss": 0.6572, "step": 589 }, { - "epoch": 2.65, - "learning_rate": 2.8508670520231212e-05, - "loss": 4.264, + "epoch": 5.31, + "learning_rate": 0.0002572131147540983, + "loss": 0.6395, "step": 590 }, { - "epoch": 2.65, - "learning_rate": 2.8491329479768788e-05, - "loss": 4.2155, + "epoch": 5.32, + "learning_rate": 0.0002567213114754098, + "loss": 0.5414, "step": 591 }, { - "epoch": 2.65, - "learning_rate": 2.847398843930636e-05, - "loss": 4.264, + "epoch": 5.33, + "learning_rate": 0.0002562295081967213, + "loss": 0.5121, "step": 592 }, { - "epoch": 2.66, - "learning_rate": 2.8456647398843932e-05, - "loss": 4.1971, + "epoch": 5.34, + "learning_rate": 0.00025573770491803277, + "loss": 0.4897, "step": 593 }, { - "epoch": 2.66, - "learning_rate": 2.8439306358381504e-05, - "loss": 4.2744, + "epoch": 5.35, + "learning_rate": 0.0002552459016393442, + "loss": 0.4312, "step": 594 }, { - "epoch": 2.67, - "learning_rate": 2.8421965317919076e-05, - "loss": 4.3306, + "epoch": 5.36, + "learning_rate": 0.0002547540983606557, + "loss": 0.4397, "step": 595 }, { - "epoch": 2.67, - "learning_rate": 2.8404624277456648e-05, - "loss": 4.1645, + "epoch": 5.37, + "learning_rate": 0.0002542622950819672, + "loss": 0.3896, "step": 596 }, { - "epoch": 2.68, - "learning_rate": 2.838728323699422e-05, - "loss": 5.2117, + "epoch": 5.38, + "learning_rate": 0.00025377049180327866, + "loss": 0.3908, "step": 597 }, { - "epoch": 2.68, - "learning_rate": 2.8369942196531792e-05, - "loss": 5.3487, + "epoch": 5.39, + "learning_rate": 0.00025327868852459016, + "loss": 0.3858, "step": 598 }, { - "epoch": 2.69, - "learning_rate": 2.8352601156069368e-05, - "loss": 5.3785, + "epoch": 5.39, + "learning_rate": 0.0002527868852459016, + "loss": 0.3354, "step": 599 }, { - "epoch": 2.69, - "learning_rate": 2.833526011560694e-05, - "loss": 5.2952, + "epoch": 5.4, + "learning_rate": 0.0002522950819672131, + "loss": 0.3314, "step": 600 }, { - "epoch": 2.7, - "learning_rate": 2.8317919075144508e-05, - "loss": 5.2478, + "epoch": 5.41, + "learning_rate": 0.00025180327868852456, + "loss": 0.3332, "step": 601 }, { - "epoch": 2.7, - "learning_rate": 2.830057803468208e-05, - "loss": 5.0231, + "epoch": 5.42, + "learning_rate": 0.00025131147540983606, + "loss": 0.297, "step": 602 }, { - "epoch": 2.7, - "learning_rate": 2.8283236994219652e-05, - "loss": 4.8553, + "epoch": 5.43, + "learning_rate": 0.00025081967213114756, + "loss": 0.2414, "step": 603 }, { - "epoch": 2.71, - "learning_rate": 2.8265895953757228e-05, - "loss": 4.6718, + "epoch": 5.44, + "learning_rate": 0.000250327868852459, + "loss": 0.2282, "step": 604 }, { - "epoch": 2.71, - "learning_rate": 2.82485549132948e-05, - "loss": 4.5994, + "epoch": 5.45, + "learning_rate": 0.00024983606557377045, + "loss": 0.3007, "step": 605 }, { - "epoch": 2.72, - "learning_rate": 2.823121387283237e-05, - "loss": 4.4981, + "epoch": 5.46, + "learning_rate": 0.00024934426229508195, + "loss": 1.9302, "step": 606 }, { - "epoch": 2.72, - "learning_rate": 2.821387283236994e-05, - "loss": 4.4212, + "epoch": 5.47, + "learning_rate": 0.0002488524590163934, + "loss": 1.4471, "step": 607 }, { - "epoch": 2.73, - "learning_rate": 2.8196531791907516e-05, - "loss": 4.4431, + "epoch": 5.48, + "learning_rate": 0.0002483606557377049, + "loss": 1.0509, "step": 608 }, { - "epoch": 2.73, - "learning_rate": 2.8179190751445088e-05, - "loss": 4.3458, + "epoch": 5.48, + "learning_rate": 0.0002478688524590164, + "loss": 0.9157, "step": 609 }, { - "epoch": 2.74, - "learning_rate": 2.816184971098266e-05, - "loss": 4.3888, + "epoch": 5.49, + "learning_rate": 0.00024737704918032785, + "loss": 0.8162, "step": 610 }, { - "epoch": 2.74, - "learning_rate": 2.814450867052023e-05, - "loss": 4.2951, + "epoch": 5.5, + "learning_rate": 0.0002468852459016393, + "loss": 0.6961, "step": 611 }, { - "epoch": 2.74, - "learning_rate": 2.8127167630057804e-05, - "loss": 4.2744, + "epoch": 5.51, + "learning_rate": 0.0002463934426229508, + "loss": 0.6048, "step": 612 }, { - "epoch": 2.75, - "learning_rate": 2.8109826589595376e-05, - "loss": 4.3792, + "epoch": 5.52, + "learning_rate": 0.0002459016393442623, + "loss": 0.6278, "step": 613 }, { - "epoch": 2.75, - "learning_rate": 2.809248554913295e-05, - "loss": 4.3104, + "epoch": 5.53, + "learning_rate": 0.00024540983606557374, + "loss": 0.5126, "step": 614 }, { - "epoch": 2.76, - "learning_rate": 2.807514450867052e-05, - "loss": 4.1727, + "epoch": 5.54, + "learning_rate": 0.0002449180327868852, + "loss": 0.4451, "step": 615 }, { - "epoch": 2.76, - "learning_rate": 2.8057803468208093e-05, - "loss": 4.3698, + "epoch": 5.55, + "learning_rate": 0.0002444262295081967, + "loss": 0.4496, "step": 616 }, { - "epoch": 2.77, - "learning_rate": 2.8040462427745665e-05, - "loss": 4.297, + "epoch": 5.56, + "learning_rate": 0.00024393442622950816, + "loss": 0.3534, "step": 617 }, { - "epoch": 2.77, - "learning_rate": 2.8023121387283237e-05, - "loss": 4.3592, + "epoch": 5.57, + "learning_rate": 0.00024344262295081966, + "loss": 0.3675, "step": 618 }, { - "epoch": 2.78, - "learning_rate": 2.800578034682081e-05, - "loss": 4.2746, + "epoch": 5.57, + "learning_rate": 0.00024295081967213114, + "loss": 0.3927, "step": 619 }, { - "epoch": 2.78, - "learning_rate": 2.7988439306358384e-05, - "loss": 4.3185, + "epoch": 5.58, + "learning_rate": 0.0002424590163934426, + "loss": 0.3711, "step": 620 }, { - "epoch": 2.78, - "learning_rate": 2.7971098265895953e-05, - "loss": 4.3034, + "epoch": 5.59, + "learning_rate": 0.00024196721311475406, + "loss": 0.3521, "step": 621 }, { - "epoch": 2.79, - "learning_rate": 2.7953757225433525e-05, - "loss": 4.2336, + "epoch": 5.6, + "learning_rate": 0.00024147540983606556, + "loss": 0.3142, "step": 622 }, { - "epoch": 2.79, - "learning_rate": 2.7936416184971097e-05, - "loss": 4.2943, + "epoch": 5.61, + "learning_rate": 0.00024098360655737703, + "loss": 0.3412, "step": 623 }, { - "epoch": 2.8, - "learning_rate": 2.7919075144508673e-05, - "loss": 4.1664, + "epoch": 5.62, + "learning_rate": 0.0002404918032786885, + "loss": 0.2855, "step": 624 }, { - "epoch": 2.8, - "learning_rate": 2.7901734104046245e-05, - "loss": 4.2394, + "epoch": 5.63, + "learning_rate": 0.00023999999999999998, + "loss": 0.3093, "step": 625 }, { - "epoch": 2.81, - "learning_rate": 2.7884393063583817e-05, - "loss": 4.3155, + "epoch": 5.64, + "learning_rate": 0.00023950819672131145, + "loss": 0.3254, "step": 626 }, { - "epoch": 2.81, - "learning_rate": 2.7867052023121385e-05, - "loss": 4.3109, + "epoch": 5.65, + "learning_rate": 0.00023901639344262293, + "loss": 0.3263, "step": 627 }, { - "epoch": 2.82, - "learning_rate": 2.784971098265896e-05, - "loss": 4.1935, + "epoch": 5.65, + "learning_rate": 0.0002385245901639344, + "loss": 0.2527, "step": 628 }, { - "epoch": 2.82, - "learning_rate": 2.7832369942196533e-05, - "loss": 4.224, + "epoch": 5.66, + "learning_rate": 0.00023803278688524587, + "loss": 0.2214, "step": 629 }, { - "epoch": 2.83, - "learning_rate": 2.7815028901734105e-05, - "loss": 4.2271, + "epoch": 5.67, + "learning_rate": 0.00023754098360655737, + "loss": 0.305, "step": 630 }, { - "epoch": 2.83, - "learning_rate": 2.7797687861271677e-05, - "loss": 4.2469, + "epoch": 5.68, + "learning_rate": 0.00023704918032786882, + "loss": 1.1692, "step": 631 }, { - "epoch": 2.83, - "learning_rate": 2.778034682080925e-05, - "loss": 4.2321, + "epoch": 5.69, + "learning_rate": 0.0002365573770491803, + "loss": 0.832, "step": 632 }, { - "epoch": 2.84, - "learning_rate": 2.776300578034682e-05, - "loss": 4.1939, + "epoch": 5.7, + "learning_rate": 0.00023606557377049177, + "loss": 0.6695, "step": 633 }, { - "epoch": 2.84, - "learning_rate": 2.7745664739884393e-05, - "loss": 4.2496, + "epoch": 5.71, + "learning_rate": 0.00023557377049180327, + "loss": 0.6161, "step": 634 }, { - "epoch": 2.85, - "learning_rate": 2.7728323699421965e-05, - "loss": 4.2124, + "epoch": 5.72, + "learning_rate": 0.00023508196721311474, + "loss": 0.5685, "step": 635 }, { - "epoch": 2.85, - "learning_rate": 2.771098265895954e-05, - "loss": 4.1494, + "epoch": 5.73, + "learning_rate": 0.00023459016393442622, + "loss": 0.5094, "step": 636 }, { - "epoch": 2.86, - "learning_rate": 2.769364161849711e-05, - "loss": 4.2576, + "epoch": 5.74, + "learning_rate": 0.00023409836065573766, + "loss": 0.4618, "step": 637 }, { - "epoch": 2.86, - "learning_rate": 2.767630057803468e-05, - "loss": 4.2654, + "epoch": 5.74, + "learning_rate": 0.00023360655737704916, + "loss": 0.4221, "step": 638 }, { - "epoch": 2.87, - "learning_rate": 2.7658959537572254e-05, - "loss": 4.2854, + "epoch": 5.75, + "learning_rate": 0.00023311475409836064, + "loss": 0.3998, "step": 639 }, { - "epoch": 2.87, - "learning_rate": 2.764161849710983e-05, - "loss": 4.3885, + "epoch": 5.76, + "learning_rate": 0.0002326229508196721, + "loss": 0.369, "step": 640 }, { - "epoch": 2.87, - "learning_rate": 2.76242774566474e-05, - "loss": 4.2676, + "epoch": 5.77, + "learning_rate": 0.00023213114754098358, + "loss": 0.3503, "step": 641 }, { - "epoch": 2.88, - "learning_rate": 2.760693641618497e-05, - "loss": 4.2418, + "epoch": 5.78, + "learning_rate": 0.00023163934426229506, + "loss": 0.3426, "step": 642 }, { - "epoch": 2.88, - "learning_rate": 2.7589595375722542e-05, - "loss": 4.1392, + "epoch": 5.79, + "learning_rate": 0.00023114754098360653, + "loss": 0.3081, "step": 643 }, { - "epoch": 2.89, - "learning_rate": 2.7572254335260117e-05, - "loss": 4.2529, + "epoch": 5.8, + "learning_rate": 0.000230655737704918, + "loss": 0.321, "step": 644 }, { - "epoch": 2.89, - "learning_rate": 2.755491329479769e-05, - "loss": 4.1777, + "epoch": 5.81, + "learning_rate": 0.00023016393442622948, + "loss": 0.3081, "step": 645 }, { - "epoch": 2.9, - "learning_rate": 2.753757225433526e-05, - "loss": 4.1001, + "epoch": 5.82, + "learning_rate": 0.00022967213114754098, + "loss": 0.3378, "step": 646 }, { - "epoch": 2.9, - "learning_rate": 2.752023121387283e-05, - "loss": 5.2074, + "epoch": 5.83, + "learning_rate": 0.00022918032786885245, + "loss": 0.2946, "step": 647 }, { - "epoch": 2.91, - "learning_rate": 2.7502890173410406e-05, - "loss": 5.2898, + "epoch": 5.83, + "learning_rate": 0.0002286885245901639, + "loss": 0.3051, "step": 648 }, { - "epoch": 2.91, - "learning_rate": 2.7485549132947978e-05, - "loss": 5.2925, + "epoch": 5.84, + "learning_rate": 0.00022819672131147537, + "loss": 0.2399, "step": 649 }, { - "epoch": 2.91, - "learning_rate": 2.746820809248555e-05, - "loss": 5.307, + "epoch": 5.85, + "learning_rate": 0.00022770491803278688, + "loss": 0.2973, "step": 650 }, { - "epoch": 2.92, - "learning_rate": 2.7450867052023122e-05, - "loss": 5.0791, + "epoch": 5.86, + "learning_rate": 0.00022721311475409835, + "loss": 0.2952, "step": 651 }, { - "epoch": 2.92, - "learning_rate": 2.7433526011560697e-05, - "loss": 4.8647, + "epoch": 5.87, + "learning_rate": 0.00022672131147540982, + "loss": 0.2593, "step": 652 }, { - "epoch": 2.93, - "learning_rate": 2.7416184971098266e-05, - "loss": 4.8926, + "epoch": 5.88, + "learning_rate": 0.00022622950819672127, + "loss": 0.2495, "step": 653 }, { - "epoch": 2.93, - "learning_rate": 2.7398843930635838e-05, - "loss": 4.6648, + "epoch": 5.89, + "learning_rate": 0.00022573770491803277, + "loss": 0.2275, "step": 654 }, { - "epoch": 2.94, - "learning_rate": 2.738150289017341e-05, - "loss": 4.5792, + "epoch": 5.9, + "learning_rate": 0.00022524590163934424, + "loss": 0.2896, "step": 655 }, { - "epoch": 2.94, - "learning_rate": 2.7364161849710986e-05, - "loss": 4.4372, + "epoch": 5.91, + "learning_rate": 0.00022475409836065572, + "loss": 0.8577, "step": 656 }, { - "epoch": 2.95, - "learning_rate": 2.7346820809248558e-05, - "loss": 4.3201, + "epoch": 5.91, + "learning_rate": 0.0002242622950819672, + "loss": 0.6569, "step": 657 }, { - "epoch": 2.95, - "learning_rate": 2.7329479768786126e-05, - "loss": 4.3811, + "epoch": 5.92, + "learning_rate": 0.0002237704918032787, + "loss": 0.5387, "step": 658 }, { - "epoch": 2.96, - "learning_rate": 2.73121387283237e-05, - "loss": 4.3014, + "epoch": 5.93, + "learning_rate": 0.00022327868852459014, + "loss": 0.4774, "step": 659 }, { - "epoch": 2.96, - "learning_rate": 2.7294797687861274e-05, - "loss": 4.3152, + "epoch": 5.94, + "learning_rate": 0.0002227868852459016, + "loss": 0.4175, "step": 660 }, { - "epoch": 2.96, - "learning_rate": 2.7277456647398846e-05, - "loss": 4.3751, + "epoch": 5.95, + "learning_rate": 0.00022229508196721309, + "loss": 0.3658, "step": 661 }, { - "epoch": 2.97, - "learning_rate": 2.7260115606936418e-05, - "loss": 4.2581, + "epoch": 5.96, + "learning_rate": 0.00022180327868852459, + "loss": 0.3594, "step": 662 }, { - "epoch": 2.97, - "learning_rate": 2.7242774566473987e-05, - "loss": 4.2483, + "epoch": 5.97, + "learning_rate": 0.00022131147540983606, + "loss": 0.3114, "step": 663 }, { - "epoch": 2.98, - "learning_rate": 2.7225433526011562e-05, - "loss": 4.2555, + "epoch": 5.98, + "learning_rate": 0.0002208196721311475, + "loss": 0.2732, "step": 664 }, { - "epoch": 2.98, - "learning_rate": 2.7208092485549134e-05, - "loss": 4.3171, + "epoch": 5.99, + "learning_rate": 0.00022032786885245898, + "loss": 0.2827, "step": 665 }, { - "epoch": 2.99, - "learning_rate": 2.7190751445086706e-05, - "loss": 4.1631, + "epoch": 6.0, + "learning_rate": 0.00021983606557377048, + "loss": 0.2918, "step": 666 }, { - "epoch": 2.99, - "learning_rate": 2.717341040462428e-05, - "loss": 4.288, + "epoch": 6.01, + "learning_rate": 0.00021934426229508195, + "loss": 0.6729, "step": 667 }, { - "epoch": 3.0, - "learning_rate": 2.715606936416185e-05, - "loss": 4.2888, + "epoch": 6.02, + "learning_rate": 0.00021885245901639343, + "loss": 0.4497, "step": 668 }, { - "epoch": 3.0, - "learning_rate": 2.7138728323699422e-05, - "loss": 4.3506, + "epoch": 6.03, + "learning_rate": 0.0002183606557377049, + "loss": 0.3901, "step": 669 }, { - "epoch": 3.0, - "learning_rate": 2.7121387283236995e-05, - "loss": 4.3771, + "epoch": 6.04, + "learning_rate": 0.00021786885245901638, + "loss": 0.3872, "step": 670 }, { - "epoch": 3.01, - "learning_rate": 2.7104046242774567e-05, - "loss": 4.3568, + "epoch": 6.04, + "learning_rate": 0.00021737704918032785, + "loss": 0.3052, "step": 671 }, { - "epoch": 3.01, - "learning_rate": 2.7086705202312142e-05, - "loss": 4.3573, + "epoch": 6.05, + "learning_rate": 0.00021688524590163932, + "loss": 0.2726, "step": 672 }, { - "epoch": 3.02, - "learning_rate": 2.706936416184971e-05, - "loss": 4.3189, + "epoch": 6.06, + "learning_rate": 0.0002163934426229508, + "loss": 0.2653, "step": 673 }, { - "epoch": 3.02, - "learning_rate": 2.7052023121387283e-05, - "loss": 4.3255, + "epoch": 6.07, + "learning_rate": 0.0002159016393442623, + "loss": 0.2387, "step": 674 }, { - "epoch": 3.03, - "learning_rate": 2.7034682080924855e-05, - "loss": 4.3107, + "epoch": 6.08, + "learning_rate": 0.00021540983606557374, + "loss": 0.2052, "step": 675 }, { - "epoch": 3.03, - "learning_rate": 2.701734104046243e-05, - "loss": 4.3297, + "epoch": 6.09, + "learning_rate": 0.00021491803278688522, + "loss": 0.2342, "step": 676 }, { - "epoch": 3.04, - "learning_rate": 2.7000000000000002e-05, - "loss": 4.2507, + "epoch": 6.1, + "learning_rate": 0.0002144262295081967, + "loss": 0.1993, "step": 677 }, { - "epoch": 3.04, - "learning_rate": 2.6982658959537574e-05, - "loss": 4.1454, + "epoch": 6.11, + "learning_rate": 0.0002139344262295082, + "loss": 0.1983, "step": 678 }, { - "epoch": 3.04, - "learning_rate": 2.6965317919075143e-05, - "loss": 4.2008, + "epoch": 6.12, + "learning_rate": 0.00021344262295081967, + "loss": 0.1869, "step": 679 }, { - "epoch": 3.05, - "learning_rate": 2.694797687861272e-05, - "loss": 4.1477, + "epoch": 6.13, + "learning_rate": 0.00021295081967213114, + "loss": 0.1725, "step": 680 }, { - "epoch": 3.05, - "learning_rate": 2.693063583815029e-05, - "loss": 4.1979, + "epoch": 6.13, + "learning_rate": 0.00021245901639344259, + "loss": 0.1686, "step": 681 }, { - "epoch": 3.06, - "learning_rate": 2.6913294797687863e-05, - "loss": 4.2501, + "epoch": 6.14, + "learning_rate": 0.0002119672131147541, + "loss": 0.1684, "step": 682 }, { - "epoch": 3.06, - "learning_rate": 2.6895953757225435e-05, - "loss": 4.149, + "epoch": 6.15, + "learning_rate": 0.00021147540983606556, + "loss": 0.1432, "step": 683 }, { - "epoch": 3.07, - "learning_rate": 2.6878612716763007e-05, - "loss": 4.1838, + "epoch": 6.16, + "learning_rate": 0.00021098360655737703, + "loss": 0.1873, "step": 684 }, { - "epoch": 3.07, - "learning_rate": 2.686127167630058e-05, - "loss": 4.2453, + "epoch": 6.17, + "learning_rate": 0.0002104918032786885, + "loss": 0.1583, "step": 685 }, { - "epoch": 3.08, - "learning_rate": 2.684393063583815e-05, - "loss": 4.1949, + "epoch": 6.18, + "learning_rate": 0.00020999999999999998, + "loss": 0.1472, "step": 686 }, { - "epoch": 3.08, - "learning_rate": 2.6826589595375723e-05, - "loss": 4.1344, + "epoch": 6.19, + "learning_rate": 0.00020950819672131146, + "loss": 0.1488, "step": 687 }, { - "epoch": 3.09, - "learning_rate": 2.68092485549133e-05, - "loss": 4.1343, + "epoch": 6.2, + "learning_rate": 0.00020901639344262293, + "loss": 0.1143, "step": 688 }, { - "epoch": 3.09, - "learning_rate": 2.6791907514450867e-05, - "loss": 4.1509, + "epoch": 6.21, + "learning_rate": 0.0002085245901639344, + "loss": 0.1343, "step": 689 }, { - "epoch": 3.09, - "learning_rate": 2.677456647398844e-05, - "loss": 4.1241, + "epoch": 6.22, + "learning_rate": 0.0002080327868852459, + "loss": 0.1251, "step": 690 }, { - "epoch": 3.1, - "learning_rate": 2.675722543352601e-05, - "loss": 4.0753, + "epoch": 6.22, + "learning_rate": 0.00020754098360655735, + "loss": 0.1643, "step": 691 }, { - "epoch": 3.1, - "learning_rate": 2.6739884393063587e-05, - "loss": 4.1107, + "epoch": 6.23, + "learning_rate": 0.00020704918032786882, + "loss": 0.7004, "step": 692 }, { - "epoch": 3.11, - "learning_rate": 2.672254335260116e-05, - "loss": 4.0779, + "epoch": 6.24, + "learning_rate": 0.0002065573770491803, + "loss": 0.6188, "step": 693 }, { - "epoch": 3.11, - "learning_rate": 2.6705202312138728e-05, - "loss": 4.0421, + "epoch": 6.25, + "learning_rate": 0.0002060655737704918, + "loss": 0.423, "step": 694 }, { - "epoch": 3.12, - "learning_rate": 2.66878612716763e-05, - "loss": 4.0448, + "epoch": 6.26, + "learning_rate": 0.00020557377049180327, + "loss": 0.3769, "step": 695 }, { - "epoch": 3.12, - "learning_rate": 2.6670520231213875e-05, - "loss": 4.1082, + "epoch": 6.27, + "learning_rate": 0.00020508196721311475, + "loss": 0.376, "step": 696 }, { - "epoch": 3.13, - "learning_rate": 2.6653179190751447e-05, - "loss": 4.0395, + "epoch": 6.28, + "learning_rate": 0.0002045901639344262, + "loss": 0.2984, "step": 697 }, { - "epoch": 3.13, - "learning_rate": 2.663583815028902e-05, - "loss": 4.0556, + "epoch": 6.29, + "learning_rate": 0.0002040983606557377, + "loss": 0.2631, "step": 698 }, { - "epoch": 3.13, - "learning_rate": 2.6618497109826588e-05, - "loss": 4.0162, + "epoch": 6.3, + "learning_rate": 0.00020360655737704917, + "loss": 0.254, "step": 699 }, { - "epoch": 3.14, - "learning_rate": 2.660115606936416e-05, - "loss": 3.9868, + "epoch": 6.3, + "learning_rate": 0.00020311475409836064, + "loss": 0.252, "step": 700 }, { - "epoch": 3.14, - "learning_rate": 2.6583815028901735e-05, - "loss": 3.9384, + "epoch": 6.31, + "learning_rate": 0.00020262295081967211, + "loss": 0.2213, "step": 701 }, { - "epoch": 3.15, - "learning_rate": 2.6566473988439307e-05, - "loss": 4.0049, + "epoch": 6.32, + "learning_rate": 0.00020213114754098356, + "loss": 0.2044, "step": 702 }, { - "epoch": 3.15, - "learning_rate": 2.654913294797688e-05, - "loss": 3.9742, + "epoch": 6.33, + "learning_rate": 0.00020163934426229506, + "loss": 0.1789, "step": 703 }, { - "epoch": 3.16, - "learning_rate": 2.653179190751445e-05, - "loss": 3.9905, + "epoch": 6.34, + "learning_rate": 0.00020114754098360653, + "loss": 0.2126, "step": 704 }, { - "epoch": 3.16, - "learning_rate": 2.6514450867052024e-05, - "loss": 4.0019, + "epoch": 6.35, + "learning_rate": 0.000200655737704918, + "loss": 0.1658, "step": 705 }, { - "epoch": 3.17, - "learning_rate": 2.6497109826589596e-05, - "loss": 3.9217, + "epoch": 6.36, + "learning_rate": 0.0002001639344262295, + "loss": 0.1732, "step": 706 }, { - "epoch": 3.17, - "learning_rate": 2.6479768786127168e-05, - "loss": 4.0279, + "epoch": 6.37, + "learning_rate": 0.00019967213114754098, + "loss": 0.1748, "step": 707 }, { - "epoch": 3.17, - "learning_rate": 2.646242774566474e-05, - "loss": 4.0279, + "epoch": 6.38, + "learning_rate": 0.00019918032786885243, + "loss": 0.1638, "step": 708 }, { - "epoch": 3.18, - "learning_rate": 2.6445086705202315e-05, - "loss": 3.85, + "epoch": 6.39, + "learning_rate": 0.0001986885245901639, + "loss": 0.157, "step": 709 }, { - "epoch": 3.18, - "learning_rate": 2.6427745664739884e-05, - "loss": 4.0435, + "epoch": 6.39, + "learning_rate": 0.0001981967213114754, + "loss": 0.1418, "step": 710 }, { - "epoch": 3.19, - "learning_rate": 2.6410404624277456e-05, - "loss": 4.0146, + "epoch": 6.4, + "learning_rate": 0.00019770491803278688, + "loss": 0.1552, "step": 711 }, { - "epoch": 3.19, - "learning_rate": 2.6393063583815028e-05, - "loss": 4.0106, + "epoch": 6.41, + "learning_rate": 0.00019721311475409835, + "loss": 0.1415, "step": 712 }, { - "epoch": 3.2, - "learning_rate": 2.6375722543352604e-05, - "loss": 4.1137, + "epoch": 6.42, + "learning_rate": 0.0001967213114754098, + "loss": 0.126, "step": 713 }, { - "epoch": 3.2, - "learning_rate": 2.6358381502890176e-05, - "loss": 3.9141, + "epoch": 6.43, + "learning_rate": 0.0001962295081967213, + "loss": 0.1129, "step": 714 }, { - "epoch": 3.21, - "learning_rate": 2.6341040462427744e-05, - "loss": 3.9825, + "epoch": 6.44, + "learning_rate": 0.00019573770491803277, + "loss": 0.1441, "step": 715 }, { - "epoch": 3.21, - "learning_rate": 2.6323699421965316e-05, - "loss": 3.8994, + "epoch": 6.45, + "learning_rate": 0.00019524590163934425, + "loss": 0.1373, "step": 716 }, { - "epoch": 3.22, - "learning_rate": 2.6306358381502892e-05, - "loss": 3.9, + "epoch": 6.46, + "learning_rate": 0.00019475409836065572, + "loss": 0.5316, "step": 717 }, { - "epoch": 3.22, - "learning_rate": 2.6289017341040464e-05, - "loss": 4.0405, + "epoch": 6.47, + "learning_rate": 0.00019426229508196722, + "loss": 0.4373, "step": 718 }, { - "epoch": 3.22, - "learning_rate": 2.6271676300578036e-05, - "loss": 3.8299, + "epoch": 6.48, + "learning_rate": 0.00019377049180327867, + "loss": 0.3649, "step": 719 }, { - "epoch": 3.23, - "learning_rate": 2.6254335260115605e-05, - "loss": 5.0524, + "epoch": 6.48, + "learning_rate": 0.00019327868852459014, + "loss": 0.3233, "step": 720 }, { - "epoch": 3.23, - "learning_rate": 2.623699421965318e-05, - "loss": 5.1449, + "epoch": 6.49, + "learning_rate": 0.00019278688524590161, + "loss": 0.2931, "step": 721 }, { - "epoch": 3.24, - "learning_rate": 2.6219653179190752e-05, - "loss": 5.2232, + "epoch": 6.5, + "learning_rate": 0.00019229508196721312, + "loss": 0.2771, "step": 722 }, { - "epoch": 3.24, - "learning_rate": 2.6202312138728324e-05, - "loss": 5.1146, + "epoch": 6.51, + "learning_rate": 0.0001918032786885246, + "loss": 0.2295, "step": 723 }, { - "epoch": 3.25, - "learning_rate": 2.6184971098265896e-05, - "loss": 5.0103, + "epoch": 6.52, + "learning_rate": 0.00019131147540983604, + "loss": 0.2316, "step": 724 }, { - "epoch": 3.25, - "learning_rate": 2.616763005780347e-05, - "loss": 4.9405, + "epoch": 6.53, + "learning_rate": 0.0001908196721311475, + "loss": 0.2157, "step": 725 }, { - "epoch": 3.26, - "learning_rate": 2.615028901734104e-05, - "loss": 4.8401, + "epoch": 6.54, + "learning_rate": 0.000190327868852459, + "loss": 0.2125, "step": 726 }, { - "epoch": 3.26, - "learning_rate": 2.6132947976878613e-05, - "loss": 4.6854, + "epoch": 6.55, + "learning_rate": 0.00018983606557377048, + "loss": 0.1578, "step": 727 }, { - "epoch": 3.26, - "learning_rate": 2.6115606936416185e-05, - "loss": 4.657, + "epoch": 6.56, + "learning_rate": 0.00018934426229508196, + "loss": 0.1633, "step": 728 }, { - "epoch": 3.27, - "learning_rate": 2.609826589595376e-05, - "loss": 4.4908, + "epoch": 6.57, + "learning_rate": 0.00018885245901639343, + "loss": 0.1553, "step": 729 }, { - "epoch": 3.27, - "learning_rate": 2.608092485549133e-05, - "loss": 4.2998, + "epoch": 6.57, + "learning_rate": 0.00018836065573770488, + "loss": 0.1544, "step": 730 }, { - "epoch": 3.28, - "learning_rate": 2.60635838150289e-05, - "loss": 4.2922, + "epoch": 6.58, + "learning_rate": 0.00018786885245901638, + "loss": 0.1958, "step": 731 }, { - "epoch": 3.28, - "learning_rate": 2.6046242774566473e-05, - "loss": 4.2949, + "epoch": 6.59, + "learning_rate": 0.00018737704918032785, + "loss": 0.1519, "step": 732 }, { - "epoch": 3.29, - "learning_rate": 2.602890173410405e-05, - "loss": 4.2303, + "epoch": 6.6, + "learning_rate": 0.00018688524590163933, + "loss": 0.1557, "step": 733 }, { - "epoch": 3.29, - "learning_rate": 2.601156069364162e-05, - "loss": 4.2311, + "epoch": 6.61, + "learning_rate": 0.00018639344262295083, + "loss": 0.1656, "step": 734 }, { - "epoch": 3.3, - "learning_rate": 2.5994219653179192e-05, - "loss": 4.2388, + "epoch": 6.62, + "learning_rate": 0.00018590163934426227, + "loss": 0.1322, "step": 735 }, { - "epoch": 3.3, - "learning_rate": 2.597687861271676e-05, - "loss": 4.1948, + "epoch": 6.63, + "learning_rate": 0.00018540983606557375, + "loss": 0.1391, "step": 736 }, { - "epoch": 3.3, - "learning_rate": 2.5959537572254337e-05, - "loss": 4.2017, + "epoch": 6.64, + "learning_rate": 0.00018491803278688522, + "loss": 0.151, "step": 737 }, { - "epoch": 3.31, - "learning_rate": 2.594219653179191e-05, - "loss": 4.1867, + "epoch": 6.65, + "learning_rate": 0.00018442622950819672, + "loss": 0.1497, "step": 738 }, { - "epoch": 3.31, - "learning_rate": 2.592485549132948e-05, - "loss": 4.1769, + "epoch": 6.65, + "learning_rate": 0.0001839344262295082, + "loss": 0.1293, "step": 739 }, { - "epoch": 3.32, - "learning_rate": 2.5907514450867053e-05, - "loss": 4.0745, + "epoch": 6.66, + "learning_rate": 0.00018344262295081964, + "loss": 0.1088, "step": 740 }, { - "epoch": 3.32, - "learning_rate": 2.5890173410404625e-05, - "loss": 4.0938, + "epoch": 6.67, + "learning_rate": 0.00018295081967213112, + "loss": 0.1561, "step": 741 }, { - "epoch": 3.33, - "learning_rate": 2.5872832369942197e-05, - "loss": 4.0325, + "epoch": 6.68, + "learning_rate": 0.00018245901639344262, + "loss": 0.5004, "step": 742 }, { - "epoch": 3.33, - "learning_rate": 2.585549132947977e-05, - "loss": 4.1052, + "epoch": 6.69, + "learning_rate": 0.0001819672131147541, + "loss": 0.4009, "step": 743 }, { - "epoch": 3.34, - "learning_rate": 2.583815028901734e-05, - "loss": 4.1427, + "epoch": 6.7, + "learning_rate": 0.00018147540983606556, + "loss": 0.3291, "step": 744 }, { - "epoch": 3.34, - "learning_rate": 2.5820809248554917e-05, - "loss": 4.0941, + "epoch": 6.71, + "learning_rate": 0.00018098360655737704, + "loss": 0.2757, "step": 745 }, { - "epoch": 3.35, - "learning_rate": 2.5803468208092485e-05, - "loss": 4.0728, + "epoch": 6.72, + "learning_rate": 0.00018049180327868848, + "loss": 0.2992, "step": 746 }, { - "epoch": 3.35, - "learning_rate": 2.5786127167630057e-05, - "loss": 4.0014, + "epoch": 6.73, + "learning_rate": 0.00017999999999999998, + "loss": 0.2224, "step": 747 }, { - "epoch": 3.35, - "learning_rate": 2.576878612716763e-05, - "loss": 4.0966, + "epoch": 6.74, + "learning_rate": 0.00017950819672131146, + "loss": 0.2268, "step": 748 }, { - "epoch": 3.36, - "learning_rate": 2.5751445086705205e-05, - "loss": 4.0551, + "epoch": 6.74, + "learning_rate": 0.00017901639344262293, + "loss": 0.2091, "step": 749 }, { - "epoch": 3.36, - "learning_rate": 2.5734104046242777e-05, - "loss": 4.1112, + "epoch": 6.75, + "learning_rate": 0.00017852459016393443, + "loss": 0.2108, "step": 750 }, { - "epoch": 3.37, - "learning_rate": 2.5716763005780346e-05, - "loss": 4.0396, + "epoch": 6.76, + "learning_rate": 0.00017803278688524588, + "loss": 0.1898, "step": 751 }, { - "epoch": 3.37, - "learning_rate": 2.5699421965317918e-05, - "loss": 4.0486, + "epoch": 6.77, + "learning_rate": 0.00017754098360655735, + "loss": 0.1764, "step": 752 }, { - "epoch": 3.38, - "learning_rate": 2.5682080924855493e-05, - "loss": 4.0519, + "epoch": 6.78, + "learning_rate": 0.00017704918032786883, + "loss": 0.1669, "step": 753 }, { - "epoch": 3.38, - "learning_rate": 2.5664739884393065e-05, - "loss": 4.0912, + "epoch": 6.79, + "learning_rate": 0.00017655737704918033, + "loss": 0.1502, "step": 754 }, { - "epoch": 3.39, - "learning_rate": 2.5647398843930637e-05, - "loss": 4.1006, + "epoch": 6.8, + "learning_rate": 0.0001760655737704918, + "loss": 0.1734, "step": 755 }, { - "epoch": 3.39, - "learning_rate": 2.5630057803468206e-05, - "loss": 3.9934, + "epoch": 6.81, + "learning_rate": 0.00017557377049180327, + "loss": 0.1519, "step": 756 }, { - "epoch": 3.39, - "learning_rate": 2.561271676300578e-05, - "loss": 4.0426, + "epoch": 6.82, + "learning_rate": 0.00017508196721311472, + "loss": 0.1561, "step": 757 }, { - "epoch": 3.4, - "learning_rate": 2.5595375722543353e-05, - "loss": 4.0186, + "epoch": 6.83, + "learning_rate": 0.0001745901639344262, + "loss": 0.1471, "step": 758 }, { - "epoch": 3.4, - "learning_rate": 2.5578034682080925e-05, - "loss": 3.956, + "epoch": 6.83, + "learning_rate": 0.0001740983606557377, + "loss": 0.1443, "step": 759 }, { - "epoch": 3.41, - "learning_rate": 2.5560693641618498e-05, - "loss": 4.0575, + "epoch": 6.84, + "learning_rate": 0.00017360655737704917, + "loss": 0.1161, "step": 760 }, { - "epoch": 3.41, - "learning_rate": 2.5543352601156073e-05, - "loss": 3.9721, + "epoch": 6.85, + "learning_rate": 0.00017311475409836064, + "loss": 0.1166, "step": 761 }, { - "epoch": 3.42, - "learning_rate": 2.5526011560693642e-05, - "loss": 3.9682, + "epoch": 6.86, + "learning_rate": 0.0001726229508196721, + "loss": 0.1304, "step": 762 }, { - "epoch": 3.42, - "learning_rate": 2.5508670520231214e-05, - "loss": 4.0261, + "epoch": 6.87, + "learning_rate": 0.0001721311475409836, + "loss": 0.12, "step": 763 }, { - "epoch": 3.43, - "learning_rate": 2.5491329479768786e-05, - "loss": 4.0748, + "epoch": 6.88, + "learning_rate": 0.00017163934426229506, + "loss": 0.1263, "step": 764 }, { - "epoch": 3.43, - "learning_rate": 2.547398843930636e-05, - "loss": 3.9467, + "epoch": 6.89, + "learning_rate": 0.00017114754098360654, + "loss": 0.1376, "step": 765 }, { - "epoch": 3.43, - "learning_rate": 2.5456647398843933e-05, - "loss": 4.0798, + "epoch": 6.9, + "learning_rate": 0.00017065573770491804, + "loss": 0.1581, "step": 766 }, { - "epoch": 3.44, - "learning_rate": 2.5439306358381502e-05, - "loss": 3.8732, + "epoch": 6.91, + "learning_rate": 0.0001701639344262295, + "loss": 0.398, "step": 767 }, { - "epoch": 3.44, - "learning_rate": 2.5421965317919074e-05, - "loss": 3.9332, + "epoch": 6.91, + "learning_rate": 0.00016967213114754096, + "loss": 0.3025, "step": 768 }, { - "epoch": 3.45, - "learning_rate": 2.540462427745665e-05, - "loss": 3.7809, + "epoch": 6.92, + "learning_rate": 0.00016918032786885243, + "loss": 0.2683, "step": 769 }, { - "epoch": 3.45, - "learning_rate": 2.538728323699422e-05, - "loss": 4.88, + "epoch": 6.93, + "learning_rate": 0.00016868852459016393, + "loss": 0.2023, "step": 770 }, { - "epoch": 3.46, - "learning_rate": 2.5369942196531794e-05, - "loss": 5.0725, + "epoch": 6.94, + "learning_rate": 0.0001681967213114754, + "loss": 0.184, "step": 771 }, { - "epoch": 3.46, - "learning_rate": 2.5352601156069362e-05, - "loss": 5.1848, + "epoch": 6.95, + "learning_rate": 0.00016770491803278688, + "loss": 0.1605, "step": 772 }, { - "epoch": 3.47, - "learning_rate": 2.5335260115606938e-05, - "loss": 5.0027, + "epoch": 6.96, + "learning_rate": 0.00016721311475409833, + "loss": 0.142, "step": 773 }, { - "epoch": 3.47, - "learning_rate": 2.531791907514451e-05, - "loss": 4.9422, + "epoch": 6.97, + "learning_rate": 0.0001667213114754098, + "loss": 0.1362, "step": 774 }, { - "epoch": 3.48, - "learning_rate": 2.5300578034682082e-05, - "loss": 4.9045, + "epoch": 6.98, + "learning_rate": 0.0001662295081967213, + "loss": 0.1242, "step": 775 }, { - "epoch": 3.48, - "learning_rate": 2.5283236994219654e-05, - "loss": 4.8252, + "epoch": 6.99, + "learning_rate": 0.00016573770491803278, + "loss": 0.1229, "step": 776 }, { - "epoch": 3.48, - "learning_rate": 2.5265895953757226e-05, - "loss": 4.5949, + "epoch": 7.0, + "learning_rate": 0.00016524590163934425, + "loss": 0.1273, "step": 777 }, { - "epoch": 3.49, - "learning_rate": 2.5248554913294798e-05, - "loss": 4.5225, + "epoch": 7.01, + "learning_rate": 0.00016475409836065575, + "loss": 0.3292, "step": 778 }, { - "epoch": 3.49, - "learning_rate": 2.523121387283237e-05, - "loss": 4.3801, + "epoch": 7.02, + "learning_rate": 0.0001642622950819672, + "loss": 0.1917, "step": 779 }, { - "epoch": 3.5, - "learning_rate": 2.5213872832369942e-05, - "loss": 4.3487, + "epoch": 7.03, + "learning_rate": 0.00016377049180327867, + "loss": 0.1808, "step": 780 }, { - "epoch": 3.5, - "learning_rate": 2.5196531791907518e-05, - "loss": 4.264, + "epoch": 7.04, + "learning_rate": 0.00016327868852459014, + "loss": 0.1459, "step": 781 }, { - "epoch": 3.51, - "learning_rate": 2.5179190751445086e-05, - "loss": 4.2761, + "epoch": 7.04, + "learning_rate": 0.00016278688524590164, + "loss": 0.1363, "step": 782 }, { - "epoch": 3.51, - "learning_rate": 2.516184971098266e-05, - "loss": 4.1625, + "epoch": 7.05, + "learning_rate": 0.00016229508196721312, + "loss": 0.1299, "step": 783 }, { - "epoch": 3.52, - "learning_rate": 2.514450867052023e-05, - "loss": 4.14, + "epoch": 7.06, + "learning_rate": 0.00016180327868852456, + "loss": 0.1337, "step": 784 }, { - "epoch": 3.52, - "learning_rate": 2.5127167630057806e-05, - "loss": 4.1771, + "epoch": 7.07, + "learning_rate": 0.00016131147540983604, + "loss": 0.1175, "step": 785 }, { - "epoch": 3.52, - "learning_rate": 2.5109826589595378e-05, - "loss": 4.2, + "epoch": 7.08, + "learning_rate": 0.00016081967213114754, + "loss": 0.1139, "step": 786 }, { - "epoch": 3.53, - "learning_rate": 2.509248554913295e-05, - "loss": 4.1604, + "epoch": 7.09, + "learning_rate": 0.000160327868852459, + "loss": 0.1132, "step": 787 }, { - "epoch": 3.53, - "learning_rate": 2.507514450867052e-05, - "loss": 4.0762, + "epoch": 7.1, + "learning_rate": 0.00015983606557377049, + "loss": 0.0938, "step": 788 }, { - "epoch": 3.54, - "learning_rate": 2.5057803468208094e-05, - "loss": 4.1025, + "epoch": 7.11, + "learning_rate": 0.00015934426229508193, + "loss": 0.0853, "step": 789 }, { - "epoch": 3.54, - "learning_rate": 2.5040462427745666e-05, - "loss": 4.1368, + "epoch": 7.12, + "learning_rate": 0.0001588524590163934, + "loss": 0.0911, "step": 790 }, { - "epoch": 3.55, - "learning_rate": 2.502312138728324e-05, - "loss": 4.131, + "epoch": 7.13, + "learning_rate": 0.0001583606557377049, + "loss": 0.0876, "step": 791 }, { - "epoch": 3.55, - "learning_rate": 2.500578034682081e-05, - "loss": 4.0851, + "epoch": 7.13, + "learning_rate": 0.00015786885245901638, + "loss": 0.0952, "step": 792 }, { - "epoch": 3.56, - "learning_rate": 2.498843930635838e-05, - "loss": 4.0771, + "epoch": 7.14, + "learning_rate": 0.00015737704918032785, + "loss": 0.0965, "step": 793 }, { - "epoch": 3.56, - "learning_rate": 2.4971098265895955e-05, - "loss": 4.1087, + "epoch": 7.15, + "learning_rate": 0.00015688524590163936, + "loss": 0.0917, "step": 794 }, { - "epoch": 3.57, - "learning_rate": 2.4953757225433527e-05, - "loss": 4.1521, + "epoch": 7.16, + "learning_rate": 0.0001563934426229508, + "loss": 0.0931, "step": 795 }, { - "epoch": 3.57, - "learning_rate": 2.49364161849711e-05, - "loss": 4.086, + "epoch": 7.17, + "learning_rate": 0.00015590163934426228, + "loss": 0.0801, "step": 796 }, { - "epoch": 3.57, - "learning_rate": 2.491907514450867e-05, - "loss": 4.0091, + "epoch": 7.18, + "learning_rate": 0.00015540983606557375, + "loss": 0.081, "step": 797 }, { - "epoch": 3.58, - "learning_rate": 2.4901734104046243e-05, - "loss": 4.079, + "epoch": 7.19, + "learning_rate": 0.00015491803278688525, + "loss": 0.0888, "step": 798 }, { - "epoch": 3.58, - "learning_rate": 2.4884393063583815e-05, - "loss": 4.1123, + "epoch": 7.2, + "learning_rate": 0.00015442622950819672, + "loss": 0.0644, "step": 799 }, { - "epoch": 3.59, - "learning_rate": 2.4867052023121387e-05, - "loss": 4.0259, + "epoch": 7.21, + "learning_rate": 0.00015393442622950817, + "loss": 0.0697, "step": 800 }, { - "epoch": 3.59, - "learning_rate": 2.484971098265896e-05, - "loss": 4.0563, + "epoch": 7.22, + "learning_rate": 0.00015344262295081964, + "loss": 0.0626, "step": 801 }, { - "epoch": 3.6, - "learning_rate": 2.4832369942196535e-05, - "loss": 4.0352, + "epoch": 7.22, + "learning_rate": 0.00015295081967213112, + "loss": 0.0777, "step": 802 }, { - "epoch": 3.6, - "learning_rate": 2.4815028901734103e-05, - "loss": 4.0531, + "epoch": 7.23, + "learning_rate": 0.00015245901639344262, + "loss": 0.2966, "step": 803 }, { - "epoch": 3.61, - "learning_rate": 2.4797687861271675e-05, - "loss": 4.0419, + "epoch": 7.24, + "learning_rate": 0.0001519672131147541, + "loss": 0.2267, "step": 804 }, { - "epoch": 3.61, - "learning_rate": 2.4780346820809247e-05, - "loss": 4.0331, + "epoch": 7.25, + "learning_rate": 0.00015147540983606557, + "loss": 0.1676, "step": 805 }, { - "epoch": 3.61, - "learning_rate": 2.4763005780346823e-05, - "loss": 4.0502, + "epoch": 7.26, + "learning_rate": 0.000150983606557377, + "loss": 0.18, "step": 806 }, { - "epoch": 3.62, - "learning_rate": 2.4745664739884395e-05, - "loss": 3.9469, + "epoch": 7.27, + "learning_rate": 0.0001504918032786885, + "loss": 0.1531, "step": 807 }, { - "epoch": 3.62, - "learning_rate": 2.4728323699421964e-05, - "loss": 4.1118, + "epoch": 7.28, + "learning_rate": 0.00015, + "loss": 0.1334, "step": 808 }, { - "epoch": 3.63, - "learning_rate": 2.4710982658959536e-05, - "loss": 4.0224, + "epoch": 7.29, + "learning_rate": 0.00014950819672131146, + "loss": 0.1211, "step": 809 }, { - "epoch": 3.63, - "learning_rate": 2.469364161849711e-05, - "loss": 4.0334, + "epoch": 7.3, + "learning_rate": 0.00014901639344262293, + "loss": 0.1199, "step": 810 }, { - "epoch": 3.64, - "learning_rate": 2.4676300578034683e-05, - "loss": 3.9915, + "epoch": 7.3, + "learning_rate": 0.0001485245901639344, + "loss": 0.1123, "step": 811 }, { - "epoch": 3.64, - "learning_rate": 2.4658959537572255e-05, - "loss": 3.9869, + "epoch": 7.31, + "learning_rate": 0.0001480327868852459, + "loss": 0.1059, "step": 812 }, { - "epoch": 3.65, - "learning_rate": 2.4641618497109827e-05, - "loss": 4.031, + "epoch": 7.32, + "learning_rate": 0.00014754098360655736, + "loss": 0.0931, "step": 813 }, { - "epoch": 3.65, - "learning_rate": 2.46242774566474e-05, - "loss": 4.1438, + "epoch": 7.33, + "learning_rate": 0.00014704918032786886, + "loss": 0.0969, "step": 814 }, { - "epoch": 3.65, - "learning_rate": 2.460693641618497e-05, - "loss": 3.8887, + "epoch": 7.34, + "learning_rate": 0.0001465573770491803, + "loss": 0.0967, "step": 815 }, { - "epoch": 3.66, - "learning_rate": 2.4589595375722544e-05, - "loss": 4.0366, + "epoch": 7.35, + "learning_rate": 0.00014606557377049178, + "loss": 0.0934, "step": 816 }, { - "epoch": 3.66, - "learning_rate": 2.4572254335260116e-05, - "loss": 4.046, + "epoch": 7.36, + "learning_rate": 0.00014557377049180328, + "loss": 0.0883, "step": 817 }, { - "epoch": 3.67, - "learning_rate": 2.455491329479769e-05, - "loss": 3.9899, + "epoch": 7.37, + "learning_rate": 0.00014508196721311472, + "loss": 0.0828, "step": 818 }, { - "epoch": 3.67, - "learning_rate": 2.453757225433526e-05, - "loss": 3.9826, + "epoch": 7.38, + "learning_rate": 0.00014459016393442622, + "loss": 0.0905, "step": 819 }, { - "epoch": 3.68, - "learning_rate": 2.4520231213872832e-05, - "loss": 4.8933, + "epoch": 7.39, + "learning_rate": 0.0001440983606557377, + "loss": 0.0853, "step": 820 }, { - "epoch": 3.68, - "learning_rate": 2.4502890173410404e-05, - "loss": 4.8635, + "epoch": 7.39, + "learning_rate": 0.00014360655737704917, + "loss": 0.0755, "step": 821 }, { - "epoch": 3.69, - "learning_rate": 2.448554913294798e-05, - "loss": 4.9638, + "epoch": 7.4, + "learning_rate": 0.00014311475409836065, + "loss": 0.0726, "step": 822 }, { - "epoch": 3.69, - "learning_rate": 2.446820809248555e-05, - "loss": 5.0011, + "epoch": 7.41, + "learning_rate": 0.00014262295081967212, + "loss": 0.0775, "step": 823 }, { - "epoch": 3.7, - "learning_rate": 2.445086705202312e-05, - "loss": 4.9523, + "epoch": 7.42, + "learning_rate": 0.0001421311475409836, + "loss": 0.0798, "step": 824 }, { - "epoch": 3.7, - "learning_rate": 2.4433526011560692e-05, - "loss": 4.8568, + "epoch": 7.43, + "learning_rate": 0.00014163934426229507, + "loss": 0.0591, "step": 825 }, { - "epoch": 3.7, - "learning_rate": 2.4416184971098268e-05, - "loss": 4.7478, + "epoch": 7.44, + "learning_rate": 0.00014114754098360654, + "loss": 0.0644, "step": 826 }, { - "epoch": 3.71, - "learning_rate": 2.439884393063584e-05, - "loss": 4.7502, + "epoch": 7.45, + "learning_rate": 0.00014065573770491801, + "loss": 0.0976, "step": 827 }, { - "epoch": 3.71, - "learning_rate": 2.4381502890173412e-05, - "loss": 4.5984, + "epoch": 7.46, + "learning_rate": 0.00014016393442622951, + "loss": 0.2755, "step": 828 }, { - "epoch": 3.72, - "learning_rate": 2.436416184971098e-05, - "loss": 4.4919, + "epoch": 7.47, + "learning_rate": 0.00013967213114754096, + "loss": 0.2098, "step": 829 }, { - "epoch": 3.72, - "learning_rate": 2.4346820809248556e-05, - "loss": 4.2935, + "epoch": 7.48, + "learning_rate": 0.00013918032786885243, + "loss": 0.1766, "step": 830 }, { - "epoch": 3.73, - "learning_rate": 2.4329479768786128e-05, - "loss": 4.3023, + "epoch": 7.48, + "learning_rate": 0.00013868852459016394, + "loss": 0.1588, "step": 831 }, { - "epoch": 3.73, - "learning_rate": 2.43121387283237e-05, - "loss": 4.2509, + "epoch": 7.49, + "learning_rate": 0.00013819672131147538, + "loss": 0.143, "step": 832 }, { - "epoch": 3.74, - "learning_rate": 2.4294797687861272e-05, - "loss": 4.2187, + "epoch": 7.5, + "learning_rate": 0.00013770491803278688, + "loss": 0.1298, "step": 833 }, { - "epoch": 3.74, - "learning_rate": 2.4277456647398844e-05, - "loss": 4.1768, + "epoch": 7.51, + "learning_rate": 0.00013721311475409833, + "loss": 0.1163, "step": 834 }, { - "epoch": 3.74, - "learning_rate": 2.4260115606936416e-05, - "loss": 4.092, + "epoch": 7.52, + "learning_rate": 0.00013672131147540983, + "loss": 0.1181, "step": 835 }, { - "epoch": 3.75, - "learning_rate": 2.4242774566473988e-05, - "loss": 4.1135, + "epoch": 7.53, + "learning_rate": 0.0001362295081967213, + "loss": 0.116, "step": 836 }, { - "epoch": 3.75, - "learning_rate": 2.422543352601156e-05, - "loss": 4.0844, + "epoch": 7.54, + "learning_rate": 0.00013573770491803278, + "loss": 0.0969, "step": 837 }, { - "epoch": 3.76, - "learning_rate": 2.4208092485549136e-05, - "loss": 4.1666, + "epoch": 7.55, + "learning_rate": 0.00013524590163934425, + "loss": 0.0956, "step": 838 }, { - "epoch": 3.76, - "learning_rate": 2.4190751445086708e-05, - "loss": 4.1177, + "epoch": 7.56, + "learning_rate": 0.00013475409836065573, + "loss": 0.0921, "step": 839 }, { - "epoch": 3.77, - "learning_rate": 2.4173410404624277e-05, - "loss": 4.086, + "epoch": 7.57, + "learning_rate": 0.0001342622950819672, + "loss": 0.1068, "step": 840 }, { - "epoch": 3.77, - "learning_rate": 2.415606936416185e-05, - "loss": 4.1248, + "epoch": 7.57, + "learning_rate": 0.00013377049180327867, + "loss": 0.0846, "step": 841 }, { - "epoch": 3.78, - "learning_rate": 2.4138728323699424e-05, - "loss": 3.984, + "epoch": 7.58, + "learning_rate": 0.00013327868852459017, + "loss": 0.0797, "step": 842 }, { - "epoch": 3.78, - "learning_rate": 2.4121387283236996e-05, - "loss": 4.0219, + "epoch": 7.59, + "learning_rate": 0.00013278688524590162, + "loss": 0.0789, "step": 843 }, { - "epoch": 3.78, - "learning_rate": 2.4104046242774568e-05, - "loss": 4.0683, + "epoch": 7.6, + "learning_rate": 0.0001322950819672131, + "loss": 0.0802, "step": 844 }, { - "epoch": 3.79, - "learning_rate": 2.4086705202312137e-05, - "loss": 4.1054, + "epoch": 7.61, + "learning_rate": 0.00013180327868852457, + "loss": 0.0798, "step": 845 }, { - "epoch": 3.79, - "learning_rate": 2.4069364161849712e-05, - "loss": 4.0374, + "epoch": 7.62, + "learning_rate": 0.00013131147540983604, + "loss": 0.0625, "step": 846 }, { - "epoch": 3.8, - "learning_rate": 2.4052023121387284e-05, - "loss": 3.9663, + "epoch": 7.63, + "learning_rate": 0.00013081967213114754, + "loss": 0.0729, "step": 847 }, { - "epoch": 3.8, - "learning_rate": 2.4034682080924856e-05, - "loss": 4.0037, + "epoch": 7.64, + "learning_rate": 0.000130327868852459, + "loss": 0.0702, "step": 848 }, { - "epoch": 3.81, - "learning_rate": 2.401734104046243e-05, - "loss": 4.1018, + "epoch": 7.65, + "learning_rate": 0.0001298360655737705, + "loss": 0.0749, "step": 849 }, { - "epoch": 3.81, - "learning_rate": 2.4e-05, - "loss": 4.1347, + "epoch": 7.65, + "learning_rate": 0.00012934426229508196, + "loss": 0.1538, "step": 850 }, { - "epoch": 3.82, - "learning_rate": 2.3982658959537573e-05, - "loss": 3.9942, + "epoch": 7.66, + "learning_rate": 0.00012885245901639344, + "loss": 0.0793, "step": 851 }, { - "epoch": 3.82, - "learning_rate": 2.3965317919075145e-05, - "loss": 4.014, + "epoch": 7.67, + "learning_rate": 0.0001283606557377049, + "loss": 0.1118, "step": 852 }, { - "epoch": 3.83, - "learning_rate": 2.3947976878612717e-05, - "loss": 3.9866, + "epoch": 7.68, + "learning_rate": 0.00012786885245901638, + "loss": 0.2584, "step": 853 }, { - "epoch": 3.83, - "learning_rate": 2.3930635838150292e-05, - "loss": 3.9975, + "epoch": 7.69, + "learning_rate": 0.00012737704918032786, + "loss": 0.2042, "step": 854 }, { - "epoch": 3.83, - "learning_rate": 2.391329479768786e-05, - "loss": 4.0508, + "epoch": 7.7, + "learning_rate": 0.00012688524590163933, + "loss": 0.1538, "step": 855 }, { - "epoch": 3.84, - "learning_rate": 2.3895953757225433e-05, - "loss": 4.1077, + "epoch": 7.71, + "learning_rate": 0.0001263934426229508, + "loss": 0.1337, "step": 856 }, { - "epoch": 3.84, - "learning_rate": 2.3878612716763005e-05, - "loss": 4.0541, + "epoch": 7.72, + "learning_rate": 0.00012590163934426228, + "loss": 0.1333, "step": 857 }, { - "epoch": 3.85, - "learning_rate": 2.386127167630058e-05, - "loss": 4.0191, + "epoch": 7.73, + "learning_rate": 0.00012540983606557378, + "loss": 0.1308, "step": 858 }, { - "epoch": 3.85, - "learning_rate": 2.3843930635838153e-05, - "loss": 3.9792, + "epoch": 7.74, + "learning_rate": 0.00012491803278688523, + "loss": 0.1062, "step": 859 }, { - "epoch": 3.86, - "learning_rate": 2.382658959537572e-05, - "loss": 3.9911, + "epoch": 7.74, + "learning_rate": 0.0001244262295081967, + "loss": 0.1091, "step": 860 }, { - "epoch": 3.86, - "learning_rate": 2.3809248554913293e-05, - "loss": 4.0872, + "epoch": 7.75, + "learning_rate": 0.0001239344262295082, + "loss": 0.1118, "step": 861 }, { - "epoch": 3.87, - "learning_rate": 2.379190751445087e-05, - "loss": 4.0612, + "epoch": 7.76, + "learning_rate": 0.00012344262295081965, + "loss": 0.1769, "step": 862 }, { - "epoch": 3.87, - "learning_rate": 2.377456647398844e-05, - "loss": 3.9739, + "epoch": 7.77, + "learning_rate": 0.00012295081967213115, + "loss": 0.0905, "step": 863 }, { - "epoch": 3.87, - "learning_rate": 2.3757225433526013e-05, - "loss": 3.9459, + "epoch": 7.78, + "learning_rate": 0.0001224590163934426, + "loss": 0.1006, "step": 864 }, { - "epoch": 3.88, - "learning_rate": 2.3739884393063585e-05, - "loss": 4.0336, + "epoch": 7.79, + "learning_rate": 0.00012196721311475408, + "loss": 0.0995, "step": 865 }, { - "epoch": 3.88, - "learning_rate": 2.3722543352601157e-05, - "loss": 4.0283, + "epoch": 7.8, + "learning_rate": 0.00012147540983606557, + "loss": 0.0921, "step": 866 }, { - "epoch": 3.89, - "learning_rate": 2.370520231213873e-05, - "loss": 4.0666, + "epoch": 7.81, + "learning_rate": 0.00012098360655737703, + "loss": 0.0971, "step": 867 }, { - "epoch": 3.89, - "learning_rate": 2.36878612716763e-05, - "loss": 3.9129, + "epoch": 7.82, + "learning_rate": 0.00012049180327868852, + "loss": 0.0774, "step": 868 }, { - "epoch": 3.9, - "learning_rate": 2.3670520231213873e-05, - "loss": 3.8494, + "epoch": 7.83, + "learning_rate": 0.00011999999999999999, + "loss": 0.0866, "step": 869 }, { - "epoch": 3.9, - "learning_rate": 2.365317919075145e-05, - "loss": 4.7372, + "epoch": 7.83, + "learning_rate": 0.00011950819672131146, + "loss": 0.0981, "step": 870 }, { - "epoch": 3.91, - "learning_rate": 2.3635838150289017e-05, - "loss": 4.7829, + "epoch": 7.84, + "learning_rate": 0.00011901639344262294, + "loss": 0.072, "step": 871 }, { - "epoch": 3.91, - "learning_rate": 2.361849710982659e-05, - "loss": 4.7124, + "epoch": 7.85, + "learning_rate": 0.00011852459016393441, + "loss": 0.0674, "step": 872 }, { - "epoch": 3.91, - "learning_rate": 2.360115606936416e-05, - "loss": 4.8475, + "epoch": 7.86, + "learning_rate": 0.00011803278688524588, + "loss": 0.0639, "step": 873 }, { - "epoch": 3.92, - "learning_rate": 2.3583815028901737e-05, - "loss": 4.6983, + "epoch": 7.87, + "learning_rate": 0.00011754098360655737, + "loss": 0.0839, "step": 874 }, { - "epoch": 3.92, - "learning_rate": 2.356647398843931e-05, - "loss": 4.6769, + "epoch": 7.88, + "learning_rate": 0.00011704918032786883, + "loss": 0.066, "step": 875 }, { - "epoch": 3.93, - "learning_rate": 2.3549132947976878e-05, - "loss": 4.6902, + "epoch": 7.89, + "learning_rate": 0.00011655737704918032, + "loss": 0.0597, "step": 876 }, { - "epoch": 3.93, - "learning_rate": 2.353179190751445e-05, - "loss": 4.485, + "epoch": 7.9, + "learning_rate": 0.00011606557377049179, + "loss": 0.0819, "step": 877 }, { - "epoch": 3.94, - "learning_rate": 2.3514450867052025e-05, - "loss": 4.4053, + "epoch": 7.91, + "learning_rate": 0.00011557377049180327, + "loss": 0.1785, "step": 878 }, { - "epoch": 3.94, - "learning_rate": 2.3497109826589597e-05, - "loss": 4.2649, + "epoch": 7.91, + "learning_rate": 0.00011508196721311474, + "loss": 0.1619, "step": 879 }, { - "epoch": 3.95, - "learning_rate": 2.347976878612717e-05, - "loss": 4.3011, + "epoch": 7.92, + "learning_rate": 0.00011459016393442623, + "loss": 0.1235, "step": 880 }, { - "epoch": 3.95, - "learning_rate": 2.3462427745664738e-05, - "loss": 4.1987, + "epoch": 7.93, + "learning_rate": 0.00011409836065573769, + "loss": 0.0996, "step": 881 }, { - "epoch": 3.96, - "learning_rate": 2.3445086705202314e-05, - "loss": 4.0956, + "epoch": 7.94, + "learning_rate": 0.00011360655737704917, + "loss": 0.0896, "step": 882 }, { - "epoch": 3.96, - "learning_rate": 2.3427745664739886e-05, - "loss": 4.1256, + "epoch": 7.95, + "learning_rate": 0.00011311475409836063, + "loss": 0.0785, "step": 883 }, { - "epoch": 3.96, - "learning_rate": 2.3410404624277458e-05, - "loss": 3.951, + "epoch": 7.96, + "learning_rate": 0.00011262295081967212, + "loss": 0.0756, "step": 884 }, { - "epoch": 3.97, - "learning_rate": 2.339306358381503e-05, - "loss": 4.1155, + "epoch": 7.97, + "learning_rate": 0.0001121311475409836, + "loss": 0.094, "step": 885 }, { - "epoch": 3.97, - "learning_rate": 2.3375722543352602e-05, - "loss": 4.0756, + "epoch": 7.98, + "learning_rate": 0.00011163934426229507, + "loss": 0.0629, "step": 886 }, { - "epoch": 3.98, - "learning_rate": 2.3358381502890174e-05, - "loss": 4.0417, + "epoch": 7.99, + "learning_rate": 0.00011114754098360654, + "loss": 0.0694, "step": 887 }, { - "epoch": 3.98, - "learning_rate": 2.3341040462427746e-05, - "loss": 4.1136, + "epoch": 8.0, + "learning_rate": 0.00011065573770491803, + "loss": 0.0818, "step": 888 }, { - "epoch": 3.99, - "learning_rate": 2.3323699421965318e-05, - "loss": 4.1349, + "epoch": 8.01, + "learning_rate": 0.00011016393442622949, + "loss": 0.1714, "step": 889 }, { - "epoch": 3.99, - "learning_rate": 2.330635838150289e-05, - "loss": 4.0961, + "epoch": 8.02, + "learning_rate": 0.00010967213114754098, + "loss": 0.1262, "step": 890 }, { - "epoch": 4.0, - "learning_rate": 2.3289017341040466e-05, - "loss": 3.9922, + "epoch": 8.03, + "learning_rate": 0.00010918032786885245, + "loss": 0.089, "step": 891 }, { - "epoch": 4.0, - "learning_rate": 2.3271676300578034e-05, - "loss": 4.1601, + "epoch": 8.04, + "learning_rate": 0.00010868852459016392, + "loss": 0.1003, "step": 892 }, { - "epoch": 4.0, - "learning_rate": 2.3254335260115606e-05, - "loss": 4.3071, + "epoch": 8.04, + "learning_rate": 0.0001081967213114754, + "loss": 0.0913, "step": 893 }, { - "epoch": 4.01, - "learning_rate": 2.323699421965318e-05, - "loss": 4.2509, + "epoch": 8.05, + "learning_rate": 0.00010770491803278687, + "loss": 0.0799, "step": 894 }, { - "epoch": 4.01, - "learning_rate": 2.3219653179190754e-05, - "loss": 4.2892, + "epoch": 8.06, + "learning_rate": 0.00010721311475409835, + "loss": 0.0765, "step": 895 }, { - "epoch": 4.02, - "learning_rate": 2.3202312138728326e-05, - "loss": 4.2278, + "epoch": 8.07, + "learning_rate": 0.00010672131147540983, + "loss": 0.0802, "step": 896 }, { - "epoch": 4.02, - "learning_rate": 2.3184971098265895e-05, - "loss": 4.1769, + "epoch": 8.08, + "learning_rate": 0.00010622950819672129, + "loss": 0.0641, "step": 897 }, { - "epoch": 4.03, - "learning_rate": 2.3167630057803467e-05, - "loss": 4.3061, + "epoch": 8.09, + "learning_rate": 0.00010573770491803278, + "loss": 0.0614, "step": 898 }, { - "epoch": 4.03, - "learning_rate": 2.3150289017341042e-05, - "loss": 4.1789, + "epoch": 8.1, + "learning_rate": 0.00010524590163934425, + "loss": 0.064, "step": 899 }, { - "epoch": 4.04, - "learning_rate": 2.3132947976878614e-05, - "loss": 4.1885, + "epoch": 8.11, + "learning_rate": 0.00010475409836065573, + "loss": 0.0568, "step": 900 }, { - "epoch": 4.04, - "learning_rate": 2.3115606936416186e-05, - "loss": 4.0922, + "epoch": 8.12, + "learning_rate": 0.0001042622950819672, + "loss": 0.0609, "step": 901 }, { - "epoch": 4.04, - "learning_rate": 2.3098265895953755e-05, - "loss": 4.1367, + "epoch": 8.13, + "learning_rate": 0.00010377049180327867, + "loss": 0.0569, "step": 902 }, { - "epoch": 4.05, - "learning_rate": 2.308092485549133e-05, - "loss": 4.0081, + "epoch": 8.13, + "learning_rate": 0.00010327868852459015, + "loss": 0.0584, "step": 903 }, { - "epoch": 4.05, - "learning_rate": 2.3063583815028902e-05, - "loss": 4.0683, + "epoch": 8.14, + "learning_rate": 0.00010278688524590164, + "loss": 0.0536, "step": 904 }, { - "epoch": 4.06, - "learning_rate": 2.3046242774566475e-05, - "loss": 4.0, + "epoch": 8.15, + "learning_rate": 0.0001022950819672131, + "loss": 0.0542, "step": 905 }, { - "epoch": 4.06, - "learning_rate": 2.3028901734104047e-05, - "loss": 4.145, + "epoch": 8.16, + "learning_rate": 0.00010180327868852458, + "loss": 0.0597, "step": 906 }, { - "epoch": 4.07, - "learning_rate": 2.301156069364162e-05, - "loss": 4.0468, + "epoch": 8.17, + "learning_rate": 0.00010131147540983606, + "loss": 0.0608, "step": 907 }, { - "epoch": 4.07, - "learning_rate": 2.299421965317919e-05, - "loss": 3.9863, + "epoch": 8.18, + "learning_rate": 0.00010081967213114753, + "loss": 0.0479, "step": 908 }, { - "epoch": 4.08, - "learning_rate": 2.2976878612716763e-05, - "loss": 4.0802, + "epoch": 8.19, + "learning_rate": 0.000100327868852459, + "loss": 0.0487, "step": 909 }, { - "epoch": 4.08, - "learning_rate": 2.2959537572254335e-05, - "loss": 3.9497, + "epoch": 8.2, + "learning_rate": 9.983606557377049e-05, + "loss": 0.0546, "step": 910 }, { - "epoch": 4.09, - "learning_rate": 2.294219653179191e-05, - "loss": 3.9416, + "epoch": 8.21, + "learning_rate": 9.934426229508195e-05, + "loss": 0.0393, "step": 911 }, { - "epoch": 4.09, - "learning_rate": 2.292485549132948e-05, - "loss": 3.8963, + "epoch": 8.22, + "learning_rate": 9.885245901639344e-05, + "loss": 0.0577, "step": 912 }, { - "epoch": 4.09, - "learning_rate": 2.290751445086705e-05, - "loss": 3.911, + "epoch": 8.22, + "learning_rate": 9.83606557377049e-05, + "loss": 0.0428, "step": 913 }, { - "epoch": 4.1, - "learning_rate": 2.2890173410404623e-05, - "loss": 3.9315, + "epoch": 8.23, + "learning_rate": 9.786885245901639e-05, + "loss": 0.1259, "step": 914 }, { - "epoch": 4.1, - "learning_rate": 2.28728323699422e-05, - "loss": 3.8278, + "epoch": 8.24, + "learning_rate": 9.737704918032786e-05, + "loss": 0.1007, "step": 915 }, { - "epoch": 4.11, - "learning_rate": 2.285549132947977e-05, - "loss": 3.9261, + "epoch": 8.25, + "learning_rate": 9.688524590163933e-05, + "loss": 0.0901, "step": 916 }, { - "epoch": 4.11, - "learning_rate": 2.2838150289017343e-05, - "loss": 3.8533, + "epoch": 8.26, + "learning_rate": 9.639344262295081e-05, + "loss": 0.0894, "step": 917 }, { - "epoch": 4.12, - "learning_rate": 2.282080924855491e-05, - "loss": 3.852, + "epoch": 8.27, + "learning_rate": 9.59016393442623e-05, + "loss": 0.0852, "step": 918 }, { - "epoch": 4.12, - "learning_rate": 2.2803468208092487e-05, - "loss": 3.8898, + "epoch": 8.28, + "learning_rate": 9.540983606557375e-05, + "loss": 0.0744, "step": 919 }, { - "epoch": 4.13, - "learning_rate": 2.278612716763006e-05, - "loss": 3.7262, + "epoch": 8.29, + "learning_rate": 9.491803278688524e-05, + "loss": 0.067, "step": 920 }, { - "epoch": 4.13, - "learning_rate": 2.276878612716763e-05, - "loss": 3.8449, + "epoch": 8.3, + "learning_rate": 9.442622950819672e-05, + "loss": 0.0717, "step": 921 }, { - "epoch": 4.13, - "learning_rate": 2.2751445086705203e-05, - "loss": 3.8773, + "epoch": 8.3, + "learning_rate": 9.393442622950819e-05, + "loss": 0.0604, "step": 922 }, { - "epoch": 4.14, - "learning_rate": 2.2734104046242775e-05, - "loss": 3.8296, + "epoch": 8.31, + "learning_rate": 9.344262295081966e-05, + "loss": 0.0617, "step": 923 }, { - "epoch": 4.14, - "learning_rate": 2.2716763005780347e-05, - "loss": 3.8417, + "epoch": 8.32, + "learning_rate": 9.295081967213114e-05, + "loss": 0.0527, "step": 924 }, { - "epoch": 4.15, - "learning_rate": 2.269942196531792e-05, - "loss": 3.8742, + "epoch": 8.33, + "learning_rate": 9.245901639344261e-05, + "loss": 0.057, "step": 925 }, { - "epoch": 4.15, - "learning_rate": 2.268208092485549e-05, - "loss": 3.8602, + "epoch": 8.34, + "learning_rate": 9.19672131147541e-05, + "loss": 0.0499, "step": 926 }, { - "epoch": 4.16, - "learning_rate": 2.2664739884393067e-05, - "loss": 3.8711, + "epoch": 8.35, + "learning_rate": 9.147540983606556e-05, + "loss": 0.0535, "step": 927 }, { - "epoch": 4.16, - "learning_rate": 2.2647398843930635e-05, - "loss": 3.8169, + "epoch": 8.36, + "learning_rate": 9.098360655737704e-05, + "loss": 0.0546, "step": 928 }, { - "epoch": 4.17, - "learning_rate": 2.2630057803468208e-05, - "loss": 3.8873, + "epoch": 8.37, + "learning_rate": 9.049180327868852e-05, + "loss": 0.0565, "step": 929 }, { - "epoch": 4.17, - "learning_rate": 2.261271676300578e-05, - "loss": 3.8648, + "epoch": 8.38, + "learning_rate": 8.999999999999999e-05, + "loss": 0.0507, "step": 930 }, { - "epoch": 4.17, - "learning_rate": 2.2595375722543355e-05, - "loss": 3.859, + "epoch": 8.39, + "learning_rate": 8.950819672131147e-05, + "loss": 0.0496, "step": 931 }, { - "epoch": 4.18, - "learning_rate": 2.2578034682080927e-05, - "loss": 3.7546, + "epoch": 8.39, + "learning_rate": 8.901639344262294e-05, + "loss": 0.0445, "step": 932 }, { - "epoch": 4.18, - "learning_rate": 2.2560693641618496e-05, - "loss": 3.784, + "epoch": 8.4, + "learning_rate": 8.852459016393441e-05, + "loss": 0.0436, "step": 933 }, { - "epoch": 4.19, - "learning_rate": 2.2543352601156068e-05, - "loss": 3.8621, + "epoch": 8.41, + "learning_rate": 8.80327868852459e-05, + "loss": 0.0449, "step": 934 }, { - "epoch": 4.19, - "learning_rate": 2.2526011560693643e-05, - "loss": 3.7674, + "epoch": 8.42, + "learning_rate": 8.754098360655736e-05, + "loss": 0.0465, "step": 935 }, { - "epoch": 4.2, - "learning_rate": 2.2508670520231215e-05, - "loss": 3.8919, + "epoch": 8.43, + "learning_rate": 8.704918032786885e-05, + "loss": 0.0365, "step": 936 }, { - "epoch": 4.2, - "learning_rate": 2.2491329479768787e-05, - "loss": 3.7846, + "epoch": 8.44, + "learning_rate": 8.655737704918032e-05, + "loss": 0.0361, "step": 937 }, { - "epoch": 4.21, - "learning_rate": 2.2473988439306356e-05, - "loss": 3.8464, + "epoch": 8.45, + "learning_rate": 8.60655737704918e-05, + "loss": 0.0566, "step": 938 }, { - "epoch": 4.21, - "learning_rate": 2.245664739884393e-05, - "loss": 3.7018, + "epoch": 8.46, + "learning_rate": 8.557377049180327e-05, + "loss": 0.1376, "step": 939 }, { - "epoch": 4.22, - "learning_rate": 2.2439306358381504e-05, - "loss": 3.6997, + "epoch": 8.47, + "learning_rate": 8.508196721311476e-05, + "loss": 0.1047, "step": 940 }, { - "epoch": 4.22, - "learning_rate": 2.2421965317919076e-05, - "loss": 3.739, + "epoch": 8.48, + "learning_rate": 8.459016393442622e-05, + "loss": 0.0878, "step": 941 }, { - "epoch": 4.22, - "learning_rate": 2.2404624277456648e-05, - "loss": 3.5986, + "epoch": 8.48, + "learning_rate": 8.40983606557377e-05, + "loss": 0.0905, "step": 942 }, { - "epoch": 4.23, - "learning_rate": 2.238728323699422e-05, - "loss": 4.7915, + "epoch": 8.49, + "learning_rate": 8.360655737704916e-05, + "loss": 0.0747, "step": 943 }, { - "epoch": 4.23, - "learning_rate": 2.2369942196531792e-05, - "loss": 4.7906, + "epoch": 8.5, + "learning_rate": 8.311475409836065e-05, + "loss": 0.0621, "step": 944 }, { - "epoch": 4.24, - "learning_rate": 2.2352601156069364e-05, - "loss": 4.7964, + "epoch": 8.51, + "learning_rate": 8.262295081967212e-05, + "loss": 0.075, "step": 945 }, { - "epoch": 4.24, - "learning_rate": 2.2335260115606936e-05, - "loss": 4.8029, + "epoch": 8.52, + "learning_rate": 8.21311475409836e-05, + "loss": 0.0683, "step": 946 }, { - "epoch": 4.25, - "learning_rate": 2.231791907514451e-05, - "loss": 4.7965, + "epoch": 8.53, + "learning_rate": 8.163934426229507e-05, + "loss": 0.0557, "step": 947 }, { - "epoch": 4.25, - "learning_rate": 2.2300578034682084e-05, - "loss": 4.6215, + "epoch": 8.54, + "learning_rate": 8.114754098360656e-05, + "loss": 0.0588, "step": 948 }, { - "epoch": 4.26, - "learning_rate": 2.2283236994219652e-05, - "loss": 4.527, + "epoch": 8.55, + "learning_rate": 8.065573770491802e-05, + "loss": 0.0597, "step": 949 }, { - "epoch": 4.26, - "learning_rate": 2.2265895953757224e-05, - "loss": 4.5305, + "epoch": 8.56, + "learning_rate": 8.01639344262295e-05, + "loss": 0.0598, "step": 950 }, { - "epoch": 4.26, - "learning_rate": 2.22485549132948e-05, - "loss": 4.4029, + "epoch": 8.57, + "learning_rate": 7.967213114754097e-05, + "loss": 0.0542, "step": 951 }, { - "epoch": 4.27, - "learning_rate": 2.2231213872832372e-05, - "loss": 4.3071, + "epoch": 8.57, + "learning_rate": 7.918032786885245e-05, + "loss": 0.051, "step": 952 }, { - "epoch": 4.27, - "learning_rate": 2.2213872832369944e-05, - "loss": 4.1186, + "epoch": 8.58, + "learning_rate": 7.868852459016393e-05, + "loss": 0.0587, "step": 953 }, { - "epoch": 4.28, - "learning_rate": 2.2196531791907513e-05, - "loss": 4.1175, + "epoch": 8.59, + "learning_rate": 7.81967213114754e-05, + "loss": 0.0526, "step": 954 }, { - "epoch": 4.28, - "learning_rate": 2.2179190751445088e-05, - "loss": 4.216, + "epoch": 8.6, + "learning_rate": 7.770491803278687e-05, + "loss": 0.0481, "step": 955 }, { - "epoch": 4.29, - "learning_rate": 2.216184971098266e-05, - "loss": 4.1222, + "epoch": 8.61, + "learning_rate": 7.721311475409836e-05, + "loss": 0.0511, "step": 956 }, { - "epoch": 4.29, - "learning_rate": 2.2144508670520232e-05, - "loss": 4.064, + "epoch": 8.62, + "learning_rate": 7.672131147540982e-05, + "loss": 0.038, "step": 957 }, { - "epoch": 4.3, - "learning_rate": 2.2127167630057804e-05, - "loss": 3.9848, + "epoch": 8.63, + "learning_rate": 7.622950819672131e-05, + "loss": 0.0412, "step": 958 }, { - "epoch": 4.3, - "learning_rate": 2.2109826589595376e-05, - "loss": 4.0032, + "epoch": 8.64, + "learning_rate": 7.573770491803278e-05, + "loss": 0.052, "step": 959 }, { - "epoch": 4.3, - "learning_rate": 2.209248554913295e-05, - "loss": 3.9009, + "epoch": 8.65, + "learning_rate": 7.524590163934426e-05, + "loss": 0.0426, "step": 960 }, { - "epoch": 4.31, - "learning_rate": 2.207514450867052e-05, - "loss": 4.0057, + "epoch": 8.65, + "learning_rate": 7.475409836065573e-05, + "loss": 0.0514, "step": 961 }, { - "epoch": 4.31, - "learning_rate": 2.2057803468208093e-05, - "loss": 4.0278, + "epoch": 8.66, + "learning_rate": 7.42622950819672e-05, + "loss": 0.0348, "step": 962 }, { - "epoch": 4.32, - "learning_rate": 2.2040462427745668e-05, - "loss": 3.945, + "epoch": 8.67, + "learning_rate": 7.377049180327868e-05, + "loss": 0.0697, "step": 963 }, { - "epoch": 4.32, - "learning_rate": 2.2023121387283237e-05, - "loss": 3.9708, + "epoch": 8.68, + "learning_rate": 7.327868852459015e-05, + "loss": 0.1125, "step": 964 }, { - "epoch": 4.33, - "learning_rate": 2.200578034682081e-05, - "loss": 3.9399, + "epoch": 8.69, + "learning_rate": 7.278688524590164e-05, + "loss": 0.0963, "step": 965 }, { - "epoch": 4.33, - "learning_rate": 2.198843930635838e-05, - "loss": 3.9303, + "epoch": 8.7, + "learning_rate": 7.229508196721311e-05, + "loss": 0.0785, "step": 966 }, { - "epoch": 4.34, - "learning_rate": 2.1971098265895956e-05, - "loss": 3.9469, + "epoch": 8.71, + "learning_rate": 7.180327868852459e-05, + "loss": 0.0818, "step": 967 }, { - "epoch": 4.34, - "learning_rate": 2.195375722543353e-05, - "loss": 3.9381, + "epoch": 8.72, + "learning_rate": 7.131147540983606e-05, + "loss": 0.0798, "step": 968 }, { - "epoch": 4.35, - "learning_rate": 2.1936416184971097e-05, - "loss": 3.9298, + "epoch": 8.73, + "learning_rate": 7.081967213114753e-05, + "loss": 0.064, "step": 969 }, { - "epoch": 4.35, - "learning_rate": 2.191907514450867e-05, - "loss": 3.9481, + "epoch": 8.74, + "learning_rate": 7.032786885245901e-05, + "loss": 0.0586, "step": 970 }, { - "epoch": 4.35, - "learning_rate": 2.1901734104046245e-05, - "loss": 3.824, + "epoch": 8.74, + "learning_rate": 6.983606557377048e-05, + "loss": 0.0594, "step": 971 }, { - "epoch": 4.36, - "learning_rate": 2.1884393063583817e-05, - "loss": 3.8024, + "epoch": 8.75, + "learning_rate": 6.934426229508197e-05, + "loss": 0.0661, "step": 972 }, { - "epoch": 4.36, - "learning_rate": 2.186705202312139e-05, - "loss": 3.9022, + "epoch": 8.76, + "learning_rate": 6.885245901639344e-05, + "loss": 0.0698, "step": 973 }, { - "epoch": 4.37, - "learning_rate": 2.184971098265896e-05, - "loss": 3.7766, + "epoch": 8.77, + "learning_rate": 6.836065573770492e-05, + "loss": 0.0558, "step": 974 }, { - "epoch": 4.37, - "learning_rate": 2.1832369942196533e-05, - "loss": 3.8805, + "epoch": 8.78, + "learning_rate": 6.786885245901639e-05, + "loss": 0.0524, "step": 975 }, { - "epoch": 4.38, - "learning_rate": 2.1815028901734105e-05, - "loss": 3.8413, + "epoch": 8.79, + "learning_rate": 6.737704918032786e-05, + "loss": 0.0562, "step": 976 }, { - "epoch": 4.38, - "learning_rate": 2.1797687861271677e-05, - "loss": 3.8996, + "epoch": 8.8, + "learning_rate": 6.688524590163934e-05, + "loss": 0.0507, "step": 977 }, { - "epoch": 4.39, - "learning_rate": 2.178034682080925e-05, - "loss": 3.8482, + "epoch": 8.81, + "learning_rate": 6.639344262295081e-05, + "loss": 0.0477, "step": 978 }, { - "epoch": 4.39, - "learning_rate": 2.1763005780346824e-05, - "loss": 3.8851, + "epoch": 8.82, + "learning_rate": 6.590163934426228e-05, + "loss": 0.0525, "step": 979 }, { - "epoch": 4.39, - "learning_rate": 2.1745664739884393e-05, - "loss": 3.8244, + "epoch": 8.83, + "learning_rate": 6.540983606557377e-05, + "loss": 0.0456, "step": 980 }, { - "epoch": 4.4, - "learning_rate": 2.1728323699421965e-05, - "loss": 3.8388, + "epoch": 8.83, + "learning_rate": 6.491803278688524e-05, + "loss": 0.0446, "step": 981 }, { - "epoch": 4.4, - "learning_rate": 2.1710982658959537e-05, - "loss": 3.7814, + "epoch": 8.84, + "learning_rate": 6.442622950819672e-05, + "loss": 0.0443, "step": 982 }, { - "epoch": 4.41, - "learning_rate": 2.1693641618497113e-05, - "loss": 3.7747, + "epoch": 8.85, + "learning_rate": 6.393442622950819e-05, + "loss": 0.0344, "step": 983 }, { - "epoch": 4.41, - "learning_rate": 2.1676300578034685e-05, - "loss": 3.8686, + "epoch": 8.86, + "learning_rate": 6.344262295081967e-05, + "loss": 0.0557, "step": 984 }, { - "epoch": 4.42, - "learning_rate": 2.1658959537572253e-05, - "loss": 3.8083, + "epoch": 8.87, + "learning_rate": 6.295081967213114e-05, + "loss": 0.0299, "step": 985 }, { - "epoch": 4.42, - "learning_rate": 2.1641618497109826e-05, - "loss": 3.9, + "epoch": 8.88, + "learning_rate": 6.245901639344261e-05, + "loss": 0.0364, "step": 986 }, { - "epoch": 4.43, - "learning_rate": 2.1624277456647398e-05, - "loss": 3.7535, + "epoch": 8.89, + "learning_rate": 6.19672131147541e-05, + "loss": 0.0459, "step": 987 }, { - "epoch": 4.43, - "learning_rate": 2.1606936416184973e-05, - "loss": 3.8612, + "epoch": 8.9, + "learning_rate": 6.147540983606557e-05, + "loss": 0.0529, "step": 988 }, { - "epoch": 4.43, - "learning_rate": 2.1589595375722545e-05, - "loss": 3.668, + "epoch": 8.91, + "learning_rate": 6.098360655737704e-05, + "loss": 0.1122, "step": 989 }, { - "epoch": 4.44, - "learning_rate": 2.1572254335260114e-05, - "loss": 3.9476, + "epoch": 8.91, + "learning_rate": 6.0491803278688514e-05, + "loss": 0.0814, "step": 990 }, { - "epoch": 4.44, - "learning_rate": 2.1554913294797686e-05, - "loss": 3.8109, + "epoch": 8.92, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.0625, "step": 991 }, { - "epoch": 4.45, - "learning_rate": 2.153757225433526e-05, - "loss": 3.7516, + "epoch": 8.93, + "learning_rate": 5.950819672131147e-05, + "loss": 0.0625, "step": 992 }, { - "epoch": 4.45, - "learning_rate": 2.1520231213872833e-05, - "loss": 4.5727, + "epoch": 8.94, + "learning_rate": 5.901639344262294e-05, + "loss": 0.0568, "step": 993 }, { - "epoch": 4.46, - "learning_rate": 2.1502890173410405e-05, - "loss": 4.653, + "epoch": 8.95, + "learning_rate": 5.8524590163934416e-05, + "loss": 0.0485, "step": 994 }, { - "epoch": 4.46, - "learning_rate": 2.1485549132947974e-05, - "loss": 4.7266, + "epoch": 8.96, + "learning_rate": 5.8032786885245896e-05, + "loss": 0.0434, "step": 995 }, { - "epoch": 4.47, - "learning_rate": 2.146820809248555e-05, - "loss": 4.5639, + "epoch": 8.97, + "learning_rate": 5.754098360655737e-05, + "loss": 0.0434, "step": 996 }, { - "epoch": 4.47, - "learning_rate": 2.145086705202312e-05, - "loss": 4.7074, + "epoch": 8.98, + "learning_rate": 5.7049180327868844e-05, + "loss": 0.0416, "step": 997 }, { - "epoch": 4.48, - "learning_rate": 2.1433526011560694e-05, - "loss": 4.5743, + "epoch": 8.99, + "learning_rate": 5.655737704918032e-05, + "loss": 0.0464, "step": 998 }, { - "epoch": 4.48, - "learning_rate": 2.1416184971098266e-05, - "loss": 4.532, + "epoch": 9.0, + "learning_rate": 5.60655737704918e-05, + "loss": 0.0321, "step": 999 }, { - "epoch": 4.48, - "learning_rate": 2.139884393063584e-05, - "loss": 4.4811, + "epoch": 9.01, + "learning_rate": 5.557377049180327e-05, + "loss": 0.1177, "step": 1000 }, { - "epoch": 4.48, - "eval_loss": 4.471442222595215, - "eval_runtime": 410.0311, - "eval_samples_per_second": 6.443, - "eval_steps_per_second": 0.807, - "eval_wer": 0.9456564855216184, + "epoch": 9.01, + "eval_loss": 0.35239124298095703, + "eval_runtime": 326.5742, + "eval_samples_per_second": 8.09, + "eval_steps_per_second": 0.508, + "eval_wer": 0.10420468068226894, "step": 1000 }, { - "epoch": 4.49, - "learning_rate": 2.138150289017341e-05, - "loss": 4.4202, + "epoch": 9.02, + "learning_rate": 5.5081967213114745e-05, + "loss": 0.0658, "step": 1001 }, { - "epoch": 4.49, - "learning_rate": 2.1364161849710982e-05, - "loss": 4.3316, + "epoch": 9.03, + "learning_rate": 5.4590163934426226e-05, + "loss": 0.0527, "step": 1002 }, { - "epoch": 4.5, - "learning_rate": 2.1346820809248554e-05, - "loss": 4.1793, + "epoch": 9.04, + "learning_rate": 5.40983606557377e-05, + "loss": 0.0639, "step": 1003 }, { - "epoch": 4.5, - "learning_rate": 2.132947976878613e-05, - "loss": 4.1143, + "epoch": 9.04, + "learning_rate": 5.360655737704917e-05, + "loss": 0.0622, "step": 1004 }, { - "epoch": 4.51, - "learning_rate": 2.13121387283237e-05, - "loss": 4.1652, + "epoch": 9.05, + "learning_rate": 5.3114754098360647e-05, + "loss": 0.0566, "step": 1005 }, { - "epoch": 4.51, - "learning_rate": 2.129479768786127e-05, - "loss": 3.9561, + "epoch": 9.06, + "learning_rate": 5.262295081967213e-05, + "loss": 0.0423, "step": 1006 }, { - "epoch": 4.52, - "learning_rate": 2.1277456647398842e-05, - "loss": 4.0482, + "epoch": 9.07, + "learning_rate": 5.21311475409836e-05, + "loss": 0.0496, "step": 1007 }, { - "epoch": 4.52, - "learning_rate": 2.1260115606936418e-05, - "loss": 4.0044, + "epoch": 9.08, + "learning_rate": 5.1639344262295074e-05, + "loss": 0.0438, "step": 1008 }, { - "epoch": 4.52, - "learning_rate": 2.124277456647399e-05, - "loss": 4.0244, + "epoch": 9.09, + "learning_rate": 5.114754098360655e-05, + "loss": 0.04, "step": 1009 }, { - "epoch": 4.53, - "learning_rate": 2.1225433526011562e-05, - "loss": 3.9018, + "epoch": 9.1, + "learning_rate": 5.065573770491803e-05, + "loss": 0.0399, "step": 1010 }, { - "epoch": 4.53, - "learning_rate": 2.120809248554913e-05, - "loss": 3.9488, + "epoch": 9.11, + "learning_rate": 5.01639344262295e-05, + "loss": 0.0494, "step": 1011 }, { - "epoch": 4.54, - "learning_rate": 2.1190751445086706e-05, - "loss": 3.9324, + "epoch": 9.12, + "learning_rate": 4.9672131147540976e-05, + "loss": 0.0367, "step": 1012 }, { - "epoch": 4.54, - "learning_rate": 2.1173410404624278e-05, - "loss": 4.0152, + "epoch": 9.13, + "learning_rate": 4.918032786885245e-05, + "loss": 0.0387, "step": 1013 }, { - "epoch": 4.55, - "learning_rate": 2.115606936416185e-05, - "loss": 3.9643, + "epoch": 9.13, + "learning_rate": 4.868852459016393e-05, + "loss": 0.0372, "step": 1014 }, { - "epoch": 4.55, - "learning_rate": 2.1138728323699422e-05, - "loss": 3.8474, + "epoch": 9.14, + "learning_rate": 4.8196721311475404e-05, + "loss": 0.0334, "step": 1015 }, { - "epoch": 4.56, - "learning_rate": 2.1121387283236994e-05, - "loss": 3.91, + "epoch": 9.15, + "learning_rate": 4.770491803278688e-05, + "loss": 0.0286, "step": 1016 }, { - "epoch": 4.56, - "learning_rate": 2.1104046242774566e-05, - "loss": 3.9484, + "epoch": 9.16, + "learning_rate": 4.721311475409836e-05, + "loss": 0.0352, "step": 1017 }, { - "epoch": 4.57, - "learning_rate": 2.108670520231214e-05, - "loss": 3.9356, + "epoch": 9.17, + "learning_rate": 4.672131147540983e-05, + "loss": 0.0299, "step": 1018 }, { - "epoch": 4.57, - "learning_rate": 2.106936416184971e-05, - "loss": 3.8633, + "epoch": 9.18, + "learning_rate": 4.6229508196721305e-05, + "loss": 0.0288, "step": 1019 }, { - "epoch": 4.57, - "learning_rate": 2.1052023121387286e-05, - "loss": 3.8511, + "epoch": 9.19, + "learning_rate": 4.573770491803278e-05, + "loss": 0.0322, "step": 1020 }, { - "epoch": 4.58, - "learning_rate": 2.1034682080924855e-05, - "loss": 3.8245, + "epoch": 9.2, + "learning_rate": 4.524590163934426e-05, + "loss": 0.0304, "step": 1021 }, { - "epoch": 4.58, - "learning_rate": 2.1017341040462427e-05, - "loss": 3.8697, + "epoch": 9.21, + "learning_rate": 4.475409836065573e-05, + "loss": 0.0386, "step": 1022 }, { - "epoch": 4.59, - "learning_rate": 2.1e-05, - "loss": 3.8685, + "epoch": 9.22, + "learning_rate": 4.4262295081967207e-05, + "loss": 0.0332, "step": 1023 }, { - "epoch": 4.59, - "learning_rate": 2.0982658959537574e-05, - "loss": 3.7468, + "epoch": 9.22, + "learning_rate": 4.377049180327868e-05, + "loss": 0.0501, "step": 1024 }, { - "epoch": 4.6, - "learning_rate": 2.0965317919075146e-05, - "loss": 3.7574, + "epoch": 9.23, + "learning_rate": 4.327868852459016e-05, + "loss": 0.0687, "step": 1025 }, { - "epoch": 4.6, - "learning_rate": 2.094797687861272e-05, - "loss": 3.8157, + "epoch": 9.24, + "learning_rate": 4.2786885245901634e-05, + "loss": 0.071, "step": 1026 }, { - "epoch": 4.61, - "learning_rate": 2.0930635838150287e-05, - "loss": 3.8817, + "epoch": 9.25, + "learning_rate": 4.229508196721311e-05, + "loss": 0.0587, "step": 1027 }, { - "epoch": 4.61, - "learning_rate": 2.0913294797687863e-05, - "loss": 3.761, + "epoch": 9.26, + "learning_rate": 4.180327868852458e-05, + "loss": 0.0558, "step": 1028 }, { - "epoch": 4.61, - "learning_rate": 2.0895953757225435e-05, - "loss": 3.8213, + "epoch": 9.27, + "learning_rate": 4.131147540983606e-05, + "loss": 0.0559, "step": 1029 }, { - "epoch": 4.62, - "learning_rate": 2.0878612716763007e-05, - "loss": 3.7712, + "epoch": 9.28, + "learning_rate": 4.0819672131147536e-05, + "loss": 0.0525, "step": 1030 }, { - "epoch": 4.62, - "learning_rate": 2.086127167630058e-05, - "loss": 3.8045, + "epoch": 9.29, + "learning_rate": 4.032786885245901e-05, + "loss": 0.0428, "step": 1031 }, { - "epoch": 4.63, - "learning_rate": 2.084393063583815e-05, - "loss": 3.8181, + "epoch": 9.3, + "learning_rate": 3.983606557377048e-05, + "loss": 0.05, "step": 1032 }, { - "epoch": 4.63, - "learning_rate": 2.0826589595375723e-05, - "loss": 3.915, + "epoch": 9.3, + "learning_rate": 3.9344262295081964e-05, + "loss": 0.043, "step": 1033 }, { - "epoch": 4.64, - "learning_rate": 2.0809248554913295e-05, - "loss": 3.7518, + "epoch": 9.31, + "learning_rate": 3.885245901639344e-05, + "loss": 0.0455, "step": 1034 }, { - "epoch": 4.64, - "learning_rate": 2.0791907514450867e-05, - "loss": 3.8364, + "epoch": 9.32, + "learning_rate": 3.836065573770491e-05, + "loss": 0.0381, "step": 1035 }, { - "epoch": 4.65, - "learning_rate": 2.0774566473988442e-05, - "loss": 3.9519, + "epoch": 9.33, + "learning_rate": 3.786885245901639e-05, + "loss": 0.0315, "step": 1036 }, { - "epoch": 4.65, - "learning_rate": 2.075722543352601e-05, - "loss": 3.8736, + "epoch": 9.34, + "learning_rate": 3.7377049180327865e-05, + "loss": 0.0366, "step": 1037 }, { - "epoch": 4.65, - "learning_rate": 2.0739884393063583e-05, - "loss": 3.6868, + "epoch": 9.35, + "learning_rate": 3.688524590163934e-05, + "loss": 0.0286, "step": 1038 }, { - "epoch": 4.66, - "learning_rate": 2.0722543352601155e-05, - "loss": 3.8773, + "epoch": 9.36, + "learning_rate": 3.639344262295082e-05, + "loss": 0.0401, "step": 1039 }, { - "epoch": 4.66, - "learning_rate": 2.070520231213873e-05, - "loss": 3.6867, + "epoch": 9.37, + "learning_rate": 3.590163934426229e-05, + "loss": 0.0402, "step": 1040 }, { - "epoch": 4.67, - "learning_rate": 2.0687861271676303e-05, - "loss": 3.8537, + "epoch": 9.38, + "learning_rate": 3.540983606557377e-05, + "loss": 0.0349, "step": 1041 }, { - "epoch": 4.67, - "learning_rate": 2.067052023121387e-05, - "loss": 3.77, + "epoch": 9.39, + "learning_rate": 3.491803278688524e-05, + "loss": 0.035, "step": 1042 }, { - "epoch": 4.68, - "learning_rate": 2.0653179190751444e-05, - "loss": 4.5353, + "epoch": 9.39, + "learning_rate": 3.442622950819672e-05, + "loss": 0.0276, "step": 1043 }, { - "epoch": 4.68, - "learning_rate": 2.063583815028902e-05, - "loss": 4.4984, + "epoch": 9.4, + "learning_rate": 3.3934426229508194e-05, + "loss": 0.0299, "step": 1044 }, { - "epoch": 4.69, - "learning_rate": 2.061849710982659e-05, - "loss": 4.5213, + "epoch": 9.41, + "learning_rate": 3.344262295081967e-05, + "loss": 0.0323, "step": 1045 }, { - "epoch": 4.69, - "learning_rate": 2.0601156069364163e-05, - "loss": 4.5536, + "epoch": 9.42, + "learning_rate": 3.295081967213114e-05, + "loss": 0.027, "step": 1046 }, { - "epoch": 4.7, - "learning_rate": 2.0583815028901732e-05, - "loss": 4.5843, + "epoch": 9.43, + "learning_rate": 3.245901639344262e-05, + "loss": 0.0245, "step": 1047 }, { - "epoch": 4.7, - "learning_rate": 2.0566473988439307e-05, - "loss": 4.5179, + "epoch": 9.44, + "learning_rate": 3.1967213114754096e-05, + "loss": 0.0424, "step": 1048 }, { - "epoch": 4.7, - "learning_rate": 2.054913294797688e-05, - "loss": 4.4883, + "epoch": 9.45, + "learning_rate": 3.147540983606557e-05, + "loss": 0.0517, "step": 1049 }, { - "epoch": 4.71, - "learning_rate": 2.053179190751445e-05, - "loss": 4.4311, + "epoch": 9.46, + "learning_rate": 3.098360655737705e-05, + "loss": 0.081, "step": 1050 }, { - "epoch": 4.71, - "learning_rate": 2.0514450867052024e-05, - "loss": 4.2903, + "epoch": 9.47, + "learning_rate": 3.049180327868852e-05, + "loss": 0.0621, "step": 1051 }, { - "epoch": 4.72, - "learning_rate": 2.04971098265896e-05, - "loss": 4.2673, + "epoch": 9.48, + "learning_rate": 2.9999999999999997e-05, + "loss": 0.054, "step": 1052 }, { - "epoch": 4.72, - "learning_rate": 2.0479768786127168e-05, - "loss": 4.2055, + "epoch": 9.48, + "learning_rate": 2.950819672131147e-05, + "loss": 0.0524, "step": 1053 }, { - "epoch": 4.73, - "learning_rate": 2.046242774566474e-05, - "loss": 4.1595, + "epoch": 9.49, + "learning_rate": 2.9016393442622948e-05, + "loss": 0.0471, "step": 1054 }, { - "epoch": 4.73, - "learning_rate": 2.0445086705202312e-05, - "loss": 4.0374, + "epoch": 9.5, + "learning_rate": 2.8524590163934422e-05, + "loss": 0.048, "step": 1055 }, { - "epoch": 4.74, - "learning_rate": 2.0427745664739887e-05, - "loss": 3.9281, + "epoch": 9.51, + "learning_rate": 2.80327868852459e-05, + "loss": 0.0498, "step": 1056 }, { - "epoch": 4.74, - "learning_rate": 2.041040462427746e-05, - "loss": 4.02, + "epoch": 9.52, + "learning_rate": 2.7540983606557373e-05, + "loss": 0.0381, "step": 1057 }, { - "epoch": 4.74, - "learning_rate": 2.0393063583815028e-05, - "loss": 3.9746, + "epoch": 9.53, + "learning_rate": 2.704918032786885e-05, + "loss": 0.0503, "step": 1058 }, { - "epoch": 4.75, - "learning_rate": 2.03757225433526e-05, - "loss": 4.0699, + "epoch": 9.54, + "learning_rate": 2.6557377049180323e-05, + "loss": 0.0317, "step": 1059 }, { - "epoch": 4.75, - "learning_rate": 2.0358381502890176e-05, - "loss": 4.0389, + "epoch": 9.55, + "learning_rate": 2.60655737704918e-05, + "loss": 0.0365, "step": 1060 }, { - "epoch": 4.76, - "learning_rate": 2.0341040462427748e-05, - "loss": 3.9078, + "epoch": 9.56, + "learning_rate": 2.5573770491803274e-05, + "loss": 0.0385, "step": 1061 }, { - "epoch": 4.76, - "learning_rate": 2.032369942196532e-05, - "loss": 3.9825, + "epoch": 9.57, + "learning_rate": 2.508196721311475e-05, + "loss": 0.0298, "step": 1062 }, { - "epoch": 4.77, - "learning_rate": 2.030635838150289e-05, - "loss": 3.9332, + "epoch": 9.57, + "learning_rate": 2.4590163934426225e-05, + "loss": 0.0325, "step": 1063 }, { - "epoch": 4.77, - "learning_rate": 2.0289017341040464e-05, - "loss": 3.9562, + "epoch": 9.58, + "learning_rate": 2.4098360655737702e-05, + "loss": 0.0314, "step": 1064 }, { - "epoch": 4.78, - "learning_rate": 2.0271676300578036e-05, - "loss": 3.8246, + "epoch": 9.59, + "learning_rate": 2.360655737704918e-05, + "loss": 0.0316, "step": 1065 }, { - "epoch": 4.78, - "learning_rate": 2.0254335260115608e-05, - "loss": 3.9159, + "epoch": 9.6, + "learning_rate": 2.3114754098360653e-05, + "loss": 0.0467, "step": 1066 }, { - "epoch": 4.78, - "learning_rate": 2.023699421965318e-05, - "loss": 3.912, + "epoch": 9.61, + "learning_rate": 2.262295081967213e-05, + "loss": 0.0386, "step": 1067 }, { - "epoch": 4.79, - "learning_rate": 2.0219653179190752e-05, - "loss": 3.9218, + "epoch": 9.62, + "learning_rate": 2.2131147540983603e-05, + "loss": 0.0289, "step": 1068 }, { - "epoch": 4.79, - "learning_rate": 2.0202312138728324e-05, - "loss": 3.9524, + "epoch": 9.63, + "learning_rate": 2.163934426229508e-05, + "loss": 0.0298, "step": 1069 }, { - "epoch": 4.8, - "learning_rate": 2.0184971098265896e-05, - "loss": 3.8963, + "epoch": 9.64, + "learning_rate": 2.1147540983606554e-05, + "loss": 0.0383, "step": 1070 }, { - "epoch": 4.8, - "learning_rate": 2.0167630057803468e-05, - "loss": 3.9146, + "epoch": 9.65, + "learning_rate": 2.065573770491803e-05, + "loss": 0.0362, "step": 1071 }, { - "epoch": 4.81, - "learning_rate": 2.0150289017341044e-05, - "loss": 3.9796, + "epoch": 9.65, + "learning_rate": 2.0163934426229505e-05, + "loss": 0.0308, "step": 1072 }, { - "epoch": 4.81, - "learning_rate": 2.0132947976878612e-05, - "loss": 3.9317, + "epoch": 9.66, + "learning_rate": 1.9672131147540982e-05, + "loss": 0.0252, "step": 1073 }, { - "epoch": 4.82, - "learning_rate": 2.0115606936416184e-05, - "loss": 3.7378, + "epoch": 9.67, + "learning_rate": 1.9180327868852456e-05, + "loss": 0.0467, "step": 1074 }, { - "epoch": 4.82, - "learning_rate": 2.0098265895953757e-05, - "loss": 3.7175, + "epoch": 9.68, + "learning_rate": 1.8688524590163933e-05, + "loss": 0.07, "step": 1075 }, { - "epoch": 4.83, - "learning_rate": 2.0080924855491332e-05, - "loss": 3.8474, + "epoch": 9.69, + "learning_rate": 1.819672131147541e-05, + "loss": 0.0574, "step": 1076 }, { - "epoch": 4.83, - "learning_rate": 2.0063583815028904e-05, - "loss": 3.9127, + "epoch": 9.7, + "learning_rate": 1.7704918032786883e-05, + "loss": 0.0483, "step": 1077 }, { - "epoch": 4.83, - "learning_rate": 2.0046242774566476e-05, - "loss": 3.7991, + "epoch": 9.71, + "learning_rate": 1.721311475409836e-05, + "loss": 0.0526, "step": 1078 }, { - "epoch": 4.84, - "learning_rate": 2.0028901734104045e-05, - "loss": 3.7866, + "epoch": 9.72, + "learning_rate": 1.6721311475409834e-05, + "loss": 0.0521, "step": 1079 }, { - "epoch": 4.84, - "learning_rate": 2.001156069364162e-05, - "loss": 3.8647, + "epoch": 9.73, + "learning_rate": 1.622950819672131e-05, + "loss": 0.0498, "step": 1080 }, { - "epoch": 4.85, - "learning_rate": 1.9994219653179192e-05, - "loss": 3.7537, + "epoch": 9.74, + "learning_rate": 1.5737704918032785e-05, + "loss": 0.0438, "step": 1081 }, { - "epoch": 4.85, - "learning_rate": 1.9976878612716764e-05, - "loss": 3.8178, + "epoch": 9.74, + "learning_rate": 1.524590163934426e-05, + "loss": 0.0459, "step": 1082 }, { - "epoch": 4.86, - "learning_rate": 1.9959537572254336e-05, - "loss": 3.8901, + "epoch": 9.75, + "learning_rate": 1.4754098360655736e-05, + "loss": 0.0403, "step": 1083 }, { - "epoch": 4.86, - "learning_rate": 1.9942196531791905e-05, - "loss": 3.8224, + "epoch": 9.76, + "learning_rate": 1.4262295081967211e-05, + "loss": 0.0334, "step": 1084 }, { - "epoch": 4.87, - "learning_rate": 1.992485549132948e-05, - "loss": 3.8457, + "epoch": 9.77, + "learning_rate": 1.3770491803278686e-05, + "loss": 0.0457, "step": 1085 }, { - "epoch": 4.87, - "learning_rate": 1.9907514450867053e-05, - "loss": 3.8248, + "epoch": 9.78, + "learning_rate": 1.3278688524590162e-05, + "loss": 0.0393, "step": 1086 }, { - "epoch": 4.87, - "learning_rate": 1.9890173410404625e-05, - "loss": 3.8016, + "epoch": 9.79, + "learning_rate": 1.2786885245901637e-05, + "loss": 0.0364, "step": 1087 }, { - "epoch": 4.88, - "learning_rate": 1.9872832369942197e-05, - "loss": 3.8961, + "epoch": 9.8, + "learning_rate": 1.2295081967213112e-05, + "loss": 0.0329, "step": 1088 }, { - "epoch": 4.88, - "learning_rate": 1.985549132947977e-05, - "loss": 3.8621, + "epoch": 9.81, + "learning_rate": 1.180327868852459e-05, + "loss": 0.042, "step": 1089 }, { - "epoch": 4.89, - "learning_rate": 1.983815028901734e-05, - "loss": 3.7512, + "epoch": 9.82, + "learning_rate": 1.1311475409836065e-05, + "loss": 0.0375, "step": 1090 }, { - "epoch": 4.89, - "learning_rate": 1.9820809248554913e-05, - "loss": 3.8534, + "epoch": 9.83, + "learning_rate": 1.081967213114754e-05, + "loss": 0.0371, "step": 1091 }, { - "epoch": 4.9, - "learning_rate": 1.9803468208092485e-05, - "loss": 3.7219, + "epoch": 9.83, + "learning_rate": 1.0327868852459016e-05, + "loss": 0.0285, "step": 1092 }, { - "epoch": 4.9, - "learning_rate": 1.978612716763006e-05, - "loss": 4.4019, + "epoch": 9.84, + "learning_rate": 9.836065573770491e-06, + "loss": 0.0278, "step": 1093 }, { - "epoch": 4.91, - "learning_rate": 1.976878612716763e-05, - "loss": 4.4389, + "epoch": 9.85, + "learning_rate": 9.344262295081966e-06, + "loss": 0.0275, "step": 1094 }, { - "epoch": 4.91, - "learning_rate": 1.97514450867052e-05, - "loss": 4.4819, + "epoch": 9.86, + "learning_rate": 8.852459016393442e-06, + "loss": 0.0324, "step": 1095 }, { - "epoch": 4.91, - "learning_rate": 1.9734104046242773e-05, - "loss": 4.436, + "epoch": 9.87, + "learning_rate": 8.360655737704917e-06, + "loss": 0.0287, "step": 1096 }, { - "epoch": 4.92, - "learning_rate": 1.971676300578035e-05, - "loss": 4.3388, + "epoch": 9.88, + "learning_rate": 7.868852459016392e-06, + "loss": 0.0307, "step": 1097 }, { - "epoch": 4.92, - "learning_rate": 1.969942196531792e-05, - "loss": 4.2991, + "epoch": 9.89, + "learning_rate": 7.377049180327868e-06, + "loss": 0.0327, "step": 1098 }, { - "epoch": 4.93, - "learning_rate": 1.968208092485549e-05, - "loss": 4.162, + "epoch": 9.9, + "learning_rate": 6.885245901639343e-06, + "loss": 0.0448, "step": 1099 }, - { - "epoch": 4.93, - "learning_rate": 1.966473988439306e-05, - "loss": 4.1708, - "step": 1100 - }, - { - "epoch": 4.94, - "learning_rate": 1.9647398843930637e-05, - "loss": 4.1403, - "step": 1101 - }, - { - "epoch": 4.94, - "learning_rate": 1.963005780346821e-05, - "loss": 3.989, - "step": 1102 - }, - { - "epoch": 4.95, - "learning_rate": 1.961271676300578e-05, - "loss": 4.0543, - "step": 1103 - }, - { - "epoch": 4.95, - "learning_rate": 1.9595375722543353e-05, - "loss": 4.0557, - "step": 1104 - }, - { - "epoch": 4.96, - "learning_rate": 1.9578034682080925e-05, - "loss": 3.939, - "step": 1105 - }, - { - "epoch": 4.96, - "learning_rate": 1.9560693641618497e-05, - "loss": 3.8684, - "step": 1106 - }, - { - "epoch": 4.96, - "learning_rate": 1.954335260115607e-05, - "loss": 3.8731, - "step": 1107 - }, - { - "epoch": 4.97, - "learning_rate": 1.952601156069364e-05, - "loss": 3.9208, - "step": 1108 - }, - { - "epoch": 4.97, - "learning_rate": 1.9508670520231217e-05, - "loss": 3.8534, - "step": 1109 - }, - { - "epoch": 4.98, - "learning_rate": 1.9491329479768786e-05, - "loss": 3.8254, - "step": 1110 - }, - { - "epoch": 4.98, - "learning_rate": 1.9473988439306358e-05, - "loss": 3.8774, - "step": 1111 - }, - { - "epoch": 4.99, - "learning_rate": 1.945664739884393e-05, - "loss": 3.832, - "step": 1112 - }, - { - "epoch": 4.99, - "learning_rate": 1.9439306358381505e-05, - "loss": 3.8714, - "step": 1113 - }, - { - "epoch": 5.0, - "learning_rate": 1.9421965317919077e-05, - "loss": 3.9393, - "step": 1114 - }, - { - "epoch": 5.0, - "learning_rate": 1.9404624277456646e-05, - "loss": 4.1588, - "step": 1115 - }, - { - "epoch": 5.0, - "learning_rate": 1.9387283236994218e-05, - "loss": 4.2121, - "step": 1116 - }, - { - "epoch": 5.01, - "learning_rate": 1.9369942196531794e-05, - "loss": 4.1375, - "step": 1117 - }, - { - "epoch": 5.01, - "learning_rate": 1.9352601156069366e-05, - "loss": 4.1691, - "step": 1118 - }, - { - "epoch": 5.02, - "learning_rate": 1.9335260115606938e-05, - "loss": 4.1969, - "step": 1119 - }, - { - "epoch": 5.02, - "learning_rate": 1.9317919075144506e-05, - "loss": 4.1504, - "step": 1120 - }, - { - "epoch": 5.03, - "learning_rate": 1.9300578034682082e-05, - "loss": 4.0819, - "step": 1121 - }, - { - "epoch": 5.03, - "learning_rate": 1.9283236994219654e-05, - "loss": 4.0956, - "step": 1122 - }, - { - "epoch": 5.04, - "learning_rate": 1.9265895953757226e-05, - "loss": 4.0521, - "step": 1123 - }, - { - "epoch": 5.04, - "learning_rate": 1.9248554913294798e-05, - "loss": 4.1007, - "step": 1124 - }, - { - "epoch": 5.04, - "learning_rate": 1.923121387283237e-05, - "loss": 4.0153, - "step": 1125 - }, - { - "epoch": 5.05, - "learning_rate": 1.9213872832369942e-05, - "loss": 3.9042, - "step": 1126 - }, - { - "epoch": 5.05, - "learning_rate": 1.9196531791907514e-05, - "loss": 3.9075, - "step": 1127 - }, - { - "epoch": 5.06, - "learning_rate": 1.9179190751445086e-05, - "loss": 3.9057, - "step": 1128 - }, - { - "epoch": 5.06, - "learning_rate": 1.9161849710982662e-05, - "loss": 3.9364, - "step": 1129 - }, - { - "epoch": 5.07, - "learning_rate": 1.9144508670520234e-05, - "loss": 3.8627, - "step": 1130 - }, - { - "epoch": 5.07, - "learning_rate": 1.9127167630057802e-05, - "loss": 3.8161, - "step": 1131 - }, - { - "epoch": 5.08, - "learning_rate": 1.9109826589595375e-05, - "loss": 3.8231, - "step": 1132 - }, - { - "epoch": 5.08, - "learning_rate": 1.909248554913295e-05, - "loss": 3.7929, - "step": 1133 - }, - { - "epoch": 5.09, - "learning_rate": 1.9075144508670522e-05, - "loss": 3.8117, - "step": 1134 - }, - { - "epoch": 5.09, - "learning_rate": 1.9057803468208094e-05, - "loss": 3.772, - "step": 1135 - }, - { - "epoch": 5.09, - "learning_rate": 1.9040462427745663e-05, - "loss": 3.7204, - "step": 1136 - }, - { - "epoch": 5.1, - "learning_rate": 1.9023121387283238e-05, - "loss": 3.7641, - "step": 1137 - }, - { - "epoch": 5.1, - "learning_rate": 1.900578034682081e-05, - "loss": 3.8326, - "step": 1138 - }, - { - "epoch": 5.11, - "learning_rate": 1.8988439306358382e-05, - "loss": 3.7707, - "step": 1139 - }, - { - "epoch": 5.11, - "learning_rate": 1.8971098265895954e-05, - "loss": 3.7888, - "step": 1140 - }, - { - "epoch": 5.12, - "learning_rate": 1.8953757225433527e-05, - "loss": 3.7731, - "step": 1141 - }, - { - "epoch": 5.12, - "learning_rate": 1.89364161849711e-05, - "loss": 3.7375, - "step": 1142 - }, - { - "epoch": 5.13, - "learning_rate": 1.891907514450867e-05, - "loss": 3.687, - "step": 1143 - }, - { - "epoch": 5.13, - "learning_rate": 1.8901734104046243e-05, - "loss": 3.7132, - "step": 1144 - }, - { - "epoch": 5.13, - "learning_rate": 1.8884393063583818e-05, - "loss": 3.6417, - "step": 1145 - }, - { - "epoch": 5.14, - "learning_rate": 1.8867052023121387e-05, - "loss": 3.7163, - "step": 1146 - }, - { - "epoch": 5.14, - "learning_rate": 1.884971098265896e-05, - "loss": 3.5894, - "step": 1147 - }, - { - "epoch": 5.15, - "learning_rate": 1.883236994219653e-05, - "loss": 3.7036, - "step": 1148 - }, - { - "epoch": 5.15, - "learning_rate": 1.8815028901734106e-05, - "loss": 3.5606, - "step": 1149 - }, - { - "epoch": 5.16, - "learning_rate": 1.879768786127168e-05, - "loss": 3.6319, - "step": 1150 - }, - { - "epoch": 5.16, - "learning_rate": 1.8780346820809247e-05, - "loss": 3.6076, - "step": 1151 - }, - { - "epoch": 5.17, - "learning_rate": 1.876300578034682e-05, - "loss": 3.6795, - "step": 1152 - }, - { - "epoch": 5.17, - "learning_rate": 1.8745664739884395e-05, - "loss": 3.6955, - "step": 1153 - }, - { - "epoch": 5.17, - "learning_rate": 1.8728323699421967e-05, - "loss": 3.6129, - "step": 1154 - }, - { - "epoch": 5.18, - "learning_rate": 1.871098265895954e-05, - "loss": 3.5779, - "step": 1155 - }, - { - "epoch": 5.18, - "learning_rate": 1.869364161849711e-05, - "loss": 3.6706, - "step": 1156 - }, - { - "epoch": 5.19, - "learning_rate": 1.8676300578034683e-05, - "loss": 3.5448, - "step": 1157 - }, - { - "epoch": 5.19, - "learning_rate": 1.8658959537572255e-05, - "loss": 3.684, - "step": 1158 - }, - { - "epoch": 5.2, - "learning_rate": 1.8641618497109827e-05, - "loss": 3.5315, - "step": 1159 - }, - { - "epoch": 5.2, - "learning_rate": 1.86242774566474e-05, - "loss": 3.7197, - "step": 1160 - }, - { - "epoch": 5.21, - "learning_rate": 1.8606936416184975e-05, - "loss": 3.6238, - "step": 1161 - }, - { - "epoch": 5.21, - "learning_rate": 1.8589595375722543e-05, - "loss": 3.544, - "step": 1162 - }, - { - "epoch": 5.22, - "learning_rate": 1.8572254335260115e-05, - "loss": 3.6976, - "step": 1163 - }, - { - "epoch": 5.22, - "learning_rate": 1.8554913294797688e-05, - "loss": 3.6154, - "step": 1164 - }, - { - "epoch": 5.22, - "learning_rate": 1.8537572254335263e-05, - "loss": 3.4891, - "step": 1165 - }, - { - "epoch": 5.23, - "learning_rate": 1.8520231213872835e-05, - "loss": 4.4885, - "step": 1166 - }, - { - "epoch": 5.23, - "learning_rate": 1.8502890173410404e-05, - "loss": 4.5534, - "step": 1167 - }, - { - "epoch": 5.24, - "learning_rate": 1.8485549132947976e-05, - "loss": 4.6256, - "step": 1168 - }, - { - "epoch": 5.24, - "learning_rate": 1.846820809248555e-05, - "loss": 4.6323, - "step": 1169 - }, - { - "epoch": 5.25, - "learning_rate": 1.8450867052023123e-05, - "loss": 4.4773, - "step": 1170 - }, - { - "epoch": 5.25, - "learning_rate": 1.8433526011560695e-05, - "loss": 4.5565, - "step": 1171 - }, - { - "epoch": 5.26, - "learning_rate": 1.8416184971098264e-05, - "loss": 4.4722, - "step": 1172 - }, - { - "epoch": 5.26, - "learning_rate": 1.839884393063584e-05, - "loss": 4.4305, - "step": 1173 - }, - { - "epoch": 5.26, - "learning_rate": 1.838150289017341e-05, - "loss": 4.2747, - "step": 1174 - }, - { - "epoch": 5.27, - "learning_rate": 1.8364161849710984e-05, - "loss": 4.2715, - "step": 1175 - }, - { - "epoch": 5.27, - "learning_rate": 1.8346820809248556e-05, - "loss": 4.0829, - "step": 1176 - }, - { - "epoch": 5.28, - "learning_rate": 1.8329479768786124e-05, - "loss": 4.0985, - "step": 1177 - }, - { - "epoch": 5.28, - "learning_rate": 1.83121387283237e-05, - "loss": 4.1009, - "step": 1178 - }, - { - "epoch": 5.29, - "learning_rate": 1.8294797687861272e-05, - "loss": 3.9659, - "step": 1179 - }, - { - "epoch": 5.29, - "learning_rate": 1.8277456647398844e-05, - "loss": 3.8449, - "step": 1180 - }, - { - "epoch": 5.3, - "learning_rate": 1.8260115606936416e-05, - "loss": 3.8618, - "step": 1181 - }, - { - "epoch": 5.3, - "learning_rate": 1.8242774566473988e-05, - "loss": 3.9116, - "step": 1182 - }, - { - "epoch": 5.3, - "learning_rate": 1.822543352601156e-05, - "loss": 3.8054, - "step": 1183 - }, - { - "epoch": 5.31, - "learning_rate": 1.8208092485549132e-05, - "loss": 3.7647, - "step": 1184 - }, - { - "epoch": 5.31, - "learning_rate": 1.8190751445086704e-05, - "loss": 3.8045, - "step": 1185 - }, - { - "epoch": 5.32, - "learning_rate": 1.817341040462428e-05, - "loss": 3.8661, - "step": 1186 - }, - { - "epoch": 5.32, - "learning_rate": 1.8156069364161852e-05, - "loss": 3.8399, - "step": 1187 - }, - { - "epoch": 5.33, - "learning_rate": 1.813872832369942e-05, - "loss": 3.7362, - "step": 1188 - }, - { - "epoch": 5.33, - "learning_rate": 1.8121387283236993e-05, - "loss": 3.7025, - "step": 1189 - }, - { - "epoch": 5.34, - "learning_rate": 1.8104046242774568e-05, - "loss": 3.7664, - "step": 1190 - }, - { - "epoch": 5.34, - "learning_rate": 1.808670520231214e-05, - "loss": 3.7872, - "step": 1191 - }, - { - "epoch": 5.35, - "learning_rate": 1.8069364161849712e-05, - "loss": 3.7064, - "step": 1192 - }, - { - "epoch": 5.35, - "learning_rate": 1.805202312138728e-05, - "loss": 3.7757, - "step": 1193 - }, - { - "epoch": 5.35, - "learning_rate": 1.8034682080924856e-05, - "loss": 3.6683, - "step": 1194 - }, - { - "epoch": 5.36, - "learning_rate": 1.801734104046243e-05, - "loss": 3.8014, - "step": 1195 - }, - { - "epoch": 5.36, - "learning_rate": 1.8e-05, - "loss": 3.7297, - "step": 1196 - }, - { - "epoch": 5.37, - "learning_rate": 1.7982658959537573e-05, - "loss": 3.6839, - "step": 1197 - }, - { - "epoch": 5.37, - "learning_rate": 1.7965317919075145e-05, - "loss": 3.6291, - "step": 1198 - }, - { - "epoch": 5.38, - "learning_rate": 1.7947976878612717e-05, - "loss": 3.6654, - "step": 1199 - }, - { - "epoch": 5.38, - "learning_rate": 1.793063583815029e-05, - "loss": 3.723, - "step": 1200 - }, - { - "epoch": 5.39, - "learning_rate": 1.791329479768786e-05, - "loss": 3.6468, - "step": 1201 - }, - { - "epoch": 5.39, - "learning_rate": 1.7895953757225436e-05, - "loss": 3.6865, - "step": 1202 - }, - { - "epoch": 5.39, - "learning_rate": 1.7878612716763005e-05, - "loss": 3.6262, - "step": 1203 - }, - { - "epoch": 5.4, - "learning_rate": 1.7861271676300577e-05, - "loss": 3.7134, - "step": 1204 - }, - { - "epoch": 5.4, - "learning_rate": 1.784393063583815e-05, - "loss": 3.5908, - "step": 1205 - }, - { - "epoch": 5.41, - "learning_rate": 1.7826589595375725e-05, - "loss": 3.7011, - "step": 1206 - }, - { - "epoch": 5.41, - "learning_rate": 1.7809248554913297e-05, - "loss": 3.6593, - "step": 1207 - }, - { - "epoch": 5.42, - "learning_rate": 1.7791907514450865e-05, - "loss": 3.665, - "step": 1208 - }, - { - "epoch": 5.42, - "learning_rate": 1.7774566473988437e-05, - "loss": 3.7782, - "step": 1209 - }, - { - "epoch": 5.43, - "learning_rate": 1.7757225433526013e-05, - "loss": 3.6439, - "step": 1210 - }, - { - "epoch": 5.43, - "learning_rate": 1.7739884393063585e-05, - "loss": 3.8337, - "step": 1211 - }, - { - "epoch": 5.43, - "learning_rate": 1.7722543352601157e-05, - "loss": 3.578, - "step": 1212 - }, - { - "epoch": 5.44, - "learning_rate": 1.770520231213873e-05, - "loss": 3.6506, - "step": 1213 - }, - { - "epoch": 5.44, - "learning_rate": 1.76878612716763e-05, - "loss": 3.7176, - "step": 1214 - }, - { - "epoch": 5.45, - "learning_rate": 1.7670520231213873e-05, - "loss": 3.6124, - "step": 1215 - }, - { - "epoch": 5.45, - "learning_rate": 1.7653179190751445e-05, - "loss": 4.501, - "step": 1216 - }, - { - "epoch": 5.46, - "learning_rate": 1.7635838150289017e-05, - "loss": 4.3737, - "step": 1217 - }, - { - "epoch": 5.46, - "learning_rate": 1.7618497109826593e-05, - "loss": 4.4793, - "step": 1218 - }, - { - "epoch": 5.47, - "learning_rate": 1.760115606936416e-05, - "loss": 4.4172, - "step": 1219 - }, - { - "epoch": 5.47, - "learning_rate": 1.7583815028901733e-05, - "loss": 4.3874, - "step": 1220 - }, - { - "epoch": 5.48, - "learning_rate": 1.7566473988439306e-05, - "loss": 4.406, - "step": 1221 - }, - { - "epoch": 5.48, - "learning_rate": 1.754913294797688e-05, - "loss": 4.3033, - "step": 1222 - }, - { - "epoch": 5.48, - "learning_rate": 1.7531791907514453e-05, - "loss": 4.287, - "step": 1223 - }, - { - "epoch": 5.49, - "learning_rate": 1.7514450867052022e-05, - "loss": 4.2098, - "step": 1224 - }, - { - "epoch": 5.49, - "learning_rate": 1.7497109826589594e-05, - "loss": 4.2035, - "step": 1225 - }, - { - "epoch": 5.5, - "learning_rate": 1.747976878612717e-05, - "loss": 4.0716, - "step": 1226 - }, - { - "epoch": 5.5, - "learning_rate": 1.746242774566474e-05, - "loss": 4.1066, - "step": 1227 - }, - { - "epoch": 5.51, - "learning_rate": 1.7445086705202313e-05, - "loss": 3.8452, - "step": 1228 - }, - { - "epoch": 5.51, - "learning_rate": 1.7427745664739882e-05, - "loss": 3.93, - "step": 1229 - }, - { - "epoch": 5.52, - "learning_rate": 1.7410404624277458e-05, - "loss": 3.8458, - "step": 1230 - }, - { - "epoch": 5.52, - "learning_rate": 1.739306358381503e-05, - "loss": 3.9127, - "step": 1231 - }, - { - "epoch": 5.52, - "learning_rate": 1.73757225433526e-05, - "loss": 3.8718, - "step": 1232 - }, - { - "epoch": 5.53, - "learning_rate": 1.7358381502890174e-05, - "loss": 3.8352, - "step": 1233 - }, - { - "epoch": 5.53, - "learning_rate": 1.7341040462427746e-05, - "loss": 3.8066, - "step": 1234 - }, - { - "epoch": 5.54, - "learning_rate": 1.7323699421965318e-05, - "loss": 3.8209, - "step": 1235 - }, - { - "epoch": 5.54, - "learning_rate": 1.730635838150289e-05, - "loss": 3.7231, - "step": 1236 - }, - { - "epoch": 5.55, - "learning_rate": 1.7289017341040462e-05, - "loss": 3.8296, - "step": 1237 - }, - { - "epoch": 5.55, - "learning_rate": 1.7271676300578037e-05, - "loss": 3.7024, - "step": 1238 - }, - { - "epoch": 5.56, - "learning_rate": 1.725433526011561e-05, - "loss": 3.7866, - "step": 1239 - }, - { - "epoch": 5.56, - "learning_rate": 1.7236994219653178e-05, - "loss": 3.8055, - "step": 1240 - }, - { - "epoch": 5.57, - "learning_rate": 1.721965317919075e-05, - "loss": 3.8048, - "step": 1241 - }, - { - "epoch": 5.57, - "learning_rate": 1.7202312138728326e-05, - "loss": 3.7299, - "step": 1242 - }, - { - "epoch": 5.57, - "learning_rate": 1.7184971098265898e-05, - "loss": 3.6614, - "step": 1243 - }, - { - "epoch": 5.58, - "learning_rate": 1.716763005780347e-05, - "loss": 3.7104, - "step": 1244 - }, - { - "epoch": 5.58, - "learning_rate": 1.715028901734104e-05, - "loss": 3.7007, - "step": 1245 - }, - { - "epoch": 5.59, - "learning_rate": 1.7132947976878614e-05, - "loss": 3.7255, - "step": 1246 - }, - { - "epoch": 5.59, - "learning_rate": 1.7115606936416186e-05, - "loss": 3.611, - "step": 1247 - }, - { - "epoch": 5.6, - "learning_rate": 1.7098265895953758e-05, - "loss": 3.6779, - "step": 1248 - }, - { - "epoch": 5.6, - "learning_rate": 1.708092485549133e-05, - "loss": 3.6963, - "step": 1249 - }, - { - "epoch": 5.61, - "learning_rate": 1.7063583815028902e-05, - "loss": 3.6617, - "step": 1250 - }, - { - "epoch": 5.61, - "learning_rate": 1.7046242774566474e-05, - "loss": 3.5974, - "step": 1251 - }, - { - "epoch": 5.61, - "learning_rate": 1.7028901734104046e-05, - "loss": 3.6514, - "step": 1252 - }, - { - "epoch": 5.62, - "learning_rate": 1.701156069364162e-05, - "loss": 3.6334, - "step": 1253 - }, - { - "epoch": 5.62, - "learning_rate": 1.6994219653179194e-05, - "loss": 3.5855, - "step": 1254 - }, - { - "epoch": 5.63, - "learning_rate": 1.6976878612716763e-05, - "loss": 3.5942, - "step": 1255 - }, - { - "epoch": 5.63, - "learning_rate": 1.6959537572254335e-05, - "loss": 3.6141, - "step": 1256 - }, - { - "epoch": 5.64, - "learning_rate": 1.6942196531791907e-05, - "loss": 3.6875, - "step": 1257 - }, - { - "epoch": 5.64, - "learning_rate": 1.6924855491329482e-05, - "loss": 3.732, - "step": 1258 - }, - { - "epoch": 5.65, - "learning_rate": 1.6907514450867054e-05, - "loss": 3.5305, - "step": 1259 - }, - { - "epoch": 5.65, - "learning_rate": 1.6890173410404623e-05, - "loss": 3.5889, - "step": 1260 - }, - { - "epoch": 5.65, - "learning_rate": 1.6872832369942195e-05, - "loss": 3.6374, - "step": 1261 - }, - { - "epoch": 5.66, - "learning_rate": 1.685549132947977e-05, - "loss": 3.6227, - "step": 1262 - }, - { - "epoch": 5.66, - "learning_rate": 1.6838150289017343e-05, - "loss": 3.584, - "step": 1263 - }, - { - "epoch": 5.67, - "learning_rate": 1.6820809248554915e-05, - "loss": 3.7017, - "step": 1264 - }, - { - "epoch": 5.67, - "learning_rate": 1.6803468208092487e-05, - "loss": 3.5791, - "step": 1265 - }, - { - "epoch": 5.68, - "learning_rate": 1.678612716763006e-05, - "loss": 4.4901, - "step": 1266 - }, - { - "epoch": 5.68, - "learning_rate": 1.676878612716763e-05, - "loss": 4.4509, - "step": 1267 - }, - { - "epoch": 5.69, - "learning_rate": 1.6751445086705203e-05, - "loss": 4.4861, - "step": 1268 - }, - { - "epoch": 5.69, - "learning_rate": 1.6734104046242775e-05, - "loss": 4.4029, - "step": 1269 - }, - { - "epoch": 5.7, - "learning_rate": 1.671676300578035e-05, - "loss": 4.4611, - "step": 1270 - }, - { - "epoch": 5.7, - "learning_rate": 1.669942196531792e-05, - "loss": 4.3711, - "step": 1271 - }, - { - "epoch": 5.7, - "learning_rate": 1.668208092485549e-05, - "loss": 4.2994, - "step": 1272 - }, - { - "epoch": 5.71, - "learning_rate": 1.6664739884393063e-05, - "loss": 4.3213, - "step": 1273 - }, - { - "epoch": 5.71, - "learning_rate": 1.6647398843930635e-05, - "loss": 4.2141, - "step": 1274 - }, - { - "epoch": 5.72, - "learning_rate": 1.663005780346821e-05, - "loss": 4.189, - "step": 1275 - }, - { - "epoch": 5.72, - "learning_rate": 1.661271676300578e-05, - "loss": 4.0621, - "step": 1276 - }, - { - "epoch": 5.73, - "learning_rate": 1.659537572254335e-05, - "loss": 3.9399, - "step": 1277 - }, - { - "epoch": 5.73, - "learning_rate": 1.6578034682080924e-05, - "loss": 4.0211, - "step": 1278 - }, - { - "epoch": 5.74, - "learning_rate": 1.65606936416185e-05, - "loss": 3.9832, - "step": 1279 - }, - { - "epoch": 5.74, - "learning_rate": 1.654335260115607e-05, - "loss": 3.8816, - "step": 1280 - }, - { - "epoch": 5.74, - "learning_rate": 1.652601156069364e-05, - "loss": 3.8468, - "step": 1281 - }, - { - "epoch": 5.75, - "learning_rate": 1.6508670520231212e-05, - "loss": 3.9062, - "step": 1282 - }, - { - "epoch": 5.75, - "learning_rate": 1.6491329479768787e-05, - "loss": 3.9067, - "step": 1283 - }, - { - "epoch": 5.76, - "learning_rate": 1.647398843930636e-05, - "loss": 3.8635, - "step": 1284 - }, - { - "epoch": 5.76, - "learning_rate": 1.645664739884393e-05, - "loss": 3.8684, - "step": 1285 - }, - { - "epoch": 5.77, - "learning_rate": 1.64393063583815e-05, - "loss": 3.7676, - "step": 1286 - }, - { - "epoch": 5.77, - "learning_rate": 1.6421965317919076e-05, - "loss": 3.7876, - "step": 1287 - }, - { - "epoch": 5.78, - "learning_rate": 1.6404624277456648e-05, - "loss": 3.7201, - "step": 1288 - }, - { - "epoch": 5.78, - "learning_rate": 1.638728323699422e-05, - "loss": 3.7107, - "step": 1289 - }, - { - "epoch": 5.78, - "learning_rate": 1.6369942196531792e-05, - "loss": 3.7353, - "step": 1290 - }, - { - "epoch": 5.79, - "learning_rate": 1.6352601156069367e-05, - "loss": 3.8101, - "step": 1291 - }, - { - "epoch": 5.79, - "learning_rate": 1.6335260115606936e-05, - "loss": 3.7945, - "step": 1292 - }, - { - "epoch": 5.8, - "learning_rate": 1.6317919075144508e-05, - "loss": 3.7453, - "step": 1293 - }, - { - "epoch": 5.8, - "learning_rate": 1.630057803468208e-05, - "loss": 3.7175, - "step": 1294 - }, - { - "epoch": 5.81, - "learning_rate": 1.6283236994219656e-05, - "loss": 3.7157, - "step": 1295 - }, - { - "epoch": 5.81, - "learning_rate": 1.6265895953757228e-05, - "loss": 3.6373, - "step": 1296 - }, - { - "epoch": 5.82, - "learning_rate": 1.6248554913294796e-05, - "loss": 3.6859, - "step": 1297 - }, - { - "epoch": 5.82, - "learning_rate": 1.623121387283237e-05, - "loss": 3.7236, - "step": 1298 - }, - { - "epoch": 5.83, - "learning_rate": 1.6213872832369944e-05, - "loss": 3.7574, - "step": 1299 - }, - { - "epoch": 5.83, - "learning_rate": 1.6196531791907516e-05, - "loss": 3.7723, - "step": 1300 - }, - { - "epoch": 5.83, - "learning_rate": 1.6179190751445088e-05, - "loss": 3.6934, - "step": 1301 - }, - { - "epoch": 5.84, - "learning_rate": 1.6161849710982657e-05, - "loss": 3.6567, - "step": 1302 - }, - { - "epoch": 5.84, - "learning_rate": 1.6144508670520232e-05, - "loss": 3.6286, - "step": 1303 - }, - { - "epoch": 5.85, - "learning_rate": 1.6127167630057804e-05, - "loss": 3.7219, - "step": 1304 - }, - { - "epoch": 5.85, - "learning_rate": 1.6109826589595376e-05, - "loss": 3.74, - "step": 1305 - }, - { - "epoch": 5.86, - "learning_rate": 1.6092485549132948e-05, - "loss": 3.6348, - "step": 1306 - }, - { - "epoch": 5.86, - "learning_rate": 1.607514450867052e-05, - "loss": 3.7186, - "step": 1307 - }, - { - "epoch": 5.87, - "learning_rate": 1.6057803468208092e-05, - "loss": 3.6526, - "step": 1308 - }, - { - "epoch": 5.87, - "learning_rate": 1.6040462427745664e-05, - "loss": 3.7628, - "step": 1309 - }, - { - "epoch": 5.87, - "learning_rate": 1.6023121387283237e-05, - "loss": 3.7244, - "step": 1310 - }, - { - "epoch": 5.88, - "learning_rate": 1.6005780346820812e-05, - "loss": 3.6791, - "step": 1311 - }, - { - "epoch": 5.88, - "learning_rate": 1.598843930635838e-05, - "loss": 3.679, - "step": 1312 - }, - { - "epoch": 5.89, - "learning_rate": 1.5971098265895953e-05, - "loss": 3.6038, - "step": 1313 - }, - { - "epoch": 5.89, - "learning_rate": 1.5953757225433525e-05, - "loss": 3.6377, - "step": 1314 - }, - { - "epoch": 5.9, - "learning_rate": 1.59364161849711e-05, - "loss": 3.6217, - "step": 1315 - }, - { - "epoch": 5.9, - "learning_rate": 1.5919075144508672e-05, - "loss": 4.394, - "step": 1316 - }, - { - "epoch": 5.91, - "learning_rate": 1.5901734104046244e-05, - "loss": 4.2921, - "step": 1317 - }, - { - "epoch": 5.91, - "learning_rate": 1.5884393063583813e-05, - "loss": 4.3858, - "step": 1318 - }, - { - "epoch": 5.91, - "learning_rate": 1.586705202312139e-05, - "loss": 4.3503, - "step": 1319 - }, - { - "epoch": 5.92, - "learning_rate": 1.584971098265896e-05, - "loss": 4.2609, - "step": 1320 - }, - { - "epoch": 5.92, - "learning_rate": 1.5832369942196533e-05, - "loss": 4.1411, - "step": 1321 - }, - { - "epoch": 5.93, - "learning_rate": 1.5815028901734105e-05, - "loss": 4.1702, - "step": 1322 - }, - { - "epoch": 5.93, - "learning_rate": 1.5797687861271677e-05, - "loss": 3.9806, - "step": 1323 - }, - { - "epoch": 5.94, - "learning_rate": 1.578034682080925e-05, - "loss": 4.0033, - "step": 1324 - }, - { - "epoch": 5.94, - "learning_rate": 1.576300578034682e-05, - "loss": 3.9419, - "step": 1325 - }, - { - "epoch": 5.95, - "learning_rate": 1.5745664739884393e-05, - "loss": 3.8458, - "step": 1326 - }, - { - "epoch": 5.95, - "learning_rate": 1.572832369942197e-05, - "loss": 3.8642, - "step": 1327 - }, - { - "epoch": 5.96, - "learning_rate": 1.5710982658959537e-05, - "loss": 3.7582, - "step": 1328 - }, - { - "epoch": 5.96, - "learning_rate": 1.569364161849711e-05, - "loss": 3.7628, - "step": 1329 - }, - { - "epoch": 5.96, - "learning_rate": 1.567630057803468e-05, - "loss": 3.6299, - "step": 1330 - }, - { - "epoch": 5.97, - "learning_rate": 1.5658959537572257e-05, - "loss": 3.6943, - "step": 1331 - }, - { - "epoch": 5.97, - "learning_rate": 1.564161849710983e-05, - "loss": 3.6493, - "step": 1332 - }, - { - "epoch": 5.98, - "learning_rate": 1.5624277456647397e-05, - "loss": 3.7038, - "step": 1333 - }, - { - "epoch": 5.98, - "learning_rate": 1.560693641618497e-05, - "loss": 3.7886, - "step": 1334 - }, - { - "epoch": 5.99, - "learning_rate": 1.5589595375722545e-05, - "loss": 3.7207, - "step": 1335 - }, - { - "epoch": 5.99, - "learning_rate": 1.5572254335260117e-05, - "loss": 3.6326, - "step": 1336 - }, - { - "epoch": 6.0, - "learning_rate": 1.555491329479769e-05, - "loss": 3.7846, - "step": 1337 - }, - { - "epoch": 6.0, - "learning_rate": 1.5537572254335258e-05, - "loss": 3.9457, - "step": 1338 - }, - { - "epoch": 6.0, - "learning_rate": 1.5520231213872833e-05, - "loss": 4.1404, - "step": 1339 - }, - { - "epoch": 6.01, - "learning_rate": 1.5502890173410405e-05, - "loss": 4.0547, - "step": 1340 - }, - { - "epoch": 6.01, - "learning_rate": 1.5485549132947977e-05, - "loss": 4.1299, - "step": 1341 - }, - { - "epoch": 6.02, - "learning_rate": 1.546820809248555e-05, - "loss": 4.0622, - "step": 1342 - }, - { - "epoch": 6.02, - "learning_rate": 1.545086705202312e-05, - "loss": 4.1236, - "step": 1343 - }, - { - "epoch": 6.03, - "learning_rate": 1.5433526011560694e-05, - "loss": 4.099, - "step": 1344 - }, - { - "epoch": 6.03, - "learning_rate": 1.5416184971098266e-05, - "loss": 4.0269, - "step": 1345 - }, - { - "epoch": 6.04, - "learning_rate": 1.5398843930635838e-05, - "loss": 4.102, - "step": 1346 - }, - { - "epoch": 6.04, - "learning_rate": 1.5381502890173413e-05, - "loss": 4.0486, - "step": 1347 - }, - { - "epoch": 6.04, - "learning_rate": 1.5364161849710985e-05, - "loss": 3.9785, - "step": 1348 - }, - { - "epoch": 6.05, - "learning_rate": 1.5346820809248554e-05, - "loss": 3.881, - "step": 1349 - }, - { - "epoch": 6.05, - "learning_rate": 1.5329479768786126e-05, - "loss": 3.8907, - "step": 1350 - }, - { - "epoch": 6.06, - "learning_rate": 1.53121387283237e-05, - "loss": 3.9125, - "step": 1351 - }, - { - "epoch": 6.06, - "learning_rate": 1.5294797687861274e-05, - "loss": 3.8936, - "step": 1352 - }, - { - "epoch": 6.07, - "learning_rate": 1.5277456647398846e-05, - "loss": 3.7634, - "step": 1353 - }, - { - "epoch": 6.07, - "learning_rate": 1.5260115606936414e-05, - "loss": 3.7423, - "step": 1354 - }, - { - "epoch": 6.08, - "learning_rate": 1.524277456647399e-05, - "loss": 3.7204, - "step": 1355 - }, - { - "epoch": 6.08, - "learning_rate": 1.5225433526011562e-05, - "loss": 3.6845, - "step": 1356 - }, - { - "epoch": 6.09, - "learning_rate": 1.5208092485549134e-05, - "loss": 3.7513, - "step": 1357 - }, - { - "epoch": 6.09, - "learning_rate": 1.5190751445086704e-05, - "loss": 3.7427, - "step": 1358 - }, - { - "epoch": 6.09, - "learning_rate": 1.517341040462428e-05, - "loss": 3.6635, - "step": 1359 - }, - { - "epoch": 6.1, - "learning_rate": 1.515606936416185e-05, - "loss": 3.6328, - "step": 1360 - }, - { - "epoch": 6.1, - "learning_rate": 1.5138728323699422e-05, - "loss": 3.613, - "step": 1361 - }, - { - "epoch": 6.11, - "learning_rate": 1.5121387283236994e-05, - "loss": 3.5995, - "step": 1362 - }, - { - "epoch": 6.11, - "learning_rate": 1.5104046242774568e-05, - "loss": 3.5823, - "step": 1363 - }, - { - "epoch": 6.12, - "learning_rate": 1.508670520231214e-05, - "loss": 3.6251, - "step": 1364 - }, - { - "epoch": 6.12, - "learning_rate": 1.5069364161849712e-05, - "loss": 3.6519, - "step": 1365 - }, - { - "epoch": 6.13, - "learning_rate": 1.5052023121387282e-05, - "loss": 3.5135, - "step": 1366 - }, - { - "epoch": 6.13, - "learning_rate": 1.5034682080924858e-05, - "loss": 3.526, - "step": 1367 - }, - { - "epoch": 6.13, - "learning_rate": 1.5017341040462428e-05, - "loss": 3.5666, - "step": 1368 - }, - { - "epoch": 6.14, - "learning_rate": 1.5e-05, - "loss": 3.5295, - "step": 1369 - }, - { - "epoch": 6.14, - "learning_rate": 1.4982658959537572e-05, - "loss": 3.4438, - "step": 1370 - }, - { - "epoch": 6.15, - "learning_rate": 1.4965317919075145e-05, - "loss": 3.4523, - "step": 1371 - }, - { - "epoch": 6.15, - "learning_rate": 1.4947976878612717e-05, - "loss": 3.4697, - "step": 1372 - }, - { - "epoch": 6.16, - "learning_rate": 1.4930635838150289e-05, - "loss": 3.6016, - "step": 1373 - }, - { - "epoch": 6.16, - "learning_rate": 1.491329479768786e-05, - "loss": 3.469, - "step": 1374 - }, - { - "epoch": 6.17, - "learning_rate": 1.4895953757225434e-05, - "loss": 3.563, - "step": 1375 - }, - { - "epoch": 6.17, - "learning_rate": 1.4878612716763005e-05, - "loss": 3.4659, - "step": 1376 - }, - { - "epoch": 6.17, - "learning_rate": 1.4861271676300579e-05, - "loss": 3.4131, - "step": 1377 - }, - { - "epoch": 6.18, - "learning_rate": 1.484393063583815e-05, - "loss": 3.5381, - "step": 1378 - }, - { - "epoch": 6.18, - "learning_rate": 1.4826589595375723e-05, - "loss": 3.5364, - "step": 1379 - }, - { - "epoch": 6.19, - "learning_rate": 1.4809248554913295e-05, - "loss": 3.4761, - "step": 1380 - }, - { - "epoch": 6.19, - "learning_rate": 1.4791907514450867e-05, - "loss": 3.5366, - "step": 1381 - }, - { - "epoch": 6.2, - "learning_rate": 1.4774566473988439e-05, - "loss": 3.5096, - "step": 1382 - }, - { - "epoch": 6.2, - "learning_rate": 1.4757225433526013e-05, - "loss": 3.4822, - "step": 1383 - }, - { - "epoch": 6.21, - "learning_rate": 1.4739884393063583e-05, - "loss": 3.6349, - "step": 1384 - }, - { - "epoch": 6.21, - "learning_rate": 1.4722543352601157e-05, - "loss": 3.5348, - "step": 1385 - }, - { - "epoch": 6.22, - "learning_rate": 1.4705202312138727e-05, - "loss": 3.4334, - "step": 1386 - }, - { - "epoch": 6.22, - "learning_rate": 1.4687861271676301e-05, - "loss": 3.4998, - "step": 1387 - }, - { - "epoch": 6.22, - "learning_rate": 1.4670520231213873e-05, - "loss": 3.4966, - "step": 1388 - }, - { - "epoch": 6.23, - "learning_rate": 1.4653179190751445e-05, - "loss": 4.3748, - "step": 1389 - }, - { - "epoch": 6.23, - "learning_rate": 1.4635838150289017e-05, - "loss": 4.2535, - "step": 1390 - }, - { - "epoch": 6.24, - "learning_rate": 1.4618497109826591e-05, - "loss": 4.3442, - "step": 1391 - }, - { - "epoch": 6.24, - "learning_rate": 1.4601156069364161e-05, - "loss": 4.3623, - "step": 1392 - }, - { - "epoch": 6.25, - "learning_rate": 1.4583815028901735e-05, - "loss": 4.3147, - "step": 1393 - }, - { - "epoch": 6.25, - "learning_rate": 1.4566473988439305e-05, - "loss": 4.2769, - "step": 1394 - }, - { - "epoch": 6.26, - "learning_rate": 1.454913294797688e-05, - "loss": 4.2314, - "step": 1395 - }, - { - "epoch": 6.26, - "learning_rate": 1.4531791907514451e-05, - "loss": 4.2258, - "step": 1396 - }, - { - "epoch": 6.26, - "learning_rate": 1.4514450867052023e-05, - "loss": 4.1569, - "step": 1397 - }, - { - "epoch": 6.27, - "learning_rate": 1.4497109826589595e-05, - "loss": 4.0524, - "step": 1398 - }, - { - "epoch": 6.27, - "learning_rate": 1.4479768786127167e-05, - "loss": 3.9985, - "step": 1399 - }, - { - "epoch": 6.28, - "learning_rate": 1.446242774566474e-05, - "loss": 3.9162, - "step": 1400 - }, - { - "epoch": 6.28, - "learning_rate": 1.4445086705202313e-05, - "loss": 3.899, - "step": 1401 - }, - { - "epoch": 6.29, - "learning_rate": 1.4427745664739884e-05, - "loss": 3.7695, - "step": 1402 - }, - { - "epoch": 6.29, - "learning_rate": 1.4410404624277457e-05, - "loss": 3.7958, - "step": 1403 - }, - { - "epoch": 6.3, - "learning_rate": 1.439306358381503e-05, - "loss": 3.7724, - "step": 1404 - }, - { - "epoch": 6.3, - "learning_rate": 1.4375722543352602e-05, - "loss": 3.7631, - "step": 1405 - }, - { - "epoch": 6.3, - "learning_rate": 1.4358381502890174e-05, - "loss": 3.6711, - "step": 1406 - }, - { - "epoch": 6.31, - "learning_rate": 1.4341040462427746e-05, - "loss": 3.6774, - "step": 1407 - }, - { - "epoch": 6.31, - "learning_rate": 1.4323699421965318e-05, - "loss": 3.6973, - "step": 1408 - }, - { - "epoch": 6.32, - "learning_rate": 1.4306358381502892e-05, - "loss": 3.6787, - "step": 1409 - }, - { - "epoch": 6.32, - "learning_rate": 1.4289017341040462e-05, - "loss": 3.606, - "step": 1410 - }, - { - "epoch": 6.33, - "learning_rate": 1.4271676300578036e-05, - "loss": 3.7522, - "step": 1411 - }, - { - "epoch": 6.33, - "learning_rate": 1.4254335260115606e-05, - "loss": 3.6174, - "step": 1412 - }, - { - "epoch": 6.34, - "learning_rate": 1.423699421965318e-05, - "loss": 3.6904, - "step": 1413 - }, - { - "epoch": 6.34, - "learning_rate": 1.4219653179190752e-05, - "loss": 3.5759, - "step": 1414 - }, - { - "epoch": 6.35, - "learning_rate": 1.4202312138728324e-05, - "loss": 3.6298, - "step": 1415 - }, - { - "epoch": 6.35, - "learning_rate": 1.4184971098265896e-05, - "loss": 3.5669, - "step": 1416 - }, - { - "epoch": 6.35, - "learning_rate": 1.416763005780347e-05, - "loss": 3.5675, - "step": 1417 - }, - { - "epoch": 6.36, - "learning_rate": 1.415028901734104e-05, - "loss": 3.526, - "step": 1418 - }, - { - "epoch": 6.36, - "learning_rate": 1.4132947976878614e-05, - "loss": 3.6871, - "step": 1419 - }, - { - "epoch": 6.37, - "learning_rate": 1.4115606936416184e-05, - "loss": 3.491, - "step": 1420 - }, - { - "epoch": 6.37, - "learning_rate": 1.4098265895953758e-05, - "loss": 3.5132, - "step": 1421 - }, - { - "epoch": 6.38, - "learning_rate": 1.408092485549133e-05, - "loss": 3.4734, - "step": 1422 - }, - { - "epoch": 6.38, - "learning_rate": 1.4063583815028902e-05, - "loss": 3.5613, - "step": 1423 - }, - { - "epoch": 6.39, - "learning_rate": 1.4046242774566474e-05, - "loss": 3.5734, - "step": 1424 - }, - { - "epoch": 6.39, - "learning_rate": 1.4028901734104046e-05, - "loss": 3.5342, - "step": 1425 - }, - { - "epoch": 6.39, - "learning_rate": 1.4011560693641618e-05, - "loss": 3.5138, - "step": 1426 - }, - { - "epoch": 6.4, - "learning_rate": 1.3994219653179192e-05, - "loss": 3.4809, - "step": 1427 - }, - { - "epoch": 6.4, - "learning_rate": 1.3976878612716763e-05, - "loss": 3.4592, - "step": 1428 - }, - { - "epoch": 6.41, - "learning_rate": 1.3959537572254336e-05, - "loss": 3.4913, - "step": 1429 - }, - { - "epoch": 6.41, - "learning_rate": 1.3942196531791908e-05, - "loss": 3.5048, - "step": 1430 - }, - { - "epoch": 6.42, - "learning_rate": 1.392485549132948e-05, - "loss": 3.4408, - "step": 1431 - }, - { - "epoch": 6.42, - "learning_rate": 1.3907514450867053e-05, - "loss": 3.5879, - "step": 1432 - }, - { - "epoch": 6.43, - "learning_rate": 1.3890173410404625e-05, - "loss": 3.5279, - "step": 1433 - }, - { - "epoch": 6.43, - "learning_rate": 1.3872832369942197e-05, - "loss": 3.4807, - "step": 1434 - }, - { - "epoch": 6.43, - "learning_rate": 1.385549132947977e-05, - "loss": 3.5377, - "step": 1435 - }, - { - "epoch": 6.44, - "learning_rate": 1.383815028901734e-05, - "loss": 3.5555, - "step": 1436 - }, - { - "epoch": 6.44, - "learning_rate": 1.3820809248554915e-05, - "loss": 3.5234, - "step": 1437 - }, - { - "epoch": 6.45, - "learning_rate": 1.3803468208092485e-05, - "loss": 3.3992, - "step": 1438 - }, - { - "epoch": 6.45, - "learning_rate": 1.3786127167630059e-05, - "loss": 4.3738, - "step": 1439 - }, - { - "epoch": 6.46, - "learning_rate": 1.376878612716763e-05, - "loss": 4.3301, - "step": 1440 - }, - { - "epoch": 6.46, - "learning_rate": 1.3751445086705203e-05, - "loss": 4.38, - "step": 1441 - }, - { - "epoch": 6.47, - "learning_rate": 1.3734104046242775e-05, - "loss": 4.3963, - "step": 1442 - }, - { - "epoch": 6.47, - "learning_rate": 1.3716763005780349e-05, - "loss": 4.3242, - "step": 1443 - }, - { - "epoch": 6.48, - "learning_rate": 1.3699421965317919e-05, - "loss": 4.2459, - "step": 1444 - }, - { - "epoch": 6.48, - "learning_rate": 1.3682080924855493e-05, - "loss": 4.2176, - "step": 1445 - }, - { - "epoch": 6.48, - "learning_rate": 1.3664739884393063e-05, - "loss": 4.1511, - "step": 1446 - }, - { - "epoch": 6.49, - "learning_rate": 1.3647398843930637e-05, - "loss": 4.0639, - "step": 1447 - }, - { - "epoch": 6.49, - "learning_rate": 1.3630057803468209e-05, - "loss": 4.0748, - "step": 1448 - }, - { - "epoch": 6.5, - "learning_rate": 1.3612716763005781e-05, - "loss": 3.9933, - "step": 1449 - }, - { - "epoch": 6.5, - "learning_rate": 1.3595375722543353e-05, - "loss": 3.9647, - "step": 1450 - }, - { - "epoch": 6.51, - "learning_rate": 1.3578034682080925e-05, - "loss": 3.8892, - "step": 1451 - }, - { - "epoch": 6.51, - "learning_rate": 1.3560693641618497e-05, - "loss": 3.8595, - "step": 1452 - }, - { - "epoch": 6.52, - "learning_rate": 1.3543352601156071e-05, - "loss": 3.8028, - "step": 1453 - }, - { - "epoch": 6.52, - "learning_rate": 1.3526011560693641e-05, - "loss": 3.6911, - "step": 1454 - }, - { - "epoch": 6.52, - "learning_rate": 1.3508670520231215e-05, - "loss": 3.811, - "step": 1455 - }, - { - "epoch": 6.53, - "learning_rate": 1.3491329479768787e-05, - "loss": 3.7722, - "step": 1456 - }, - { - "epoch": 6.53, - "learning_rate": 1.347398843930636e-05, - "loss": 3.6371, - "step": 1457 - }, - { - "epoch": 6.54, - "learning_rate": 1.3456647398843931e-05, - "loss": 3.6797, - "step": 1458 - }, - { - "epoch": 6.54, - "learning_rate": 1.3439306358381503e-05, - "loss": 3.6933, - "step": 1459 - }, - { - "epoch": 6.55, - "learning_rate": 1.3421965317919075e-05, - "loss": 3.6937, - "step": 1460 - }, - { - "epoch": 6.55, - "learning_rate": 1.340462427745665e-05, - "loss": 3.5847, - "step": 1461 - }, - { - "epoch": 6.56, - "learning_rate": 1.338728323699422e-05, - "loss": 3.6837, - "step": 1462 - }, - { - "epoch": 6.56, - "learning_rate": 1.3369942196531793e-05, - "loss": 3.6907, - "step": 1463 - }, - { - "epoch": 6.57, - "learning_rate": 1.3352601156069364e-05, - "loss": 3.8257, - "step": 1464 - }, - { - "epoch": 6.57, - "learning_rate": 1.3335260115606938e-05, - "loss": 3.6479, - "step": 1465 - }, - { - "epoch": 6.57, - "learning_rate": 1.331791907514451e-05, - "loss": 3.681, - "step": 1466 - }, - { - "epoch": 6.58, - "learning_rate": 1.330057803468208e-05, - "loss": 3.5989, - "step": 1467 - }, - { - "epoch": 6.58, - "learning_rate": 1.3283236994219654e-05, - "loss": 3.5327, - "step": 1468 - }, - { - "epoch": 6.59, - "learning_rate": 1.3265895953757226e-05, - "loss": 3.6351, - "step": 1469 - }, - { - "epoch": 6.59, - "learning_rate": 1.3248554913294798e-05, - "loss": 3.5191, - "step": 1470 - }, - { - "epoch": 6.6, - "learning_rate": 1.323121387283237e-05, - "loss": 3.6232, - "step": 1471 - }, - { - "epoch": 6.6, - "learning_rate": 1.3213872832369942e-05, - "loss": 3.5038, - "step": 1472 - }, - { - "epoch": 6.61, - "learning_rate": 1.3196531791907514e-05, - "loss": 3.5143, - "step": 1473 - }, - { - "epoch": 6.61, - "learning_rate": 1.3179190751445088e-05, - "loss": 3.5653, - "step": 1474 - }, - { - "epoch": 6.61, - "learning_rate": 1.3161849710982658e-05, - "loss": 3.4478, - "step": 1475 - }, - { - "epoch": 6.62, - "learning_rate": 1.3144508670520232e-05, - "loss": 3.5128, - "step": 1476 - }, - { - "epoch": 6.62, - "learning_rate": 1.3127167630057802e-05, - "loss": 3.4933, - "step": 1477 - }, - { - "epoch": 6.63, - "learning_rate": 1.3109826589595376e-05, - "loss": 3.395, - "step": 1478 - }, - { - "epoch": 6.63, - "learning_rate": 1.3092485549132948e-05, - "loss": 3.5296, - "step": 1479 - }, - { - "epoch": 6.64, - "learning_rate": 1.307514450867052e-05, - "loss": 3.5239, - "step": 1480 - }, - { - "epoch": 6.64, - "learning_rate": 1.3057803468208092e-05, - "loss": 3.5912, - "step": 1481 - }, - { - "epoch": 6.65, - "learning_rate": 1.3040462427745664e-05, - "loss": 3.5952, - "step": 1482 - }, - { - "epoch": 6.65, - "learning_rate": 1.3023121387283236e-05, - "loss": 3.5399, - "step": 1483 - }, - { - "epoch": 6.65, - "learning_rate": 1.300578034682081e-05, - "loss": 3.5792, - "step": 1484 - }, - { - "epoch": 6.66, - "learning_rate": 1.298843930635838e-05, - "loss": 3.4492, - "step": 1485 - }, - { - "epoch": 6.66, - "learning_rate": 1.2971098265895954e-05, - "loss": 3.5801, - "step": 1486 - }, - { - "epoch": 6.67, - "learning_rate": 1.2953757225433526e-05, - "loss": 3.5486, - "step": 1487 - }, - { - "epoch": 6.67, - "learning_rate": 1.2936416184971098e-05, - "loss": 3.4543, - "step": 1488 - }, - { - "epoch": 6.68, - "learning_rate": 1.291907514450867e-05, - "loss": 4.3831, - "step": 1489 - }, - { - "epoch": 6.68, - "learning_rate": 1.2901734104046243e-05, - "loss": 4.3403, - "step": 1490 - }, - { - "epoch": 6.69, - "learning_rate": 1.2884393063583815e-05, - "loss": 4.2843, - "step": 1491 - }, - { - "epoch": 6.69, - "learning_rate": 1.2867052023121388e-05, - "loss": 4.1974, - "step": 1492 - }, - { - "epoch": 6.7, - "learning_rate": 1.2849710982658959e-05, - "loss": 4.1307, - "step": 1493 - }, - { - "epoch": 6.7, - "learning_rate": 1.2832369942196533e-05, - "loss": 4.2715, - "step": 1494 - }, - { - "epoch": 6.7, - "learning_rate": 1.2815028901734103e-05, - "loss": 4.1558, - "step": 1495 - }, - { - "epoch": 6.71, - "learning_rate": 1.2797687861271677e-05, - "loss": 4.1503, - "step": 1496 - }, - { - "epoch": 6.71, - "learning_rate": 1.2780346820809249e-05, - "loss": 4.1042, - "step": 1497 - }, - { - "epoch": 6.72, - "learning_rate": 1.2763005780346821e-05, - "loss": 4.0457, - "step": 1498 - }, - { - "epoch": 6.72, - "learning_rate": 1.2745664739884393e-05, - "loss": 3.8454, - "step": 1499 - }, - { - "epoch": 6.73, - "learning_rate": 1.2728323699421967e-05, - "loss": 3.8931, - "step": 1500 - }, - { - "epoch": 6.73, - "eval_loss": 4.250516891479492, - "eval_runtime": 463.1359, - "eval_samples_per_second": 5.705, - "eval_steps_per_second": 0.715, - "eval_wer": 0.9688218960729869, - "step": 1500 - }, - { - "epoch": 6.73, - "learning_rate": 1.2710982658959537e-05, - "loss": 3.8691, - "step": 1501 - }, - { - "epoch": 6.74, - "learning_rate": 1.269364161849711e-05, - "loss": 3.7624, - "step": 1502 - }, - { - "epoch": 6.74, - "learning_rate": 1.2676300578034681e-05, - "loss": 3.7811, - "step": 1503 - }, - { - "epoch": 6.74, - "learning_rate": 1.2658959537572255e-05, - "loss": 3.7243, - "step": 1504 - }, - { - "epoch": 6.75, - "learning_rate": 1.2641618497109827e-05, - "loss": 3.7005, - "step": 1505 - }, - { - "epoch": 6.75, - "learning_rate": 1.2624277456647399e-05, - "loss": 3.7262, - "step": 1506 - }, - { - "epoch": 6.76, - "learning_rate": 1.2606936416184971e-05, - "loss": 3.6828, - "step": 1507 - }, - { - "epoch": 6.76, - "learning_rate": 1.2589595375722543e-05, - "loss": 3.7144, - "step": 1508 - }, - { - "epoch": 6.77, - "learning_rate": 1.2572254335260115e-05, - "loss": 3.778, - "step": 1509 - }, - { - "epoch": 6.77, - "learning_rate": 1.2554913294797689e-05, - "loss": 3.6453, - "step": 1510 - }, - { - "epoch": 6.78, - "learning_rate": 1.253757225433526e-05, - "loss": 3.6811, - "step": 1511 - }, - { - "epoch": 6.78, - "learning_rate": 1.2520231213872833e-05, - "loss": 3.7192, - "step": 1512 - }, - { - "epoch": 6.78, - "learning_rate": 1.2502890173410405e-05, - "loss": 3.6474, - "step": 1513 - }, - { - "epoch": 6.79, - "learning_rate": 1.2485549132947977e-05, - "loss": 3.5742, - "step": 1514 - }, - { - "epoch": 6.79, - "learning_rate": 1.246820809248555e-05, - "loss": 3.6431, - "step": 1515 - }, - { - "epoch": 6.8, - "learning_rate": 1.2450867052023121e-05, - "loss": 3.5768, - "step": 1516 - }, - { - "epoch": 6.8, - "learning_rate": 1.2433526011560694e-05, - "loss": 3.6144, - "step": 1517 - }, - { - "epoch": 6.81, - "learning_rate": 1.2416184971098267e-05, - "loss": 3.5914, - "step": 1518 - }, - { - "epoch": 6.81, - "learning_rate": 1.2398843930635838e-05, - "loss": 3.5791, - "step": 1519 - }, - { - "epoch": 6.82, - "learning_rate": 1.2381502890173411e-05, - "loss": 3.5251, - "step": 1520 - }, - { - "epoch": 6.82, - "learning_rate": 1.2364161849710982e-05, - "loss": 3.577, - "step": 1521 - }, - { - "epoch": 6.83, - "learning_rate": 1.2346820809248556e-05, - "loss": 3.5221, - "step": 1522 - }, - { - "epoch": 6.83, - "learning_rate": 1.2329479768786128e-05, - "loss": 3.5566, - "step": 1523 - }, - { - "epoch": 6.83, - "learning_rate": 1.23121387283237e-05, - "loss": 3.4536, - "step": 1524 - }, - { - "epoch": 6.84, - "learning_rate": 1.2294797687861272e-05, - "loss": 3.4887, - "step": 1525 - }, - { - "epoch": 6.84, - "learning_rate": 1.2277456647398846e-05, - "loss": 3.5162, - "step": 1526 - }, - { - "epoch": 6.85, - "learning_rate": 1.2260115606936416e-05, - "loss": 3.607, - "step": 1527 - }, - { - "epoch": 6.85, - "learning_rate": 1.224277456647399e-05, - "loss": 3.5811, - "step": 1528 - }, - { - "epoch": 6.86, - "learning_rate": 1.222543352601156e-05, - "loss": 3.5118, - "step": 1529 - }, - { - "epoch": 6.86, - "learning_rate": 1.2208092485549134e-05, - "loss": 3.474, - "step": 1530 - }, - { - "epoch": 6.87, - "learning_rate": 1.2190751445086706e-05, - "loss": 3.5014, - "step": 1531 - }, - { - "epoch": 6.87, - "learning_rate": 1.2173410404624278e-05, - "loss": 3.5547, - "step": 1532 - }, - { - "epoch": 6.87, - "learning_rate": 1.215606936416185e-05, - "loss": 3.4679, - "step": 1533 - }, - { - "epoch": 6.88, - "learning_rate": 1.2138728323699422e-05, - "loss": 3.495, - "step": 1534 - }, - { - "epoch": 6.88, - "learning_rate": 1.2121387283236994e-05, - "loss": 3.503, - "step": 1535 - }, - { - "epoch": 6.89, - "learning_rate": 1.2104046242774568e-05, - "loss": 3.4486, - "step": 1536 - }, - { - "epoch": 6.89, - "learning_rate": 1.2086705202312138e-05, - "loss": 3.6023, - "step": 1537 - }, - { - "epoch": 6.9, - "learning_rate": 1.2069364161849712e-05, - "loss": 3.4638, - "step": 1538 - }, - { - "epoch": 6.9, - "learning_rate": 1.2052023121387284e-05, - "loss": 4.2609, - "step": 1539 - }, - { - "epoch": 6.91, - "learning_rate": 1.2034682080924856e-05, - "loss": 4.2512, - "step": 1540 - }, - { - "epoch": 6.91, - "learning_rate": 1.2017341040462428e-05, - "loss": 4.2611, - "step": 1541 - }, - { - "epoch": 6.91, - "learning_rate": 1.2e-05, - "loss": 4.1527, - "step": 1542 - }, - { - "epoch": 6.92, - "learning_rate": 1.1982658959537572e-05, - "loss": 4.1289, - "step": 1543 - }, - { - "epoch": 6.92, - "learning_rate": 1.1965317919075146e-05, - "loss": 3.9784, - "step": 1544 - }, - { - "epoch": 6.93, - "learning_rate": 1.1947976878612717e-05, - "loss": 3.9353, - "step": 1545 - }, - { - "epoch": 6.93, - "learning_rate": 1.193063583815029e-05, - "loss": 3.8506, - "step": 1546 - }, - { - "epoch": 6.94, - "learning_rate": 1.191329479768786e-05, - "loss": 3.8273, - "step": 1547 - }, - { - "epoch": 6.94, - "learning_rate": 1.1895953757225434e-05, - "loss": 3.6927, - "step": 1548 - }, - { - "epoch": 6.95, - "learning_rate": 1.1878612716763006e-05, - "loss": 3.6501, - "step": 1549 - }, - { - "epoch": 6.95, - "learning_rate": 1.1861271676300579e-05, - "loss": 3.7406, - "step": 1550 - }, - { - "epoch": 6.96, - "learning_rate": 1.184393063583815e-05, - "loss": 3.5916, - "step": 1551 - }, - { - "epoch": 6.96, - "learning_rate": 1.1826589595375724e-05, - "loss": 3.6129, - "step": 1552 - }, - { - "epoch": 6.96, - "learning_rate": 1.1809248554913295e-05, - "loss": 3.5014, - "step": 1553 - }, - { - "epoch": 6.97, - "learning_rate": 1.1791907514450869e-05, - "loss": 3.5187, - "step": 1554 - }, - { - "epoch": 6.97, - "learning_rate": 1.1774566473988439e-05, - "loss": 3.5085, - "step": 1555 - }, - { - "epoch": 6.98, - "learning_rate": 1.1757225433526013e-05, - "loss": 3.5914, - "step": 1556 - }, - { - "epoch": 6.98, - "learning_rate": 1.1739884393063585e-05, - "loss": 3.528, - "step": 1557 - }, - { - "epoch": 6.99, - "learning_rate": 1.1722543352601157e-05, - "loss": 3.5764, - "step": 1558 - }, - { - "epoch": 6.99, - "learning_rate": 1.1705202312138729e-05, - "loss": 3.499, - "step": 1559 - }, - { - "epoch": 7.0, - "learning_rate": 1.1687861271676301e-05, - "loss": 3.4236, - "step": 1560 - }, - { - "epoch": 7.0, - "learning_rate": 1.1670520231213873e-05, - "loss": 3.7797, - "step": 1561 - }, - { - "epoch": 7.0, - "learning_rate": 1.1653179190751445e-05, - "loss": 4.0943, - "step": 1562 - }, - { - "epoch": 7.01, - "learning_rate": 1.1635838150289017e-05, - "loss": 4.0141, - "step": 1563 - }, - { - "epoch": 7.01, - "learning_rate": 1.161849710982659e-05, - "loss": 4.037, - "step": 1564 - }, - { - "epoch": 7.02, - "learning_rate": 1.1601156069364163e-05, - "loss": 4.0608, - "step": 1565 - }, - { - "epoch": 7.02, - "learning_rate": 1.1583815028901733e-05, - "loss": 4.001, - "step": 1566 - }, - { - "epoch": 7.03, - "learning_rate": 1.1566473988439307e-05, - "loss": 4.0066, - "step": 1567 - }, - { - "epoch": 7.03, - "learning_rate": 1.1549132947976877e-05, - "loss": 4.0685, - "step": 1568 - }, - { - "epoch": 7.04, - "learning_rate": 1.1531791907514451e-05, - "loss": 3.9795, - "step": 1569 - }, - { - "epoch": 7.04, - "learning_rate": 1.1514450867052023e-05, - "loss": 3.9148, - "step": 1570 - }, - { - "epoch": 7.04, - "learning_rate": 1.1497109826589595e-05, - "loss": 3.8969, - "step": 1571 - }, - { - "epoch": 7.05, - "learning_rate": 1.1479768786127167e-05, - "loss": 3.8096, - "step": 1572 - }, - { - "epoch": 7.05, - "learning_rate": 1.146242774566474e-05, - "loss": 3.8556, - "step": 1573 - }, - { - "epoch": 7.06, - "learning_rate": 1.1445086705202312e-05, - "loss": 3.7949, - "step": 1574 - }, - { - "epoch": 7.06, - "learning_rate": 1.1427745664739885e-05, - "loss": 3.7384, - "step": 1575 - }, - { - "epoch": 7.07, - "learning_rate": 1.1410404624277456e-05, - "loss": 3.6781, - "step": 1576 - }, - { - "epoch": 7.07, - "learning_rate": 1.139306358381503e-05, - "loss": 3.7412, - "step": 1577 - }, - { - "epoch": 7.08, - "learning_rate": 1.1375722543352602e-05, - "loss": 3.6372, - "step": 1578 - }, - { - "epoch": 7.08, - "learning_rate": 1.1358381502890174e-05, - "loss": 3.6381, - "step": 1579 - }, - { - "epoch": 7.09, - "learning_rate": 1.1341040462427746e-05, - "loss": 3.5741, - "step": 1580 - }, - { - "epoch": 7.09, - "learning_rate": 1.1323699421965318e-05, - "loss": 3.5746, - "step": 1581 - }, - { - "epoch": 7.09, - "learning_rate": 1.130635838150289e-05, - "loss": 3.5849, - "step": 1582 - }, - { - "epoch": 7.1, - "learning_rate": 1.1289017341040464e-05, - "loss": 3.601, - "step": 1583 - }, - { - "epoch": 7.1, - "learning_rate": 1.1271676300578034e-05, - "loss": 3.5217, - "step": 1584 - }, - { - "epoch": 7.11, - "learning_rate": 1.1254335260115608e-05, - "loss": 3.5282, - "step": 1585 - }, - { - "epoch": 7.11, - "learning_rate": 1.1236994219653178e-05, - "loss": 3.4845, - "step": 1586 - }, - { - "epoch": 7.12, - "learning_rate": 1.1219653179190752e-05, - "loss": 3.5286, - "step": 1587 - }, - { - "epoch": 7.12, - "learning_rate": 1.1202312138728324e-05, - "loss": 3.4687, - "step": 1588 - }, - { - "epoch": 7.13, - "learning_rate": 1.1184971098265896e-05, - "loss": 3.4723, - "step": 1589 - }, - { - "epoch": 7.13, - "learning_rate": 1.1167630057803468e-05, - "loss": 3.4709, - "step": 1590 - }, - { - "epoch": 7.13, - "learning_rate": 1.1150289017341042e-05, - "loss": 3.4791, - "step": 1591 - }, - { - "epoch": 7.14, - "learning_rate": 1.1132947976878612e-05, - "loss": 3.515, - "step": 1592 - }, - { - "epoch": 7.14, - "learning_rate": 1.1115606936416186e-05, - "loss": 3.3335, - "step": 1593 - }, - { - "epoch": 7.15, - "learning_rate": 1.1098265895953756e-05, - "loss": 3.3489, - "step": 1594 - }, - { - "epoch": 7.15, - "learning_rate": 1.108092485549133e-05, - "loss": 3.3639, - "step": 1595 - }, - { - "epoch": 7.16, - "learning_rate": 1.1063583815028902e-05, - "loss": 3.4261, - "step": 1596 - }, - { - "epoch": 7.16, - "learning_rate": 1.1046242774566474e-05, - "loss": 3.3638, - "step": 1597 - }, - { - "epoch": 7.17, - "learning_rate": 1.1028901734104046e-05, - "loss": 3.4047, - "step": 1598 - }, - { - "epoch": 7.17, - "learning_rate": 1.1011560693641618e-05, - "loss": 3.4339, - "step": 1599 - }, - { - "epoch": 7.17, - "learning_rate": 1.099421965317919e-05, - "loss": 3.4643, - "step": 1600 - }, - { - "epoch": 7.18, - "learning_rate": 1.0976878612716764e-05, - "loss": 3.4215, - "step": 1601 - }, - { - "epoch": 7.18, - "learning_rate": 1.0959537572254335e-05, - "loss": 3.3619, - "step": 1602 - }, - { - "epoch": 7.19, - "learning_rate": 1.0942196531791908e-05, - "loss": 3.3778, - "step": 1603 - }, - { - "epoch": 7.19, - "learning_rate": 1.092485549132948e-05, - "loss": 3.5079, - "step": 1604 - }, - { - "epoch": 7.2, - "learning_rate": 1.0907514450867052e-05, - "loss": 3.447, - "step": 1605 - }, - { - "epoch": 7.2, - "learning_rate": 1.0890173410404625e-05, - "loss": 3.3798, - "step": 1606 - }, - { - "epoch": 7.21, - "learning_rate": 1.0872832369942197e-05, - "loss": 3.4926, - "step": 1607 - }, - { - "epoch": 7.21, - "learning_rate": 1.0855491329479769e-05, - "loss": 3.4982, - "step": 1608 - }, - { - "epoch": 7.22, - "learning_rate": 1.0838150289017342e-05, - "loss": 3.2367, - "step": 1609 - }, - { - "epoch": 7.22, - "learning_rate": 1.0820809248554913e-05, - "loss": 3.4525, - "step": 1610 - }, - { - "epoch": 7.22, - "learning_rate": 1.0803468208092487e-05, - "loss": 3.355, - "step": 1611 - }, - { - "epoch": 7.23, - "learning_rate": 1.0786127167630057e-05, - "loss": 4.2457, - "step": 1612 - }, - { - "epoch": 7.23, - "learning_rate": 1.076878612716763e-05, - "loss": 4.2183, - "step": 1613 - }, - { - "epoch": 7.24, - "learning_rate": 1.0751445086705203e-05, - "loss": 4.2492, - "step": 1614 - }, - { - "epoch": 7.24, - "learning_rate": 1.0734104046242775e-05, - "loss": 4.2798, - "step": 1615 - }, - { - "epoch": 7.25, - "learning_rate": 1.0716763005780347e-05, - "loss": 4.214, - "step": 1616 - }, - { - "epoch": 7.25, - "learning_rate": 1.069942196531792e-05, - "loss": 4.1478, - "step": 1617 - }, - { - "epoch": 7.26, - "learning_rate": 1.0682080924855491e-05, - "loss": 4.1246, - "step": 1618 - }, - { - "epoch": 7.26, - "learning_rate": 1.0664739884393065e-05, - "loss": 4.1029, - "step": 1619 - }, - { - "epoch": 7.26, - "learning_rate": 1.0647398843930635e-05, - "loss": 4.0118, - "step": 1620 - }, - { - "epoch": 7.27, - "learning_rate": 1.0630057803468209e-05, - "loss": 3.9219, - "step": 1621 - }, - { - "epoch": 7.27, - "learning_rate": 1.0612716763005781e-05, - "loss": 3.9431, - "step": 1622 - }, - { - "epoch": 7.28, - "learning_rate": 1.0595375722543353e-05, - "loss": 3.7916, - "step": 1623 - }, - { - "epoch": 7.28, - "learning_rate": 1.0578034682080925e-05, - "loss": 3.855, - "step": 1624 - }, - { - "epoch": 7.29, - "learning_rate": 1.0560693641618497e-05, - "loss": 3.7926, - "step": 1625 - }, - { - "epoch": 7.29, - "learning_rate": 1.054335260115607e-05, - "loss": 3.6741, - "step": 1626 - }, - { - "epoch": 7.3, - "learning_rate": 1.0526011560693643e-05, - "loss": 3.6696, - "step": 1627 - }, - { - "epoch": 7.3, - "learning_rate": 1.0508670520231213e-05, - "loss": 3.6256, - "step": 1628 - }, - { - "epoch": 7.3, - "learning_rate": 1.0491329479768787e-05, - "loss": 3.6661, - "step": 1629 - }, - { - "epoch": 7.31, - "learning_rate": 1.047398843930636e-05, - "loss": 3.6186, - "step": 1630 - }, - { - "epoch": 7.31, - "learning_rate": 1.0456647398843931e-05, - "loss": 3.5236, - "step": 1631 - }, - { - "epoch": 7.32, - "learning_rate": 1.0439306358381503e-05, - "loss": 3.5413, - "step": 1632 - }, - { - "epoch": 7.32, - "learning_rate": 1.0421965317919075e-05, - "loss": 3.5598, - "step": 1633 - }, - { - "epoch": 7.33, - "learning_rate": 1.0404624277456647e-05, - "loss": 3.4842, - "step": 1634 - }, - { - "epoch": 7.33, - "learning_rate": 1.0387283236994221e-05, - "loss": 3.5515, - "step": 1635 - }, - { - "epoch": 7.34, - "learning_rate": 1.0369942196531792e-05, - "loss": 3.4904, - "step": 1636 - }, - { - "epoch": 7.34, - "learning_rate": 1.0352601156069365e-05, - "loss": 3.6319, - "step": 1637 - }, - { - "epoch": 7.35, - "learning_rate": 1.0335260115606936e-05, - "loss": 3.5538, - "step": 1638 - }, - { - "epoch": 7.35, - "learning_rate": 1.031791907514451e-05, - "loss": 3.5301, - "step": 1639 - }, - { - "epoch": 7.35, - "learning_rate": 1.0300578034682082e-05, - "loss": 3.3783, - "step": 1640 - }, - { - "epoch": 7.36, - "learning_rate": 1.0283236994219654e-05, - "loss": 3.4897, - "step": 1641 - }, - { - "epoch": 7.36, - "learning_rate": 1.0265895953757226e-05, - "loss": 3.5303, - "step": 1642 - }, - { - "epoch": 7.37, - "learning_rate": 1.02485549132948e-05, - "loss": 3.4219, - "step": 1643 - }, - { - "epoch": 7.37, - "learning_rate": 1.023121387283237e-05, - "loss": 3.4381, - "step": 1644 - }, - { - "epoch": 7.38, - "learning_rate": 1.0213872832369944e-05, - "loss": 3.4455, - "step": 1645 - }, - { - "epoch": 7.38, - "learning_rate": 1.0196531791907514e-05, - "loss": 3.4048, - "step": 1646 - }, - { - "epoch": 7.39, - "learning_rate": 1.0179190751445088e-05, - "loss": 3.3894, - "step": 1647 - }, - { - "epoch": 7.39, - "learning_rate": 1.016184971098266e-05, - "loss": 3.4203, - "step": 1648 - }, - { - "epoch": 7.39, - "learning_rate": 1.0144508670520232e-05, - "loss": 3.4005, - "step": 1649 - }, - { - "epoch": 7.4, - "learning_rate": 1.0127167630057804e-05, - "loss": 3.4222, - "step": 1650 - }, - { - "epoch": 7.4, - "learning_rate": 1.0109826589595376e-05, - "loss": 3.3879, - "step": 1651 - }, - { - "epoch": 7.41, - "learning_rate": 1.0092485549132948e-05, - "loss": 3.4199, - "step": 1652 - }, - { - "epoch": 7.41, - "learning_rate": 1.0075144508670522e-05, - "loss": 3.3692, - "step": 1653 - }, - { - "epoch": 7.42, - "learning_rate": 1.0057803468208092e-05, - "loss": 3.3981, - "step": 1654 - }, - { - "epoch": 7.42, - "learning_rate": 1.0040462427745666e-05, - "loss": 3.4547, - "step": 1655 - }, - { - "epoch": 7.43, - "learning_rate": 1.0023121387283238e-05, - "loss": 3.4876, - "step": 1656 - }, - { - "epoch": 7.43, - "learning_rate": 1.000578034682081e-05, - "loss": 3.3046, - "step": 1657 - }, - { - "epoch": 7.43, - "learning_rate": 9.988439306358382e-06, - "loss": 3.3553, - "step": 1658 - }, - { - "epoch": 7.44, - "learning_rate": 9.971098265895953e-06, - "loss": 3.3635, - "step": 1659 - }, - { - "epoch": 7.44, - "learning_rate": 9.953757225433526e-06, - "loss": 3.4029, - "step": 1660 - }, - { - "epoch": 7.45, - "learning_rate": 9.936416184971098e-06, - "loss": 3.2374, - "step": 1661 - }, - { - "epoch": 7.45, - "learning_rate": 9.91907514450867e-06, - "loss": 4.3088, - "step": 1662 - }, - { - "epoch": 7.46, - "learning_rate": 9.901734104046243e-06, - "loss": 4.2106, - "step": 1663 - }, - { - "epoch": 7.46, - "learning_rate": 9.884393063583815e-06, - "loss": 4.2904, - "step": 1664 - }, - { - "epoch": 7.47, - "learning_rate": 9.867052023121387e-06, - "loss": 4.0952, - "step": 1665 - }, - { - "epoch": 7.47, - "learning_rate": 9.84971098265896e-06, - "loss": 4.232, - "step": 1666 - }, - { - "epoch": 7.48, - "learning_rate": 9.83236994219653e-06, - "loss": 4.1302, - "step": 1667 - }, - { - "epoch": 7.48, - "learning_rate": 9.815028901734105e-06, - "loss": 4.1167, - "step": 1668 - }, - { - "epoch": 7.48, - "learning_rate": 9.797687861271677e-06, - "loss": 4.0666, - "step": 1669 - }, - { - "epoch": 7.49, - "learning_rate": 9.780346820809249e-06, - "loss": 4.0464, - "step": 1670 - }, - { - "epoch": 7.49, - "learning_rate": 9.76300578034682e-06, - "loss": 4.0075, - "step": 1671 - }, - { - "epoch": 7.5, - "learning_rate": 9.745664739884393e-06, - "loss": 3.8552, - "step": 1672 - }, - { - "epoch": 7.5, - "learning_rate": 9.728323699421965e-06, - "loss": 3.8782, - "step": 1673 - }, - { - "epoch": 7.51, - "learning_rate": 9.710982658959539e-06, - "loss": 3.8213, - "step": 1674 - }, - { - "epoch": 7.51, - "learning_rate": 9.693641618497109e-06, - "loss": 3.7914, - "step": 1675 - }, - { - "epoch": 7.52, - "learning_rate": 9.676300578034683e-06, - "loss": 3.702, - "step": 1676 - }, - { - "epoch": 7.52, - "learning_rate": 9.658959537572253e-06, - "loss": 3.5826, - "step": 1677 - }, - { - "epoch": 7.52, - "learning_rate": 9.641618497109827e-06, - "loss": 3.624, - "step": 1678 - }, - { - "epoch": 7.53, - "learning_rate": 9.624277456647399e-06, - "loss": 3.5874, - "step": 1679 - }, - { - "epoch": 7.53, - "learning_rate": 9.606936416184971e-06, - "loss": 3.5884, - "step": 1680 - }, - { - "epoch": 7.54, - "learning_rate": 9.589595375722543e-06, - "loss": 3.5871, - "step": 1681 - }, - { - "epoch": 7.54, - "learning_rate": 9.572254335260117e-06, - "loss": 3.5571, - "step": 1682 - }, - { - "epoch": 7.55, - "learning_rate": 9.554913294797687e-06, - "loss": 3.5846, - "step": 1683 - }, - { - "epoch": 7.55, - "learning_rate": 9.537572254335261e-06, - "loss": 3.5604, - "step": 1684 - }, - { - "epoch": 7.56, - "learning_rate": 9.520231213872831e-06, - "loss": 3.5964, - "step": 1685 - }, - { - "epoch": 7.56, - "learning_rate": 9.502890173410405e-06, - "loss": 3.5461, - "step": 1686 - }, - { - "epoch": 7.57, - "learning_rate": 9.485549132947977e-06, - "loss": 3.5809, - "step": 1687 - }, - { - "epoch": 7.57, - "learning_rate": 9.46820809248555e-06, - "loss": 3.522, - "step": 1688 - }, - { - "epoch": 7.57, - "learning_rate": 9.450867052023121e-06, - "loss": 3.4954, - "step": 1689 - }, - { - "epoch": 7.58, - "learning_rate": 9.433526011560693e-06, - "loss": 3.4639, - "step": 1690 - }, - { - "epoch": 7.58, - "learning_rate": 9.416184971098266e-06, - "loss": 3.5024, - "step": 1691 - }, - { - "epoch": 7.59, - "learning_rate": 9.39884393063584e-06, - "loss": 3.4632, - "step": 1692 - }, - { - "epoch": 7.59, - "learning_rate": 9.38150289017341e-06, - "loss": 3.3797, - "step": 1693 - }, - { - "epoch": 7.6, - "learning_rate": 9.364161849710983e-06, - "loss": 3.3826, - "step": 1694 - }, - { - "epoch": 7.6, - "learning_rate": 9.346820809248555e-06, - "loss": 3.3758, - "step": 1695 - }, - { - "epoch": 7.61, - "learning_rate": 9.329479768786128e-06, - "loss": 3.4563, - "step": 1696 - }, - { - "epoch": 7.61, - "learning_rate": 9.3121387283237e-06, - "loss": 3.3694, - "step": 1697 - }, - { - "epoch": 7.61, - "learning_rate": 9.294797687861272e-06, - "loss": 3.4125, - "step": 1698 - }, - { - "epoch": 7.62, - "learning_rate": 9.277456647398844e-06, - "loss": 3.4251, - "step": 1699 - }, - { - "epoch": 7.62, - "learning_rate": 9.260115606936418e-06, - "loss": 3.3842, - "step": 1700 - }, - { - "epoch": 7.63, - "learning_rate": 9.242774566473988e-06, - "loss": 3.3716, - "step": 1701 - }, - { - "epoch": 7.63, - "learning_rate": 9.225433526011562e-06, - "loss": 3.4167, - "step": 1702 - }, - { - "epoch": 7.64, - "learning_rate": 9.208092485549132e-06, - "loss": 3.4427, - "step": 1703 - }, - { - "epoch": 7.64, - "learning_rate": 9.190751445086706e-06, - "loss": 3.399, - "step": 1704 - }, - { - "epoch": 7.65, - "learning_rate": 9.173410404624278e-06, - "loss": 3.3828, - "step": 1705 - }, - { - "epoch": 7.65, - "learning_rate": 9.15606936416185e-06, - "loss": 3.3617, - "step": 1706 - }, - { - "epoch": 7.65, - "learning_rate": 9.138728323699422e-06, - "loss": 3.4299, - "step": 1707 - }, - { - "epoch": 7.66, - "learning_rate": 9.121387283236994e-06, - "loss": 3.4361, - "step": 1708 - }, - { - "epoch": 7.66, - "learning_rate": 9.104046242774566e-06, - "loss": 3.4543, - "step": 1709 - }, - { - "epoch": 7.67, - "learning_rate": 9.08670520231214e-06, - "loss": 3.3756, - "step": 1710 - }, - { - "epoch": 7.67, - "learning_rate": 9.06936416184971e-06, - "loss": 3.4149, - "step": 1711 - }, - { - "epoch": 7.68, - "learning_rate": 9.052023121387284e-06, - "loss": 4.2696, - "step": 1712 - }, - { - "epoch": 7.68, - "learning_rate": 9.034682080924856e-06, - "loss": 4.1461, - "step": 1713 - }, - { - "epoch": 7.69, - "learning_rate": 9.017341040462428e-06, - "loss": 4.2457, - "step": 1714 - }, - { - "epoch": 7.69, - "learning_rate": 9e-06, - "loss": 4.1243, - "step": 1715 - }, - { - "epoch": 7.7, - "learning_rate": 8.982658959537572e-06, - "loss": 4.1863, - "step": 1716 - }, - { - "epoch": 7.7, - "learning_rate": 8.965317919075144e-06, - "loss": 4.0753, - "step": 1717 - }, - { - "epoch": 7.7, - "learning_rate": 8.947976878612718e-06, - "loss": 4.0233, - "step": 1718 - }, - { - "epoch": 7.71, - "learning_rate": 8.930635838150288e-06, - "loss": 4.1104, - "step": 1719 - }, - { - "epoch": 7.71, - "learning_rate": 8.913294797687862e-06, - "loss": 4.0023, - "step": 1720 - }, - { - "epoch": 7.72, - "learning_rate": 8.895953757225433e-06, - "loss": 4.0233, - "step": 1721 - }, - { - "epoch": 7.72, - "learning_rate": 8.878612716763006e-06, - "loss": 3.9096, - "step": 1722 - }, - { - "epoch": 7.73, - "learning_rate": 8.861271676300578e-06, - "loss": 3.8776, - "step": 1723 - }, - { - "epoch": 7.73, - "learning_rate": 8.84393063583815e-06, - "loss": 3.7885, - "step": 1724 - }, - { - "epoch": 7.74, - "learning_rate": 8.826589595375723e-06, - "loss": 3.6858, - "step": 1725 - }, - { - "epoch": 7.74, - "learning_rate": 8.809248554913296e-06, - "loss": 3.6899, - "step": 1726 - }, - { - "epoch": 7.74, - "learning_rate": 8.791907514450867e-06, - "loss": 3.641, - "step": 1727 - }, - { - "epoch": 7.75, - "learning_rate": 8.77456647398844e-06, - "loss": 3.6487, - "step": 1728 - }, - { - "epoch": 7.75, - "learning_rate": 8.757225433526011e-06, - "loss": 3.5889, - "step": 1729 - }, - { - "epoch": 7.76, - "learning_rate": 8.739884393063585e-06, - "loss": 3.5744, - "step": 1730 - }, - { - "epoch": 7.76, - "learning_rate": 8.722543352601157e-06, - "loss": 3.6601, - "step": 1731 - }, - { - "epoch": 7.77, - "learning_rate": 8.705202312138729e-06, - "loss": 3.5943, - "step": 1732 - }, - { - "epoch": 7.77, - "learning_rate": 8.6878612716763e-06, - "loss": 3.6264, - "step": 1733 - }, - { - "epoch": 7.78, - "learning_rate": 8.670520231213873e-06, - "loss": 3.5722, - "step": 1734 - }, - { - "epoch": 7.78, - "learning_rate": 8.653179190751445e-06, - "loss": 3.5256, - "step": 1735 - }, - { - "epoch": 7.78, - "learning_rate": 8.635838150289019e-06, - "loss": 3.6254, - "step": 1736 - }, - { - "epoch": 7.79, - "learning_rate": 8.618497109826589e-06, - "loss": 3.5778, - "step": 1737 - }, - { - "epoch": 7.79, - "learning_rate": 8.601156069364163e-06, - "loss": 3.4736, - "step": 1738 - }, - { - "epoch": 7.8, - "learning_rate": 8.583815028901735e-06, - "loss": 3.4854, - "step": 1739 - }, - { - "epoch": 7.8, - "learning_rate": 8.566473988439307e-06, - "loss": 3.4268, - "step": 1740 - }, - { - "epoch": 7.81, - "learning_rate": 8.549132947976879e-06, - "loss": 3.4583, - "step": 1741 - }, - { - "epoch": 7.81, - "learning_rate": 8.531791907514451e-06, - "loss": 3.4335, - "step": 1742 - }, - { - "epoch": 7.82, - "learning_rate": 8.514450867052023e-06, - "loss": 3.4451, - "step": 1743 - }, - { - "epoch": 7.82, - "learning_rate": 8.497109826589597e-06, - "loss": 3.42, - "step": 1744 - }, - { - "epoch": 7.83, - "learning_rate": 8.479768786127167e-06, - "loss": 3.4913, - "step": 1745 - }, - { - "epoch": 7.83, - "learning_rate": 8.462427745664741e-06, - "loss": 3.4534, - "step": 1746 - }, - { - "epoch": 7.83, - "learning_rate": 8.445086705202311e-06, - "loss": 3.3673, - "step": 1747 - }, - { - "epoch": 7.84, - "learning_rate": 8.427745664739885e-06, - "loss": 3.3476, - "step": 1748 - }, - { - "epoch": 7.84, - "learning_rate": 8.410404624277457e-06, - "loss": 3.327, - "step": 1749 - }, - { - "epoch": 7.85, - "learning_rate": 8.39306358381503e-06, - "loss": 3.3339, - "step": 1750 - }, - { - "epoch": 7.85, - "learning_rate": 8.375722543352601e-06, - "loss": 3.3453, - "step": 1751 - }, - { - "epoch": 7.86, - "learning_rate": 8.358381502890175e-06, - "loss": 3.3937, - "step": 1752 - }, - { - "epoch": 7.86, - "learning_rate": 8.341040462427746e-06, - "loss": 3.4472, - "step": 1753 - }, - { - "epoch": 7.87, - "learning_rate": 8.323699421965318e-06, - "loss": 3.3535, - "step": 1754 - }, - { - "epoch": 7.87, - "learning_rate": 8.30635838150289e-06, - "loss": 3.4178, - "step": 1755 - }, - { - "epoch": 7.87, - "learning_rate": 8.289017341040462e-06, - "loss": 3.409, - "step": 1756 - }, - { - "epoch": 7.88, - "learning_rate": 8.271676300578036e-06, - "loss": 3.3731, - "step": 1757 - }, - { - "epoch": 7.88, - "learning_rate": 8.254335260115606e-06, - "loss": 3.3778, - "step": 1758 - }, - { - "epoch": 7.89, - "learning_rate": 8.23699421965318e-06, - "loss": 3.4355, - "step": 1759 - }, - { - "epoch": 7.89, - "learning_rate": 8.21965317919075e-06, - "loss": 3.4453, - "step": 1760 - }, - { - "epoch": 7.9, - "learning_rate": 8.202312138728324e-06, - "loss": 3.4076, - "step": 1761 - }, - { - "epoch": 7.9, - "learning_rate": 8.184971098265896e-06, - "loss": 4.2028, - "step": 1762 - }, - { - "epoch": 7.91, - "learning_rate": 8.167630057803468e-06, - "loss": 4.1265, - "step": 1763 - }, - { - "epoch": 7.91, - "learning_rate": 8.15028901734104e-06, - "loss": 4.1616, - "step": 1764 - }, - { - "epoch": 7.91, - "learning_rate": 8.132947976878614e-06, - "loss": 4.0691, - "step": 1765 - }, - { - "epoch": 7.92, - "learning_rate": 8.115606936416184e-06, - "loss": 3.998, - "step": 1766 - }, - { - "epoch": 7.92, - "learning_rate": 8.098265895953758e-06, - "loss": 3.7823, - "step": 1767 - }, - { - "epoch": 7.93, - "learning_rate": 8.080924855491328e-06, - "loss": 3.7425, - "step": 1768 - }, - { - "epoch": 7.93, - "learning_rate": 8.063583815028902e-06, - "loss": 3.7305, - "step": 1769 - }, - { - "epoch": 7.94, - "learning_rate": 8.046242774566474e-06, - "loss": 3.7151, - "step": 1770 - }, - { - "epoch": 7.94, - "learning_rate": 8.028901734104046e-06, - "loss": 3.5602, - "step": 1771 - }, - { - "epoch": 7.95, - "learning_rate": 8.011560693641618e-06, - "loss": 3.5218, - "step": 1772 - }, - { - "epoch": 7.95, - "learning_rate": 7.99421965317919e-06, - "loss": 3.6544, - "step": 1773 - }, - { - "epoch": 7.96, - "learning_rate": 7.976878612716762e-06, - "loss": 3.4446, - "step": 1774 - }, - { - "epoch": 7.96, - "learning_rate": 7.959537572254336e-06, - "loss": 3.5295, - "step": 1775 - }, - { - "epoch": 7.96, - "learning_rate": 7.942196531791907e-06, - "loss": 3.441, - "step": 1776 - }, - { - "epoch": 7.97, - "learning_rate": 7.92485549132948e-06, - "loss": 3.4074, - "step": 1777 - }, - { - "epoch": 7.97, - "learning_rate": 7.907514450867052e-06, - "loss": 3.4914, - "step": 1778 - }, - { - "epoch": 7.98, - "learning_rate": 7.890173410404624e-06, - "loss": 3.2935, - "step": 1779 - }, - { - "epoch": 7.98, - "learning_rate": 7.872832369942196e-06, - "loss": 3.4236, - "step": 1780 - }, - { - "epoch": 7.99, - "learning_rate": 7.855491329479769e-06, - "loss": 3.3437, - "step": 1781 - }, - { - "epoch": 7.99, - "learning_rate": 7.83815028901734e-06, - "loss": 3.378, - "step": 1782 - }, - { - "epoch": 8.0, - "learning_rate": 7.820809248554914e-06, - "loss": 3.4117, - "step": 1783 - }, - { - "epoch": 8.0, - "learning_rate": 7.803468208092485e-06, - "loss": 3.4898, - "step": 1784 - }, - { - "epoch": 8.0, - "learning_rate": 7.786127167630059e-06, - "loss": 4.1182, - "step": 1785 - }, - { - "epoch": 8.01, - "learning_rate": 7.768786127167629e-06, - "loss": 3.9994, - "step": 1786 - }, - { - "epoch": 8.01, - "learning_rate": 7.751445086705203e-06, - "loss": 4.0592, - "step": 1787 - }, - { - "epoch": 8.02, - "learning_rate": 7.734104046242775e-06, - "loss": 3.9964, - "step": 1788 - }, - { - "epoch": 8.02, - "learning_rate": 7.716763005780347e-06, - "loss": 4.0107, - "step": 1789 - }, - { - "epoch": 8.03, - "learning_rate": 7.699421965317919e-06, - "loss": 3.931, - "step": 1790 - }, - { - "epoch": 8.03, - "learning_rate": 7.682080924855493e-06, - "loss": 3.922, - "step": 1791 - }, - { - "epoch": 8.04, - "learning_rate": 7.664739884393063e-06, - "loss": 3.8991, - "step": 1792 - }, - { - "epoch": 8.04, - "learning_rate": 7.647398843930637e-06, - "loss": 3.8779, - "step": 1793 - }, - { - "epoch": 8.04, - "learning_rate": 7.630057803468207e-06, - "loss": 3.8272, - "step": 1794 - }, - { - "epoch": 8.05, - "learning_rate": 7.612716763005781e-06, - "loss": 3.7461, - "step": 1795 - }, - { - "epoch": 8.05, - "learning_rate": 7.595375722543352e-06, - "loss": 3.7225, - "step": 1796 - }, - { - "epoch": 8.06, - "learning_rate": 7.578034682080925e-06, - "loss": 3.6657, - "step": 1797 - }, - { - "epoch": 8.06, - "learning_rate": 7.560693641618497e-06, - "loss": 3.6324, - "step": 1798 - }, - { - "epoch": 8.07, - "learning_rate": 7.54335260115607e-06, - "loss": 3.6222, - "step": 1799 - }, - { - "epoch": 8.07, - "learning_rate": 7.526011560693641e-06, - "loss": 3.5819, - "step": 1800 - }, - { - "epoch": 8.08, - "learning_rate": 7.508670520231214e-06, - "loss": 3.6389, - "step": 1801 - }, - { - "epoch": 8.08, - "learning_rate": 7.491329479768786e-06, - "loss": 3.5601, - "step": 1802 - }, - { - "epoch": 8.09, - "learning_rate": 7.473988439306358e-06, - "loss": 3.5371, - "step": 1803 - }, - { - "epoch": 8.09, - "learning_rate": 7.45664739884393e-06, - "loss": 3.4591, - "step": 1804 - }, - { - "epoch": 8.09, - "learning_rate": 7.4393063583815024e-06, - "loss": 3.4749, - "step": 1805 - }, - { - "epoch": 8.1, - "learning_rate": 7.421965317919075e-06, - "loss": 3.4705, - "step": 1806 - }, - { - "epoch": 8.1, - "learning_rate": 7.404624277456647e-06, - "loss": 3.4022, - "step": 1807 - }, - { - "epoch": 8.11, - "learning_rate": 7.3872832369942195e-06, - "loss": 3.4355, - "step": 1808 - }, - { - "epoch": 8.11, - "learning_rate": 7.3699421965317915e-06, - "loss": 3.5067, - "step": 1809 - }, - { - "epoch": 8.12, - "learning_rate": 7.352601156069364e-06, - "loss": 3.4097, - "step": 1810 - }, - { - "epoch": 8.12, - "learning_rate": 7.3352601156069365e-06, - "loss": 3.4438, - "step": 1811 - }, - { - "epoch": 8.13, - "learning_rate": 7.317919075144509e-06, - "loss": 3.3129, - "step": 1812 - }, - { - "epoch": 8.13, - "learning_rate": 7.300578034682081e-06, - "loss": 3.3894, - "step": 1813 - }, - { - "epoch": 8.13, - "learning_rate": 7.283236994219653e-06, - "loss": 3.3518, - "step": 1814 - }, - { - "epoch": 8.14, - "learning_rate": 7.265895953757226e-06, - "loss": 3.3617, - "step": 1815 - }, - { - "epoch": 8.14, - "learning_rate": 7.248554913294798e-06, - "loss": 3.3222, - "step": 1816 - }, - { - "epoch": 8.15, - "learning_rate": 7.23121387283237e-06, - "loss": 3.2778, - "step": 1817 - }, - { - "epoch": 8.15, - "learning_rate": 7.213872832369942e-06, - "loss": 3.2908, - "step": 1818 - }, - { - "epoch": 8.16, - "learning_rate": 7.196531791907515e-06, - "loss": 3.3253, - "step": 1819 - }, - { - "epoch": 8.16, - "learning_rate": 7.179190751445087e-06, - "loss": 3.3726, - "step": 1820 - }, - { - "epoch": 8.17, - "learning_rate": 7.161849710982659e-06, - "loss": 3.3145, - "step": 1821 - }, - { - "epoch": 8.17, - "learning_rate": 7.144508670520231e-06, - "loss": 3.3403, - "step": 1822 - }, - { - "epoch": 8.17, - "learning_rate": 7.127167630057803e-06, - "loss": 3.3802, - "step": 1823 - }, - { - "epoch": 8.18, - "learning_rate": 7.109826589595376e-06, - "loss": 3.2439, - "step": 1824 - }, - { - "epoch": 8.18, - "learning_rate": 7.092485549132948e-06, - "loss": 3.3549, - "step": 1825 - }, - { - "epoch": 8.19, - "learning_rate": 7.07514450867052e-06, - "loss": 3.2981, - "step": 1826 - }, - { - "epoch": 8.19, - "learning_rate": 7.057803468208092e-06, - "loss": 3.277, - "step": 1827 - }, - { - "epoch": 8.2, - "learning_rate": 7.040462427745665e-06, - "loss": 3.343, - "step": 1828 - }, - { - "epoch": 8.2, - "learning_rate": 7.023121387283237e-06, - "loss": 3.38, - "step": 1829 - }, - { - "epoch": 8.21, - "learning_rate": 7.005780346820809e-06, - "loss": 3.3683, - "step": 1830 - }, - { - "epoch": 8.21, - "learning_rate": 6.988439306358381e-06, - "loss": 3.2754, - "step": 1831 - }, - { - "epoch": 8.22, - "learning_rate": 6.971098265895954e-06, - "loss": 3.3122, - "step": 1832 - }, - { - "epoch": 8.22, - "learning_rate": 6.953757225433526e-06, - "loss": 3.4639, - "step": 1833 - }, - { - "epoch": 8.22, - "learning_rate": 6.936416184971098e-06, - "loss": 3.1142, - "step": 1834 - }, - { - "epoch": 8.23, - "learning_rate": 6.91907514450867e-06, - "loss": 4.1776, - "step": 1835 - }, - { - "epoch": 8.23, - "learning_rate": 6.9017341040462425e-06, - "loss": 4.1406, - "step": 1836 - }, - { - "epoch": 8.24, - "learning_rate": 6.884393063583815e-06, - "loss": 4.1619, - "step": 1837 - }, - { - "epoch": 8.24, - "learning_rate": 6.8670520231213874e-06, - "loss": 4.1597, - "step": 1838 - }, - { - "epoch": 8.25, - "learning_rate": 6.8497109826589595e-06, - "loss": 4.1007, - "step": 1839 - }, - { - "epoch": 8.25, - "learning_rate": 6.832369942196532e-06, - "loss": 4.0917, - "step": 1840 - }, - { - "epoch": 8.26, - "learning_rate": 6.8150289017341045e-06, - "loss": 4.05, - "step": 1841 - }, - { - "epoch": 8.26, - "learning_rate": 6.7976878612716766e-06, - "loss": 4.0253, - "step": 1842 - }, - { - "epoch": 8.26, - "learning_rate": 6.780346820809249e-06, - "loss": 4.0109, - "step": 1843 - }, - { - "epoch": 8.27, - "learning_rate": 6.763005780346821e-06, - "loss": 3.9088, - "step": 1844 - }, - { - "epoch": 8.27, - "learning_rate": 6.745664739884394e-06, - "loss": 3.8613, - "step": 1845 - }, - { - "epoch": 8.28, - "learning_rate": 6.728323699421966e-06, - "loss": 3.7595, - "step": 1846 - }, - { - "epoch": 8.28, - "learning_rate": 6.710982658959538e-06, - "loss": 3.8061, - "step": 1847 - }, - { - "epoch": 8.29, - "learning_rate": 6.69364161849711e-06, - "loss": 3.7033, - "step": 1848 - }, - { - "epoch": 8.29, - "learning_rate": 6.676300578034682e-06, - "loss": 3.5736, - "step": 1849 - }, - { - "epoch": 8.3, - "learning_rate": 6.658959537572255e-06, - "loss": 3.5619, - "step": 1850 - }, - { - "epoch": 8.3, - "learning_rate": 6.641618497109827e-06, - "loss": 3.623, - "step": 1851 - }, - { - "epoch": 8.3, - "learning_rate": 6.624277456647399e-06, - "loss": 3.5274, - "step": 1852 - }, - { - "epoch": 8.31, - "learning_rate": 6.606936416184971e-06, - "loss": 3.476, - "step": 1853 - }, - { - "epoch": 8.31, - "learning_rate": 6.589595375722544e-06, - "loss": 3.4913, - "step": 1854 - }, - { - "epoch": 8.32, - "learning_rate": 6.572254335260116e-06, - "loss": 3.4383, - "step": 1855 - }, - { - "epoch": 8.32, - "learning_rate": 6.554913294797688e-06, - "loss": 3.5418, - "step": 1856 - }, - { - "epoch": 8.33, - "learning_rate": 6.53757225433526e-06, - "loss": 3.4382, - "step": 1857 - }, - { - "epoch": 8.33, - "learning_rate": 6.520231213872832e-06, - "loss": 3.4381, - "step": 1858 - }, - { - "epoch": 8.34, - "learning_rate": 6.502890173410405e-06, - "loss": 3.4033, - "step": 1859 - }, - { - "epoch": 8.34, - "learning_rate": 6.485549132947977e-06, - "loss": 3.5293, - "step": 1860 - }, - { - "epoch": 8.35, - "learning_rate": 6.468208092485549e-06, - "loss": 3.4931, - "step": 1861 - }, - { - "epoch": 8.35, - "learning_rate": 6.450867052023121e-06, - "loss": 3.4443, - "step": 1862 - }, - { - "epoch": 8.35, - "learning_rate": 6.433526011560694e-06, - "loss": 3.3363, - "step": 1863 - }, - { - "epoch": 8.36, - "learning_rate": 6.416184971098266e-06, - "loss": 3.4288, - "step": 1864 - }, - { - "epoch": 8.36, - "learning_rate": 6.398843930635838e-06, - "loss": 3.3535, - "step": 1865 - }, - { - "epoch": 8.37, - "learning_rate": 6.3815028901734104e-06, - "loss": 3.258, - "step": 1866 - }, - { - "epoch": 8.37, - "learning_rate": 6.364161849710983e-06, - "loss": 3.3366, - "step": 1867 - }, - { - "epoch": 8.38, - "learning_rate": 6.346820809248555e-06, - "loss": 3.4033, - "step": 1868 - }, - { - "epoch": 8.38, - "learning_rate": 6.3294797687861275e-06, - "loss": 3.3799, - "step": 1869 - }, - { - "epoch": 8.39, - "learning_rate": 6.3121387283236995e-06, - "loss": 3.2493, - "step": 1870 - }, - { - "epoch": 8.39, - "learning_rate": 6.294797687861272e-06, - "loss": 3.2931, - "step": 1871 - }, - { - "epoch": 8.39, - "learning_rate": 6.2774566473988445e-06, - "loss": 3.3144, - "step": 1872 - }, - { - "epoch": 8.4, - "learning_rate": 6.260115606936417e-06, - "loss": 3.2651, - "step": 1873 - }, - { - "epoch": 8.4, - "learning_rate": 6.242774566473989e-06, - "loss": 3.3027, - "step": 1874 - }, - { - "epoch": 8.41, - "learning_rate": 6.225433526011561e-06, - "loss": 3.2677, - "step": 1875 - }, - { - "epoch": 8.41, - "learning_rate": 6.208092485549134e-06, - "loss": 3.5069, - "step": 1876 - }, - { - "epoch": 8.42, - "learning_rate": 6.190751445086706e-06, - "loss": 3.2993, - "step": 1877 - }, - { - "epoch": 8.42, - "learning_rate": 6.173410404624278e-06, - "loss": 3.3499, - "step": 1878 - }, - { - "epoch": 8.43, - "learning_rate": 6.15606936416185e-06, - "loss": 3.1909, - "step": 1879 - }, - { - "epoch": 8.43, - "learning_rate": 6.138728323699423e-06, - "loss": 3.372, - "step": 1880 - }, - { - "epoch": 8.43, - "learning_rate": 6.121387283236995e-06, - "loss": 3.2667, - "step": 1881 - }, - { - "epoch": 8.44, - "learning_rate": 6.104046242774567e-06, - "loss": 3.3584, - "step": 1882 - }, - { - "epoch": 8.44, - "learning_rate": 6.086705202312139e-06, - "loss": 3.3955, - "step": 1883 - }, - { - "epoch": 8.45, - "learning_rate": 6.069364161849711e-06, - "loss": 3.2677, - "step": 1884 - }, - { - "epoch": 8.45, - "learning_rate": 6.052023121387284e-06, - "loss": 4.1139, - "step": 1885 - }, - { - "epoch": 8.46, - "learning_rate": 6.034682080924856e-06, - "loss": 4.017, - "step": 1886 - }, - { - "epoch": 8.46, - "learning_rate": 6.017341040462428e-06, - "loss": 4.1157, - "step": 1887 - }, - { - "epoch": 8.47, - "learning_rate": 6e-06, - "loss": 4.0058, - "step": 1888 - }, - { - "epoch": 8.47, - "learning_rate": 5.982658959537573e-06, - "loss": 3.9954, - "step": 1889 - }, - { - "epoch": 8.48, - "learning_rate": 5.965317919075145e-06, - "loss": 4.0446, - "step": 1890 - }, - { - "epoch": 8.48, - "learning_rate": 5.947976878612717e-06, - "loss": 4.0586, - "step": 1891 - }, - { - "epoch": 8.48, - "learning_rate": 5.930635838150289e-06, - "loss": 4.0104, - "step": 1892 - }, - { - "epoch": 8.49, - "learning_rate": 5.913294797687862e-06, - "loss": 3.9089, - "step": 1893 - }, - { - "epoch": 8.49, - "learning_rate": 5.895953757225434e-06, - "loss": 3.8737, - "step": 1894 - }, - { - "epoch": 8.5, - "learning_rate": 5.878612716763006e-06, - "loss": 3.8226, - "step": 1895 - }, - { - "epoch": 8.5, - "learning_rate": 5.861271676300578e-06, - "loss": 3.7705, - "step": 1896 - }, - { - "epoch": 8.51, - "learning_rate": 5.8439306358381505e-06, - "loss": 3.8212, - "step": 1897 - }, - { - "epoch": 8.51, - "learning_rate": 5.8265895953757225e-06, - "loss": 3.6777, - "step": 1898 - }, - { - "epoch": 8.52, - "learning_rate": 5.809248554913295e-06, - "loss": 3.663, - "step": 1899 - }, - { - "epoch": 8.52, - "learning_rate": 5.791907514450867e-06, - "loss": 3.5272, - "step": 1900 - }, - { - "epoch": 8.52, - "learning_rate": 5.774566473988439e-06, - "loss": 3.4951, - "step": 1901 - }, - { - "epoch": 8.53, - "learning_rate": 5.757225433526012e-06, - "loss": 3.5092, - "step": 1902 - }, - { - "epoch": 8.53, - "learning_rate": 5.739884393063584e-06, - "loss": 3.5343, - "step": 1903 - }, - { - "epoch": 8.54, - "learning_rate": 5.722543352601156e-06, - "loss": 3.4855, - "step": 1904 - }, - { - "epoch": 8.54, - "learning_rate": 5.705202312138728e-06, - "loss": 3.4923, - "step": 1905 - }, - { - "epoch": 8.55, - "learning_rate": 5.687861271676301e-06, - "loss": 3.5079, - "step": 1906 - }, - { - "epoch": 8.55, - "learning_rate": 5.670520231213873e-06, - "loss": 3.4391, - "step": 1907 - }, - { - "epoch": 8.56, - "learning_rate": 5.653179190751445e-06, - "loss": 3.4836, - "step": 1908 - }, - { - "epoch": 8.56, - "learning_rate": 5.635838150289017e-06, - "loss": 3.4855, - "step": 1909 - }, - { - "epoch": 8.57, - "learning_rate": 5.618497109826589e-06, - "loss": 3.4756, - "step": 1910 - }, - { - "epoch": 8.57, - "learning_rate": 5.601156069364162e-06, - "loss": 3.4713, - "step": 1911 - }, - { - "epoch": 8.57, - "learning_rate": 5.583815028901734e-06, - "loss": 3.372, - "step": 1912 - }, - { - "epoch": 8.58, - "learning_rate": 5.566473988439306e-06, - "loss": 3.4141, - "step": 1913 - }, - { - "epoch": 8.58, - "learning_rate": 5.549132947976878e-06, - "loss": 3.3146, - "step": 1914 - }, - { - "epoch": 8.59, - "learning_rate": 5.531791907514451e-06, - "loss": 3.4147, - "step": 1915 - }, - { - "epoch": 8.59, - "learning_rate": 5.514450867052023e-06, - "loss": 3.346, - "step": 1916 - }, - { - "epoch": 8.6, - "learning_rate": 5.497109826589595e-06, - "loss": 3.3628, - "step": 1917 - }, - { - "epoch": 8.6, - "learning_rate": 5.479768786127167e-06, - "loss": 3.289, - "step": 1918 - }, - { - "epoch": 8.61, - "learning_rate": 5.46242774566474e-06, - "loss": 3.2644, - "step": 1919 - }, - { - "epoch": 8.61, - "learning_rate": 5.445086705202312e-06, - "loss": 3.3341, - "step": 1920 - }, - { - "epoch": 8.61, - "learning_rate": 5.427745664739884e-06, - "loss": 3.3634, - "step": 1921 - }, - { - "epoch": 8.62, - "learning_rate": 5.410404624277456e-06, - "loss": 3.2382, - "step": 1922 - }, - { - "epoch": 8.62, - "learning_rate": 5.3930635838150285e-06, - "loss": 3.2531, - "step": 1923 - }, - { - "epoch": 8.63, - "learning_rate": 5.375722543352601e-06, - "loss": 3.3276, - "step": 1924 - }, - { - "epoch": 8.63, - "learning_rate": 5.3583815028901734e-06, - "loss": 3.3509, - "step": 1925 - }, - { - "epoch": 8.64, - "learning_rate": 5.3410404624277455e-06, - "loss": 3.224, - "step": 1926 - }, - { - "epoch": 8.64, - "learning_rate": 5.3236994219653176e-06, - "loss": 3.2597, - "step": 1927 - }, - { - "epoch": 8.65, - "learning_rate": 5.3063583815028905e-06, - "loss": 3.3581, - "step": 1928 - }, - { - "epoch": 8.65, - "learning_rate": 5.2890173410404626e-06, - "loss": 3.4059, - "step": 1929 - }, - { - "epoch": 8.65, - "learning_rate": 5.271676300578035e-06, - "loss": 3.3169, - "step": 1930 - }, - { - "epoch": 8.66, - "learning_rate": 5.254335260115607e-06, - "loss": 3.2648, - "step": 1931 - }, - { - "epoch": 8.66, - "learning_rate": 5.23699421965318e-06, - "loss": 3.4101, - "step": 1932 - }, - { - "epoch": 8.67, - "learning_rate": 5.219653179190752e-06, - "loss": 3.242, - "step": 1933 - }, - { - "epoch": 8.67, - "learning_rate": 5.202312138728324e-06, - "loss": 3.338, - "step": 1934 - }, - { - "epoch": 8.68, - "learning_rate": 5.184971098265896e-06, - "loss": 4.1682, - "step": 1935 - }, - { - "epoch": 8.68, - "learning_rate": 5.167630057803468e-06, - "loss": 4.1346, - "step": 1936 - }, - { - "epoch": 8.69, - "learning_rate": 5.150289017341041e-06, - "loss": 4.1245, - "step": 1937 - }, - { - "epoch": 8.69, - "learning_rate": 5.132947976878613e-06, - "loss": 4.052, - "step": 1938 - }, - { - "epoch": 8.7, - "learning_rate": 5.115606936416185e-06, - "loss": 3.9706, - "step": 1939 - }, - { - "epoch": 8.7, - "learning_rate": 5.098265895953757e-06, - "loss": 4.0667, - "step": 1940 - }, - { - "epoch": 8.7, - "learning_rate": 5.08092485549133e-06, - "loss": 3.9514, - "step": 1941 - }, - { - "epoch": 8.71, - "learning_rate": 5.063583815028902e-06, - "loss": 3.9641, - "step": 1942 - }, - { - "epoch": 8.71, - "learning_rate": 5.046242774566474e-06, - "loss": 3.9298, - "step": 1943 - }, - { - "epoch": 8.72, - "learning_rate": 5.028901734104046e-06, - "loss": 3.8935, - "step": 1944 - }, - { - "epoch": 8.72, - "learning_rate": 5.011560693641619e-06, - "loss": 3.7538, - "step": 1945 - }, - { - "epoch": 8.73, - "learning_rate": 4.994219653179191e-06, - "loss": 3.7974, - "step": 1946 - }, - { - "epoch": 8.73, - "learning_rate": 4.976878612716763e-06, - "loss": 3.6656, - "step": 1947 - }, - { - "epoch": 8.74, - "learning_rate": 4.959537572254335e-06, - "loss": 3.6146, - "step": 1948 - }, - { - "epoch": 8.74, - "learning_rate": 4.942196531791907e-06, - "loss": 3.6184, - "step": 1949 - }, - { - "epoch": 8.74, - "learning_rate": 4.92485549132948e-06, - "loss": 3.574, - "step": 1950 - }, - { - "epoch": 8.75, - "learning_rate": 4.907514450867052e-06, - "loss": 3.5685, - "step": 1951 - }, - { - "epoch": 8.75, - "learning_rate": 4.890173410404624e-06, - "loss": 3.5844, - "step": 1952 - }, - { - "epoch": 8.76, - "learning_rate": 4.872832369942196e-06, - "loss": 3.5489, - "step": 1953 - }, - { - "epoch": 8.76, - "learning_rate": 4.855491329479769e-06, - "loss": 3.63, - "step": 1954 - }, - { - "epoch": 8.77, - "learning_rate": 4.838150289017341e-06, - "loss": 3.5089, - "step": 1955 - }, - { - "epoch": 8.77, - "learning_rate": 4.8208092485549135e-06, - "loss": 3.5381, - "step": 1956 - }, - { - "epoch": 8.78, - "learning_rate": 4.8034682080924855e-06, - "loss": 3.4585, - "step": 1957 - }, - { - "epoch": 8.78, - "learning_rate": 4.7861271676300585e-06, - "loss": 3.4748, - "step": 1958 - }, - { - "epoch": 8.78, - "learning_rate": 4.7687861271676305e-06, - "loss": 3.4835, - "step": 1959 - }, - { - "epoch": 8.79, - "learning_rate": 4.751445086705203e-06, - "loss": 3.4503, - "step": 1960 - }, - { - "epoch": 8.79, - "learning_rate": 4.734104046242775e-06, - "loss": 3.451, - "step": 1961 - }, - { - "epoch": 8.8, - "learning_rate": 4.716763005780347e-06, - "loss": 3.4158, - "step": 1962 - }, - { - "epoch": 8.8, - "learning_rate": 4.69942196531792e-06, - "loss": 3.397, - "step": 1963 - }, - { - "epoch": 8.81, - "learning_rate": 4.682080924855492e-06, - "loss": 3.3698, - "step": 1964 - }, - { - "epoch": 8.81, - "learning_rate": 4.664739884393064e-06, - "loss": 3.4446, - "step": 1965 - }, - { - "epoch": 8.82, - "learning_rate": 4.647398843930636e-06, - "loss": 3.2909, - "step": 1966 - }, - { - "epoch": 8.82, - "learning_rate": 4.630057803468209e-06, - "loss": 3.3365, - "step": 1967 - }, - { - "epoch": 8.83, - "learning_rate": 4.612716763005781e-06, - "loss": 3.3514, - "step": 1968 - }, - { - "epoch": 8.83, - "learning_rate": 4.595375722543353e-06, - "loss": 3.3568, - "step": 1969 - }, - { - "epoch": 8.83, - "learning_rate": 4.578034682080925e-06, - "loss": 3.2758, - "step": 1970 - }, - { - "epoch": 8.84, - "learning_rate": 4.560693641618497e-06, - "loss": 3.2929, - "step": 1971 - }, - { - "epoch": 8.84, - "learning_rate": 4.54335260115607e-06, - "loss": 3.3295, - "step": 1972 - }, - { - "epoch": 8.85, - "learning_rate": 4.526011560693642e-06, - "loss": 3.282, - "step": 1973 - }, - { - "epoch": 8.85, - "learning_rate": 4.508670520231214e-06, - "loss": 3.2687, - "step": 1974 - }, - { - "epoch": 8.86, - "learning_rate": 4.491329479768786e-06, - "loss": 3.339, - "step": 1975 - }, - { - "epoch": 8.86, - "learning_rate": 4.473988439306359e-06, - "loss": 3.2867, - "step": 1976 - }, - { - "epoch": 8.87, - "learning_rate": 4.456647398843931e-06, - "loss": 3.3589, - "step": 1977 - }, - { - "epoch": 8.87, - "learning_rate": 4.439306358381503e-06, - "loss": 3.3917, - "step": 1978 - }, - { - "epoch": 8.87, - "learning_rate": 4.421965317919075e-06, - "loss": 3.2579, - "step": 1979 - }, - { - "epoch": 8.88, - "learning_rate": 4.404624277456648e-06, - "loss": 3.3597, - "step": 1980 - }, - { - "epoch": 8.88, - "learning_rate": 4.38728323699422e-06, - "loss": 3.3344, - "step": 1981 - }, - { - "epoch": 8.89, - "learning_rate": 4.369942196531792e-06, - "loss": 3.2327, - "step": 1982 - }, - { - "epoch": 8.89, - "learning_rate": 4.352601156069364e-06, - "loss": 3.3952, - "step": 1983 - }, - { - "epoch": 8.9, - "learning_rate": 4.3352601156069365e-06, - "loss": 3.3123, - "step": 1984 - }, - { - "epoch": 8.9, - "learning_rate": 4.317919075144509e-06, - "loss": 3.98, - "step": 1985 - }, - { - "epoch": 8.91, - "learning_rate": 4.3005780346820814e-06, - "loss": 4.0363, - "step": 1986 - }, - { - "epoch": 8.91, - "learning_rate": 4.2832369942196535e-06, - "loss": 4.1265, - "step": 1987 - }, - { - "epoch": 8.91, - "learning_rate": 4.2658959537572256e-06, - "loss": 3.9383, - "step": 1988 - }, - { - "epoch": 8.92, - "learning_rate": 4.2485549132947985e-06, - "loss": 3.9433, - "step": 1989 - }, - { - "epoch": 8.92, - "learning_rate": 4.2312138728323706e-06, - "loss": 3.8571, - "step": 1990 - }, - { - "epoch": 8.93, - "learning_rate": 4.213872832369943e-06, - "loss": 3.699, - "step": 1991 - }, - { - "epoch": 8.93, - "learning_rate": 4.196531791907515e-06, - "loss": 3.5891, - "step": 1992 - }, - { - "epoch": 8.94, - "learning_rate": 4.179190751445088e-06, - "loss": 3.555, - "step": 1993 - }, - { - "epoch": 8.94, - "learning_rate": 4.161849710982659e-06, - "loss": 3.5435, - "step": 1994 - }, - { - "epoch": 8.95, - "learning_rate": 4.144508670520231e-06, - "loss": 3.5203, - "step": 1995 - }, - { - "epoch": 8.95, - "learning_rate": 4.127167630057803e-06, - "loss": 3.5198, - "step": 1996 - }, - { - "epoch": 8.96, - "learning_rate": 4.109826589595375e-06, - "loss": 3.4158, - "step": 1997 - }, - { - "epoch": 8.96, - "learning_rate": 4.092485549132948e-06, - "loss": 3.4458, - "step": 1998 - }, - { - "epoch": 8.96, - "learning_rate": 4.07514450867052e-06, - "loss": 3.3295, - "step": 1999 - }, - { - "epoch": 8.97, - "learning_rate": 4.057803468208092e-06, - "loss": 3.3698, - "step": 2000 - }, - { - "epoch": 8.97, - "eval_loss": 4.259941577911377, - "eval_runtime": 524.0014, - "eval_samples_per_second": 5.042, - "eval_steps_per_second": 0.632, - "eval_wer": 1.0190995636652123, - "step": 2000 - }, - { - "epoch": 8.97, - "learning_rate": 4.040462427745664e-06, - "loss": 3.3549, - "step": 2001 - }, - { - "epoch": 8.98, - "learning_rate": 4.023121387283237e-06, - "loss": 3.3073, - "step": 2002 - }, - { - "epoch": 8.98, - "learning_rate": 4.005780346820809e-06, - "loss": 3.2608, - "step": 2003 - }, - { - "epoch": 8.99, - "learning_rate": 3.988439306358381e-06, - "loss": 3.376, - "step": 2004 - }, - { - "epoch": 8.99, - "learning_rate": 3.971098265895953e-06, - "loss": 3.2901, - "step": 2005 - }, - { - "epoch": 9.0, - "learning_rate": 3.953757225433526e-06, - "loss": 3.4347, - "step": 2006 - }, - { - "epoch": 9.0, - "learning_rate": 3.936416184971098e-06, - "loss": 3.3191, - "step": 2007 - }, - { - "epoch": 9.0, - "learning_rate": 3.91907514450867e-06, - "loss": 4.0172, - "step": 2008 - }, - { - "epoch": 9.01, - "learning_rate": 3.901734104046242e-06, - "loss": 3.8855, - "step": 2009 - }, - { - "epoch": 9.01, - "learning_rate": 3.8843930635838145e-06, - "loss": 3.9422, - "step": 2010 - }, - { - "epoch": 9.02, - "learning_rate": 3.867052023121387e-06, - "loss": 3.9771, - "step": 2011 - }, - { - "epoch": 9.02, - "learning_rate": 3.8497109826589594e-06, - "loss": 3.995, - "step": 2012 - }, - { - "epoch": 9.03, - "learning_rate": 3.8323699421965315e-06, - "loss": 3.881, - "step": 2013 - }, - { - "epoch": 9.03, - "learning_rate": 3.8150289017341036e-06, - "loss": 3.9283, - "step": 2014 - }, - { - "epoch": 9.04, - "learning_rate": 3.797687861271676e-06, - "loss": 3.8494, - "step": 2015 - }, - { - "epoch": 9.04, - "learning_rate": 3.7803468208092486e-06, - "loss": 3.8015, - "step": 2016 - }, - { - "epoch": 9.04, - "learning_rate": 3.7630057803468206e-06, - "loss": 3.8063, - "step": 2017 - }, - { - "epoch": 9.05, - "learning_rate": 3.745664739884393e-06, - "loss": 3.7155, - "step": 2018 - }, - { - "epoch": 9.05, - "learning_rate": 3.728323699421965e-06, - "loss": 3.7095, - "step": 2019 - }, - { - "epoch": 9.06, - "learning_rate": 3.7109826589595377e-06, - "loss": 3.6091, - "step": 2020 - }, - { - "epoch": 9.06, - "learning_rate": 3.6936416184971097e-06, - "loss": 3.5699, - "step": 2021 - }, - { - "epoch": 9.07, - "learning_rate": 3.676300578034682e-06, - "loss": 3.5183, - "step": 2022 - }, - { - "epoch": 9.07, - "learning_rate": 3.6589595375722543e-06, - "loss": 3.5259, - "step": 2023 - }, - { - "epoch": 9.08, - "learning_rate": 3.6416184971098264e-06, - "loss": 3.4793, - "step": 2024 - }, - { - "epoch": 9.08, - "learning_rate": 3.624277456647399e-06, - "loss": 3.4506, - "step": 2025 - }, - { - "epoch": 9.09, - "learning_rate": 3.606936416184971e-06, - "loss": 3.4071, - "step": 2026 - }, - { - "epoch": 9.09, - "learning_rate": 3.5895953757225434e-06, - "loss": 3.4044, - "step": 2027 - }, - { - "epoch": 9.09, - "learning_rate": 3.5722543352601155e-06, - "loss": 3.398, - "step": 2028 - }, - { - "epoch": 9.1, - "learning_rate": 3.554913294797688e-06, - "loss": 3.4986, - "step": 2029 - }, - { - "epoch": 9.1, - "learning_rate": 3.53757225433526e-06, - "loss": 3.4046, - "step": 2030 - }, - { - "epoch": 9.11, - "learning_rate": 3.5202312138728325e-06, - "loss": 3.3652, - "step": 2031 - }, - { - "epoch": 9.11, - "learning_rate": 3.5028901734104046e-06, - "loss": 3.4288, - "step": 2032 - }, - { - "epoch": 9.12, - "learning_rate": 3.485549132947977e-06, - "loss": 3.3382, - "step": 2033 - }, - { - "epoch": 9.12, - "learning_rate": 3.468208092485549e-06, - "loss": 3.3078, - "step": 2034 - }, - { - "epoch": 9.13, - "learning_rate": 3.4508670520231212e-06, - "loss": 3.3217, - "step": 2035 - }, - { - "epoch": 9.13, - "learning_rate": 3.4335260115606937e-06, - "loss": 3.2776, - "step": 2036 - }, - { - "epoch": 9.13, - "learning_rate": 3.416184971098266e-06, - "loss": 3.3606, - "step": 2037 - }, - { - "epoch": 9.14, - "learning_rate": 3.3988439306358383e-06, - "loss": 3.3422, - "step": 2038 - }, - { - "epoch": 9.14, - "learning_rate": 3.3815028901734103e-06, - "loss": 3.3055, - "step": 2039 - }, - { - "epoch": 9.15, - "learning_rate": 3.364161849710983e-06, - "loss": 3.2051, - "step": 2040 - }, - { - "epoch": 9.15, - "learning_rate": 3.346820809248555e-06, - "loss": 3.2975, - "step": 2041 - }, - { - "epoch": 9.16, - "learning_rate": 3.3294797687861274e-06, - "loss": 3.2273, - "step": 2042 - }, - { - "epoch": 9.16, - "learning_rate": 3.3121387283236995e-06, - "loss": 3.2341, - "step": 2043 - }, - { - "epoch": 9.17, - "learning_rate": 3.294797687861272e-06, - "loss": 3.2985, - "step": 2044 - }, - { - "epoch": 9.17, - "learning_rate": 3.277456647398844e-06, - "loss": 3.278, - "step": 2045 - }, - { - "epoch": 9.17, - "learning_rate": 3.260115606936416e-06, - "loss": 3.2753, - "step": 2046 - }, - { - "epoch": 9.18, - "learning_rate": 3.2427745664739886e-06, - "loss": 3.261, - "step": 2047 - }, - { - "epoch": 9.18, - "learning_rate": 3.2254335260115607e-06, - "loss": 3.1894, - "step": 2048 - }, - { - "epoch": 9.19, - "learning_rate": 3.208092485549133e-06, - "loss": 3.3104, - "step": 2049 - }, - { - "epoch": 9.19, - "learning_rate": 3.1907514450867052e-06, - "loss": 3.2267, - "step": 2050 - }, - { - "epoch": 9.2, - "learning_rate": 3.1734104046242777e-06, - "loss": 3.3057, - "step": 2051 - }, - { - "epoch": 9.2, - "learning_rate": 3.1560693641618498e-06, - "loss": 3.231, - "step": 2052 - }, - { - "epoch": 9.21, - "learning_rate": 3.1387283236994223e-06, - "loss": 3.3067, - "step": 2053 - }, - { - "epoch": 9.21, - "learning_rate": 3.1213872832369943e-06, - "loss": 3.2419, - "step": 2054 - }, - { - "epoch": 9.22, - "learning_rate": 3.104046242774567e-06, - "loss": 3.2409, - "step": 2055 - }, - { - "epoch": 9.22, - "learning_rate": 3.086705202312139e-06, - "loss": 3.3373, - "step": 2056 - }, - { - "epoch": 9.22, - "learning_rate": 3.0693641618497114e-06, - "loss": 3.2751, - "step": 2057 - }, - { - "epoch": 9.23, - "learning_rate": 3.0520231213872834e-06, - "loss": 4.1168, - "step": 2058 - }, - { - "epoch": 9.23, - "learning_rate": 3.0346820809248555e-06, - "loss": 3.9596, - "step": 2059 - }, - { - "epoch": 9.24, - "learning_rate": 3.017341040462428e-06, - "loss": 4.0989, - "step": 2060 - }, - { - "epoch": 9.24, - "learning_rate": 3e-06, - "loss": 4.0323, - "step": 2061 - }, - { - "epoch": 9.25, - "learning_rate": 2.9826589595375726e-06, - "loss": 4.0487, - "step": 2062 - }, - { - "epoch": 9.25, - "learning_rate": 2.9653179190751446e-06, - "loss": 3.9915, - "step": 2063 - }, - { - "epoch": 9.26, - "learning_rate": 2.947976878612717e-06, - "loss": 3.8636, - "step": 2064 - }, - { - "epoch": 9.26, - "learning_rate": 2.930635838150289e-06, - "loss": 3.9274, - "step": 2065 - }, - { - "epoch": 9.26, - "learning_rate": 2.9132947976878613e-06, - "loss": 3.8802, - "step": 2066 - }, - { - "epoch": 9.27, - "learning_rate": 2.8959537572254333e-06, - "loss": 3.8013, - "step": 2067 - }, - { - "epoch": 9.27, - "learning_rate": 2.878612716763006e-06, - "loss": 3.8256, - "step": 2068 - }, - { - "epoch": 9.28, - "learning_rate": 2.861271676300578e-06, - "loss": 3.6916, - "step": 2069 - }, - { - "epoch": 9.28, - "learning_rate": 2.8439306358381504e-06, - "loss": 3.6942, - "step": 2070 - }, - { - "epoch": 9.29, - "learning_rate": 2.8265895953757224e-06, - "loss": 3.7135, - "step": 2071 - }, - { - "epoch": 9.29, - "learning_rate": 2.8092485549132945e-06, - "loss": 3.5903, - "step": 2072 - }, - { - "epoch": 9.3, - "learning_rate": 2.791907514450867e-06, - "loss": 3.5691, - "step": 2073 - }, - { - "epoch": 9.3, - "learning_rate": 2.774566473988439e-06, - "loss": 3.4745, - "step": 2074 - }, - { - "epoch": 9.3, - "learning_rate": 2.7572254335260116e-06, - "loss": 3.4411, - "step": 2075 - }, - { - "epoch": 9.31, - "learning_rate": 2.7398843930635836e-06, - "loss": 3.4509, - "step": 2076 - }, - { - "epoch": 9.31, - "learning_rate": 2.722543352601156e-06, - "loss": 3.4391, - "step": 2077 - }, - { - "epoch": 9.32, - "learning_rate": 2.705202312138728e-06, - "loss": 3.5249, - "step": 2078 - }, - { - "epoch": 9.32, - "learning_rate": 2.6878612716763007e-06, - "loss": 3.4409, - "step": 2079 - }, - { - "epoch": 9.33, - "learning_rate": 2.6705202312138728e-06, - "loss": 3.3896, - "step": 2080 - }, - { - "epoch": 9.33, - "learning_rate": 2.6531791907514452e-06, - "loss": 3.4046, - "step": 2081 - }, - { - "epoch": 9.34, - "learning_rate": 2.6358381502890173e-06, - "loss": 3.4891, - "step": 2082 - }, - { - "epoch": 9.34, - "learning_rate": 2.61849710982659e-06, - "loss": 3.4354, - "step": 2083 - }, - { - "epoch": 9.35, - "learning_rate": 2.601156069364162e-06, - "loss": 3.3831, - "step": 2084 - }, - { - "epoch": 9.35, - "learning_rate": 2.583815028901734e-06, - "loss": 3.3975, - "step": 2085 - }, - { - "epoch": 9.35, - "learning_rate": 2.5664739884393064e-06, - "loss": 3.3543, - "step": 2086 - }, - { - "epoch": 9.36, - "learning_rate": 2.5491329479768785e-06, - "loss": 3.3374, - "step": 2087 - }, - { - "epoch": 9.36, - "learning_rate": 2.531791907514451e-06, - "loss": 3.4501, - "step": 2088 - }, - { - "epoch": 9.37, - "learning_rate": 2.514450867052023e-06, - "loss": 3.239, - "step": 2089 - }, - { - "epoch": 9.37, - "learning_rate": 2.4971098265895955e-06, - "loss": 3.3719, - "step": 2090 - }, - { - "epoch": 9.38, - "learning_rate": 2.4797687861271676e-06, - "loss": 3.3023, - "step": 2091 - }, - { - "epoch": 9.38, - "learning_rate": 2.46242774566474e-06, - "loss": 3.3659, - "step": 2092 - }, - { - "epoch": 9.39, - "learning_rate": 2.445086705202312e-06, - "loss": 3.2331, - "step": 2093 - }, - { - "epoch": 9.39, - "learning_rate": 2.4277456647398847e-06, - "loss": 3.2789, - "step": 2094 - }, - { - "epoch": 9.39, - "learning_rate": 2.4104046242774567e-06, - "loss": 3.2706, - "step": 2095 - }, - { - "epoch": 9.4, - "learning_rate": 2.3930635838150292e-06, - "loss": 3.2017, - "step": 2096 - }, - { - "epoch": 9.4, - "learning_rate": 2.3757225433526013e-06, - "loss": 3.2796, - "step": 2097 - }, - { - "epoch": 9.41, - "learning_rate": 2.3583815028901734e-06, - "loss": 3.2567, - "step": 2098 - }, - { - "epoch": 9.41, - "learning_rate": 2.341040462427746e-06, - "loss": 3.2948, - "step": 2099 - }, - { - "epoch": 9.42, - "learning_rate": 2.323699421965318e-06, - "loss": 3.2829, - "step": 2100 - }, - { - "epoch": 9.42, - "learning_rate": 2.3063583815028904e-06, - "loss": 3.3335, - "step": 2101 - }, - { - "epoch": 9.43, - "learning_rate": 2.2890173410404625e-06, - "loss": 3.2214, - "step": 2102 - }, - { - "epoch": 9.43, - "learning_rate": 2.271676300578035e-06, - "loss": 3.3146, - "step": 2103 - }, - { - "epoch": 9.43, - "learning_rate": 2.254335260115607e-06, - "loss": 3.2607, - "step": 2104 - }, - { - "epoch": 9.44, - "learning_rate": 2.2369942196531795e-06, - "loss": 3.219, - "step": 2105 - }, - { - "epoch": 9.44, - "learning_rate": 2.2196531791907516e-06, - "loss": 3.332, - "step": 2106 - }, - { - "epoch": 9.45, - "learning_rate": 2.202312138728324e-06, - "loss": 3.3571, - "step": 2107 - }, - { - "epoch": 9.45, - "learning_rate": 2.184971098265896e-06, - "loss": 4.061, - "step": 2108 - }, - { - "epoch": 9.46, - "learning_rate": 2.1676300578034682e-06, - "loss": 4.0652, - "step": 2109 - }, - { - "epoch": 9.46, - "learning_rate": 2.1502890173410407e-06, - "loss": 4.0563, - "step": 2110 - }, - { - "epoch": 9.47, - "learning_rate": 2.1329479768786128e-06, - "loss": 3.9696, - "step": 2111 - }, - { - "epoch": 9.47, - "learning_rate": 2.1156069364161853e-06, - "loss": 3.9416, - "step": 2112 - }, - { - "epoch": 9.48, - "learning_rate": 2.0982658959537573e-06, - "loss": 4.0014, - "step": 2113 - }, - { - "epoch": 9.48, - "learning_rate": 2.0809248554913294e-06, - "loss": 4.0486, - "step": 2114 - }, - { - "epoch": 9.48, - "learning_rate": 2.0635838150289015e-06, - "loss": 3.8973, - "step": 2115 - }, - { - "epoch": 9.49, - "learning_rate": 2.046242774566474e-06, - "loss": 3.8664, - "step": 2116 - }, - { - "epoch": 9.49, - "learning_rate": 2.028901734104046e-06, - "loss": 3.8263, - "step": 2117 - }, - { - "epoch": 9.5, - "learning_rate": 2.0115606936416185e-06, - "loss": 3.7801, - "step": 2118 - }, - { - "epoch": 9.5, - "learning_rate": 1.9942196531791906e-06, - "loss": 3.7214, - "step": 2119 - }, - { - "epoch": 9.51, - "learning_rate": 1.976878612716763e-06, - "loss": 3.623, - "step": 2120 - }, - { - "epoch": 9.51, - "learning_rate": 1.959537572254335e-06, - "loss": 3.6586, - "step": 2121 - }, - { - "epoch": 9.52, - "learning_rate": 1.9421965317919072e-06, - "loss": 3.5181, - "step": 2122 - }, - { - "epoch": 9.52, - "learning_rate": 1.9248554913294797e-06, - "loss": 3.4883, - "step": 2123 - }, - { - "epoch": 9.52, - "learning_rate": 1.9075144508670518e-06, - "loss": 3.6009, - "step": 2124 - }, - { - "epoch": 9.53, - "learning_rate": 1.8901734104046243e-06, - "loss": 3.5483, - "step": 2125 - }, - { - "epoch": 9.53, - "learning_rate": 1.8728323699421966e-06, - "loss": 3.4663, - "step": 2126 - }, - { - "epoch": 9.54, - "learning_rate": 1.8554913294797688e-06, - "loss": 3.424, - "step": 2127 - }, - { - "epoch": 9.54, - "learning_rate": 1.838150289017341e-06, - "loss": 3.4261, - "step": 2128 - }, - { - "epoch": 9.55, - "learning_rate": 1.8208092485549132e-06, - "loss": 3.4097, - "step": 2129 - }, - { - "epoch": 9.55, - "learning_rate": 1.8034682080924855e-06, - "loss": 3.4119, - "step": 2130 - }, - { - "epoch": 9.56, - "learning_rate": 1.7861271676300577e-06, - "loss": 3.3827, - "step": 2131 - }, - { - "epoch": 9.56, - "learning_rate": 1.76878612716763e-06, - "loss": 3.4139, - "step": 2132 - }, - { - "epoch": 9.57, - "learning_rate": 1.7514450867052023e-06, - "loss": 3.4792, - "step": 2133 - }, - { - "epoch": 9.57, - "learning_rate": 1.7341040462427746e-06, - "loss": 3.3544, - "step": 2134 - }, - { - "epoch": 9.57, - "learning_rate": 1.7167630057803469e-06, - "loss": 3.3638, - "step": 2135 - }, - { - "epoch": 9.58, - "learning_rate": 1.6994219653179191e-06, - "loss": 3.3551, - "step": 2136 - }, - { - "epoch": 9.58, - "learning_rate": 1.6820809248554914e-06, - "loss": 3.3162, - "step": 2137 - }, - { - "epoch": 9.59, - "learning_rate": 1.6647398843930637e-06, - "loss": 3.3552, - "step": 2138 - }, - { - "epoch": 9.59, - "learning_rate": 1.647398843930636e-06, - "loss": 3.2356, - "step": 2139 - }, - { - "epoch": 9.6, - "learning_rate": 1.630057803468208e-06, - "loss": 3.2151, - "step": 2140 - }, - { - "epoch": 9.6, - "learning_rate": 1.6127167630057803e-06, - "loss": 3.3076, - "step": 2141 - }, - { - "epoch": 9.61, - "learning_rate": 1.5953757225433526e-06, - "loss": 3.3143, - "step": 2142 - }, - { - "epoch": 9.61, - "learning_rate": 1.5780346820809249e-06, - "loss": 3.2887, - "step": 2143 - }, - { - "epoch": 9.61, - "learning_rate": 1.5606936416184972e-06, - "loss": 3.2681, - "step": 2144 - }, - { - "epoch": 9.62, - "learning_rate": 1.5433526011560694e-06, - "loss": 3.2485, - "step": 2145 - }, - { - "epoch": 9.62, - "learning_rate": 1.5260115606936417e-06, - "loss": 3.2595, - "step": 2146 - }, - { - "epoch": 9.63, - "learning_rate": 1.508670520231214e-06, - "loss": 3.2182, - "step": 2147 - }, - { - "epoch": 9.63, - "learning_rate": 1.4913294797687863e-06, - "loss": 3.3599, - "step": 2148 - }, - { - "epoch": 9.64, - "learning_rate": 1.4739884393063586e-06, - "loss": 3.2669, - "step": 2149 - }, - { - "epoch": 9.64, - "learning_rate": 1.4566473988439306e-06, - "loss": 3.2302, - "step": 2150 - }, - { - "epoch": 9.65, - "learning_rate": 1.439306358381503e-06, - "loss": 3.3325, - "step": 2151 - }, - { - "epoch": 9.65, - "learning_rate": 1.4219653179190752e-06, - "loss": 3.3096, - "step": 2152 - }, - { - "epoch": 9.65, - "learning_rate": 1.4046242774566473e-06, - "loss": 3.252, - "step": 2153 - }, - { - "epoch": 9.66, - "learning_rate": 1.3872832369942195e-06, - "loss": 3.3566, - "step": 2154 - }, - { - "epoch": 9.66, - "learning_rate": 1.3699421965317918e-06, - "loss": 3.318, - "step": 2155 - }, - { - "epoch": 9.67, - "learning_rate": 1.352601156069364e-06, - "loss": 3.344, - "step": 2156 - }, - { - "epoch": 9.67, - "learning_rate": 1.3352601156069364e-06, - "loss": 3.2417, - "step": 2157 - }, - { - "epoch": 9.68, - "learning_rate": 1.3179190751445087e-06, - "loss": 3.9626, - "step": 2158 - }, - { - "epoch": 9.68, - "learning_rate": 1.300578034682081e-06, - "loss": 4.0286, - "step": 2159 - }, - { - "epoch": 9.69, - "learning_rate": 1.2832369942196532e-06, - "loss": 4.0529, - "step": 2160 - }, - { - "epoch": 9.69, - "learning_rate": 1.2658959537572255e-06, - "loss": 3.9245, - "step": 2161 - }, - { - "epoch": 9.7, - "learning_rate": 1.2485549132947978e-06, - "loss": 3.8779, - "step": 2162 - }, - { - "epoch": 9.7, - "learning_rate": 1.23121387283237e-06, - "loss": 3.8989, - "step": 2163 - }, - { - "epoch": 9.7, - "learning_rate": 1.2138728323699423e-06, - "loss": 3.8897, - "step": 2164 - }, - { - "epoch": 9.71, - "learning_rate": 1.1965317919075146e-06, - "loss": 3.9576, - "step": 2165 - }, - { - "epoch": 9.71, - "learning_rate": 1.1791907514450867e-06, - "loss": 3.8363, - "step": 2166 - }, - { - "epoch": 9.72, - "learning_rate": 1.161849710982659e-06, - "loss": 3.9311, - "step": 2167 - }, - { - "epoch": 9.72, - "learning_rate": 1.1445086705202312e-06, - "loss": 3.7241, - "step": 2168 - }, - { - "epoch": 9.73, - "learning_rate": 1.1271676300578035e-06, - "loss": 3.7679, - "step": 2169 - }, - { - "epoch": 9.73, - "learning_rate": 1.1098265895953758e-06, - "loss": 3.7678, - "step": 2170 - }, - { - "epoch": 9.74, - "learning_rate": 1.092485549132948e-06, - "loss": 3.5697, - "step": 2171 - }, - { - "epoch": 9.74, - "learning_rate": 1.0751445086705204e-06, - "loss": 3.6388, - "step": 2172 - }, - { - "epoch": 9.74, - "learning_rate": 1.0578034682080926e-06, - "loss": 3.4868, - "step": 2173 - }, - { - "epoch": 9.75, - "learning_rate": 1.0404624277456647e-06, - "loss": 3.5181, - "step": 2174 - }, - { - "epoch": 9.75, - "learning_rate": 1.023121387283237e-06, - "loss": 3.5205, - "step": 2175 - }, - { - "epoch": 9.76, - "learning_rate": 1.0057803468208093e-06, - "loss": 3.5082, - "step": 2176 - }, - { - "epoch": 9.76, - "learning_rate": 9.884393063583815e-07, - "loss": 3.4763, - "step": 2177 - }, - { - "epoch": 9.77, - "learning_rate": 9.710982658959536e-07, - "loss": 3.4069, - "step": 2178 - }, - { - "epoch": 9.77, - "learning_rate": 9.537572254335259e-07, - "loss": 3.4645, - "step": 2179 - }, - { - "epoch": 9.78, - "learning_rate": 9.364161849710983e-07, - "loss": 3.3022, - "step": 2180 - }, - { - "epoch": 9.78, - "learning_rate": 9.190751445086705e-07, - "loss": 3.44, - "step": 2181 - }, - { - "epoch": 9.78, - "learning_rate": 9.017341040462427e-07, - "loss": 3.3826, - "step": 2182 - }, - { - "epoch": 9.79, - "learning_rate": 8.84393063583815e-07, - "loss": 3.4476, - "step": 2183 - }, - { - "epoch": 9.79, - "learning_rate": 8.670520231213873e-07, - "loss": 3.3724, - "step": 2184 - }, - { - "epoch": 9.8, - "learning_rate": 8.497109826589596e-07, - "loss": 3.3177, - "step": 2185 - }, - { - "epoch": 9.8, - "learning_rate": 8.323699421965318e-07, - "loss": 3.3174, - "step": 2186 - }, - { - "epoch": 9.81, - "learning_rate": 8.15028901734104e-07, - "loss": 3.399, - "step": 2187 - }, - { - "epoch": 9.81, - "learning_rate": 7.976878612716763e-07, - "loss": 3.289, - "step": 2188 - }, - { - "epoch": 9.82, - "learning_rate": 7.803468208092486e-07, - "loss": 3.2944, - "step": 2189 - }, - { - "epoch": 9.82, - "learning_rate": 7.630057803468209e-07, - "loss": 3.2746, - "step": 2190 - }, - { - "epoch": 9.83, - "learning_rate": 7.456647398843931e-07, - "loss": 3.2808, - "step": 2191 - }, - { - "epoch": 9.83, - "learning_rate": 7.283236994219653e-07, - "loss": 3.3136, - "step": 2192 - }, - { - "epoch": 9.83, - "learning_rate": 7.109826589595376e-07, - "loss": 3.2613, - "step": 2193 - }, - { - "epoch": 9.84, - "learning_rate": 6.936416184971098e-07, - "loss": 3.2466, - "step": 2194 - }, - { - "epoch": 9.84, - "learning_rate": 6.76300578034682e-07, - "loss": 3.246, - "step": 2195 - }, - { - "epoch": 9.85, - "learning_rate": 6.589595375722543e-07, - "loss": 3.2324, - "step": 2196 - }, - { - "epoch": 9.85, - "learning_rate": 6.416184971098266e-07, - "loss": 3.2604, - "step": 2197 - }, - { - "epoch": 9.86, - "learning_rate": 6.242774566473989e-07, - "loss": 3.2229, - "step": 2198 - }, - { - "epoch": 9.86, - "learning_rate": 6.069364161849712e-07, - "loss": 3.3276, - "step": 2199 - }, - { - "epoch": 9.87, - "learning_rate": 5.895953757225433e-07, - "loss": 3.3062, - "step": 2200 - }, - { - "epoch": 9.87, - "learning_rate": 5.722543352601156e-07, - "loss": 3.3162, - "step": 2201 - }, - { - "epoch": 9.87, - "learning_rate": 5.549132947976879e-07, - "loss": 3.2932, - "step": 2202 - }, - { - "epoch": 9.88, - "learning_rate": 5.375722543352602e-07, - "loss": 3.3479, - "step": 2203 - }, - { - "epoch": 9.88, - "learning_rate": 5.202312138728324e-07, - "loss": 3.2576, - "step": 2204 - }, - { - "epoch": 9.89, - "learning_rate": 5.028901734104046e-07, - "loss": 3.2652, - "step": 2205 - }, - { - "epoch": 9.89, - "learning_rate": 4.855491329479768e-07, - "loss": 3.3432, - "step": 2206 - }, - { - "epoch": 9.9, - "learning_rate": 4.6820809248554914e-07, - "loss": 3.2673, - "step": 2207 - }, - { - "epoch": 9.9, - "learning_rate": 4.5086705202312137e-07, - "loss": 3.9483, - "step": 2208 - }, { "epoch": 9.91, - "learning_rate": 4.3352601156069365e-07, - "loss": 3.8833, - "step": 2209 - }, - { - "epoch": 9.91, - "learning_rate": 4.161849710982659e-07, - "loss": 4.0039, - "step": 2210 + "learning_rate": 6.3934426229508185e-06, + "loss": 0.0575, + "step": 1100 }, { "epoch": 9.91, - "learning_rate": 3.9884393063583815e-07, - "loss": 3.9315, - "step": 2211 - }, - { - "epoch": 9.92, - "learning_rate": 3.8150289017341043e-07, - "loss": 3.7539, - "step": 2212 + "learning_rate": 5.901639344262295e-06, + "loss": 0.0564, + "step": 1101 }, { "epoch": 9.92, - "learning_rate": 3.6416184971098266e-07, - "loss": 3.6727, - "step": 2213 - }, - { - "epoch": 9.93, - "learning_rate": 3.468208092485549e-07, - "loss": 3.5854, - "step": 2214 + "learning_rate": 5.40983606557377e-06, + "loss": 0.0421, + "step": 1102 }, { "epoch": 9.93, - "learning_rate": 3.2947976878612716e-07, - "loss": 3.4982, - "step": 2215 - }, - { - "epoch": 9.94, - "learning_rate": 3.1213872832369944e-07, - "loss": 3.513, - "step": 2216 + "learning_rate": 4.9180327868852455e-06, + "loss": 0.0421, + "step": 1103 }, { "epoch": 9.94, - "learning_rate": 2.9479768786127167e-07, - "loss": 3.4476, - "step": 2217 - }, - { - "epoch": 9.95, - "learning_rate": 2.7745664739884395e-07, - "loss": 3.401, - "step": 2218 + "learning_rate": 4.426229508196721e-06, + "loss": 0.0357, + "step": 1104 }, { "epoch": 9.95, - "learning_rate": 2.601156069364162e-07, - "loss": 3.441, - "step": 2219 - }, - { - "epoch": 9.96, - "learning_rate": 2.427745664739884e-07, - "loss": 3.4043, - "step": 2220 - }, - { - "epoch": 9.96, - "learning_rate": 2.2543352601156068e-07, - "loss": 3.3952, - "step": 2221 + "learning_rate": 3.934426229508196e-06, + "loss": 0.0404, + "step": 1105 }, { "epoch": 9.96, - "learning_rate": 2.0809248554913296e-07, - "loss": 3.2021, - "step": 2222 - }, - { - "epoch": 9.97, - "learning_rate": 1.9075144508670522e-07, - "loss": 3.2441, - "step": 2223 + "learning_rate": 3.4426229508196716e-06, + "loss": 0.031, + "step": 1106 }, { "epoch": 9.97, - "learning_rate": 1.7341040462427744e-07, - "loss": 3.316, - "step": 2224 - }, - { - "epoch": 9.98, - "learning_rate": 1.5606936416184972e-07, - "loss": 3.2267, - "step": 2225 + "learning_rate": 2.9508196721311474e-06, + "loss": 0.0362, + "step": 1107 }, { "epoch": 9.98, - "learning_rate": 1.3872832369942197e-07, - "loss": 3.2223, - "step": 2226 - }, - { - "epoch": 9.99, - "learning_rate": 1.213872832369942e-07, - "loss": 3.2844, - "step": 2227 + "learning_rate": 2.4590163934426227e-06, + "loss": 0.0295, + "step": 1108 }, { "epoch": 9.99, - "learning_rate": 1.0404624277456648e-07, - "loss": 3.291, - "step": 2228 - }, - { - "epoch": 10.0, - "learning_rate": 8.670520231213872e-08, - "loss": 3.2634, - "step": 2229 + "learning_rate": 1.967213114754098e-06, + "loss": 0.0268, + "step": 1109 }, { "epoch": 10.0, - "learning_rate": 6.936416184971099e-08, - "loss": 3.1966, - "step": 2230 + "learning_rate": 1.4754098360655737e-06, + "loss": 0.0308, + "step": 1110 }, { "epoch": 10.0, - "step": 2230, + "step": 1110, "total_flos": 0.0, - "train_loss": 4.09313003856505, - "train_runtime": 51677.5666, - "train_samples_per_second": 5.522, + "train_loss": 2.1241667401347613, + "train_runtime": 25891.0003, + "train_samples_per_second": 11.022, "train_steps_per_second": 0.043 } ], - "max_steps": 2230, + "max_steps": 1110, "num_train_epochs": 10, "total_flos": 0.0, "trial_name": null,