diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,32227 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "global_step": 5352, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 10.9943, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 10.9584, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 11.4022, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 10.9768, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 11.0889, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 11.0464, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 11.2208, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000002e-07, + "loss": 11.0702, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 4.0000000000000003e-07, + "loss": 11.4155, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 4.0000000000000003e-07, + "loss": 11.0631, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 6.000000000000001e-07, + "loss": 11.197, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-07, + "loss": 11.056, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 1.0000000000000002e-06, + "loss": 11.1092, + "step": 13 + }, + { + "epoch": 0.01, + "learning_rate": 1.2000000000000002e-06, + "loss": 10.8994, + "step": 14 + }, + { + "epoch": 0.01, + "learning_rate": 1.4000000000000001e-06, + "loss": 11.103, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000001e-06, + "loss": 11.1372, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 1.8e-06, + "loss": 11.1003, + "step": 17 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 10.8299, + "step": 18 + }, + { + "epoch": 0.01, + "learning_rate": 2.2e-06, + "loss": 10.2585, + "step": 19 + }, + { + "epoch": 0.01, + "learning_rate": 2.4000000000000003e-06, + "loss": 10.3535, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 2.6e-06, + "loss": 10.0379, + "step": 21 + }, + { + "epoch": 0.01, + "learning_rate": 2.8000000000000003e-06, + "loss": 10.0975, + "step": 22 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 9.8731, + "step": 23 + }, + { + "epoch": 0.01, + "learning_rate": 3.2000000000000003e-06, + "loss": 9.2341, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 3.4000000000000005e-06, + "loss": 9.0927, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 3.6e-06, + "loss": 8.9826, + "step": 26 + }, + { + "epoch": 0.02, + "learning_rate": 3.8e-06, + "loss": 8.9649, + "step": 27 + }, + { + "epoch": 0.02, + "learning_rate": 4.000000000000001e-06, + "loss": 8.6652, + "step": 28 + }, + { + "epoch": 0.02, + "learning_rate": 4.2000000000000004e-06, + "loss": 8.7757, + "step": 29 + }, + { + "epoch": 0.02, + "learning_rate": 4.4e-06, + "loss": 8.5096, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.6e-06, + "loss": 8.6883, + "step": 31 + }, + { + "epoch": 0.02, + "learning_rate": 4.800000000000001e-06, + "loss": 8.2975, + "step": 32 + }, + { + "epoch": 0.02, + "learning_rate": 5e-06, + "loss": 8.285, + "step": 33 + }, + { + "epoch": 0.02, + "learning_rate": 5.2e-06, + "loss": 7.9462, + "step": 34 + }, + { + "epoch": 0.02, + "learning_rate": 5.4e-06, + "loss": 8.0024, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 5.600000000000001e-06, + "loss": 8.2322, + "step": 36 + }, + { + "epoch": 0.02, + "learning_rate": 5.8e-06, + "loss": 7.8988, + "step": 37 + }, + { + "epoch": 0.02, + "learning_rate": 6e-06, + "loss": 8.1555, + "step": 38 + }, + { + "epoch": 0.02, + "learning_rate": 6.2e-06, + "loss": 7.9796, + "step": 39 + }, + { + "epoch": 0.02, + "learning_rate": 6.4000000000000006e-06, + "loss": 7.8011, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 6.6e-06, + "loss": 7.8403, + "step": 41 + }, + { + "epoch": 0.02, + "learning_rate": 6.800000000000001e-06, + "loss": 7.6365, + "step": 42 + }, + { + "epoch": 0.02, + "learning_rate": 7.000000000000001e-06, + "loss": 7.8086, + "step": 43 + }, + { + "epoch": 0.02, + "learning_rate": 7.2e-06, + "loss": 7.5116, + "step": 44 + }, + { + "epoch": 0.03, + "learning_rate": 7.4e-06, + "loss": 7.8187, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 7.6e-06, + "loss": 7.5173, + "step": 46 + }, + { + "epoch": 0.03, + "learning_rate": 7.8e-06, + "loss": 7.5973, + "step": 47 + }, + { + "epoch": 0.03, + "learning_rate": 8.000000000000001e-06, + "loss": 7.8332, + "step": 48 + }, + { + "epoch": 0.03, + "learning_rate": 8.200000000000001e-06, + "loss": 7.487, + "step": 49 + }, + { + "epoch": 0.03, + "learning_rate": 8.400000000000001e-06, + "loss": 7.462, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 8.599999999999999e-06, + "loss": 7.4334, + "step": 51 + }, + { + "epoch": 0.03, + "learning_rate": 8.8e-06, + "loss": 7.4941, + "step": 52 + }, + { + "epoch": 0.03, + "learning_rate": 9e-06, + "loss": 7.2823, + "step": 53 + }, + { + "epoch": 0.03, + "learning_rate": 9.2e-06, + "loss": 7.4285, + "step": 54 + }, + { + "epoch": 0.03, + "learning_rate": 9.4e-06, + "loss": 7.5708, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 9.600000000000001e-06, + "loss": 7.306, + "step": 56 + }, + { + "epoch": 0.03, + "learning_rate": 9.800000000000001e-06, + "loss": 7.2824, + "step": 57 + }, + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 7.1456, + "step": 58 + }, + { + "epoch": 0.03, + "learning_rate": 1.02e-05, + "loss": 7.1856, + "step": 59 + }, + { + "epoch": 0.03, + "learning_rate": 1.04e-05, + "loss": 7.2322, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 1.06e-05, + "loss": 7.3762, + "step": 61 + }, + { + "epoch": 0.03, + "learning_rate": 1.08e-05, + "loss": 7.3175, + "step": 62 + }, + { + "epoch": 0.04, + "learning_rate": 1.1000000000000001e-05, + "loss": 7.358, + "step": 63 + }, + { + "epoch": 0.04, + "learning_rate": 1.1200000000000001e-05, + "loss": 7.2088, + "step": 64 + }, + { + "epoch": 0.04, + "learning_rate": 1.1400000000000001e-05, + "loss": 7.1944, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 1.16e-05, + "loss": 7.1452, + "step": 66 + }, + { + "epoch": 0.04, + "learning_rate": 1.18e-05, + "loss": 7.2423, + "step": 67 + }, + { + "epoch": 0.04, + "learning_rate": 1.2e-05, + "loss": 7.2724, + "step": 68 + }, + { + "epoch": 0.04, + "learning_rate": 1.22e-05, + "loss": 7.2636, + "step": 69 + }, + { + "epoch": 0.04, + "learning_rate": 1.24e-05, + "loss": 7.1627, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 1.2600000000000001e-05, + "loss": 7.2538, + "step": 71 + }, + { + "epoch": 0.04, + "learning_rate": 1.2800000000000001e-05, + "loss": 7.1301, + "step": 72 + }, + { + "epoch": 0.04, + "learning_rate": 1.3000000000000001e-05, + "loss": 7.2295, + "step": 73 + }, + { + "epoch": 0.04, + "learning_rate": 1.32e-05, + "loss": 7.0985, + "step": 74 + }, + { + "epoch": 0.04, + "learning_rate": 1.3400000000000002e-05, + "loss": 7.3331, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 1.3600000000000002e-05, + "loss": 7.2283, + "step": 76 + }, + { + "epoch": 0.04, + "learning_rate": 1.3800000000000002e-05, + "loss": 7.2221, + "step": 77 + }, + { + "epoch": 0.04, + "learning_rate": 1.4000000000000001e-05, + "loss": 7.1114, + "step": 78 + }, + { + "epoch": 0.04, + "learning_rate": 1.42e-05, + "loss": 7.0985, + "step": 79 + }, + { + "epoch": 0.04, + "learning_rate": 1.44e-05, + "loss": 7.1191, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 1.4599999999999999e-05, + "loss": 7.2704, + "step": 81 + }, + { + "epoch": 0.05, + "learning_rate": 1.48e-05, + "loss": 7.3029, + "step": 82 + }, + { + "epoch": 0.05, + "learning_rate": 1.5e-05, + "loss": 7.0338, + "step": 83 + }, + { + "epoch": 0.05, + "learning_rate": 1.52e-05, + "loss": 7.1617, + "step": 84 + }, + { + "epoch": 0.05, + "learning_rate": 1.54e-05, + "loss": 7.1962, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 1.56e-05, + "loss": 7.2343, + "step": 86 + }, + { + "epoch": 0.05, + "learning_rate": 1.58e-05, + "loss": 7.2556, + "step": 87 + }, + { + "epoch": 0.05, + "learning_rate": 1.6000000000000003e-05, + "loss": 7.2534, + "step": 88 + }, + { + "epoch": 0.05, + "learning_rate": 1.62e-05, + "loss": 7.2853, + "step": 89 + }, + { + "epoch": 0.05, + "learning_rate": 1.6400000000000002e-05, + "loss": 7.0127, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 1.66e-05, + "loss": 7.329, + "step": 91 + }, + { + "epoch": 0.05, + "learning_rate": 1.6800000000000002e-05, + "loss": 6.9507, + "step": 92 + }, + { + "epoch": 0.05, + "learning_rate": 1.7000000000000003e-05, + "loss": 7.0706, + "step": 93 + }, + { + "epoch": 0.05, + "learning_rate": 1.7199999999999998e-05, + "loss": 7.3435, + "step": 94 + }, + { + "epoch": 0.05, + "learning_rate": 1.74e-05, + "loss": 7.3304, + "step": 95 + }, + { + "epoch": 0.05, + "learning_rate": 1.76e-05, + "loss": 7.1647, + "step": 96 + }, + { + "epoch": 0.05, + "learning_rate": 1.78e-05, + "loss": 7.0002, + "step": 97 + }, + { + "epoch": 0.05, + "learning_rate": 1.8e-05, + "loss": 6.8276, + "step": 98 + }, + { + "epoch": 0.06, + "learning_rate": 1.8200000000000002e-05, + "loss": 7.1761, + "step": 99 + }, + { + "epoch": 0.06, + "learning_rate": 1.84e-05, + "loss": 7.0169, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 1.86e-05, + "loss": 7.3488, + "step": 101 + }, + { + "epoch": 0.06, + "learning_rate": 1.88e-05, + "loss": 7.2829, + "step": 102 + }, + { + "epoch": 0.06, + "learning_rate": 1.9e-05, + "loss": 7.2905, + "step": 103 + }, + { + "epoch": 0.06, + "learning_rate": 1.9200000000000003e-05, + "loss": 7.176, + "step": 104 + }, + { + "epoch": 0.06, + "learning_rate": 1.94e-05, + "loss": 7.1516, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 1.9600000000000002e-05, + "loss": 7.0835, + "step": 106 + }, + { + "epoch": 0.06, + "learning_rate": 1.9800000000000004e-05, + "loss": 7.573, + "step": 107 + }, + { + "epoch": 0.06, + "learning_rate": 2e-05, + "loss": 7.1754, + "step": 108 + }, + { + "epoch": 0.06, + "learning_rate": 2.0200000000000003e-05, + "loss": 7.0634, + "step": 109 + }, + { + "epoch": 0.06, + "learning_rate": 2.04e-05, + "loss": 7.1904, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 2.06e-05, + "loss": 7.0143, + "step": 111 + }, + { + "epoch": 0.06, + "learning_rate": 2.08e-05, + "loss": 6.9704, + "step": 112 + }, + { + "epoch": 0.06, + "learning_rate": 2.1e-05, + "loss": 7.1983, + "step": 113 + }, + { + "epoch": 0.06, + "learning_rate": 2.12e-05, + "loss": 7.2388, + "step": 114 + }, + { + "epoch": 0.06, + "learning_rate": 2.1400000000000002e-05, + "loss": 7.1558, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 2.16e-05, + "loss": 7.1881, + "step": 116 + }, + { + "epoch": 0.07, + "learning_rate": 2.18e-05, + "loss": 7.3307, + "step": 117 + }, + { + "epoch": 0.07, + "learning_rate": 2.2000000000000003e-05, + "loss": 7.0756, + "step": 118 + }, + { + "epoch": 0.07, + "learning_rate": 2.22e-05, + "loss": 7.1239, + "step": 119 + }, + { + "epoch": 0.07, + "learning_rate": 2.2400000000000002e-05, + "loss": 7.1285, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 2.26e-05, + "loss": 7.3267, + "step": 121 + }, + { + "epoch": 0.07, + "learning_rate": 2.2800000000000002e-05, + "loss": 7.2647, + "step": 122 + }, + { + "epoch": 0.07, + "learning_rate": 2.3000000000000003e-05, + "loss": 7.2048, + "step": 123 + }, + { + "epoch": 0.07, + "learning_rate": 2.32e-05, + "loss": 7.3158, + "step": 124 + }, + { + "epoch": 0.07, + "learning_rate": 2.3400000000000003e-05, + "loss": 6.9004, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 2.36e-05, + "loss": 7.3811, + "step": 126 + }, + { + "epoch": 0.07, + "learning_rate": 2.38e-05, + "loss": 7.1474, + "step": 127 + }, + { + "epoch": 0.07, + "learning_rate": 2.4e-05, + "loss": 7.1854, + "step": 128 + }, + { + "epoch": 0.07, + "learning_rate": 2.4200000000000002e-05, + "loss": 7.0567, + "step": 129 + }, + { + "epoch": 0.07, + "learning_rate": 2.44e-05, + "loss": 7.0879, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 2.46e-05, + "loss": 7.3049, + "step": 131 + }, + { + "epoch": 0.07, + "learning_rate": 2.48e-05, + "loss": 7.0845, + "step": 132 + }, + { + "epoch": 0.07, + "learning_rate": 2.5e-05, + "loss": 7.2161, + "step": 133 + }, + { + "epoch": 0.08, + "learning_rate": 2.5200000000000003e-05, + "loss": 7.0246, + "step": 134 + }, + { + "epoch": 0.08, + "learning_rate": 2.54e-05, + "loss": 7.0001, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 2.5600000000000002e-05, + "loss": 7.2634, + "step": 136 + }, + { + "epoch": 0.08, + "learning_rate": 2.58e-05, + "loss": 7.1607, + "step": 137 + }, + { + "epoch": 0.08, + "learning_rate": 2.6000000000000002e-05, + "loss": 7.0718, + "step": 138 + }, + { + "epoch": 0.08, + "learning_rate": 2.6200000000000003e-05, + "loss": 7.2687, + "step": 139 + }, + { + "epoch": 0.08, + "learning_rate": 2.64e-05, + "loss": 7.1654, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 2.6600000000000003e-05, + "loss": 7.3757, + "step": 141 + }, + { + "epoch": 0.08, + "learning_rate": 2.6800000000000004e-05, + "loss": 7.3598, + "step": 142 + }, + { + "epoch": 0.08, + "learning_rate": 2.7000000000000002e-05, + "loss": 6.98, + "step": 143 + }, + { + "epoch": 0.08, + "learning_rate": 2.7200000000000004e-05, + "loss": 7.1408, + "step": 144 + }, + { + "epoch": 0.08, + "learning_rate": 2.7400000000000002e-05, + "loss": 7.2928, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 2.7600000000000003e-05, + "loss": 7.1745, + "step": 146 + }, + { + "epoch": 0.08, + "learning_rate": 2.7800000000000005e-05, + "loss": 7.2032, + "step": 147 + }, + { + "epoch": 0.08, + "learning_rate": 2.8000000000000003e-05, + "loss": 7.1541, + "step": 148 + }, + { + "epoch": 0.08, + "learning_rate": 2.8199999999999998e-05, + "loss": 6.9952, + "step": 149 + }, + { + "epoch": 0.08, + "learning_rate": 2.84e-05, + "loss": 6.344, + "step": 150 + }, + { + "epoch": 0.08, + "learning_rate": 2.86e-05, + "loss": 7.1416, + "step": 151 + }, + { + "epoch": 0.09, + "learning_rate": 2.88e-05, + "loss": 7.2178, + "step": 152 + }, + { + "epoch": 0.09, + "learning_rate": 2.9e-05, + "loss": 6.9869, + "step": 153 + }, + { + "epoch": 0.09, + "learning_rate": 2.9199999999999998e-05, + "loss": 7.2288, + "step": 154 + }, + { + "epoch": 0.09, + "learning_rate": 2.94e-05, + "loss": 6.9245, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 2.96e-05, + "loss": 7.2831, + "step": 156 + }, + { + "epoch": 0.09, + "learning_rate": 2.98e-05, + "loss": 7.0794, + "step": 157 + }, + { + "epoch": 0.09, + "learning_rate": 3e-05, + "loss": 7.1408, + "step": 158 + }, + { + "epoch": 0.09, + "learning_rate": 3.02e-05, + "loss": 7.0602, + "step": 159 + }, + { + "epoch": 0.09, + "learning_rate": 3.04e-05, + "loss": 7.2424, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 3.06e-05, + "loss": 7.2148, + "step": 161 + }, + { + "epoch": 0.09, + "learning_rate": 3.08e-05, + "loss": 7.3888, + "step": 162 + }, + { + "epoch": 0.09, + "learning_rate": 3.1e-05, + "loss": 7.2205, + "step": 163 + }, + { + "epoch": 0.09, + "learning_rate": 3.12e-05, + "loss": 7.1783, + "step": 164 + }, + { + "epoch": 0.09, + "learning_rate": 3.1400000000000004e-05, + "loss": 7.2127, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 3.16e-05, + "loss": 7.201, + "step": 166 + }, + { + "epoch": 0.09, + "learning_rate": 3.18e-05, + "loss": 7.1274, + "step": 167 + }, + { + "epoch": 0.09, + "learning_rate": 3.2000000000000005e-05, + "loss": 6.9574, + "step": 168 + }, + { + "epoch": 0.09, + "learning_rate": 3.2200000000000003e-05, + "loss": 6.9177, + "step": 169 + }, + { + "epoch": 0.1, + "learning_rate": 3.24e-05, + "loss": 7.2344, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 3.26e-05, + "loss": 7.1829, + "step": 171 + }, + { + "epoch": 0.1, + "learning_rate": 3.2800000000000004e-05, + "loss": 7.106, + "step": 172 + }, + { + "epoch": 0.1, + "learning_rate": 3.3e-05, + "loss": 7.1077, + "step": 173 + }, + { + "epoch": 0.1, + "learning_rate": 3.32e-05, + "loss": 6.979, + "step": 174 + }, + { + "epoch": 0.1, + "learning_rate": 3.3400000000000005e-05, + "loss": 7.1942, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 3.3600000000000004e-05, + "loss": 6.9882, + "step": 176 + }, + { + "epoch": 0.1, + "learning_rate": 3.38e-05, + "loss": 7.0006, + "step": 177 + }, + { + "epoch": 0.1, + "learning_rate": 3.4000000000000007e-05, + "loss": 7.077, + "step": 178 + }, + { + "epoch": 0.1, + "learning_rate": 3.4200000000000005e-05, + "loss": 6.9483, + "step": 179 + }, + { + "epoch": 0.1, + "learning_rate": 3.4399999999999996e-05, + "loss": 7.1153, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 3.46e-05, + "loss": 6.9924, + "step": 181 + }, + { + "epoch": 0.1, + "learning_rate": 3.48e-05, + "loss": 6.9844, + "step": 182 + }, + { + "epoch": 0.1, + "learning_rate": 3.5e-05, + "loss": 7.2299, + "step": 183 + }, + { + "epoch": 0.1, + "learning_rate": 3.52e-05, + "loss": 6.8925, + "step": 184 + }, + { + "epoch": 0.1, + "learning_rate": 3.54e-05, + "loss": 7.1434, + "step": 185 + }, + { + "epoch": 0.1, + "learning_rate": 3.56e-05, + "loss": 6.9794, + "step": 186 + }, + { + "epoch": 0.1, + "learning_rate": 3.58e-05, + "loss": 6.8796, + "step": 187 + }, + { + "epoch": 0.11, + "learning_rate": 3.6e-05, + "loss": 7.0778, + "step": 188 + }, + { + "epoch": 0.11, + "learning_rate": 3.62e-05, + "loss": 7.0181, + "step": 189 + }, + { + "epoch": 0.11, + "learning_rate": 3.6400000000000004e-05, + "loss": 7.0357, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 3.66e-05, + "loss": 7.1621, + "step": 191 + }, + { + "epoch": 0.11, + "learning_rate": 3.68e-05, + "loss": 6.8678, + "step": 192 + }, + { + "epoch": 0.11, + "learning_rate": 3.7e-05, + "loss": 7.0031, + "step": 193 + }, + { + "epoch": 0.11, + "learning_rate": 3.72e-05, + "loss": 7.0492, + "step": 194 + }, + { + "epoch": 0.11, + "learning_rate": 3.74e-05, + "loss": 7.0843, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 3.76e-05, + "loss": 7.044, + "step": 196 + }, + { + "epoch": 0.11, + "learning_rate": 3.7800000000000004e-05, + "loss": 7.0957, + "step": 197 + }, + { + "epoch": 0.11, + "learning_rate": 3.8e-05, + "loss": 6.8913, + "step": 198 + }, + { + "epoch": 0.11, + "learning_rate": 3.82e-05, + "loss": 6.7978, + "step": 199 + }, + { + "epoch": 0.11, + "learning_rate": 3.8400000000000005e-05, + "loss": 6.443, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 3.86e-05, + "loss": 7.2658, + "step": 201 + }, + { + "epoch": 0.11, + "learning_rate": 3.88e-05, + "loss": 7.0871, + "step": 202 + }, + { + "epoch": 0.11, + "learning_rate": 3.9000000000000006e-05, + "loss": 7.3244, + "step": 203 + }, + { + "epoch": 0.11, + "learning_rate": 3.9200000000000004e-05, + "loss": 6.966, + "step": 204 + }, + { + "epoch": 0.11, + "learning_rate": 3.94e-05, + "loss": 6.9823, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 3.960000000000001e-05, + "loss": 7.1579, + "step": 206 + }, + { + "epoch": 0.12, + "learning_rate": 3.9800000000000005e-05, + "loss": 7.1491, + "step": 207 + }, + { + "epoch": 0.12, + "learning_rate": 4e-05, + "loss": 7.0796, + "step": 208 + }, + { + "epoch": 0.12, + "learning_rate": 4.02e-05, + "loss": 7.118, + "step": 209 + }, + { + "epoch": 0.12, + "learning_rate": 4.0400000000000006e-05, + "loss": 7.2473, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.0600000000000004e-05, + "loss": 7.0947, + "step": 211 + }, + { + "epoch": 0.12, + "learning_rate": 4.08e-05, + "loss": 7.2109, + "step": 212 + }, + { + "epoch": 0.12, + "learning_rate": 4.1e-05, + "loss": 7.1369, + "step": 213 + }, + { + "epoch": 0.12, + "learning_rate": 4.12e-05, + "loss": 6.9616, + "step": 214 + }, + { + "epoch": 0.12, + "learning_rate": 4.14e-05, + "loss": 7.2364, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.16e-05, + "loss": 6.8616, + "step": 216 + }, + { + "epoch": 0.12, + "learning_rate": 4.18e-05, + "loss": 7.0692, + "step": 217 + }, + { + "epoch": 0.12, + "learning_rate": 4.2e-05, + "loss": 6.9306, + "step": 218 + }, + { + "epoch": 0.12, + "learning_rate": 4.22e-05, + "loss": 7.0661, + "step": 219 + }, + { + "epoch": 0.12, + "learning_rate": 4.24e-05, + "loss": 7.1495, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.26e-05, + "loss": 7.1552, + "step": 221 + }, + { + "epoch": 0.12, + "learning_rate": 4.2800000000000004e-05, + "loss": 7.2373, + "step": 222 + }, + { + "epoch": 0.12, + "learning_rate": 4.3e-05, + "loss": 6.8844, + "step": 223 + }, + { + "epoch": 0.13, + "learning_rate": 4.32e-05, + "loss": 7.1065, + "step": 224 + }, + { + "epoch": 0.13, + "learning_rate": 4.3400000000000005e-05, + "loss": 7.201, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.36e-05, + "loss": 7.4243, + "step": 226 + }, + { + "epoch": 0.13, + "learning_rate": 4.38e-05, + "loss": 7.2527, + "step": 227 + }, + { + "epoch": 0.13, + "learning_rate": 4.4000000000000006e-05, + "loss": 7.2063, + "step": 228 + }, + { + "epoch": 0.13, + "learning_rate": 4.4200000000000004e-05, + "loss": 7.0557, + "step": 229 + }, + { + "epoch": 0.13, + "learning_rate": 4.44e-05, + "loss": 7.0629, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.46e-05, + "loss": 7.0912, + "step": 231 + }, + { + "epoch": 0.13, + "learning_rate": 4.4800000000000005e-05, + "loss": 7.1477, + "step": 232 + }, + { + "epoch": 0.13, + "learning_rate": 4.5e-05, + "loss": 7.3119, + "step": 233 + }, + { + "epoch": 0.13, + "learning_rate": 4.52e-05, + "loss": 7.0351, + "step": 234 + }, + { + "epoch": 0.13, + "learning_rate": 4.5400000000000006e-05, + "loss": 6.9773, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.5600000000000004e-05, + "loss": 7.3856, + "step": 236 + }, + { + "epoch": 0.13, + "learning_rate": 4.58e-05, + "loss": 7.1324, + "step": 237 + }, + { + "epoch": 0.13, + "learning_rate": 4.600000000000001e-05, + "loss": 7.0901, + "step": 238 + }, + { + "epoch": 0.13, + "learning_rate": 4.6200000000000005e-05, + "loss": 7.0031, + "step": 239 + }, + { + "epoch": 0.13, + "learning_rate": 4.64e-05, + "loss": 7.06, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.660000000000001e-05, + "loss": 7.0717, + "step": 241 + }, + { + "epoch": 0.14, + "learning_rate": 4.6800000000000006e-05, + "loss": 7.0482, + "step": 242 + }, + { + "epoch": 0.14, + "learning_rate": 4.7e-05, + "loss": 6.9197, + "step": 243 + }, + { + "epoch": 0.14, + "learning_rate": 4.72e-05, + "loss": 7.1217, + "step": 244 + }, + { + "epoch": 0.14, + "learning_rate": 4.74e-05, + "loss": 7.0378, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.76e-05, + "loss": 6.9547, + "step": 246 + }, + { + "epoch": 0.14, + "learning_rate": 4.78e-05, + "loss": 6.61, + "step": 247 + }, + { + "epoch": 0.14, + "learning_rate": 4.8e-05, + "loss": 6.9574, + "step": 248 + }, + { + "epoch": 0.14, + "learning_rate": 4.82e-05, + "loss": 6.6961, + "step": 249 + }, + { + "epoch": 0.14, + "learning_rate": 4.8400000000000004e-05, + "loss": 6.4984, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.86e-05, + "loss": 7.2069, + "step": 251 + }, + { + "epoch": 0.14, + "learning_rate": 4.88e-05, + "loss": 7.4438, + "step": 252 + }, + { + "epoch": 0.14, + "learning_rate": 4.9e-05, + "loss": 7.1281, + "step": 253 + }, + { + "epoch": 0.14, + "learning_rate": 4.92e-05, + "loss": 7.264, + "step": 254 + }, + { + "epoch": 0.14, + "learning_rate": 4.94e-05, + "loss": 7.1284, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.96e-05, + "loss": 7.1746, + "step": 256 + }, + { + "epoch": 0.14, + "learning_rate": 4.9800000000000004e-05, + "loss": 7.1866, + "step": 257 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 7.1018, + "step": 258 + }, + { + "epoch": 0.15, + "learning_rate": 5.02e-05, + "loss": 7.3555, + "step": 259 + }, + { + "epoch": 0.15, + "learning_rate": 5.0400000000000005e-05, + "loss": 6.7875, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 5.0600000000000003e-05, + "loss": 6.9759, + "step": 261 + }, + { + "epoch": 0.15, + "learning_rate": 5.08e-05, + "loss": 7.0293, + "step": 262 + }, + { + "epoch": 0.15, + "learning_rate": 5.1000000000000006e-05, + "loss": 7.2576, + "step": 263 + }, + { + "epoch": 0.15, + "learning_rate": 5.1200000000000004e-05, + "loss": 7.1699, + "step": 264 + }, + { + "epoch": 0.15, + "learning_rate": 5.14e-05, + "loss": 6.9988, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 5.16e-05, + "loss": 6.9783, + "step": 266 + }, + { + "epoch": 0.15, + "learning_rate": 5.1800000000000005e-05, + "loss": 7.1326, + "step": 267 + }, + { + "epoch": 0.15, + "learning_rate": 5.2000000000000004e-05, + "loss": 7.0865, + "step": 268 + }, + { + "epoch": 0.15, + "learning_rate": 5.22e-05, + "loss": 7.1328, + "step": 269 + }, + { + "epoch": 0.15, + "learning_rate": 5.2400000000000007e-05, + "loss": 6.9455, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 5.2600000000000005e-05, + "loss": 6.9803, + "step": 271 + }, + { + "epoch": 0.15, + "learning_rate": 5.28e-05, + "loss": 7.0873, + "step": 272 + }, + { + "epoch": 0.15, + "learning_rate": 5.300000000000001e-05, + "loss": 6.9735, + "step": 273 + }, + { + "epoch": 0.15, + "learning_rate": 5.3200000000000006e-05, + "loss": 7.1154, + "step": 274 + }, + { + "epoch": 0.15, + "learning_rate": 5.3400000000000004e-05, + "loss": 7.2227, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 5.360000000000001e-05, + "loss": 7.2743, + "step": 276 + }, + { + "epoch": 0.16, + "learning_rate": 5.380000000000001e-05, + "loss": 7.1936, + "step": 277 + }, + { + "epoch": 0.16, + "learning_rate": 5.4000000000000005e-05, + "loss": 7.2583, + "step": 278 + }, + { + "epoch": 0.16, + "learning_rate": 5.420000000000001e-05, + "loss": 7.0363, + "step": 279 + }, + { + "epoch": 0.16, + "learning_rate": 5.440000000000001e-05, + "loss": 7.0358, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 5.4600000000000006e-05, + "loss": 7.2046, + "step": 281 + }, + { + "epoch": 0.16, + "learning_rate": 5.4800000000000004e-05, + "loss": 7.2714, + "step": 282 + }, + { + "epoch": 0.16, + "learning_rate": 5.500000000000001e-05, + "loss": 7.2108, + "step": 283 + }, + { + "epoch": 0.16, + "learning_rate": 5.520000000000001e-05, + "loss": 6.8612, + "step": 284 + }, + { + "epoch": 0.16, + "learning_rate": 5.5400000000000005e-05, + "loss": 7.1636, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 5.560000000000001e-05, + "loss": 6.923, + "step": 286 + }, + { + "epoch": 0.16, + "learning_rate": 5.580000000000001e-05, + "loss": 6.9685, + "step": 287 + }, + { + "epoch": 0.16, + "learning_rate": 5.6000000000000006e-05, + "loss": 7.2593, + "step": 288 + }, + { + "epoch": 0.16, + "learning_rate": 5.620000000000001e-05, + "loss": 6.9878, + "step": 289 + }, + { + "epoch": 0.16, + "learning_rate": 5.6399999999999995e-05, + "loss": 7.1322, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 5.66e-05, + "loss": 7.0868, + "step": 291 + }, + { + "epoch": 0.16, + "learning_rate": 5.68e-05, + "loss": 7.2568, + "step": 292 + }, + { + "epoch": 0.16, + "learning_rate": 5.6999999999999996e-05, + "loss": 7.0662, + "step": 293 + }, + { + "epoch": 0.16, + "learning_rate": 5.72e-05, + "loss": 7.0929, + "step": 294 + }, + { + "epoch": 0.17, + "learning_rate": 5.74e-05, + "loss": 6.9213, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 5.76e-05, + "loss": 6.7823, + "step": 296 + }, + { + "epoch": 0.17, + "learning_rate": 5.7799999999999995e-05, + "loss": 6.8251, + "step": 297 + }, + { + "epoch": 0.17, + "learning_rate": 5.8e-05, + "loss": 7.1662, + "step": 298 + }, + { + "epoch": 0.17, + "learning_rate": 5.82e-05, + "loss": 6.7, + "step": 299 + }, + { + "epoch": 0.17, + "learning_rate": 5.8399999999999997e-05, + "loss": 6.5886, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 5.86e-05, + "loss": 7.1817, + "step": 301 + }, + { + "epoch": 0.17, + "learning_rate": 5.88e-05, + "loss": 7.0857, + "step": 302 + }, + { + "epoch": 0.17, + "learning_rate": 5.9e-05, + "loss": 7.0472, + "step": 303 + }, + { + "epoch": 0.17, + "learning_rate": 5.92e-05, + "loss": 6.9869, + "step": 304 + }, + { + "epoch": 0.17, + "learning_rate": 5.94e-05, + "loss": 7.0928, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 5.96e-05, + "loss": 7.1116, + "step": 306 + }, + { + "epoch": 0.17, + "learning_rate": 5.9800000000000003e-05, + "loss": 7.17, + "step": 307 + }, + { + "epoch": 0.17, + "learning_rate": 6e-05, + "loss": 7.2483, + "step": 308 + }, + { + "epoch": 0.17, + "learning_rate": 6.02e-05, + "loss": 7.1166, + "step": 309 + }, + { + "epoch": 0.17, + "learning_rate": 6.04e-05, + "loss": 7.3166, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 6.06e-05, + "loss": 7.0532, + "step": 311 + }, + { + "epoch": 0.17, + "learning_rate": 6.08e-05, + "loss": 7.1509, + "step": 312 + }, + { + "epoch": 0.18, + "learning_rate": 6.1e-05, + "loss": 7.0713, + "step": 313 + }, + { + "epoch": 0.18, + "learning_rate": 6.12e-05, + "loss": 7.1442, + "step": 314 + }, + { + "epoch": 0.18, + "learning_rate": 6.14e-05, + "loss": 7.1455, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 6.16e-05, + "loss": 7.0402, + "step": 316 + }, + { + "epoch": 0.18, + "learning_rate": 6.18e-05, + "loss": 6.969, + "step": 317 + }, + { + "epoch": 0.18, + "learning_rate": 6.2e-05, + "loss": 7.0087, + "step": 318 + }, + { + "epoch": 0.18, + "learning_rate": 6.220000000000001e-05, + "loss": 7.1762, + "step": 319 + }, + { + "epoch": 0.18, + "learning_rate": 6.24e-05, + "loss": 7.1858, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 6.26e-05, + "loss": 7.3863, + "step": 321 + }, + { + "epoch": 0.18, + "learning_rate": 6.280000000000001e-05, + "loss": 7.2937, + "step": 322 + }, + { + "epoch": 0.18, + "learning_rate": 6.3e-05, + "loss": 7.2723, + "step": 323 + }, + { + "epoch": 0.18, + "learning_rate": 6.32e-05, + "loss": 7.0623, + "step": 324 + }, + { + "epoch": 0.18, + "learning_rate": 6.340000000000001e-05, + "loss": 6.9822, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 6.36e-05, + "loss": 7.1546, + "step": 326 + }, + { + "epoch": 0.18, + "learning_rate": 6.38e-05, + "loss": 7.0314, + "step": 327 + }, + { + "epoch": 0.18, + "learning_rate": 6.400000000000001e-05, + "loss": 7.133, + "step": 328 + }, + { + "epoch": 0.18, + "learning_rate": 6.42e-05, + "loss": 7.0376, + "step": 329 + }, + { + "epoch": 0.18, + "learning_rate": 6.440000000000001e-05, + "loss": 7.1706, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 6.460000000000001e-05, + "loss": 7.1314, + "step": 331 + }, + { + "epoch": 0.19, + "learning_rate": 6.48e-05, + "loss": 7.1215, + "step": 332 + }, + { + "epoch": 0.19, + "learning_rate": 6.500000000000001e-05, + "loss": 7.2526, + "step": 333 + }, + { + "epoch": 0.19, + "learning_rate": 6.52e-05, + "loss": 7.2246, + "step": 334 + }, + { + "epoch": 0.19, + "learning_rate": 6.54e-05, + "loss": 7.0868, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 6.560000000000001e-05, + "loss": 7.0976, + "step": 336 + }, + { + "epoch": 0.19, + "learning_rate": 6.58e-05, + "loss": 7.1152, + "step": 337 + }, + { + "epoch": 0.19, + "learning_rate": 6.6e-05, + "loss": 6.9849, + "step": 338 + }, + { + "epoch": 0.19, + "learning_rate": 6.620000000000001e-05, + "loss": 6.9359, + "step": 339 + }, + { + "epoch": 0.19, + "learning_rate": 6.64e-05, + "loss": 7.0532, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 6.66e-05, + "loss": 6.955, + "step": 341 + }, + { + "epoch": 0.19, + "learning_rate": 6.680000000000001e-05, + "loss": 7.2644, + "step": 342 + }, + { + "epoch": 0.19, + "learning_rate": 6.7e-05, + "loss": 7.1705, + "step": 343 + }, + { + "epoch": 0.19, + "learning_rate": 6.720000000000001e-05, + "loss": 7.1836, + "step": 344 + }, + { + "epoch": 0.19, + "learning_rate": 6.740000000000001e-05, + "loss": 6.9061, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 6.76e-05, + "loss": 6.9669, + "step": 346 + }, + { + "epoch": 0.19, + "learning_rate": 6.780000000000001e-05, + "loss": 7.4457, + "step": 347 + }, + { + "epoch": 0.2, + "learning_rate": 6.800000000000001e-05, + "loss": 7.0827, + "step": 348 + }, + { + "epoch": 0.2, + "learning_rate": 6.82e-05, + "loss": 6.5416, + "step": 349 + }, + { + "epoch": 0.2, + "learning_rate": 6.840000000000001e-05, + "loss": 6.1589, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 6.860000000000001e-05, + "loss": 7.2102, + "step": 351 + }, + { + "epoch": 0.2, + "learning_rate": 6.879999999999999e-05, + "loss": 7.2278, + "step": 352 + }, + { + "epoch": 0.2, + "learning_rate": 6.9e-05, + "loss": 7.1306, + "step": 353 + }, + { + "epoch": 0.2, + "learning_rate": 6.92e-05, + "loss": 7.0525, + "step": 354 + }, + { + "epoch": 0.2, + "learning_rate": 6.939999999999999e-05, + "loss": 7.0724, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 6.96e-05, + "loss": 7.0648, + "step": 356 + }, + { + "epoch": 0.2, + "learning_rate": 6.98e-05, + "loss": 7.2456, + "step": 357 + }, + { + "epoch": 0.2, + "learning_rate": 7e-05, + "loss": 7.1056, + "step": 358 + }, + { + "epoch": 0.2, + "learning_rate": 7.02e-05, + "loss": 7.2209, + "step": 359 + }, + { + "epoch": 0.2, + "learning_rate": 7.04e-05, + "loss": 7.0958, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 7.06e-05, + "loss": 6.8967, + "step": 361 + }, + { + "epoch": 0.2, + "learning_rate": 7.08e-05, + "loss": 6.7813, + "step": 362 + }, + { + "epoch": 0.2, + "learning_rate": 7.1e-05, + "loss": 7.0357, + "step": 363 + }, + { + "epoch": 0.2, + "learning_rate": 7.12e-05, + "loss": 6.924, + "step": 364 + }, + { + "epoch": 0.2, + "learning_rate": 7.14e-05, + "loss": 7.1054, + "step": 365 + }, + { + "epoch": 0.21, + "learning_rate": 7.16e-05, + "loss": 7.1807, + "step": 366 + }, + { + "epoch": 0.21, + "learning_rate": 7.18e-05, + "loss": 6.9746, + "step": 367 + }, + { + "epoch": 0.21, + "learning_rate": 7.2e-05, + "loss": 7.0519, + "step": 368 + }, + { + "epoch": 0.21, + "learning_rate": 7.22e-05, + "loss": 7.0763, + "step": 369 + }, + { + "epoch": 0.21, + "learning_rate": 7.24e-05, + "loss": 6.9779, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 7.26e-05, + "loss": 6.9394, + "step": 371 + }, + { + "epoch": 0.21, + "learning_rate": 7.280000000000001e-05, + "loss": 7.2521, + "step": 372 + }, + { + "epoch": 0.21, + "learning_rate": 7.3e-05, + "loss": 7.136, + "step": 373 + }, + { + "epoch": 0.21, + "learning_rate": 7.32e-05, + "loss": 6.9745, + "step": 374 + }, + { + "epoch": 0.21, + "learning_rate": 7.340000000000001e-05, + "loss": 6.7593, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 7.36e-05, + "loss": 7.0613, + "step": 376 + }, + { + "epoch": 0.21, + "learning_rate": 7.38e-05, + "loss": 7.0142, + "step": 377 + }, + { + "epoch": 0.21, + "learning_rate": 7.4e-05, + "loss": 6.9591, + "step": 378 + }, + { + "epoch": 0.21, + "learning_rate": 7.42e-05, + "loss": 6.9696, + "step": 379 + }, + { + "epoch": 0.21, + "learning_rate": 7.44e-05, + "loss": 7.2282, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 7.46e-05, + "loss": 6.9052, + "step": 381 + }, + { + "epoch": 0.21, + "learning_rate": 7.48e-05, + "loss": 6.8876, + "step": 382 + }, + { + "epoch": 0.21, + "learning_rate": 7.500000000000001e-05, + "loss": 7.0237, + "step": 383 + }, + { + "epoch": 0.22, + "learning_rate": 7.52e-05, + "loss": 7.0708, + "step": 384 + }, + { + "epoch": 0.22, + "learning_rate": 7.54e-05, + "loss": 7.1279, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 7.560000000000001e-05, + "loss": 7.2487, + "step": 386 + }, + { + "epoch": 0.22, + "learning_rate": 7.58e-05, + "loss": 7.0603, + "step": 387 + }, + { + "epoch": 0.22, + "learning_rate": 7.6e-05, + "loss": 6.8195, + "step": 388 + }, + { + "epoch": 0.22, + "learning_rate": 7.620000000000001e-05, + "loss": 7.0025, + "step": 389 + }, + { + "epoch": 0.22, + "learning_rate": 7.64e-05, + "loss": 7.0806, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 7.66e-05, + "loss": 6.8648, + "step": 391 + }, + { + "epoch": 0.22, + "learning_rate": 7.680000000000001e-05, + "loss": 7.4388, + "step": 392 + }, + { + "epoch": 0.22, + "learning_rate": 7.7e-05, + "loss": 7.0427, + "step": 393 + }, + { + "epoch": 0.22, + "learning_rate": 7.72e-05, + "loss": 6.9174, + "step": 394 + }, + { + "epoch": 0.22, + "learning_rate": 7.740000000000001e-05, + "loss": 6.9422, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 7.76e-05, + "loss": 6.7755, + "step": 396 + }, + { + "epoch": 0.22, + "learning_rate": 7.780000000000001e-05, + "loss": 6.5908, + "step": 397 + }, + { + "epoch": 0.22, + "learning_rate": 7.800000000000001e-05, + "loss": 6.7942, + "step": 398 + }, + { + "epoch": 0.22, + "learning_rate": 7.82e-05, + "loss": 6.9399, + "step": 399 + }, + { + "epoch": 0.22, + "learning_rate": 7.840000000000001e-05, + "loss": 6.5178, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 7.860000000000001e-05, + "loss": 7.6662, + "step": 401 + }, + { + "epoch": 0.23, + "learning_rate": 7.88e-05, + "loss": 7.4403, + "step": 402 + }, + { + "epoch": 0.23, + "learning_rate": 7.900000000000001e-05, + "loss": 7.1939, + "step": 403 + }, + { + "epoch": 0.23, + "learning_rate": 7.920000000000001e-05, + "loss": 7.3696, + "step": 404 + }, + { + "epoch": 0.23, + "learning_rate": 7.94e-05, + "loss": 6.994, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 7.960000000000001e-05, + "loss": 7.3248, + "step": 406 + }, + { + "epoch": 0.23, + "learning_rate": 7.98e-05, + "loss": 7.0381, + "step": 407 + }, + { + "epoch": 0.23, + "learning_rate": 8e-05, + "loss": 7.0852, + "step": 408 + }, + { + "epoch": 0.23, + "learning_rate": 8.020000000000001e-05, + "loss": 7.0731, + "step": 409 + }, + { + "epoch": 0.23, + "learning_rate": 8.04e-05, + "loss": 7.0279, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 8.060000000000001e-05, + "loss": 7.0357, + "step": 411 + }, + { + "epoch": 0.23, + "learning_rate": 8.080000000000001e-05, + "loss": 6.9296, + "step": 412 + }, + { + "epoch": 0.23, + "learning_rate": 8.1e-05, + "loss": 7.1563, + "step": 413 + }, + { + "epoch": 0.23, + "learning_rate": 8.120000000000001e-05, + "loss": 6.9103, + "step": 414 + }, + { + "epoch": 0.23, + "learning_rate": 8.14e-05, + "loss": 7.1143, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 8.16e-05, + "loss": 7.0693, + "step": 416 + }, + { + "epoch": 0.23, + "learning_rate": 8.18e-05, + "loss": 7.042, + "step": 417 + }, + { + "epoch": 0.23, + "learning_rate": 8.2e-05, + "loss": 6.9777, + "step": 418 + }, + { + "epoch": 0.23, + "learning_rate": 8.22e-05, + "loss": 6.9689, + "step": 419 + }, + { + "epoch": 0.24, + "learning_rate": 8.24e-05, + "loss": 7.1131, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 8.26e-05, + "loss": 7.0906, + "step": 421 + }, + { + "epoch": 0.24, + "learning_rate": 8.28e-05, + "loss": 7.0861, + "step": 422 + }, + { + "epoch": 0.24, + "learning_rate": 8.3e-05, + "loss": 6.953, + "step": 423 + }, + { + "epoch": 0.24, + "learning_rate": 8.32e-05, + "loss": 7.2407, + "step": 424 + }, + { + "epoch": 0.24, + "learning_rate": 8.34e-05, + "loss": 7.1928, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 8.36e-05, + "loss": 7.0813, + "step": 426 + }, + { + "epoch": 0.24, + "learning_rate": 8.38e-05, + "loss": 7.0063, + "step": 427 + }, + { + "epoch": 0.24, + "learning_rate": 8.4e-05, + "loss": 6.9934, + "step": 428 + }, + { + "epoch": 0.24, + "learning_rate": 8.42e-05, + "loss": 7.0596, + "step": 429 + }, + { + "epoch": 0.24, + "learning_rate": 8.44e-05, + "loss": 7.0651, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 8.46e-05, + "loss": 6.9085, + "step": 431 + }, + { + "epoch": 0.24, + "learning_rate": 8.48e-05, + "loss": 7.0998, + "step": 432 + }, + { + "epoch": 0.24, + "learning_rate": 8.5e-05, + "loss": 6.9893, + "step": 433 + }, + { + "epoch": 0.24, + "learning_rate": 8.52e-05, + "loss": 7.0977, + "step": 434 + }, + { + "epoch": 0.24, + "learning_rate": 8.54e-05, + "loss": 6.9914, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 8.560000000000001e-05, + "loss": 6.9715, + "step": 436 + }, + { + "epoch": 0.24, + "learning_rate": 8.58e-05, + "loss": 6.9418, + "step": 437 + }, + { + "epoch": 0.25, + "learning_rate": 8.6e-05, + "loss": 7.0783, + "step": 438 + }, + { + "epoch": 0.25, + "learning_rate": 8.620000000000001e-05, + "loss": 6.8824, + "step": 439 + }, + { + "epoch": 0.25, + "learning_rate": 8.64e-05, + "loss": 7.0227, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 8.66e-05, + "loss": 6.9784, + "step": 441 + }, + { + "epoch": 0.25, + "learning_rate": 8.680000000000001e-05, + "loss": 6.9761, + "step": 442 + }, + { + "epoch": 0.25, + "learning_rate": 8.7e-05, + "loss": 6.7901, + "step": 443 + }, + { + "epoch": 0.25, + "learning_rate": 8.72e-05, + "loss": 6.8864, + "step": 444 + }, + { + "epoch": 0.25, + "learning_rate": 8.740000000000001e-05, + "loss": 6.8672, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 8.76e-05, + "loss": 7.1435, + "step": 446 + }, + { + "epoch": 0.25, + "learning_rate": 8.78e-05, + "loss": 6.8382, + "step": 447 + }, + { + "epoch": 0.25, + "learning_rate": 8.800000000000001e-05, + "loss": 6.9071, + "step": 448 + }, + { + "epoch": 0.25, + "learning_rate": 8.82e-05, + "loss": 6.9615, + "step": 449 + }, + { + "epoch": 0.25, + "learning_rate": 8.840000000000001e-05, + "loss": 6.3838, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 8.86e-05, + "loss": 7.0146, + "step": 451 + }, + { + "epoch": 0.25, + "learning_rate": 8.88e-05, + "loss": 7.0833, + "step": 452 + }, + { + "epoch": 0.25, + "learning_rate": 8.900000000000001e-05, + "loss": 7.2134, + "step": 453 + }, + { + "epoch": 0.25, + "learning_rate": 8.92e-05, + "loss": 7.107, + "step": 454 + }, + { + "epoch": 0.26, + "learning_rate": 8.94e-05, + "loss": 6.9886, + "step": 455 + }, + { + "epoch": 0.26, + "learning_rate": 8.960000000000001e-05, + "loss": 7.0828, + "step": 456 + }, + { + "epoch": 0.26, + "learning_rate": 8.98e-05, + "loss": 6.8274, + "step": 457 + }, + { + "epoch": 0.26, + "learning_rate": 9e-05, + "loss": 7.0311, + "step": 458 + }, + { + "epoch": 0.26, + "learning_rate": 9.020000000000001e-05, + "loss": 6.8049, + "step": 459 + }, + { + "epoch": 0.26, + "learning_rate": 9.04e-05, + "loss": 7.1272, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 9.06e-05, + "loss": 6.8705, + "step": 461 + }, + { + "epoch": 0.26, + "learning_rate": 9.080000000000001e-05, + "loss": 6.8506, + "step": 462 + }, + { + "epoch": 0.26, + "learning_rate": 9.1e-05, + "loss": 6.8942, + "step": 463 + }, + { + "epoch": 0.26, + "learning_rate": 9.120000000000001e-05, + "loss": 7.0391, + "step": 464 + }, + { + "epoch": 0.26, + "learning_rate": 9.140000000000001e-05, + "loss": 6.8398, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 9.16e-05, + "loss": 7.1005, + "step": 466 + }, + { + "epoch": 0.26, + "learning_rate": 9.180000000000001e-05, + "loss": 6.9834, + "step": 467 + }, + { + "epoch": 0.26, + "learning_rate": 9.200000000000001e-05, + "loss": 7.0702, + "step": 468 + }, + { + "epoch": 0.26, + "learning_rate": 9.22e-05, + "loss": 6.7883, + "step": 469 + }, + { + "epoch": 0.26, + "learning_rate": 9.240000000000001e-05, + "loss": 7.0088, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 9.260000000000001e-05, + "loss": 7.033, + "step": 471 + }, + { + "epoch": 0.26, + "learning_rate": 9.28e-05, + "loss": 6.848, + "step": 472 + }, + { + "epoch": 0.27, + "learning_rate": 9.300000000000001e-05, + "loss": 6.8449, + "step": 473 + }, + { + "epoch": 0.27, + "learning_rate": 9.320000000000002e-05, + "loss": 7.0345, + "step": 474 + }, + { + "epoch": 0.27, + "learning_rate": 9.340000000000001e-05, + "loss": 6.8402, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 9.360000000000001e-05, + "loss": 6.9558, + "step": 476 + }, + { + "epoch": 0.27, + "learning_rate": 9.38e-05, + "loss": 7.0038, + "step": 477 + }, + { + "epoch": 0.27, + "learning_rate": 9.4e-05, + "loss": 6.9085, + "step": 478 + }, + { + "epoch": 0.27, + "learning_rate": 9.42e-05, + "loss": 7.0282, + "step": 479 + }, + { + "epoch": 0.27, + "learning_rate": 9.44e-05, + "loss": 7.0454, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 9.46e-05, + "loss": 7.155, + "step": 481 + }, + { + "epoch": 0.27, + "learning_rate": 9.48e-05, + "loss": 6.991, + "step": 482 + }, + { + "epoch": 0.27, + "learning_rate": 9.5e-05, + "loss": 7.3644, + "step": 483 + }, + { + "epoch": 0.27, + "learning_rate": 9.52e-05, + "loss": 6.9936, + "step": 484 + }, + { + "epoch": 0.27, + "learning_rate": 9.54e-05, + "loss": 7.0514, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 9.56e-05, + "loss": 7.0196, + "step": 486 + }, + { + "epoch": 0.27, + "learning_rate": 9.58e-05, + "loss": 6.9094, + "step": 487 + }, + { + "epoch": 0.27, + "learning_rate": 9.6e-05, + "loss": 7.1765, + "step": 488 + }, + { + "epoch": 0.27, + "learning_rate": 9.620000000000001e-05, + "loss": 6.9902, + "step": 489 + }, + { + "epoch": 0.27, + "learning_rate": 9.64e-05, + "loss": 6.9647, + "step": 490 + }, + { + "epoch": 0.28, + "learning_rate": 9.66e-05, + "loss": 6.8088, + "step": 491 + }, + { + "epoch": 0.28, + "learning_rate": 9.680000000000001e-05, + "loss": 7.1804, + "step": 492 + }, + { + "epoch": 0.28, + "learning_rate": 9.7e-05, + "loss": 7.0074, + "step": 493 + }, + { + "epoch": 0.28, + "learning_rate": 9.72e-05, + "loss": 7.0812, + "step": 494 + }, + { + "epoch": 0.28, + "learning_rate": 9.74e-05, + "loss": 6.8391, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 9.76e-05, + "loss": 6.9229, + "step": 496 + }, + { + "epoch": 0.28, + "learning_rate": 9.78e-05, + "loss": 6.7293, + "step": 497 + }, + { + "epoch": 0.28, + "learning_rate": 9.8e-05, + "loss": 6.917, + "step": 498 + }, + { + "epoch": 0.28, + "learning_rate": 9.82e-05, + "loss": 6.6942, + "step": 499 + }, + { + "epoch": 0.28, + "learning_rate": 9.84e-05, + "loss": 6.4796, + "step": 500 + }, + { + "epoch": 0.28, + "eval_loss": 10.769026756286621, + "eval_runtime": 1321.3649, + "eval_samples_per_second": 1.999, + "eval_steps_per_second": 0.25, + "eval_wer": 1.0, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 9.86e-05, + "loss": 7.1292, + "step": 501 + }, + { + "epoch": 0.28, + "learning_rate": 9.88e-05, + "loss": 7.1513, + "step": 502 + }, + { + "epoch": 0.28, + "learning_rate": 9.900000000000001e-05, + "loss": 6.953, + "step": 503 + }, + { + "epoch": 0.28, + "learning_rate": 9.92e-05, + "loss": 7.0816, + "step": 504 + }, + { + "epoch": 0.28, + "learning_rate": 9.94e-05, + "loss": 6.9108, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 9.960000000000001e-05, + "loss": 7.0561, + "step": 506 + }, + { + "epoch": 0.28, + "learning_rate": 9.98e-05, + "loss": 6.8647, + "step": 507 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001, + "loss": 7.1634, + "step": 508 + }, + { + "epoch": 0.29, + "learning_rate": 9.997938994229185e-05, + "loss": 7.1713, + "step": 509 + }, + { + "epoch": 0.29, + "learning_rate": 9.995877988458369e-05, + "loss": 7.1024, + "step": 510 + }, + { + "epoch": 0.29, + "learning_rate": 9.993816982687552e-05, + "loss": 7.0799, + "step": 511 + }, + { + "epoch": 0.29, + "learning_rate": 9.991755976916735e-05, + "loss": 6.9018, + "step": 512 + }, + { + "epoch": 0.29, + "learning_rate": 9.98969497114592e-05, + "loss": 6.9022, + "step": 513 + }, + { + "epoch": 0.29, + "learning_rate": 9.987633965375104e-05, + "loss": 6.8078, + "step": 514 + }, + { + "epoch": 0.29, + "learning_rate": 9.985572959604287e-05, + "loss": 7.0772, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 9.983511953833471e-05, + "loss": 7.0942, + "step": 516 + }, + { + "epoch": 0.29, + "learning_rate": 9.981450948062655e-05, + "loss": 7.0663, + "step": 517 + }, + { + "epoch": 0.29, + "learning_rate": 9.979389942291838e-05, + "loss": 7.1295, + "step": 518 + }, + { + "epoch": 0.29, + "learning_rate": 9.977328936521023e-05, + "loss": 6.983, + "step": 519 + }, + { + "epoch": 0.29, + "learning_rate": 9.975267930750206e-05, + "loss": 6.9278, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 9.97320692497939e-05, + "loss": 6.9215, + "step": 521 + }, + { + "epoch": 0.29, + "learning_rate": 9.971145919208574e-05, + "loss": 7.0512, + "step": 522 + }, + { + "epoch": 0.29, + "learning_rate": 9.969084913437759e-05, + "loss": 6.8983, + "step": 523 + }, + { + "epoch": 0.29, + "learning_rate": 9.967023907666943e-05, + "loss": 6.9866, + "step": 524 + }, + { + "epoch": 0.29, + "learning_rate": 9.964962901896126e-05, + "loss": 7.0045, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 9.962901896125309e-05, + "loss": 7.0079, + "step": 526 + }, + { + "epoch": 0.3, + "learning_rate": 9.960840890354493e-05, + "loss": 7.2007, + "step": 527 + }, + { + "epoch": 0.3, + "learning_rate": 9.958779884583678e-05, + "loss": 6.841, + "step": 528 + }, + { + "epoch": 0.3, + "learning_rate": 9.956718878812861e-05, + "loss": 6.9953, + "step": 529 + }, + { + "epoch": 0.3, + "learning_rate": 9.954657873042045e-05, + "loss": 7.1213, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 9.95259686727123e-05, + "loss": 6.7855, + "step": 531 + }, + { + "epoch": 0.3, + "learning_rate": 9.950535861500412e-05, + "loss": 6.8574, + "step": 532 + }, + { + "epoch": 0.3, + "learning_rate": 9.948474855729597e-05, + "loss": 6.9793, + "step": 533 + }, + { + "epoch": 0.3, + "learning_rate": 9.94641384995878e-05, + "loss": 6.82, + "step": 534 + }, + { + "epoch": 0.3, + "learning_rate": 9.944352844187964e-05, + "loss": 7.2106, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 9.942291838417148e-05, + "loss": 7.0077, + "step": 536 + }, + { + "epoch": 0.3, + "learning_rate": 9.940230832646333e-05, + "loss": 6.8816, + "step": 537 + }, + { + "epoch": 0.3, + "learning_rate": 9.938169826875516e-05, + "loss": 6.896, + "step": 538 + }, + { + "epoch": 0.3, + "learning_rate": 9.936108821104699e-05, + "loss": 6.9621, + "step": 539 + }, + { + "epoch": 0.3, + "learning_rate": 9.934047815333883e-05, + "loss": 7.1627, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 9.931986809563067e-05, + "loss": 6.9173, + "step": 541 + }, + { + "epoch": 0.3, + "learning_rate": 9.929925803792252e-05, + "loss": 7.1056, + "step": 542 + }, + { + "epoch": 0.3, + "learning_rate": 9.927864798021435e-05, + "loss": 6.9196, + "step": 543 + }, + { + "epoch": 0.3, + "learning_rate": 9.925803792250619e-05, + "loss": 6.9014, + "step": 544 + }, + { + "epoch": 0.31, + "learning_rate": 9.923742786479803e-05, + "loss": 6.6504, + "step": 545 + }, + { + "epoch": 0.31, + "learning_rate": 9.921681780708986e-05, + "loss": 6.7571, + "step": 546 + }, + { + "epoch": 0.31, + "learning_rate": 9.91962077493817e-05, + "loss": 7.0316, + "step": 547 + }, + { + "epoch": 0.31, + "learning_rate": 9.917559769167354e-05, + "loss": 6.7477, + "step": 548 + }, + { + "epoch": 0.31, + "learning_rate": 9.915498763396538e-05, + "loss": 6.3523, + "step": 549 + }, + { + "epoch": 0.31, + "learning_rate": 9.913437757625722e-05, + "loss": 6.1208, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 9.911376751854907e-05, + "loss": 7.3993, + "step": 551 + }, + { + "epoch": 0.31, + "learning_rate": 9.90931574608409e-05, + "loss": 7.4291, + "step": 552 + }, + { + "epoch": 0.31, + "learning_rate": 9.907254740313273e-05, + "loss": 7.1349, + "step": 553 + }, + { + "epoch": 0.31, + "learning_rate": 9.905193734542457e-05, + "loss": 7.1203, + "step": 554 + }, + { + "epoch": 0.31, + "learning_rate": 9.903132728771641e-05, + "loss": 7.0658, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 9.901071723000826e-05, + "loss": 7.1187, + "step": 556 + }, + { + "epoch": 0.31, + "learning_rate": 9.899010717230009e-05, + "loss": 7.1719, + "step": 557 + }, + { + "epoch": 0.31, + "learning_rate": 9.896949711459193e-05, + "loss": 7.1313, + "step": 558 + }, + { + "epoch": 0.31, + "learning_rate": 9.894888705688376e-05, + "loss": 6.8195, + "step": 559 + }, + { + "epoch": 0.31, + "learning_rate": 9.89282769991756e-05, + "loss": 7.0463, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 9.890766694146743e-05, + "loss": 7.0163, + "step": 561 + }, + { + "epoch": 0.32, + "learning_rate": 9.888705688375928e-05, + "loss": 6.9171, + "step": 562 + }, + { + "epoch": 0.32, + "learning_rate": 9.886644682605112e-05, + "loss": 7.0382, + "step": 563 + }, + { + "epoch": 0.32, + "learning_rate": 9.884583676834296e-05, + "loss": 7.2258, + "step": 564 + }, + { + "epoch": 0.32, + "learning_rate": 9.882522671063479e-05, + "loss": 7.0302, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 9.880461665292662e-05, + "loss": 6.9996, + "step": 566 + }, + { + "epoch": 0.32, + "learning_rate": 9.878400659521847e-05, + "loss": 7.2168, + "step": 567 + }, + { + "epoch": 0.32, + "learning_rate": 9.876339653751031e-05, + "loss": 7.0449, + "step": 568 + }, + { + "epoch": 0.32, + "learning_rate": 9.874278647980215e-05, + "loss": 7.135, + "step": 569 + }, + { + "epoch": 0.32, + "learning_rate": 9.872217642209398e-05, + "loss": 7.0256, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 9.870156636438583e-05, + "loss": 6.946, + "step": 571 + }, + { + "epoch": 0.32, + "learning_rate": 9.868095630667767e-05, + "loss": 6.9494, + "step": 572 + }, + { + "epoch": 0.32, + "learning_rate": 9.86603462489695e-05, + "loss": 6.9072, + "step": 573 + }, + { + "epoch": 0.32, + "learning_rate": 9.863973619126134e-05, + "loss": 6.8277, + "step": 574 + }, + { + "epoch": 0.32, + "learning_rate": 9.861912613355317e-05, + "loss": 6.9807, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 9.859851607584502e-05, + "loss": 6.9837, + "step": 576 + }, + { + "epoch": 0.32, + "learning_rate": 9.857790601813686e-05, + "loss": 7.0911, + "step": 577 + }, + { + "epoch": 0.32, + "learning_rate": 9.85572959604287e-05, + "loss": 6.8185, + "step": 578 + }, + { + "epoch": 0.32, + "learning_rate": 9.853668590272053e-05, + "loss": 7.0473, + "step": 579 + }, + { + "epoch": 0.33, + "learning_rate": 9.851607584501236e-05, + "loss": 7.2018, + "step": 580 + }, + { + "epoch": 0.33, + "learning_rate": 9.84954657873042e-05, + "loss": 6.9656, + "step": 581 + }, + { + "epoch": 0.33, + "learning_rate": 9.847485572959605e-05, + "loss": 6.9458, + "step": 582 + }, + { + "epoch": 0.33, + "learning_rate": 9.845424567188789e-05, + "loss": 6.8463, + "step": 583 + }, + { + "epoch": 0.33, + "learning_rate": 9.843363561417972e-05, + "loss": 7.2183, + "step": 584 + }, + { + "epoch": 0.33, + "learning_rate": 9.841302555647157e-05, + "loss": 6.992, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 9.83924154987634e-05, + "loss": 6.8293, + "step": 586 + }, + { + "epoch": 0.33, + "learning_rate": 9.837180544105524e-05, + "loss": 7.114, + "step": 587 + }, + { + "epoch": 0.33, + "learning_rate": 9.835119538334708e-05, + "loss": 6.9653, + "step": 588 + }, + { + "epoch": 0.33, + "learning_rate": 9.833058532563891e-05, + "loss": 7.1877, + "step": 589 + }, + { + "epoch": 0.33, + "learning_rate": 9.830997526793076e-05, + "loss": 7.2924, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 9.82893652102226e-05, + "loss": 6.9767, + "step": 591 + }, + { + "epoch": 0.33, + "learning_rate": 9.826875515251444e-05, + "loss": 6.9955, + "step": 592 + }, + { + "epoch": 0.33, + "learning_rate": 9.824814509480627e-05, + "loss": 6.7293, + "step": 593 + }, + { + "epoch": 0.33, + "learning_rate": 9.82275350370981e-05, + "loss": 6.9697, + "step": 594 + }, + { + "epoch": 0.33, + "learning_rate": 9.820692497938995e-05, + "loss": 6.7856, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 9.818631492168179e-05, + "loss": 6.6475, + "step": 596 + }, + { + "epoch": 0.33, + "learning_rate": 9.816570486397363e-05, + "loss": 6.6353, + "step": 597 + }, + { + "epoch": 0.34, + "learning_rate": 9.814509480626546e-05, + "loss": 6.6394, + "step": 598 + }, + { + "epoch": 0.34, + "learning_rate": 9.81244847485573e-05, + "loss": 6.7021, + "step": 599 + }, + { + "epoch": 0.34, + "learning_rate": 9.810387469084913e-05, + "loss": 6.5174, + "step": 600 + }, + { + "epoch": 0.34, + "learning_rate": 9.808326463314098e-05, + "loss": 7.0698, + "step": 601 + }, + { + "epoch": 0.34, + "learning_rate": 9.806265457543281e-05, + "loss": 7.1515, + "step": 602 + }, + { + "epoch": 0.34, + "learning_rate": 9.804204451772465e-05, + "loss": 6.932, + "step": 603 + }, + { + "epoch": 0.34, + "learning_rate": 9.80214344600165e-05, + "loss": 7.0276, + "step": 604 + }, + { + "epoch": 0.34, + "learning_rate": 9.800082440230834e-05, + "loss": 6.8156, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 9.798021434460017e-05, + "loss": 7.0082, + "step": 606 + }, + { + "epoch": 0.34, + "learning_rate": 9.7959604286892e-05, + "loss": 7.1666, + "step": 607 + }, + { + "epoch": 0.34, + "learning_rate": 9.793899422918384e-05, + "loss": 6.9138, + "step": 608 + }, + { + "epoch": 0.34, + "learning_rate": 9.791838417147568e-05, + "loss": 7.0705, + "step": 609 + }, + { + "epoch": 0.34, + "learning_rate": 9.789777411376753e-05, + "loss": 6.8852, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 9.787716405605936e-05, + "loss": 7.1828, + "step": 611 + }, + { + "epoch": 0.34, + "learning_rate": 9.78565539983512e-05, + "loss": 7.0118, + "step": 612 + }, + { + "epoch": 0.34, + "learning_rate": 9.783594394064303e-05, + "loss": 7.047, + "step": 613 + }, + { + "epoch": 0.34, + "learning_rate": 9.781533388293487e-05, + "loss": 7.1401, + "step": 614 + }, + { + "epoch": 0.34, + "learning_rate": 9.779472382522672e-05, + "loss": 7.0227, + "step": 615 + }, + { + "epoch": 0.35, + "learning_rate": 9.777411376751855e-05, + "loss": 7.0281, + "step": 616 + }, + { + "epoch": 0.35, + "learning_rate": 9.775350370981039e-05, + "loss": 7.0143, + "step": 617 + }, + { + "epoch": 0.35, + "learning_rate": 9.773289365210223e-05, + "loss": 6.7438, + "step": 618 + }, + { + "epoch": 0.35, + "learning_rate": 9.771228359439408e-05, + "loss": 7.0274, + "step": 619 + }, + { + "epoch": 0.35, + "learning_rate": 9.769167353668591e-05, + "loss": 6.9697, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 9.767106347897774e-05, + "loss": 6.991, + "step": 621 + }, + { + "epoch": 0.35, + "learning_rate": 9.765045342126958e-05, + "loss": 6.9967, + "step": 622 + }, + { + "epoch": 0.35, + "learning_rate": 9.762984336356142e-05, + "loss": 7.0565, + "step": 623 + }, + { + "epoch": 0.35, + "learning_rate": 9.760923330585327e-05, + "loss": 7.2098, + "step": 624 + }, + { + "epoch": 0.35, + "learning_rate": 9.75886232481451e-05, + "loss": 6.9764, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 9.756801319043694e-05, + "loss": 7.0403, + "step": 626 + }, + { + "epoch": 0.35, + "learning_rate": 9.754740313272877e-05, + "loss": 6.8805, + "step": 627 + }, + { + "epoch": 0.35, + "learning_rate": 9.752679307502061e-05, + "loss": 7.0653, + "step": 628 + }, + { + "epoch": 0.35, + "learning_rate": 9.750618301731246e-05, + "loss": 7.1583, + "step": 629 + }, + { + "epoch": 0.35, + "learning_rate": 9.748557295960429e-05, + "loss": 6.8649, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 9.746496290189613e-05, + "loss": 6.8806, + "step": 631 + }, + { + "epoch": 0.35, + "learning_rate": 9.744435284418797e-05, + "loss": 6.8198, + "step": 632 + }, + { + "epoch": 0.35, + "learning_rate": 9.74237427864798e-05, + "loss": 6.8546, + "step": 633 + }, + { + "epoch": 0.36, + "learning_rate": 9.740313272877163e-05, + "loss": 7.0996, + "step": 634 + }, + { + "epoch": 0.36, + "learning_rate": 9.738252267106348e-05, + "loss": 6.9703, + "step": 635 + }, + { + "epoch": 0.36, + "learning_rate": 9.736191261335532e-05, + "loss": 6.8964, + "step": 636 + }, + { + "epoch": 0.36, + "learning_rate": 9.734130255564716e-05, + "loss": 6.9181, + "step": 637 + }, + { + "epoch": 0.36, + "learning_rate": 9.732069249793901e-05, + "loss": 6.7736, + "step": 638 + }, + { + "epoch": 0.36, + "learning_rate": 9.730008244023084e-05, + "loss": 7.0206, + "step": 639 + }, + { + "epoch": 0.36, + "learning_rate": 9.727947238252268e-05, + "loss": 7.0819, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 9.725886232481451e-05, + "loss": 6.859, + "step": 641 + }, + { + "epoch": 0.36, + "learning_rate": 9.723825226710635e-05, + "loss": 7.0651, + "step": 642 + }, + { + "epoch": 0.36, + "learning_rate": 9.72176422093982e-05, + "loss": 6.8661, + "step": 643 + }, + { + "epoch": 0.36, + "learning_rate": 9.719703215169003e-05, + "loss": 6.8842, + "step": 644 + }, + { + "epoch": 0.36, + "learning_rate": 9.717642209398187e-05, + "loss": 6.4939, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 9.715581203627371e-05, + "loss": 6.7665, + "step": 646 + }, + { + "epoch": 0.36, + "learning_rate": 9.713520197856554e-05, + "loss": 6.7676, + "step": 647 + }, + { + "epoch": 0.36, + "learning_rate": 9.711459192085737e-05, + "loss": 6.934, + "step": 648 + }, + { + "epoch": 0.36, + "learning_rate": 9.709398186314922e-05, + "loss": 7.0966, + "step": 649 + }, + { + "epoch": 0.36, + "learning_rate": 9.707337180544106e-05, + "loss": 6.5484, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 9.70527617477329e-05, + "loss": 7.1918, + "step": 651 + }, + { + "epoch": 0.37, + "learning_rate": 9.703215169002475e-05, + "loss": 7.0346, + "step": 652 + }, + { + "epoch": 0.37, + "learning_rate": 9.701154163231658e-05, + "loss": 7.1564, + "step": 653 + }, + { + "epoch": 0.37, + "learning_rate": 9.69909315746084e-05, + "loss": 7.1137, + "step": 654 + }, + { + "epoch": 0.37, + "learning_rate": 9.697032151690025e-05, + "loss": 6.9553, + "step": 655 + }, + { + "epoch": 0.37, + "learning_rate": 9.694971145919209e-05, + "loss": 6.9581, + "step": 656 + }, + { + "epoch": 0.37, + "learning_rate": 9.692910140148392e-05, + "loss": 6.8835, + "step": 657 + }, + { + "epoch": 0.37, + "learning_rate": 9.690849134377577e-05, + "loss": 7.0785, + "step": 658 + }, + { + "epoch": 0.37, + "learning_rate": 9.688788128606761e-05, + "loss": 7.0144, + "step": 659 + }, + { + "epoch": 0.37, + "learning_rate": 9.686727122835945e-05, + "loss": 6.9955, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 9.684666117065128e-05, + "loss": 7.0173, + "step": 661 + }, + { + "epoch": 0.37, + "learning_rate": 9.682605111294311e-05, + "loss": 6.9422, + "step": 662 + }, + { + "epoch": 0.37, + "learning_rate": 9.680544105523496e-05, + "loss": 6.9396, + "step": 663 + }, + { + "epoch": 0.37, + "learning_rate": 9.67848309975268e-05, + "loss": 7.0642, + "step": 664 + }, + { + "epoch": 0.37, + "learning_rate": 9.676422093981864e-05, + "loss": 6.9206, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 9.674361088211047e-05, + "loss": 6.9842, + "step": 666 + }, + { + "epoch": 0.37, + "learning_rate": 9.672300082440232e-05, + "loss": 6.9922, + "step": 667 + }, + { + "epoch": 0.37, + "learning_rate": 9.670239076669415e-05, + "loss": 6.932, + "step": 668 + }, + { + "epoch": 0.38, + "learning_rate": 9.668178070898599e-05, + "loss": 7.0071, + "step": 669 + }, + { + "epoch": 0.38, + "learning_rate": 9.666117065127783e-05, + "loss": 6.8581, + "step": 670 + }, + { + "epoch": 0.38, + "learning_rate": 9.664056059356966e-05, + "loss": 7.2524, + "step": 671 + }, + { + "epoch": 0.38, + "learning_rate": 9.66199505358615e-05, + "loss": 7.0018, + "step": 672 + }, + { + "epoch": 0.38, + "learning_rate": 9.659934047815335e-05, + "loss": 6.9941, + "step": 673 + }, + { + "epoch": 0.38, + "learning_rate": 9.657873042044518e-05, + "loss": 6.9293, + "step": 674 + }, + { + "epoch": 0.38, + "learning_rate": 9.655812036273702e-05, + "loss": 6.9698, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 9.653751030502885e-05, + "loss": 7.0254, + "step": 676 + }, + { + "epoch": 0.38, + "learning_rate": 9.65169002473207e-05, + "loss": 6.7698, + "step": 677 + }, + { + "epoch": 0.38, + "learning_rate": 9.649629018961254e-05, + "loss": 6.978, + "step": 678 + }, + { + "epoch": 0.38, + "learning_rate": 9.647568013190438e-05, + "loss": 6.9172, + "step": 679 + }, + { + "epoch": 0.38, + "learning_rate": 9.645507007419621e-05, + "loss": 7.0378, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 9.643446001648804e-05, + "loss": 7.114, + "step": 681 + }, + { + "epoch": 0.38, + "learning_rate": 9.641384995877989e-05, + "loss": 6.9028, + "step": 682 + }, + { + "epoch": 0.38, + "learning_rate": 9.639323990107173e-05, + "loss": 6.9735, + "step": 683 + }, + { + "epoch": 0.38, + "learning_rate": 9.637262984336357e-05, + "loss": 6.9267, + "step": 684 + }, + { + "epoch": 0.38, + "learning_rate": 9.63520197856554e-05, + "loss": 7.1803, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 9.633140972794725e-05, + "loss": 6.9781, + "step": 686 + }, + { + "epoch": 0.39, + "learning_rate": 9.631079967023909e-05, + "loss": 6.8014, + "step": 687 + }, + { + "epoch": 0.39, + "learning_rate": 9.629018961253092e-05, + "loss": 6.9893, + "step": 688 + }, + { + "epoch": 0.39, + "learning_rate": 9.626957955482275e-05, + "loss": 7.2205, + "step": 689 + }, + { + "epoch": 0.39, + "learning_rate": 9.624896949711459e-05, + "loss": 6.9157, + "step": 690 + }, + { + "epoch": 0.39, + "learning_rate": 9.622835943940644e-05, + "loss": 6.9495, + "step": 691 + }, + { + "epoch": 0.39, + "learning_rate": 9.620774938169828e-05, + "loss": 7.0654, + "step": 692 + }, + { + "epoch": 0.39, + "learning_rate": 9.618713932399012e-05, + "loss": 6.9128, + "step": 693 + }, + { + "epoch": 0.39, + "learning_rate": 9.616652926628195e-05, + "loss": 6.9559, + "step": 694 + }, + { + "epoch": 0.39, + "learning_rate": 9.614591920857378e-05, + "loss": 6.8798, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 9.612530915086563e-05, + "loss": 6.9558, + "step": 696 + }, + { + "epoch": 0.39, + "learning_rate": 9.610469909315747e-05, + "loss": 6.8029, + "step": 697 + }, + { + "epoch": 0.39, + "learning_rate": 9.60840890354493e-05, + "loss": 6.8157, + "step": 698 + }, + { + "epoch": 0.39, + "learning_rate": 9.606347897774114e-05, + "loss": 6.6476, + "step": 699 + }, + { + "epoch": 0.39, + "learning_rate": 9.604286892003299e-05, + "loss": 6.6171, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 9.602225886232481e-05, + "loss": 6.9615, + "step": 701 + }, + { + "epoch": 0.39, + "learning_rate": 9.600164880461666e-05, + "loss": 7.0158, + "step": 702 + }, + { + "epoch": 0.39, + "learning_rate": 9.598103874690849e-05, + "loss": 7.0117, + "step": 703 + }, + { + "epoch": 0.39, + "learning_rate": 9.596042868920033e-05, + "loss": 7.2157, + "step": 704 + }, + { + "epoch": 0.4, + "learning_rate": 9.593981863149217e-05, + "loss": 6.9422, + "step": 705 + }, + { + "epoch": 0.4, + "learning_rate": 9.591920857378402e-05, + "loss": 7.1864, + "step": 706 + }, + { + "epoch": 0.4, + "learning_rate": 9.589859851607586e-05, + "loss": 7.0463, + "step": 707 + }, + { + "epoch": 0.4, + "learning_rate": 9.587798845836769e-05, + "loss": 7.1544, + "step": 708 + }, + { + "epoch": 0.4, + "learning_rate": 9.585737840065952e-05, + "loss": 6.8326, + "step": 709 + }, + { + "epoch": 0.4, + "learning_rate": 9.583676834295136e-05, + "loss": 7.0521, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 9.581615828524321e-05, + "loss": 6.981, + "step": 711 + }, + { + "epoch": 0.4, + "learning_rate": 9.579554822753504e-05, + "loss": 6.9879, + "step": 712 + }, + { + "epoch": 0.4, + "learning_rate": 9.577493816982688e-05, + "loss": 6.909, + "step": 713 + }, + { + "epoch": 0.4, + "learning_rate": 9.575432811211872e-05, + "loss": 7.1222, + "step": 714 + }, + { + "epoch": 0.4, + "learning_rate": 9.573371805441055e-05, + "loss": 7.0187, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 9.57131079967024e-05, + "loss": 6.8798, + "step": 716 + }, + { + "epoch": 0.4, + "learning_rate": 9.569249793899423e-05, + "loss": 6.8435, + "step": 717 + }, + { + "epoch": 0.4, + "learning_rate": 9.567188788128607e-05, + "loss": 7.0416, + "step": 718 + }, + { + "epoch": 0.4, + "learning_rate": 9.565127782357791e-05, + "loss": 6.789, + "step": 719 + }, + { + "epoch": 0.4, + "learning_rate": 9.563066776586976e-05, + "loss": 6.9611, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 9.561005770816159e-05, + "loss": 7.0588, + "step": 721 + }, + { + "epoch": 0.4, + "learning_rate": 9.558944765045342e-05, + "loss": 7.1861, + "step": 722 + }, + { + "epoch": 0.41, + "learning_rate": 9.556883759274526e-05, + "loss": 7.0159, + "step": 723 + }, + { + "epoch": 0.41, + "learning_rate": 9.55482275350371e-05, + "loss": 6.9765, + "step": 724 + }, + { + "epoch": 0.41, + "learning_rate": 9.552761747732895e-05, + "loss": 6.8697, + "step": 725 + }, + { + "epoch": 0.41, + "learning_rate": 9.550700741962078e-05, + "loss": 6.7586, + "step": 726 + }, + { + "epoch": 0.41, + "learning_rate": 9.548639736191262e-05, + "loss": 6.5953, + "step": 727 + }, + { + "epoch": 0.41, + "learning_rate": 9.546578730420445e-05, + "loss": 6.9291, + "step": 728 + }, + { + "epoch": 0.41, + "learning_rate": 9.54451772464963e-05, + "loss": 6.7768, + "step": 729 + }, + { + "epoch": 0.41, + "learning_rate": 9.542456718878814e-05, + "loss": 7.0097, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 9.540395713107997e-05, + "loss": 6.8871, + "step": 731 + }, + { + "epoch": 0.41, + "learning_rate": 9.538334707337181e-05, + "loss": 6.7709, + "step": 732 + }, + { + "epoch": 0.41, + "learning_rate": 9.536273701566365e-05, + "loss": 6.9573, + "step": 733 + }, + { + "epoch": 0.41, + "learning_rate": 9.53421269579555e-05, + "loss": 6.9672, + "step": 734 + }, + { + "epoch": 0.41, + "learning_rate": 9.532151690024733e-05, + "loss": 6.8999, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 9.530090684253916e-05, + "loss": 6.8073, + "step": 736 + }, + { + "epoch": 0.41, + "learning_rate": 9.5280296784831e-05, + "loss": 6.8856, + "step": 737 + }, + { + "epoch": 0.41, + "learning_rate": 9.525968672712284e-05, + "loss": 6.8825, + "step": 738 + }, + { + "epoch": 0.41, + "learning_rate": 9.523907666941469e-05, + "loss": 6.9071, + "step": 739 + }, + { + "epoch": 0.41, + "learning_rate": 9.521846661170652e-05, + "loss": 6.9184, + "step": 740 + }, + { + "epoch": 0.42, + "learning_rate": 9.519785655399836e-05, + "loss": 7.0114, + "step": 741 + }, + { + "epoch": 0.42, + "learning_rate": 9.517724649629019e-05, + "loss": 6.982, + "step": 742 + }, + { + "epoch": 0.42, + "learning_rate": 9.515663643858203e-05, + "loss": 6.963, + "step": 743 + }, + { + "epoch": 0.42, + "learning_rate": 9.513602638087386e-05, + "loss": 7.0866, + "step": 744 + }, + { + "epoch": 0.42, + "learning_rate": 9.51154163231657e-05, + "loss": 6.9062, + "step": 745 + }, + { + "epoch": 0.42, + "learning_rate": 9.509480626545755e-05, + "loss": 6.5833, + "step": 746 + }, + { + "epoch": 0.42, + "learning_rate": 9.50741962077494e-05, + "loss": 6.6821, + "step": 747 + }, + { + "epoch": 0.42, + "learning_rate": 9.505358615004122e-05, + "loss": 6.6342, + "step": 748 + }, + { + "epoch": 0.42, + "learning_rate": 9.503297609233305e-05, + "loss": 6.4835, + "step": 749 + }, + { + "epoch": 0.42, + "learning_rate": 9.50123660346249e-05, + "loss": 6.368, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 9.499175597691674e-05, + "loss": 7.1185, + "step": 751 + }, + { + "epoch": 0.42, + "learning_rate": 9.497114591920858e-05, + "loss": 7.0435, + "step": 752 + }, + { + "epoch": 0.42, + "learning_rate": 9.495053586150041e-05, + "loss": 6.9648, + "step": 753 + }, + { + "epoch": 0.42, + "learning_rate": 9.492992580379226e-05, + "loss": 6.9231, + "step": 754 + }, + { + "epoch": 0.42, + "learning_rate": 9.49093157460841e-05, + "loss": 6.9011, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 9.488870568837593e-05, + "loss": 7.0139, + "step": 756 + }, + { + "epoch": 0.42, + "learning_rate": 9.486809563066777e-05, + "loss": 6.9894, + "step": 757 + }, + { + "epoch": 0.42, + "learning_rate": 9.48474855729596e-05, + "loss": 7.069, + "step": 758 + }, + { + "epoch": 0.43, + "learning_rate": 9.482687551525145e-05, + "loss": 6.68, + "step": 759 + }, + { + "epoch": 0.43, + "learning_rate": 9.480626545754329e-05, + "loss": 6.7537, + "step": 760 + }, + { + "epoch": 0.43, + "learning_rate": 9.478565539983513e-05, + "loss": 6.8721, + "step": 761 + }, + { + "epoch": 0.43, + "learning_rate": 9.476504534212696e-05, + "loss": 6.8874, + "step": 762 + }, + { + "epoch": 0.43, + "learning_rate": 9.474443528441879e-05, + "loss": 6.9849, + "step": 763 + }, + { + "epoch": 0.43, + "learning_rate": 9.472382522671064e-05, + "loss": 6.8239, + "step": 764 + }, + { + "epoch": 0.43, + "learning_rate": 9.470321516900248e-05, + "loss": 6.8274, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 9.468260511129432e-05, + "loss": 6.6282, + "step": 766 + }, + { + "epoch": 0.43, + "learning_rate": 9.466199505358615e-05, + "loss": 6.9672, + "step": 767 + }, + { + "epoch": 0.43, + "learning_rate": 9.4641384995878e-05, + "loss": 6.8185, + "step": 768 + }, + { + "epoch": 0.43, + "learning_rate": 9.462077493816983e-05, + "loss": 7.0341, + "step": 769 + }, + { + "epoch": 0.43, + "learning_rate": 9.460016488046167e-05, + "loss": 6.7661, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 9.457955482275351e-05, + "loss": 6.9149, + "step": 771 + }, + { + "epoch": 0.43, + "learning_rate": 9.455894476504534e-05, + "loss": 6.9028, + "step": 772 + }, + { + "epoch": 0.43, + "learning_rate": 9.453833470733719e-05, + "loss": 6.7691, + "step": 773 + }, + { + "epoch": 0.43, + "learning_rate": 9.451772464962903e-05, + "loss": 6.9149, + "step": 774 + }, + { + "epoch": 0.43, + "learning_rate": 9.449711459192086e-05, + "loss": 6.7891, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 9.447650453421269e-05, + "loss": 6.9535, + "step": 776 + }, + { + "epoch": 0.44, + "learning_rate": 9.445589447650453e-05, + "loss": 6.809, + "step": 777 + }, + { + "epoch": 0.44, + "learning_rate": 9.443528441879638e-05, + "loss": 6.8816, + "step": 778 + }, + { + "epoch": 0.44, + "learning_rate": 9.441467436108822e-05, + "loss": 6.9042, + "step": 779 + }, + { + "epoch": 0.44, + "learning_rate": 9.439406430338006e-05, + "loss": 6.8693, + "step": 780 + }, + { + "epoch": 0.44, + "learning_rate": 9.437345424567189e-05, + "loss": 6.9413, + "step": 781 + }, + { + "epoch": 0.44, + "learning_rate": 9.435284418796374e-05, + "loss": 6.8283, + "step": 782 + }, + { + "epoch": 0.44, + "learning_rate": 9.433223413025557e-05, + "loss": 6.9375, + "step": 783 + }, + { + "epoch": 0.44, + "learning_rate": 9.431162407254741e-05, + "loss": 6.936, + "step": 784 + }, + { + "epoch": 0.44, + "learning_rate": 9.429101401483924e-05, + "loss": 6.7873, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 9.427040395713108e-05, + "loss": 6.8551, + "step": 786 + }, + { + "epoch": 0.44, + "learning_rate": 9.424979389942293e-05, + "loss": 6.6501, + "step": 787 + }, + { + "epoch": 0.44, + "learning_rate": 9.422918384171477e-05, + "loss": 7.0524, + "step": 788 + }, + { + "epoch": 0.44, + "learning_rate": 9.42085737840066e-05, + "loss": 6.8138, + "step": 789 + }, + { + "epoch": 0.44, + "learning_rate": 9.418796372629843e-05, + "loss": 6.9682, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 9.416735366859027e-05, + "loss": 6.8771, + "step": 791 + }, + { + "epoch": 0.44, + "learning_rate": 9.414674361088212e-05, + "loss": 6.5622, + "step": 792 + }, + { + "epoch": 0.44, + "learning_rate": 9.412613355317396e-05, + "loss": 6.9002, + "step": 793 + }, + { + "epoch": 0.45, + "learning_rate": 9.41055234954658e-05, + "loss": 6.9118, + "step": 794 + }, + { + "epoch": 0.45, + "learning_rate": 9.408491343775763e-05, + "loss": 6.7161, + "step": 795 + }, + { + "epoch": 0.45, + "learning_rate": 9.406430338004946e-05, + "loss": 6.7147, + "step": 796 + }, + { + "epoch": 0.45, + "learning_rate": 9.40436933223413e-05, + "loss": 6.3663, + "step": 797 + }, + { + "epoch": 0.45, + "learning_rate": 9.402308326463315e-05, + "loss": 6.6257, + "step": 798 + }, + { + "epoch": 0.45, + "learning_rate": 9.400247320692498e-05, + "loss": 6.6646, + "step": 799 + }, + { + "epoch": 0.45, + "learning_rate": 9.398186314921682e-05, + "loss": 6.5045, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 9.396125309150866e-05, + "loss": 7.2057, + "step": 801 + }, + { + "epoch": 0.45, + "learning_rate": 9.394064303380051e-05, + "loss": 7.0751, + "step": 802 + }, + { + "epoch": 0.45, + "learning_rate": 9.392003297609234e-05, + "loss": 7.1096, + "step": 803 + }, + { + "epoch": 0.45, + "learning_rate": 9.389942291838417e-05, + "loss": 6.9131, + "step": 804 + }, + { + "epoch": 0.45, + "learning_rate": 9.387881286067601e-05, + "loss": 7.1441, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 9.385820280296785e-05, + "loss": 7.019, + "step": 806 + }, + { + "epoch": 0.45, + "learning_rate": 9.38375927452597e-05, + "loss": 6.9014, + "step": 807 + }, + { + "epoch": 0.45, + "learning_rate": 9.381698268755153e-05, + "loss": 6.8237, + "step": 808 + }, + { + "epoch": 0.45, + "learning_rate": 9.379637262984337e-05, + "loss": 7.0874, + "step": 809 + }, + { + "epoch": 0.45, + "learning_rate": 9.37757625721352e-05, + "loss": 6.9981, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 9.375515251442704e-05, + "loss": 6.9141, + "step": 811 + }, + { + "epoch": 0.46, + "learning_rate": 9.373454245671889e-05, + "loss": 7.0097, + "step": 812 + }, + { + "epoch": 0.46, + "learning_rate": 9.371393239901072e-05, + "loss": 6.8919, + "step": 813 + }, + { + "epoch": 0.46, + "learning_rate": 9.369332234130256e-05, + "loss": 6.8607, + "step": 814 + }, + { + "epoch": 0.46, + "learning_rate": 9.36727122835944e-05, + "loss": 6.8775, + "step": 815 + }, + { + "epoch": 0.46, + "learning_rate": 9.365210222588623e-05, + "loss": 6.9519, + "step": 816 + }, + { + "epoch": 0.46, + "learning_rate": 9.363149216817806e-05, + "loss": 6.7872, + "step": 817 + }, + { + "epoch": 0.46, + "learning_rate": 9.361088211046991e-05, + "loss": 6.9194, + "step": 818 + }, + { + "epoch": 0.46, + "learning_rate": 9.359027205276175e-05, + "loss": 7.0224, + "step": 819 + }, + { + "epoch": 0.46, + "learning_rate": 9.35696619950536e-05, + "loss": 6.8667, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 9.354905193734544e-05, + "loss": 6.9883, + "step": 821 + }, + { + "epoch": 0.46, + "learning_rate": 9.352844187963727e-05, + "loss": 6.875, + "step": 822 + }, + { + "epoch": 0.46, + "learning_rate": 9.35078318219291e-05, + "loss": 7.054, + "step": 823 + }, + { + "epoch": 0.46, + "learning_rate": 9.348722176422094e-05, + "loss": 6.9536, + "step": 824 + }, + { + "epoch": 0.46, + "learning_rate": 9.346661170651278e-05, + "loss": 6.953, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 9.344600164880463e-05, + "loss": 6.7752, + "step": 826 + }, + { + "epoch": 0.46, + "learning_rate": 9.342539159109646e-05, + "loss": 6.9207, + "step": 827 + }, + { + "epoch": 0.46, + "learning_rate": 9.34047815333883e-05, + "loss": 6.76, + "step": 828 + }, + { + "epoch": 0.46, + "learning_rate": 9.338417147568014e-05, + "loss": 6.9268, + "step": 829 + }, + { + "epoch": 0.47, + "learning_rate": 9.336356141797197e-05, + "loss": 6.906, + "step": 830 + }, + { + "epoch": 0.47, + "learning_rate": 9.33429513602638e-05, + "loss": 7.0418, + "step": 831 + }, + { + "epoch": 0.47, + "learning_rate": 9.332234130255565e-05, + "loss": 6.9929, + "step": 832 + }, + { + "epoch": 0.47, + "learning_rate": 9.330173124484749e-05, + "loss": 6.874, + "step": 833 + }, + { + "epoch": 0.47, + "learning_rate": 9.328112118713933e-05, + "loss": 6.5568, + "step": 834 + }, + { + "epoch": 0.47, + "learning_rate": 9.326051112943118e-05, + "loss": 6.6961, + "step": 835 + }, + { + "epoch": 0.47, + "learning_rate": 9.323990107172301e-05, + "loss": 6.851, + "step": 836 + }, + { + "epoch": 0.47, + "learning_rate": 9.321929101401484e-05, + "loss": 6.9095, + "step": 837 + }, + { + "epoch": 0.47, + "learning_rate": 9.319868095630668e-05, + "loss": 6.7053, + "step": 838 + }, + { + "epoch": 0.47, + "learning_rate": 9.317807089859852e-05, + "loss": 6.711, + "step": 839 + }, + { + "epoch": 0.47, + "learning_rate": 9.315746084089035e-05, + "loss": 6.8332, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 9.31368507831822e-05, + "loss": 7.0099, + "step": 841 + }, + { + "epoch": 0.47, + "learning_rate": 9.311624072547404e-05, + "loss": 6.9028, + "step": 842 + }, + { + "epoch": 0.47, + "learning_rate": 9.309563066776587e-05, + "loss": 6.6767, + "step": 843 + }, + { + "epoch": 0.47, + "learning_rate": 9.307502061005771e-05, + "loss": 7.0714, + "step": 844 + }, + { + "epoch": 0.47, + "learning_rate": 9.305441055234954e-05, + "loss": 6.5427, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 9.303380049464139e-05, + "loss": 6.5091, + "step": 846 + }, + { + "epoch": 0.47, + "learning_rate": 9.301319043693323e-05, + "loss": 6.7675, + "step": 847 + }, + { + "epoch": 0.48, + "learning_rate": 9.299258037922507e-05, + "loss": 6.8377, + "step": 848 + }, + { + "epoch": 0.48, + "learning_rate": 9.297197032151692e-05, + "loss": 6.4041, + "step": 849 + }, + { + "epoch": 0.48, + "learning_rate": 9.295136026380875e-05, + "loss": 6.3575, + "step": 850 + }, + { + "epoch": 0.48, + "learning_rate": 9.293075020610058e-05, + "loss": 6.9923, + "step": 851 + }, + { + "epoch": 0.48, + "learning_rate": 9.291014014839242e-05, + "loss": 7.0345, + "step": 852 + }, + { + "epoch": 0.48, + "learning_rate": 9.288953009068426e-05, + "loss": 6.8224, + "step": 853 + }, + { + "epoch": 0.48, + "learning_rate": 9.286892003297609e-05, + "loss": 7.0084, + "step": 854 + }, + { + "epoch": 0.48, + "learning_rate": 9.284830997526794e-05, + "loss": 6.9961, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 9.282769991755978e-05, + "loss": 6.8788, + "step": 856 + }, + { + "epoch": 0.48, + "learning_rate": 9.280708985985161e-05, + "loss": 6.8649, + "step": 857 + }, + { + "epoch": 0.48, + "learning_rate": 9.278647980214345e-05, + "loss": 6.946, + "step": 858 + }, + { + "epoch": 0.48, + "learning_rate": 9.276586974443528e-05, + "loss": 6.8718, + "step": 859 + }, + { + "epoch": 0.48, + "learning_rate": 9.274525968672713e-05, + "loss": 7.0052, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 9.272464962901897e-05, + "loss": 6.7953, + "step": 861 + }, + { + "epoch": 0.48, + "learning_rate": 9.270403957131081e-05, + "loss": 6.7643, + "step": 862 + }, + { + "epoch": 0.48, + "learning_rate": 9.268342951360264e-05, + "loss": 6.6812, + "step": 863 + }, + { + "epoch": 0.48, + "learning_rate": 9.266281945589447e-05, + "loss": 6.7101, + "step": 864 + }, + { + "epoch": 0.48, + "learning_rate": 9.264220939818632e-05, + "loss": 6.8645, + "step": 865 + }, + { + "epoch": 0.49, + "learning_rate": 9.262159934047816e-05, + "loss": 6.9544, + "step": 866 + }, + { + "epoch": 0.49, + "learning_rate": 9.260098928277e-05, + "loss": 6.7718, + "step": 867 + }, + { + "epoch": 0.49, + "learning_rate": 9.258037922506183e-05, + "loss": 6.7881, + "step": 868 + }, + { + "epoch": 0.49, + "learning_rate": 9.255976916735368e-05, + "loss": 6.852, + "step": 869 + }, + { + "epoch": 0.49, + "learning_rate": 9.25391591096455e-05, + "loss": 6.8457, + "step": 870 + }, + { + "epoch": 0.49, + "learning_rate": 9.251854905193735e-05, + "loss": 6.8808, + "step": 871 + }, + { + "epoch": 0.49, + "learning_rate": 9.249793899422918e-05, + "loss": 6.8655, + "step": 872 + }, + { + "epoch": 0.49, + "learning_rate": 9.247732893652102e-05, + "loss": 6.8937, + "step": 873 + }, + { + "epoch": 0.49, + "learning_rate": 9.245671887881287e-05, + "loss": 6.8309, + "step": 874 + }, + { + "epoch": 0.49, + "learning_rate": 9.243610882110471e-05, + "loss": 7.1575, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 9.241549876339655e-05, + "loss": 6.8417, + "step": 876 + }, + { + "epoch": 0.49, + "learning_rate": 9.239488870568838e-05, + "loss": 7.055, + "step": 877 + }, + { + "epoch": 0.49, + "learning_rate": 9.237427864798021e-05, + "loss": 6.9065, + "step": 878 + }, + { + "epoch": 0.49, + "learning_rate": 9.235366859027206e-05, + "loss": 6.7328, + "step": 879 + }, + { + "epoch": 0.49, + "learning_rate": 9.23330585325639e-05, + "loss": 6.912, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 9.231244847485574e-05, + "loss": 6.945, + "step": 881 + }, + { + "epoch": 0.49, + "learning_rate": 9.229183841714757e-05, + "loss": 6.6611, + "step": 882 + }, + { + "epoch": 0.49, + "learning_rate": 9.227122835943942e-05, + "loss": 6.911, + "step": 883 + }, + { + "epoch": 0.5, + "learning_rate": 9.225061830173125e-05, + "loss": 6.8797, + "step": 884 + }, + { + "epoch": 0.5, + "learning_rate": 9.223000824402309e-05, + "loss": 6.9586, + "step": 885 + }, + { + "epoch": 0.5, + "learning_rate": 9.220939818631492e-05, + "loss": 7.1094, + "step": 886 + }, + { + "epoch": 0.5, + "learning_rate": 9.218878812860676e-05, + "loss": 6.9738, + "step": 887 + }, + { + "epoch": 0.5, + "learning_rate": 9.21681780708986e-05, + "loss": 6.7878, + "step": 888 + }, + { + "epoch": 0.5, + "learning_rate": 9.214756801319045e-05, + "loss": 6.7673, + "step": 889 + }, + { + "epoch": 0.5, + "learning_rate": 9.212695795548228e-05, + "loss": 6.7735, + "step": 890 + }, + { + "epoch": 0.5, + "learning_rate": 9.210634789777411e-05, + "loss": 6.7932, + "step": 891 + }, + { + "epoch": 0.5, + "learning_rate": 9.208573784006595e-05, + "loss": 6.9499, + "step": 892 + }, + { + "epoch": 0.5, + "learning_rate": 9.20651277823578e-05, + "loss": 6.8664, + "step": 893 + }, + { + "epoch": 0.5, + "learning_rate": 9.204451772464964e-05, + "loss": 6.6964, + "step": 894 + }, + { + "epoch": 0.5, + "learning_rate": 9.202390766694147e-05, + "loss": 6.6728, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 9.200329760923331e-05, + "loss": 6.5146, + "step": 896 + }, + { + "epoch": 0.5, + "learning_rate": 9.198268755152516e-05, + "loss": 6.437, + "step": 897 + }, + { + "epoch": 0.5, + "learning_rate": 9.196207749381698e-05, + "loss": 6.8636, + "step": 898 + }, + { + "epoch": 0.5, + "learning_rate": 9.194146743610883e-05, + "loss": 6.2736, + "step": 899 + }, + { + "epoch": 0.5, + "learning_rate": 9.192085737840066e-05, + "loss": 6.2022, + "step": 900 + }, + { + "epoch": 0.51, + "learning_rate": 9.19002473206925e-05, + "loss": 6.8252, + "step": 901 + }, + { + "epoch": 0.51, + "learning_rate": 9.187963726298434e-05, + "loss": 7.0397, + "step": 902 + }, + { + "epoch": 0.51, + "learning_rate": 9.185902720527619e-05, + "loss": 7.0932, + "step": 903 + }, + { + "epoch": 0.51, + "learning_rate": 9.183841714756802e-05, + "loss": 6.8398, + "step": 904 + }, + { + "epoch": 0.51, + "learning_rate": 9.181780708985985e-05, + "loss": 7.1522, + "step": 905 + }, + { + "epoch": 0.51, + "learning_rate": 9.179719703215169e-05, + "loss": 7.1035, + "step": 906 + }, + { + "epoch": 0.51, + "learning_rate": 9.177658697444353e-05, + "loss": 6.8955, + "step": 907 + }, + { + "epoch": 0.51, + "learning_rate": 9.175597691673538e-05, + "loss": 6.775, + "step": 908 + }, + { + "epoch": 0.51, + "learning_rate": 9.173536685902721e-05, + "loss": 6.939, + "step": 909 + }, + { + "epoch": 0.51, + "learning_rate": 9.171475680131905e-05, + "loss": 6.9065, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 9.169414674361088e-05, + "loss": 7.0268, + "step": 911 + }, + { + "epoch": 0.51, + "learning_rate": 9.167353668590272e-05, + "loss": 6.8254, + "step": 912 + }, + { + "epoch": 0.51, + "learning_rate": 9.165292662819457e-05, + "loss": 6.7227, + "step": 913 + }, + { + "epoch": 0.51, + "learning_rate": 9.16323165704864e-05, + "loss": 7.0315, + "step": 914 + }, + { + "epoch": 0.51, + "learning_rate": 9.161170651277824e-05, + "loss": 6.8389, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 9.159109645507008e-05, + "loss": 6.7625, + "step": 916 + }, + { + "epoch": 0.51, + "learning_rate": 9.157048639736191e-05, + "loss": 6.7653, + "step": 917 + }, + { + "epoch": 0.51, + "learning_rate": 9.154987633965374e-05, + "loss": 6.8669, + "step": 918 + }, + { + "epoch": 0.52, + "learning_rate": 9.152926628194559e-05, + "loss": 6.8483, + "step": 919 + }, + { + "epoch": 0.52, + "learning_rate": 9.150865622423743e-05, + "loss": 6.6915, + "step": 920 + }, + { + "epoch": 0.52, + "learning_rate": 9.148804616652927e-05, + "loss": 6.8683, + "step": 921 + }, + { + "epoch": 0.52, + "learning_rate": 9.146743610882112e-05, + "loss": 6.6229, + "step": 922 + }, + { + "epoch": 0.52, + "learning_rate": 9.144682605111295e-05, + "loss": 6.7383, + "step": 923 + }, + { + "epoch": 0.52, + "learning_rate": 9.142621599340479e-05, + "loss": 6.9092, + "step": 924 + }, + { + "epoch": 0.52, + "learning_rate": 9.140560593569662e-05, + "loss": 6.907, + "step": 925 + }, + { + "epoch": 0.52, + "learning_rate": 9.138499587798846e-05, + "loss": 6.9491, + "step": 926 + }, + { + "epoch": 0.52, + "learning_rate": 9.13643858202803e-05, + "loss": 6.7509, + "step": 927 + }, + { + "epoch": 0.52, + "learning_rate": 9.134377576257214e-05, + "loss": 6.7511, + "step": 928 + }, + { + "epoch": 0.52, + "learning_rate": 9.132316570486398e-05, + "loss": 6.7765, + "step": 929 + }, + { + "epoch": 0.52, + "learning_rate": 9.130255564715582e-05, + "loss": 6.8292, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 9.128194558944765e-05, + "loss": 6.9093, + "step": 931 + }, + { + "epoch": 0.52, + "learning_rate": 9.126133553173948e-05, + "loss": 7.0032, + "step": 932 + }, + { + "epoch": 0.52, + "learning_rate": 9.124072547403133e-05, + "loss": 6.6126, + "step": 933 + }, + { + "epoch": 0.52, + "learning_rate": 9.122011541632317e-05, + "loss": 6.9268, + "step": 934 + }, + { + "epoch": 0.52, + "learning_rate": 9.119950535861501e-05, + "loss": 6.8153, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 9.117889530090684e-05, + "loss": 6.9395, + "step": 936 + }, + { + "epoch": 0.53, + "learning_rate": 9.115828524319869e-05, + "loss": 6.8741, + "step": 937 + }, + { + "epoch": 0.53, + "learning_rate": 9.113767518549052e-05, + "loss": 6.7412, + "step": 938 + }, + { + "epoch": 0.53, + "learning_rate": 9.111706512778236e-05, + "loss": 6.539, + "step": 939 + }, + { + "epoch": 0.53, + "learning_rate": 9.10964550700742e-05, + "loss": 6.819, + "step": 940 + }, + { + "epoch": 0.53, + "learning_rate": 9.107584501236603e-05, + "loss": 6.8212, + "step": 941 + }, + { + "epoch": 0.53, + "learning_rate": 9.105523495465788e-05, + "loss": 6.6822, + "step": 942 + }, + { + "epoch": 0.53, + "learning_rate": 9.103462489694972e-05, + "loss": 6.9584, + "step": 943 + }, + { + "epoch": 0.53, + "learning_rate": 9.101401483924156e-05, + "loss": 6.6507, + "step": 944 + }, + { + "epoch": 0.53, + "learning_rate": 9.09934047815334e-05, + "loss": 6.934, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 9.097279472382522e-05, + "loss": 6.602, + "step": 946 + }, + { + "epoch": 0.53, + "learning_rate": 9.095218466611707e-05, + "loss": 6.2367, + "step": 947 + }, + { + "epoch": 0.53, + "learning_rate": 9.093157460840891e-05, + "loss": 6.725, + "step": 948 + }, + { + "epoch": 0.53, + "learning_rate": 9.091096455070075e-05, + "loss": 6.7638, + "step": 949 + }, + { + "epoch": 0.53, + "learning_rate": 9.089035449299258e-05, + "loss": 5.8387, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 9.086974443528443e-05, + "loss": 6.9744, + "step": 951 + }, + { + "epoch": 0.53, + "learning_rate": 9.084913437757626e-05, + "loss": 7.032, + "step": 952 + }, + { + "epoch": 0.53, + "learning_rate": 9.08285243198681e-05, + "loss": 7.0119, + "step": 953 + }, + { + "epoch": 0.53, + "learning_rate": 9.080791426215994e-05, + "loss": 6.8657, + "step": 954 + }, + { + "epoch": 0.54, + "learning_rate": 9.078730420445177e-05, + "loss": 6.8281, + "step": 955 + }, + { + "epoch": 0.54, + "learning_rate": 9.076669414674362e-05, + "loss": 6.8987, + "step": 956 + }, + { + "epoch": 0.54, + "learning_rate": 9.074608408903546e-05, + "loss": 6.9349, + "step": 957 + }, + { + "epoch": 0.54, + "learning_rate": 9.072547403132729e-05, + "loss": 6.7392, + "step": 958 + }, + { + "epoch": 0.54, + "learning_rate": 9.070486397361912e-05, + "loss": 6.9836, + "step": 959 + }, + { + "epoch": 0.54, + "learning_rate": 9.068425391591096e-05, + "loss": 6.7774, + "step": 960 + }, + { + "epoch": 0.54, + "learning_rate": 9.06636438582028e-05, + "loss": 6.6995, + "step": 961 + }, + { + "epoch": 0.54, + "learning_rate": 9.064303380049465e-05, + "loss": 6.8346, + "step": 962 + }, + { + "epoch": 0.54, + "learning_rate": 9.062242374278649e-05, + "loss": 6.7985, + "step": 963 + }, + { + "epoch": 0.54, + "learning_rate": 9.060181368507832e-05, + "loss": 6.7641, + "step": 964 + }, + { + "epoch": 0.54, + "learning_rate": 9.058120362737015e-05, + "loss": 6.7869, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 9.0560593569662e-05, + "loss": 6.6657, + "step": 966 + }, + { + "epoch": 0.54, + "learning_rate": 9.053998351195384e-05, + "loss": 6.8386, + "step": 967 + }, + { + "epoch": 0.54, + "learning_rate": 9.051937345424568e-05, + "loss": 6.7846, + "step": 968 + }, + { + "epoch": 0.54, + "learning_rate": 9.049876339653751e-05, + "loss": 6.9469, + "step": 969 + }, + { + "epoch": 0.54, + "learning_rate": 9.047815333882936e-05, + "loss": 6.6813, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 9.04575432811212e-05, + "loss": 6.8754, + "step": 971 + }, + { + "epoch": 0.54, + "learning_rate": 9.043693322341303e-05, + "loss": 6.8385, + "step": 972 + }, + { + "epoch": 0.55, + "learning_rate": 9.041632316570486e-05, + "loss": 6.8949, + "step": 973 + }, + { + "epoch": 0.55, + "learning_rate": 9.03957131079967e-05, + "loss": 7.0168, + "step": 974 + }, + { + "epoch": 0.55, + "learning_rate": 9.037510305028855e-05, + "loss": 6.8606, + "step": 975 + }, + { + "epoch": 0.55, + "learning_rate": 9.035449299258039e-05, + "loss": 6.7158, + "step": 976 + }, + { + "epoch": 0.55, + "learning_rate": 9.033388293487223e-05, + "loss": 7.1046, + "step": 977 + }, + { + "epoch": 0.55, + "learning_rate": 9.031327287716406e-05, + "loss": 6.7823, + "step": 978 + }, + { + "epoch": 0.55, + "learning_rate": 9.029266281945589e-05, + "loss": 6.7202, + "step": 979 + }, + { + "epoch": 0.55, + "learning_rate": 9.027205276174774e-05, + "loss": 6.6599, + "step": 980 + }, + { + "epoch": 0.55, + "learning_rate": 9.025144270403958e-05, + "loss": 6.6146, + "step": 981 + }, + { + "epoch": 0.55, + "learning_rate": 9.023083264633141e-05, + "loss": 6.6924, + "step": 982 + }, + { + "epoch": 0.55, + "learning_rate": 9.021022258862325e-05, + "loss": 6.9321, + "step": 983 + }, + { + "epoch": 0.55, + "learning_rate": 9.01896125309151e-05, + "loss": 6.8577, + "step": 984 + }, + { + "epoch": 0.55, + "learning_rate": 9.016900247320693e-05, + "loss": 6.695, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 9.014839241549877e-05, + "loss": 6.8493, + "step": 986 + }, + { + "epoch": 0.55, + "learning_rate": 9.01277823577906e-05, + "loss": 6.8749, + "step": 987 + }, + { + "epoch": 0.55, + "learning_rate": 9.010717230008244e-05, + "loss": 6.7436, + "step": 988 + }, + { + "epoch": 0.55, + "learning_rate": 9.008656224237429e-05, + "loss": 7.0578, + "step": 989 + }, + { + "epoch": 0.55, + "learning_rate": 9.006595218466613e-05, + "loss": 6.6429, + "step": 990 + }, + { + "epoch": 0.56, + "learning_rate": 9.004534212695796e-05, + "loss": 6.5682, + "step": 991 + }, + { + "epoch": 0.56, + "learning_rate": 9.00247320692498e-05, + "loss": 6.9671, + "step": 992 + }, + { + "epoch": 0.56, + "learning_rate": 9.000412201154163e-05, + "loss": 6.6607, + "step": 993 + }, + { + "epoch": 0.56, + "learning_rate": 8.998351195383347e-05, + "loss": 6.6917, + "step": 994 + }, + { + "epoch": 0.56, + "learning_rate": 8.996290189612532e-05, + "loss": 6.6738, + "step": 995 + }, + { + "epoch": 0.56, + "learning_rate": 8.994229183841715e-05, + "loss": 6.2442, + "step": 996 + }, + { + "epoch": 0.56, + "learning_rate": 8.992168178070899e-05, + "loss": 6.3304, + "step": 997 + }, + { + "epoch": 0.56, + "learning_rate": 8.990107172300083e-05, + "loss": 6.9551, + "step": 998 + }, + { + "epoch": 0.56, + "learning_rate": 8.988046166529266e-05, + "loss": 6.5172, + "step": 999 + }, + { + "epoch": 0.56, + "learning_rate": 8.985985160758451e-05, + "loss": 6.2294, + "step": 1000 + }, + { + "epoch": 0.56, + "eval_loss": 10.509584426879883, + "eval_runtime": 1319.6205, + "eval_samples_per_second": 2.002, + "eval_steps_per_second": 0.251, + "eval_wer": 1.0, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 8.983924154987634e-05, + "loss": 7.1895, + "step": 1001 + }, + { + "epoch": 0.56, + "learning_rate": 8.981863149216818e-05, + "loss": 6.9751, + "step": 1002 + }, + { + "epoch": 0.56, + "learning_rate": 8.979802143446002e-05, + "loss": 6.9608, + "step": 1003 + }, + { + "epoch": 0.56, + "learning_rate": 8.977741137675187e-05, + "loss": 7.0492, + "step": 1004 + }, + { + "epoch": 0.56, + "learning_rate": 8.97568013190437e-05, + "loss": 7.1122, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 8.973619126133553e-05, + "loss": 6.9864, + "step": 1006 + }, + { + "epoch": 0.56, + "learning_rate": 8.971558120362737e-05, + "loss": 6.8662, + "step": 1007 + }, + { + "epoch": 0.57, + "learning_rate": 8.969497114591921e-05, + "loss": 6.8473, + "step": 1008 + }, + { + "epoch": 0.57, + "learning_rate": 8.967436108821106e-05, + "loss": 6.8182, + "step": 1009 + }, + { + "epoch": 0.57, + "learning_rate": 8.965375103050289e-05, + "loss": 6.7838, + "step": 1010 + }, + { + "epoch": 0.57, + "learning_rate": 8.963314097279473e-05, + "loss": 6.8163, + "step": 1011 + }, + { + "epoch": 0.57, + "learning_rate": 8.961253091508657e-05, + "loss": 6.8971, + "step": 1012 + }, + { + "epoch": 0.57, + "learning_rate": 8.95919208573784e-05, + "loss": 6.7517, + "step": 1013 + }, + { + "epoch": 0.57, + "learning_rate": 8.957131079967023e-05, + "loss": 7.0059, + "step": 1014 + }, + { + "epoch": 0.57, + "learning_rate": 8.955070074196208e-05, + "loss": 6.8824, + "step": 1015 + }, + { + "epoch": 0.57, + "learning_rate": 8.953009068425392e-05, + "loss": 6.7466, + "step": 1016 + }, + { + "epoch": 0.57, + "learning_rate": 8.950948062654576e-05, + "loss": 6.8158, + "step": 1017 + }, + { + "epoch": 0.57, + "learning_rate": 8.948887056883761e-05, + "loss": 6.7501, + "step": 1018 + }, + { + "epoch": 0.57, + "learning_rate": 8.946826051112944e-05, + "loss": 7.0113, + "step": 1019 + }, + { + "epoch": 0.57, + "learning_rate": 8.944765045342127e-05, + "loss": 7.001, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 8.942704039571311e-05, + "loss": 6.8119, + "step": 1021 + }, + { + "epoch": 0.57, + "learning_rate": 8.940643033800495e-05, + "loss": 6.6904, + "step": 1022 + }, + { + "epoch": 0.57, + "learning_rate": 8.938582028029678e-05, + "loss": 6.638, + "step": 1023 + }, + { + "epoch": 0.57, + "learning_rate": 8.936521022258863e-05, + "loss": 6.8542, + "step": 1024 + }, + { + "epoch": 0.57, + "learning_rate": 8.934460016488047e-05, + "loss": 6.8893, + "step": 1025 + }, + { + "epoch": 0.58, + "learning_rate": 8.93239901071723e-05, + "loss": 6.7111, + "step": 1026 + }, + { + "epoch": 0.58, + "learning_rate": 8.930338004946414e-05, + "loss": 6.8946, + "step": 1027 + }, + { + "epoch": 0.58, + "learning_rate": 8.928276999175597e-05, + "loss": 6.906, + "step": 1028 + }, + { + "epoch": 0.58, + "learning_rate": 8.926215993404782e-05, + "loss": 6.7917, + "step": 1029 + }, + { + "epoch": 0.58, + "learning_rate": 8.924154987633966e-05, + "loss": 6.8436, + "step": 1030 + }, + { + "epoch": 0.58, + "learning_rate": 8.92209398186315e-05, + "loss": 6.7664, + "step": 1031 + }, + { + "epoch": 0.58, + "learning_rate": 8.920032976092333e-05, + "loss": 6.8412, + "step": 1032 + }, + { + "epoch": 0.58, + "learning_rate": 8.917971970321516e-05, + "loss": 6.7491, + "step": 1033 + }, + { + "epoch": 0.58, + "learning_rate": 8.915910964550701e-05, + "loss": 6.5337, + "step": 1034 + }, + { + "epoch": 0.58, + "learning_rate": 8.913849958779885e-05, + "loss": 6.7558, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 8.91178895300907e-05, + "loss": 6.8304, + "step": 1036 + }, + { + "epoch": 0.58, + "learning_rate": 8.909727947238252e-05, + "loss": 6.5428, + "step": 1037 + }, + { + "epoch": 0.58, + "learning_rate": 8.907666941467437e-05, + "loss": 6.7771, + "step": 1038 + }, + { + "epoch": 0.58, + "learning_rate": 8.905605935696621e-05, + "loss": 6.7969, + "step": 1039 + }, + { + "epoch": 0.58, + "learning_rate": 8.903544929925804e-05, + "loss": 6.7782, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 8.901483924154988e-05, + "loss": 6.7926, + "step": 1041 + }, + { + "epoch": 0.58, + "learning_rate": 8.899422918384171e-05, + "loss": 6.7056, + "step": 1042 + }, + { + "epoch": 0.58, + "learning_rate": 8.897361912613356e-05, + "loss": 6.7019, + "step": 1043 + }, + { + "epoch": 0.59, + "learning_rate": 8.89530090684254e-05, + "loss": 6.6115, + "step": 1044 + }, + { + "epoch": 0.59, + "learning_rate": 8.893239901071724e-05, + "loss": 6.5894, + "step": 1045 + }, + { + "epoch": 0.59, + "learning_rate": 8.891178895300907e-05, + "loss": 6.7535, + "step": 1046 + }, + { + "epoch": 0.59, + "learning_rate": 8.88911788953009e-05, + "loss": 6.6846, + "step": 1047 + }, + { + "epoch": 0.59, + "learning_rate": 8.887056883759275e-05, + "loss": 6.1387, + "step": 1048 + }, + { + "epoch": 0.59, + "learning_rate": 8.884995877988459e-05, + "loss": 6.2811, + "step": 1049 + }, + { + "epoch": 0.59, + "learning_rate": 8.882934872217643e-05, + "loss": 6.5854, + "step": 1050 + }, + { + "epoch": 0.59, + "learning_rate": 8.880873866446826e-05, + "loss": 7.3416, + "step": 1051 + }, + { + "epoch": 0.59, + "learning_rate": 8.87881286067601e-05, + "loss": 6.8758, + "step": 1052 + }, + { + "epoch": 0.59, + "learning_rate": 8.876751854905194e-05, + "loss": 6.9227, + "step": 1053 + }, + { + "epoch": 0.59, + "learning_rate": 8.874690849134378e-05, + "loss": 6.8716, + "step": 1054 + }, + { + "epoch": 0.59, + "learning_rate": 8.872629843363561e-05, + "loss": 6.9722, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 8.870568837592745e-05, + "loss": 6.9472, + "step": 1056 + }, + { + "epoch": 0.59, + "learning_rate": 8.86850783182193e-05, + "loss": 6.993, + "step": 1057 + }, + { + "epoch": 0.59, + "learning_rate": 8.866446826051114e-05, + "loss": 6.8096, + "step": 1058 + }, + { + "epoch": 0.59, + "learning_rate": 8.864385820280298e-05, + "loss": 6.8049, + "step": 1059 + }, + { + "epoch": 0.59, + "learning_rate": 8.862324814509481e-05, + "loss": 6.8168, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 8.860263808738664e-05, + "loss": 6.8581, + "step": 1061 + }, + { + "epoch": 0.6, + "learning_rate": 8.858202802967849e-05, + "loss": 6.8689, + "step": 1062 + }, + { + "epoch": 0.6, + "learning_rate": 8.856141797197033e-05, + "loss": 7.0835, + "step": 1063 + }, + { + "epoch": 0.6, + "learning_rate": 8.854080791426217e-05, + "loss": 6.7236, + "step": 1064 + }, + { + "epoch": 0.6, + "learning_rate": 8.8520197856554e-05, + "loss": 6.9696, + "step": 1065 + }, + { + "epoch": 0.6, + "learning_rate": 8.849958779884585e-05, + "loss": 6.5983, + "step": 1066 + }, + { + "epoch": 0.6, + "learning_rate": 8.847897774113768e-05, + "loss": 7.0441, + "step": 1067 + }, + { + "epoch": 0.6, + "learning_rate": 8.845836768342952e-05, + "loss": 6.803, + "step": 1068 + }, + { + "epoch": 0.6, + "learning_rate": 8.843775762572135e-05, + "loss": 6.7793, + "step": 1069 + }, + { + "epoch": 0.6, + "learning_rate": 8.841714756801319e-05, + "loss": 6.8664, + "step": 1070 + }, + { + "epoch": 0.6, + "learning_rate": 8.839653751030504e-05, + "loss": 6.7362, + "step": 1071 + }, + { + "epoch": 0.6, + "learning_rate": 8.837592745259688e-05, + "loss": 6.6317, + "step": 1072 + }, + { + "epoch": 0.6, + "learning_rate": 8.835531739488871e-05, + "loss": 6.8396, + "step": 1073 + }, + { + "epoch": 0.6, + "learning_rate": 8.833470733718054e-05, + "loss": 6.7941, + "step": 1074 + }, + { + "epoch": 0.6, + "learning_rate": 8.831409727947238e-05, + "loss": 6.7505, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 8.829348722176423e-05, + "loss": 6.8142, + "step": 1076 + }, + { + "epoch": 0.6, + "learning_rate": 8.827287716405607e-05, + "loss": 6.7486, + "step": 1077 + }, + { + "epoch": 0.6, + "learning_rate": 8.82522671063479e-05, + "loss": 6.6813, + "step": 1078 + }, + { + "epoch": 0.6, + "learning_rate": 8.823165704863974e-05, + "loss": 6.9219, + "step": 1079 + }, + { + "epoch": 0.61, + "learning_rate": 8.821104699093157e-05, + "loss": 6.8199, + "step": 1080 + }, + { + "epoch": 0.61, + "learning_rate": 8.819043693322342e-05, + "loss": 6.8333, + "step": 1081 + }, + { + "epoch": 0.61, + "learning_rate": 8.816982687551526e-05, + "loss": 6.7589, + "step": 1082 + }, + { + "epoch": 0.61, + "learning_rate": 8.814921681780709e-05, + "loss": 6.6839, + "step": 1083 + }, + { + "epoch": 0.61, + "learning_rate": 8.812860676009893e-05, + "loss": 6.8829, + "step": 1084 + }, + { + "epoch": 0.61, + "learning_rate": 8.810799670239078e-05, + "loss": 6.6602, + "step": 1085 + }, + { + "epoch": 0.61, + "learning_rate": 8.808738664468262e-05, + "loss": 6.8833, + "step": 1086 + }, + { + "epoch": 0.61, + "learning_rate": 8.806677658697445e-05, + "loss": 6.9797, + "step": 1087 + }, + { + "epoch": 0.61, + "learning_rate": 8.804616652926628e-05, + "loss": 6.8678, + "step": 1088 + }, + { + "epoch": 0.61, + "learning_rate": 8.802555647155812e-05, + "loss": 6.8566, + "step": 1089 + }, + { + "epoch": 0.61, + "learning_rate": 8.800494641384997e-05, + "loss": 6.6994, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 8.798433635614181e-05, + "loss": 6.8069, + "step": 1091 + }, + { + "epoch": 0.61, + "learning_rate": 8.796372629843364e-05, + "loss": 6.8855, + "step": 1092 + }, + { + "epoch": 0.61, + "learning_rate": 8.794311624072548e-05, + "loss": 6.6551, + "step": 1093 + }, + { + "epoch": 0.61, + "learning_rate": 8.792250618301731e-05, + "loss": 6.3406, + "step": 1094 + }, + { + "epoch": 0.61, + "learning_rate": 8.790189612530915e-05, + "loss": 6.644, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 8.7881286067601e-05, + "loss": 6.4605, + "step": 1096 + }, + { + "epoch": 0.61, + "learning_rate": 8.786067600989283e-05, + "loss": 6.5818, + "step": 1097 + }, + { + "epoch": 0.62, + "learning_rate": 8.784006595218467e-05, + "loss": 6.6784, + "step": 1098 + }, + { + "epoch": 0.62, + "learning_rate": 8.781945589447651e-05, + "loss": 6.4825, + "step": 1099 + }, + { + "epoch": 0.62, + "learning_rate": 8.779884583676834e-05, + "loss": 6.1243, + "step": 1100 + }, + { + "epoch": 0.62, + "learning_rate": 8.777823577906017e-05, + "loss": 6.7527, + "step": 1101 + }, + { + "epoch": 0.62, + "learning_rate": 8.775762572135202e-05, + "loss": 7.0371, + "step": 1102 + }, + { + "epoch": 0.62, + "learning_rate": 8.773701566364386e-05, + "loss": 6.9179, + "step": 1103 + }, + { + "epoch": 0.62, + "learning_rate": 8.77164056059357e-05, + "loss": 6.8311, + "step": 1104 + }, + { + "epoch": 0.62, + "learning_rate": 8.769579554822755e-05, + "loss": 6.7603, + "step": 1105 + }, + { + "epoch": 0.62, + "learning_rate": 8.767518549051938e-05, + "loss": 6.7055, + "step": 1106 + }, + { + "epoch": 0.62, + "learning_rate": 8.765457543281122e-05, + "loss": 6.8738, + "step": 1107 + }, + { + "epoch": 0.62, + "learning_rate": 8.763396537510305e-05, + "loss": 6.7932, + "step": 1108 + }, + { + "epoch": 0.62, + "learning_rate": 8.76133553173949e-05, + "loss": 6.922, + "step": 1109 + }, + { + "epoch": 0.62, + "learning_rate": 8.759274525968672e-05, + "loss": 6.8378, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 8.757213520197857e-05, + "loss": 6.6728, + "step": 1111 + }, + { + "epoch": 0.62, + "learning_rate": 8.755152514427041e-05, + "loss": 6.87, + "step": 1112 + }, + { + "epoch": 0.62, + "learning_rate": 8.753091508656225e-05, + "loss": 6.7664, + "step": 1113 + }, + { + "epoch": 0.62, + "learning_rate": 8.751030502885408e-05, + "loss": 6.786, + "step": 1114 + }, + { + "epoch": 0.62, + "learning_rate": 8.748969497114591e-05, + "loss": 6.834, + "step": 1115 + }, + { + "epoch": 0.63, + "learning_rate": 8.746908491343776e-05, + "loss": 6.5574, + "step": 1116 + }, + { + "epoch": 0.63, + "learning_rate": 8.74484748557296e-05, + "loss": 6.6804, + "step": 1117 + }, + { + "epoch": 0.63, + "learning_rate": 8.742786479802144e-05, + "loss": 6.871, + "step": 1118 + }, + { + "epoch": 0.63, + "learning_rate": 8.740725474031329e-05, + "loss": 6.8164, + "step": 1119 + }, + { + "epoch": 0.63, + "learning_rate": 8.738664468260512e-05, + "loss": 6.6891, + "step": 1120 + }, + { + "epoch": 0.63, + "learning_rate": 8.736603462489695e-05, + "loss": 6.7652, + "step": 1121 + }, + { + "epoch": 0.63, + "learning_rate": 8.734542456718879e-05, + "loss": 6.7018, + "step": 1122 + }, + { + "epoch": 0.63, + "learning_rate": 8.732481450948063e-05, + "loss": 6.5966, + "step": 1123 + }, + { + "epoch": 0.63, + "learning_rate": 8.730420445177246e-05, + "loss": 6.7043, + "step": 1124 + }, + { + "epoch": 0.63, + "learning_rate": 8.728359439406431e-05, + "loss": 6.6884, + "step": 1125 + }, + { + "epoch": 0.63, + "learning_rate": 8.726298433635615e-05, + "loss": 6.5879, + "step": 1126 + }, + { + "epoch": 0.63, + "learning_rate": 8.724237427864798e-05, + "loss": 6.6834, + "step": 1127 + }, + { + "epoch": 0.63, + "learning_rate": 8.722176422093982e-05, + "loss": 7.0349, + "step": 1128 + }, + { + "epoch": 0.63, + "learning_rate": 8.720115416323165e-05, + "loss": 6.7407, + "step": 1129 + }, + { + "epoch": 0.63, + "learning_rate": 8.71805441055235e-05, + "loss": 6.8786, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 8.715993404781534e-05, + "loss": 6.799, + "step": 1131 + }, + { + "epoch": 0.63, + "learning_rate": 8.713932399010718e-05, + "loss": 6.7843, + "step": 1132 + }, + { + "epoch": 0.64, + "learning_rate": 8.711871393239901e-05, + "loss": 6.6275, + "step": 1133 + }, + { + "epoch": 0.64, + "learning_rate": 8.709810387469086e-05, + "loss": 6.6047, + "step": 1134 + }, + { + "epoch": 0.64, + "learning_rate": 8.707749381698269e-05, + "loss": 6.7882, + "step": 1135 + }, + { + "epoch": 0.64, + "learning_rate": 8.705688375927453e-05, + "loss": 6.7289, + "step": 1136 + }, + { + "epoch": 0.64, + "learning_rate": 8.703627370156637e-05, + "loss": 6.7386, + "step": 1137 + }, + { + "epoch": 0.64, + "learning_rate": 8.70156636438582e-05, + "loss": 6.7633, + "step": 1138 + }, + { + "epoch": 0.64, + "learning_rate": 8.699505358615005e-05, + "loss": 6.505, + "step": 1139 + }, + { + "epoch": 0.64, + "learning_rate": 8.697444352844189e-05, + "loss": 6.7294, + "step": 1140 + }, + { + "epoch": 0.64, + "learning_rate": 8.695383347073372e-05, + "loss": 6.7089, + "step": 1141 + }, + { + "epoch": 0.64, + "learning_rate": 8.693322341302555e-05, + "loss": 7.0219, + "step": 1142 + }, + { + "epoch": 0.64, + "learning_rate": 8.691261335531739e-05, + "loss": 6.78, + "step": 1143 + }, + { + "epoch": 0.64, + "learning_rate": 8.689200329760924e-05, + "loss": 6.7707, + "step": 1144 + }, + { + "epoch": 0.64, + "learning_rate": 8.687139323990108e-05, + "loss": 6.5602, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 8.685078318219292e-05, + "loss": 6.5457, + "step": 1146 + }, + { + "epoch": 0.64, + "learning_rate": 8.683017312448475e-05, + "loss": 6.5214, + "step": 1147 + }, + { + "epoch": 0.64, + "learning_rate": 8.680956306677658e-05, + "loss": 6.4071, + "step": 1148 + }, + { + "epoch": 0.64, + "learning_rate": 8.678895300906843e-05, + "loss": 6.2183, + "step": 1149 + }, + { + "epoch": 0.64, + "learning_rate": 8.676834295136027e-05, + "loss": 5.9782, + "step": 1150 + }, + { + "epoch": 0.65, + "learning_rate": 8.674773289365211e-05, + "loss": 6.8632, + "step": 1151 + }, + { + "epoch": 0.65, + "learning_rate": 8.672712283594394e-05, + "loss": 7.0484, + "step": 1152 + }, + { + "epoch": 0.65, + "learning_rate": 8.670651277823579e-05, + "loss": 6.7587, + "step": 1153 + }, + { + "epoch": 0.65, + "learning_rate": 8.668590272052763e-05, + "loss": 6.9431, + "step": 1154 + }, + { + "epoch": 0.65, + "learning_rate": 8.666529266281946e-05, + "loss": 6.9344, + "step": 1155 + }, + { + "epoch": 0.65, + "learning_rate": 8.664468260511129e-05, + "loss": 6.8539, + "step": 1156 + }, + { + "epoch": 0.65, + "learning_rate": 8.662407254740313e-05, + "loss": 6.7283, + "step": 1157 + }, + { + "epoch": 0.65, + "learning_rate": 8.660346248969498e-05, + "loss": 6.9349, + "step": 1158 + }, + { + "epoch": 0.65, + "learning_rate": 8.658285243198682e-05, + "loss": 6.5559, + "step": 1159 + }, + { + "epoch": 0.65, + "learning_rate": 8.656224237427866e-05, + "loss": 6.6893, + "step": 1160 + }, + { + "epoch": 0.65, + "learning_rate": 8.654163231657049e-05, + "loss": 6.8265, + "step": 1161 + }, + { + "epoch": 0.65, + "learning_rate": 8.652102225886232e-05, + "loss": 6.6181, + "step": 1162 + }, + { + "epoch": 0.65, + "learning_rate": 8.650041220115417e-05, + "loss": 6.6978, + "step": 1163 + }, + { + "epoch": 0.65, + "learning_rate": 8.647980214344601e-05, + "loss": 6.6792, + "step": 1164 + }, + { + "epoch": 0.65, + "learning_rate": 8.645919208573784e-05, + "loss": 6.8079, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 8.643858202802968e-05, + "loss": 6.804, + "step": 1166 + }, + { + "epoch": 0.65, + "learning_rate": 8.641797197032153e-05, + "loss": 6.8039, + "step": 1167 + }, + { + "epoch": 0.65, + "learning_rate": 8.639736191261336e-05, + "loss": 6.4179, + "step": 1168 + }, + { + "epoch": 0.66, + "learning_rate": 8.63767518549052e-05, + "loss": 6.7071, + "step": 1169 + }, + { + "epoch": 0.66, + "learning_rate": 8.635614179719703e-05, + "loss": 6.784, + "step": 1170 + }, + { + "epoch": 0.66, + "learning_rate": 8.633553173948887e-05, + "loss": 6.7009, + "step": 1171 + }, + { + "epoch": 0.66, + "learning_rate": 8.631492168178072e-05, + "loss": 6.5875, + "step": 1172 + }, + { + "epoch": 0.66, + "learning_rate": 8.629431162407256e-05, + "loss": 6.9206, + "step": 1173 + }, + { + "epoch": 0.66, + "learning_rate": 8.627370156636439e-05, + "loss": 6.6054, + "step": 1174 + }, + { + "epoch": 0.66, + "learning_rate": 8.625309150865622e-05, + "loss": 6.5818, + "step": 1175 + }, + { + "epoch": 0.66, + "learning_rate": 8.623248145094806e-05, + "loss": 6.6984, + "step": 1176 + }, + { + "epoch": 0.66, + "learning_rate": 8.62118713932399e-05, + "loss": 6.6868, + "step": 1177 + }, + { + "epoch": 0.66, + "learning_rate": 8.619126133553175e-05, + "loss": 6.6141, + "step": 1178 + }, + { + "epoch": 0.66, + "learning_rate": 8.617065127782358e-05, + "loss": 6.4755, + "step": 1179 + }, + { + "epoch": 0.66, + "learning_rate": 8.615004122011542e-05, + "loss": 6.8743, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 8.612943116240727e-05, + "loss": 6.6367, + "step": 1181 + }, + { + "epoch": 0.66, + "learning_rate": 8.61088211046991e-05, + "loss": 6.6419, + "step": 1182 + }, + { + "epoch": 0.66, + "learning_rate": 8.608821104699094e-05, + "loss": 6.6622, + "step": 1183 + }, + { + "epoch": 0.66, + "learning_rate": 8.606760098928277e-05, + "loss": 6.5273, + "step": 1184 + }, + { + "epoch": 0.66, + "learning_rate": 8.604699093157461e-05, + "loss": 7.0137, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 8.602638087386646e-05, + "loss": 6.62, + "step": 1186 + }, + { + "epoch": 0.67, + "learning_rate": 8.60057708161583e-05, + "loss": 6.7784, + "step": 1187 + }, + { + "epoch": 0.67, + "learning_rate": 8.598516075845013e-05, + "loss": 6.6826, + "step": 1188 + }, + { + "epoch": 0.67, + "learning_rate": 8.596455070074196e-05, + "loss": 6.6295, + "step": 1189 + }, + { + "epoch": 0.67, + "learning_rate": 8.59439406430338e-05, + "loss": 6.615, + "step": 1190 + }, + { + "epoch": 0.67, + "learning_rate": 8.592333058532564e-05, + "loss": 6.7134, + "step": 1191 + }, + { + "epoch": 0.67, + "learning_rate": 8.590272052761749e-05, + "loss": 6.4659, + "step": 1192 + }, + { + "epoch": 0.67, + "learning_rate": 8.588211046990932e-05, + "loss": 6.4737, + "step": 1193 + }, + { + "epoch": 0.67, + "learning_rate": 8.586150041220116e-05, + "loss": 6.565, + "step": 1194 + }, + { + "epoch": 0.67, + "learning_rate": 8.584089035449299e-05, + "loss": 6.2785, + "step": 1195 + }, + { + "epoch": 0.67, + "learning_rate": 8.582028029678483e-05, + "loss": 6.4619, + "step": 1196 + }, + { + "epoch": 0.67, + "learning_rate": 8.579967023907666e-05, + "loss": 6.2607, + "step": 1197 + }, + { + "epoch": 0.67, + "learning_rate": 8.577906018136851e-05, + "loss": 6.2957, + "step": 1198 + }, + { + "epoch": 0.67, + "learning_rate": 8.575845012366035e-05, + "loss": 5.925, + "step": 1199 + }, + { + "epoch": 0.67, + "learning_rate": 8.57378400659522e-05, + "loss": 5.5422, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 8.571723000824404e-05, + "loss": 6.846, + "step": 1201 + }, + { + "epoch": 0.67, + "learning_rate": 8.569661995053587e-05, + "loss": 6.6924, + "step": 1202 + }, + { + "epoch": 0.67, + "learning_rate": 8.56760098928277e-05, + "loss": 6.8836, + "step": 1203 + }, + { + "epoch": 0.67, + "learning_rate": 8.565539983511954e-05, + "loss": 6.7017, + "step": 1204 + }, + { + "epoch": 0.68, + "learning_rate": 8.563478977741138e-05, + "loss": 6.7779, + "step": 1205 + }, + { + "epoch": 0.68, + "learning_rate": 8.561417971970323e-05, + "loss": 6.6439, + "step": 1206 + }, + { + "epoch": 0.68, + "learning_rate": 8.559356966199506e-05, + "loss": 6.8293, + "step": 1207 + }, + { + "epoch": 0.68, + "learning_rate": 8.55729596042869e-05, + "loss": 6.7646, + "step": 1208 + }, + { + "epoch": 0.68, + "learning_rate": 8.555234954657873e-05, + "loss": 6.6797, + "step": 1209 + }, + { + "epoch": 0.68, + "learning_rate": 8.553173948887057e-05, + "loss": 6.688, + "step": 1210 + }, + { + "epoch": 0.68, + "learning_rate": 8.55111294311624e-05, + "loss": 6.7483, + "step": 1211 + }, + { + "epoch": 0.68, + "learning_rate": 8.549051937345425e-05, + "loss": 6.4949, + "step": 1212 + }, + { + "epoch": 0.68, + "learning_rate": 8.546990931574609e-05, + "loss": 6.7115, + "step": 1213 + }, + { + "epoch": 0.68, + "learning_rate": 8.544929925803793e-05, + "loss": 6.9018, + "step": 1214 + }, + { + "epoch": 0.68, + "learning_rate": 8.542868920032976e-05, + "loss": 6.6845, + "step": 1215 + }, + { + "epoch": 0.68, + "learning_rate": 8.54080791426216e-05, + "loss": 6.6071, + "step": 1216 + }, + { + "epoch": 0.68, + "learning_rate": 8.538746908491344e-05, + "loss": 6.6471, + "step": 1217 + }, + { + "epoch": 0.68, + "learning_rate": 8.536685902720528e-05, + "loss": 6.373, + "step": 1218 + }, + { + "epoch": 0.68, + "learning_rate": 8.534624896949712e-05, + "loss": 6.6219, + "step": 1219 + }, + { + "epoch": 0.68, + "learning_rate": 8.532563891178895e-05, + "loss": 6.7948, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 8.53050288540808e-05, + "loss": 6.7108, + "step": 1221 + }, + { + "epoch": 0.68, + "learning_rate": 8.528441879637263e-05, + "loss": 6.7589, + "step": 1222 + }, + { + "epoch": 0.69, + "learning_rate": 8.526380873866447e-05, + "loss": 6.8153, + "step": 1223 + }, + { + "epoch": 0.69, + "learning_rate": 8.524319868095631e-05, + "loss": 6.6347, + "step": 1224 + }, + { + "epoch": 0.69, + "learning_rate": 8.522258862324814e-05, + "loss": 6.5844, + "step": 1225 + }, + { + "epoch": 0.69, + "learning_rate": 8.520197856553999e-05, + "loss": 6.6931, + "step": 1226 + }, + { + "epoch": 0.69, + "learning_rate": 8.518136850783183e-05, + "loss": 6.6448, + "step": 1227 + }, + { + "epoch": 0.69, + "learning_rate": 8.516075845012367e-05, + "loss": 6.6149, + "step": 1228 + }, + { + "epoch": 0.69, + "learning_rate": 8.51401483924155e-05, + "loss": 6.6538, + "step": 1229 + }, + { + "epoch": 0.69, + "learning_rate": 8.511953833470733e-05, + "loss": 6.966, + "step": 1230 + }, + { + "epoch": 0.69, + "learning_rate": 8.509892827699918e-05, + "loss": 6.7125, + "step": 1231 + }, + { + "epoch": 0.69, + "learning_rate": 8.507831821929102e-05, + "loss": 6.4244, + "step": 1232 + }, + { + "epoch": 0.69, + "learning_rate": 8.505770816158286e-05, + "loss": 6.6848, + "step": 1233 + }, + { + "epoch": 0.69, + "learning_rate": 8.50370981038747e-05, + "loss": 6.5554, + "step": 1234 + }, + { + "epoch": 0.69, + "learning_rate": 8.501648804616654e-05, + "loss": 6.7319, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 8.499587798845837e-05, + "loss": 6.6032, + "step": 1236 + }, + { + "epoch": 0.69, + "learning_rate": 8.497526793075021e-05, + "loss": 6.7213, + "step": 1237 + }, + { + "epoch": 0.69, + "learning_rate": 8.495465787304205e-05, + "loss": 6.4107, + "step": 1238 + }, + { + "epoch": 0.69, + "learning_rate": 8.493404781533388e-05, + "loss": 6.7667, + "step": 1239 + }, + { + "epoch": 0.7, + "learning_rate": 8.491343775762573e-05, + "loss": 6.6309, + "step": 1240 + }, + { + "epoch": 0.7, + "learning_rate": 8.489282769991757e-05, + "loss": 6.6216, + "step": 1241 + }, + { + "epoch": 0.7, + "learning_rate": 8.48722176422094e-05, + "loss": 6.7474, + "step": 1242 + }, + { + "epoch": 0.7, + "learning_rate": 8.485160758450123e-05, + "loss": 6.5319, + "step": 1243 + }, + { + "epoch": 0.7, + "learning_rate": 8.483099752679307e-05, + "loss": 6.4562, + "step": 1244 + }, + { + "epoch": 0.7, + "learning_rate": 8.481038746908492e-05, + "loss": 6.3578, + "step": 1245 + }, + { + "epoch": 0.7, + "learning_rate": 8.478977741137676e-05, + "loss": 6.4926, + "step": 1246 + }, + { + "epoch": 0.7, + "learning_rate": 8.47691673536686e-05, + "loss": 6.4763, + "step": 1247 + }, + { + "epoch": 0.7, + "learning_rate": 8.474855729596043e-05, + "loss": 6.183, + "step": 1248 + }, + { + "epoch": 0.7, + "learning_rate": 8.472794723825228e-05, + "loss": 6.0288, + "step": 1249 + }, + { + "epoch": 0.7, + "learning_rate": 8.47073371805441e-05, + "loss": 6.1746, + "step": 1250 + }, + { + "epoch": 0.7, + "learning_rate": 8.468672712283595e-05, + "loss": 6.8434, + "step": 1251 + }, + { + "epoch": 0.7, + "learning_rate": 8.466611706512778e-05, + "loss": 6.6436, + "step": 1252 + }, + { + "epoch": 0.7, + "learning_rate": 8.464550700741962e-05, + "loss": 6.7352, + "step": 1253 + }, + { + "epoch": 0.7, + "learning_rate": 8.462489694971147e-05, + "loss": 6.7205, + "step": 1254 + }, + { + "epoch": 0.7, + "learning_rate": 8.460428689200331e-05, + "loss": 6.9031, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 8.458367683429514e-05, + "loss": 6.6338, + "step": 1256 + }, + { + "epoch": 0.7, + "learning_rate": 8.456306677658697e-05, + "loss": 6.6341, + "step": 1257 + }, + { + "epoch": 0.71, + "learning_rate": 8.454245671887881e-05, + "loss": 6.7685, + "step": 1258 + }, + { + "epoch": 0.71, + "learning_rate": 8.452184666117066e-05, + "loss": 6.6786, + "step": 1259 + }, + { + "epoch": 0.71, + "learning_rate": 8.45012366034625e-05, + "loss": 6.8482, + "step": 1260 + }, + { + "epoch": 0.71, + "learning_rate": 8.448062654575433e-05, + "loss": 6.7403, + "step": 1261 + }, + { + "epoch": 0.71, + "learning_rate": 8.446001648804617e-05, + "loss": 6.6734, + "step": 1262 + }, + { + "epoch": 0.71, + "learning_rate": 8.4439406430338e-05, + "loss": 6.6826, + "step": 1263 + }, + { + "epoch": 0.71, + "learning_rate": 8.441879637262985e-05, + "loss": 6.5286, + "step": 1264 + }, + { + "epoch": 0.71, + "learning_rate": 8.439818631492169e-05, + "loss": 6.7194, + "step": 1265 + }, + { + "epoch": 0.71, + "learning_rate": 8.437757625721352e-05, + "loss": 6.7763, + "step": 1266 + }, + { + "epoch": 0.71, + "learning_rate": 8.435696619950536e-05, + "loss": 6.5541, + "step": 1267 + }, + { + "epoch": 0.71, + "learning_rate": 8.43363561417972e-05, + "loss": 6.6596, + "step": 1268 + }, + { + "epoch": 0.71, + "learning_rate": 8.431574608408904e-05, + "loss": 6.5588, + "step": 1269 + }, + { + "epoch": 0.71, + "learning_rate": 8.429513602638088e-05, + "loss": 6.5542, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 8.427452596867271e-05, + "loss": 6.4311, + "step": 1271 + }, + { + "epoch": 0.71, + "learning_rate": 8.425391591096455e-05, + "loss": 6.4367, + "step": 1272 + }, + { + "epoch": 0.71, + "learning_rate": 8.42333058532564e-05, + "loss": 6.7051, + "step": 1273 + }, + { + "epoch": 0.71, + "learning_rate": 8.421269579554824e-05, + "loss": 6.5136, + "step": 1274 + }, + { + "epoch": 0.71, + "learning_rate": 8.419208573784007e-05, + "loss": 6.6791, + "step": 1275 + }, + { + "epoch": 0.72, + "learning_rate": 8.417147568013191e-05, + "loss": 6.772, + "step": 1276 + }, + { + "epoch": 0.72, + "learning_rate": 8.415086562242374e-05, + "loss": 6.7937, + "step": 1277 + }, + { + "epoch": 0.72, + "learning_rate": 8.413025556471559e-05, + "loss": 6.6365, + "step": 1278 + }, + { + "epoch": 0.72, + "learning_rate": 8.410964550700743e-05, + "loss": 6.6913, + "step": 1279 + }, + { + "epoch": 0.72, + "learning_rate": 8.408903544929926e-05, + "loss": 6.498, + "step": 1280 + }, + { + "epoch": 0.72, + "learning_rate": 8.40684253915911e-05, + "loss": 6.4998, + "step": 1281 + }, + { + "epoch": 0.72, + "learning_rate": 8.404781533388295e-05, + "loss": 6.5633, + "step": 1282 + }, + { + "epoch": 0.72, + "learning_rate": 8.402720527617478e-05, + "loss": 6.6948, + "step": 1283 + }, + { + "epoch": 0.72, + "learning_rate": 8.40065952184666e-05, + "loss": 6.5341, + "step": 1284 + }, + { + "epoch": 0.72, + "learning_rate": 8.398598516075845e-05, + "loss": 6.5084, + "step": 1285 + }, + { + "epoch": 0.72, + "learning_rate": 8.396537510305029e-05, + "loss": 6.69, + "step": 1286 + }, + { + "epoch": 0.72, + "learning_rate": 8.394476504534214e-05, + "loss": 6.6173, + "step": 1287 + }, + { + "epoch": 0.72, + "learning_rate": 8.392415498763398e-05, + "loss": 6.4869, + "step": 1288 + }, + { + "epoch": 0.72, + "learning_rate": 8.390354492992581e-05, + "loss": 6.6442, + "step": 1289 + }, + { + "epoch": 0.72, + "learning_rate": 8.388293487221764e-05, + "loss": 6.3683, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 8.386232481450948e-05, + "loss": 6.5191, + "step": 1291 + }, + { + "epoch": 0.72, + "learning_rate": 8.384171475680132e-05, + "loss": 6.5717, + "step": 1292 + }, + { + "epoch": 0.72, + "learning_rate": 8.382110469909317e-05, + "loss": 6.5682, + "step": 1293 + }, + { + "epoch": 0.73, + "learning_rate": 8.3800494641385e-05, + "loss": 6.3313, + "step": 1294 + }, + { + "epoch": 0.73, + "learning_rate": 8.377988458367684e-05, + "loss": 6.3508, + "step": 1295 + }, + { + "epoch": 0.73, + "learning_rate": 8.375927452596868e-05, + "loss": 6.1617, + "step": 1296 + }, + { + "epoch": 0.73, + "learning_rate": 8.373866446826051e-05, + "loss": 6.1414, + "step": 1297 + }, + { + "epoch": 0.73, + "learning_rate": 8.371805441055234e-05, + "loss": 5.7855, + "step": 1298 + }, + { + "epoch": 0.73, + "learning_rate": 8.369744435284419e-05, + "loss": 6.3681, + "step": 1299 + }, + { + "epoch": 0.73, + "learning_rate": 8.367683429513603e-05, + "loss": 5.561, + "step": 1300 + }, + { + "epoch": 0.73, + "learning_rate": 8.365622423742787e-05, + "loss": 6.8089, + "step": 1301 + }, + { + "epoch": 0.73, + "learning_rate": 8.363561417971972e-05, + "loss": 6.8368, + "step": 1302 + }, + { + "epoch": 0.73, + "learning_rate": 8.361500412201155e-05, + "loss": 6.8361, + "step": 1303 + }, + { + "epoch": 0.73, + "learning_rate": 8.359439406430338e-05, + "loss": 6.7604, + "step": 1304 + }, + { + "epoch": 0.73, + "learning_rate": 8.357378400659522e-05, + "loss": 6.7128, + "step": 1305 + }, + { + "epoch": 0.73, + "learning_rate": 8.355317394888706e-05, + "loss": 6.7931, + "step": 1306 + }, + { + "epoch": 0.73, + "learning_rate": 8.35325638911789e-05, + "loss": 6.459, + "step": 1307 + }, + { + "epoch": 0.73, + "learning_rate": 8.351195383347074e-05, + "loss": 6.66, + "step": 1308 + }, + { + "epoch": 0.73, + "learning_rate": 8.349134377576258e-05, + "loss": 6.4563, + "step": 1309 + }, + { + "epoch": 0.73, + "learning_rate": 8.347073371805441e-05, + "loss": 6.4806, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 8.345012366034625e-05, + "loss": 6.65, + "step": 1311 + }, + { + "epoch": 0.74, + "learning_rate": 8.342951360263808e-05, + "loss": 6.6799, + "step": 1312 + }, + { + "epoch": 0.74, + "learning_rate": 8.340890354492993e-05, + "loss": 6.6662, + "step": 1313 + }, + { + "epoch": 0.74, + "learning_rate": 8.338829348722177e-05, + "loss": 6.4375, + "step": 1314 + }, + { + "epoch": 0.74, + "learning_rate": 8.336768342951361e-05, + "loss": 6.9393, + "step": 1315 + }, + { + "epoch": 0.74, + "learning_rate": 8.334707337180544e-05, + "loss": 6.5135, + "step": 1316 + }, + { + "epoch": 0.74, + "learning_rate": 8.332646331409727e-05, + "loss": 6.3682, + "step": 1317 + }, + { + "epoch": 0.74, + "learning_rate": 8.330585325638912e-05, + "loss": 6.6544, + "step": 1318 + }, + { + "epoch": 0.74, + "learning_rate": 8.328524319868096e-05, + "loss": 6.5103, + "step": 1319 + }, + { + "epoch": 0.74, + "learning_rate": 8.32646331409728e-05, + "loss": 6.5362, + "step": 1320 + }, + { + "epoch": 0.74, + "learning_rate": 8.324402308326463e-05, + "loss": 6.3246, + "step": 1321 + }, + { + "epoch": 0.74, + "learning_rate": 8.322341302555648e-05, + "loss": 6.5399, + "step": 1322 + }, + { + "epoch": 0.74, + "learning_rate": 8.320280296784832e-05, + "loss": 6.6631, + "step": 1323 + }, + { + "epoch": 0.74, + "learning_rate": 8.318219291014015e-05, + "loss": 6.681, + "step": 1324 + }, + { + "epoch": 0.74, + "learning_rate": 8.3161582852432e-05, + "loss": 6.6926, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 8.314097279472382e-05, + "loss": 6.4671, + "step": 1326 + }, + { + "epoch": 0.74, + "learning_rate": 8.312036273701567e-05, + "loss": 6.3436, + "step": 1327 + }, + { + "epoch": 0.74, + "learning_rate": 8.309975267930751e-05, + "loss": 6.5232, + "step": 1328 + }, + { + "epoch": 0.74, + "learning_rate": 8.307914262159935e-05, + "loss": 6.7289, + "step": 1329 + }, + { + "epoch": 0.75, + "learning_rate": 8.305853256389118e-05, + "loss": 6.5073, + "step": 1330 + }, + { + "epoch": 0.75, + "learning_rate": 8.303792250618301e-05, + "loss": 6.4028, + "step": 1331 + }, + { + "epoch": 0.75, + "learning_rate": 8.301731244847486e-05, + "loss": 6.4198, + "step": 1332 + }, + { + "epoch": 0.75, + "learning_rate": 8.29967023907667e-05, + "loss": 6.6198, + "step": 1333 + }, + { + "epoch": 0.75, + "learning_rate": 8.297609233305854e-05, + "loss": 6.6072, + "step": 1334 + }, + { + "epoch": 0.75, + "learning_rate": 8.295548227535037e-05, + "loss": 6.3152, + "step": 1335 + }, + { + "epoch": 0.75, + "learning_rate": 8.293487221764222e-05, + "loss": 6.4827, + "step": 1336 + }, + { + "epoch": 0.75, + "learning_rate": 8.291426215993405e-05, + "loss": 6.553, + "step": 1337 + }, + { + "epoch": 0.75, + "learning_rate": 8.289365210222589e-05, + "loss": 6.5539, + "step": 1338 + }, + { + "epoch": 0.75, + "learning_rate": 8.287304204451772e-05, + "loss": 6.5596, + "step": 1339 + }, + { + "epoch": 0.75, + "learning_rate": 8.285243198680956e-05, + "loss": 6.545, + "step": 1340 + }, + { + "epoch": 0.75, + "learning_rate": 8.28318219291014e-05, + "loss": 6.6858, + "step": 1341 + }, + { + "epoch": 0.75, + "learning_rate": 8.281121187139325e-05, + "loss": 6.3996, + "step": 1342 + }, + { + "epoch": 0.75, + "learning_rate": 8.27906018136851e-05, + "loss": 6.6223, + "step": 1343 + }, + { + "epoch": 0.75, + "learning_rate": 8.276999175597692e-05, + "loss": 6.2708, + "step": 1344 + }, + { + "epoch": 0.75, + "learning_rate": 8.274938169826875e-05, + "loss": 6.2858, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 8.27287716405606e-05, + "loss": 6.4188, + "step": 1346 + }, + { + "epoch": 0.76, + "learning_rate": 8.270816158285244e-05, + "loss": 6.2983, + "step": 1347 + }, + { + "epoch": 0.76, + "learning_rate": 8.268755152514427e-05, + "loss": 5.8298, + "step": 1348 + }, + { + "epoch": 0.76, + "learning_rate": 8.266694146743611e-05, + "loss": 6.1361, + "step": 1349 + }, + { + "epoch": 0.76, + "learning_rate": 8.264633140972796e-05, + "loss": 5.3409, + "step": 1350 + }, + { + "epoch": 0.76, + "learning_rate": 8.262572135201979e-05, + "loss": 6.8958, + "step": 1351 + }, + { + "epoch": 0.76, + "learning_rate": 8.260511129431163e-05, + "loss": 6.9611, + "step": 1352 + }, + { + "epoch": 0.76, + "learning_rate": 8.258450123660346e-05, + "loss": 6.8644, + "step": 1353 + }, + { + "epoch": 0.76, + "learning_rate": 8.25638911788953e-05, + "loss": 6.7143, + "step": 1354 + }, + { + "epoch": 0.76, + "learning_rate": 8.254328112118715e-05, + "loss": 6.7114, + "step": 1355 + }, + { + "epoch": 0.76, + "learning_rate": 8.252267106347899e-05, + "loss": 6.8363, + "step": 1356 + }, + { + "epoch": 0.76, + "learning_rate": 8.250206100577082e-05, + "loss": 6.5921, + "step": 1357 + }, + { + "epoch": 0.76, + "learning_rate": 8.248145094806265e-05, + "loss": 6.8289, + "step": 1358 + }, + { + "epoch": 0.76, + "learning_rate": 8.246084089035449e-05, + "loss": 6.6884, + "step": 1359 + }, + { + "epoch": 0.76, + "learning_rate": 8.244023083264634e-05, + "loss": 6.8144, + "step": 1360 + }, + { + "epoch": 0.76, + "learning_rate": 8.241962077493818e-05, + "loss": 6.6957, + "step": 1361 + }, + { + "epoch": 0.76, + "learning_rate": 8.239901071723001e-05, + "loss": 6.6271, + "step": 1362 + }, + { + "epoch": 0.76, + "learning_rate": 8.237840065952185e-05, + "loss": 6.5578, + "step": 1363 + }, + { + "epoch": 0.76, + "learning_rate": 8.23577906018137e-05, + "loss": 6.8864, + "step": 1364 + }, + { + "epoch": 0.77, + "learning_rate": 8.233718054410553e-05, + "loss": 6.536, + "step": 1365 + }, + { + "epoch": 0.77, + "learning_rate": 8.231657048639737e-05, + "loss": 6.5373, + "step": 1366 + }, + { + "epoch": 0.77, + "learning_rate": 8.22959604286892e-05, + "loss": 6.5517, + "step": 1367 + }, + { + "epoch": 0.77, + "learning_rate": 8.227535037098104e-05, + "loss": 6.4646, + "step": 1368 + }, + { + "epoch": 0.77, + "learning_rate": 8.225474031327289e-05, + "loss": 6.6211, + "step": 1369 + }, + { + "epoch": 0.77, + "learning_rate": 8.223413025556473e-05, + "loss": 6.6103, + "step": 1370 + }, + { + "epoch": 0.77, + "learning_rate": 8.221352019785656e-05, + "loss": 6.7559, + "step": 1371 + }, + { + "epoch": 0.77, + "learning_rate": 8.219291014014839e-05, + "loss": 6.4042, + "step": 1372 + }, + { + "epoch": 0.77, + "learning_rate": 8.217230008244023e-05, + "loss": 6.6626, + "step": 1373 + }, + { + "epoch": 0.77, + "learning_rate": 8.215169002473208e-05, + "loss": 6.6922, + "step": 1374 + }, + { + "epoch": 0.77, + "learning_rate": 8.213107996702392e-05, + "loss": 6.604, + "step": 1375 + }, + { + "epoch": 0.77, + "learning_rate": 8.211046990931575e-05, + "loss": 6.3454, + "step": 1376 + }, + { + "epoch": 0.77, + "learning_rate": 8.208985985160759e-05, + "loss": 6.5629, + "step": 1377 + }, + { + "epoch": 0.77, + "learning_rate": 8.206924979389942e-05, + "loss": 6.5585, + "step": 1378 + }, + { + "epoch": 0.77, + "learning_rate": 8.204863973619127e-05, + "loss": 6.4784, + "step": 1379 + }, + { + "epoch": 0.77, + "learning_rate": 8.20280296784831e-05, + "loss": 6.6428, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 8.200741962077494e-05, + "loss": 6.4031, + "step": 1381 + }, + { + "epoch": 0.77, + "learning_rate": 8.198680956306678e-05, + "loss": 6.6332, + "step": 1382 + }, + { + "epoch": 0.78, + "learning_rate": 8.196619950535863e-05, + "loss": 6.488, + "step": 1383 + }, + { + "epoch": 0.78, + "learning_rate": 8.194558944765046e-05, + "loss": 6.6822, + "step": 1384 + }, + { + "epoch": 0.78, + "learning_rate": 8.192497938994228e-05, + "loss": 6.5656, + "step": 1385 + }, + { + "epoch": 0.78, + "learning_rate": 8.190436933223413e-05, + "loss": 6.5676, + "step": 1386 + }, + { + "epoch": 0.78, + "learning_rate": 8.188375927452597e-05, + "loss": 6.5405, + "step": 1387 + }, + { + "epoch": 0.78, + "learning_rate": 8.186314921681781e-05, + "loss": 6.5056, + "step": 1388 + }, + { + "epoch": 0.78, + "learning_rate": 8.184253915910966e-05, + "loss": 6.5807, + "step": 1389 + }, + { + "epoch": 0.78, + "learning_rate": 8.182192910140149e-05, + "loss": 6.5002, + "step": 1390 + }, + { + "epoch": 0.78, + "learning_rate": 8.180131904369333e-05, + "loss": 6.1892, + "step": 1391 + }, + { + "epoch": 0.78, + "learning_rate": 8.178070898598516e-05, + "loss": 6.4367, + "step": 1392 + }, + { + "epoch": 0.78, + "learning_rate": 8.1760098928277e-05, + "loss": 6.1806, + "step": 1393 + }, + { + "epoch": 0.78, + "learning_rate": 8.173948887056883e-05, + "loss": 6.3434, + "step": 1394 + }, + { + "epoch": 0.78, + "learning_rate": 8.171887881286068e-05, + "loss": 6.3628, + "step": 1395 + }, + { + "epoch": 0.78, + "learning_rate": 8.169826875515252e-05, + "loss": 6.3835, + "step": 1396 + }, + { + "epoch": 0.78, + "learning_rate": 8.167765869744436e-05, + "loss": 6.1255, + "step": 1397 + }, + { + "epoch": 0.78, + "learning_rate": 8.16570486397362e-05, + "loss": 5.9603, + "step": 1398 + }, + { + "epoch": 0.78, + "learning_rate": 8.163643858202802e-05, + "loss": 5.651, + "step": 1399 + }, + { + "epoch": 0.78, + "learning_rate": 8.161582852431987e-05, + "loss": 5.6158, + "step": 1400 + }, + { + "epoch": 0.79, + "learning_rate": 8.159521846661171e-05, + "loss": 6.8158, + "step": 1401 + }, + { + "epoch": 0.79, + "learning_rate": 8.157460840890355e-05, + "loss": 6.6695, + "step": 1402 + }, + { + "epoch": 0.79, + "learning_rate": 8.155399835119538e-05, + "loss": 6.609, + "step": 1403 + }, + { + "epoch": 0.79, + "learning_rate": 8.153338829348723e-05, + "loss": 6.5046, + "step": 1404 + }, + { + "epoch": 0.79, + "learning_rate": 8.151277823577906e-05, + "loss": 6.5663, + "step": 1405 + }, + { + "epoch": 0.79, + "learning_rate": 8.14921681780709e-05, + "loss": 6.8443, + "step": 1406 + }, + { + "epoch": 0.79, + "learning_rate": 8.147155812036274e-05, + "loss": 6.557, + "step": 1407 + }, + { + "epoch": 0.79, + "learning_rate": 8.145094806265457e-05, + "loss": 6.519, + "step": 1408 + }, + { + "epoch": 0.79, + "learning_rate": 8.143033800494642e-05, + "loss": 6.5501, + "step": 1409 + }, + { + "epoch": 0.79, + "learning_rate": 8.140972794723826e-05, + "loss": 6.4199, + "step": 1410 + }, + { + "epoch": 0.79, + "learning_rate": 8.13891178895301e-05, + "loss": 6.4742, + "step": 1411 + }, + { + "epoch": 0.79, + "learning_rate": 8.136850783182193e-05, + "loss": 6.4243, + "step": 1412 + }, + { + "epoch": 0.79, + "learning_rate": 8.134789777411376e-05, + "loss": 6.6315, + "step": 1413 + }, + { + "epoch": 0.79, + "learning_rate": 8.132728771640561e-05, + "loss": 6.4863, + "step": 1414 + }, + { + "epoch": 0.79, + "learning_rate": 8.130667765869745e-05, + "loss": 6.6131, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 8.12860676009893e-05, + "loss": 6.7752, + "step": 1416 + }, + { + "epoch": 0.79, + "learning_rate": 8.126545754328112e-05, + "loss": 6.6978, + "step": 1417 + }, + { + "epoch": 0.79, + "learning_rate": 8.124484748557297e-05, + "loss": 6.632, + "step": 1418 + }, + { + "epoch": 0.8, + "learning_rate": 8.12242374278648e-05, + "loss": 6.4805, + "step": 1419 + }, + { + "epoch": 0.8, + "learning_rate": 8.120362737015664e-05, + "loss": 6.4896, + "step": 1420 + }, + { + "epoch": 0.8, + "learning_rate": 8.118301731244848e-05, + "loss": 6.6703, + "step": 1421 + }, + { + "epoch": 0.8, + "learning_rate": 8.116240725474031e-05, + "loss": 6.5425, + "step": 1422 + }, + { + "epoch": 0.8, + "learning_rate": 8.114179719703216e-05, + "loss": 6.4224, + "step": 1423 + }, + { + "epoch": 0.8, + "learning_rate": 8.1121187139324e-05, + "loss": 6.6223, + "step": 1424 + }, + { + "epoch": 0.8, + "learning_rate": 8.110057708161583e-05, + "loss": 6.5126, + "step": 1425 + }, + { + "epoch": 0.8, + "learning_rate": 8.107996702390766e-05, + "loss": 6.6098, + "step": 1426 + }, + { + "epoch": 0.8, + "learning_rate": 8.10593569661995e-05, + "loss": 6.4336, + "step": 1427 + }, + { + "epoch": 0.8, + "learning_rate": 8.103874690849135e-05, + "loss": 6.469, + "step": 1428 + }, + { + "epoch": 0.8, + "learning_rate": 8.101813685078319e-05, + "loss": 6.5586, + "step": 1429 + }, + { + "epoch": 0.8, + "learning_rate": 8.099752679307503e-05, + "loss": 6.6164, + "step": 1430 + }, + { + "epoch": 0.8, + "learning_rate": 8.097691673536686e-05, + "loss": 6.6007, + "step": 1431 + }, + { + "epoch": 0.8, + "learning_rate": 8.09563066776587e-05, + "loss": 6.6084, + "step": 1432 + }, + { + "epoch": 0.8, + "learning_rate": 8.093569661995054e-05, + "loss": 6.6805, + "step": 1433 + }, + { + "epoch": 0.8, + "learning_rate": 8.091508656224238e-05, + "loss": 6.4296, + "step": 1434 + }, + { + "epoch": 0.8, + "learning_rate": 8.089447650453421e-05, + "loss": 6.5496, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 8.087386644682605e-05, + "loss": 6.3642, + "step": 1436 + }, + { + "epoch": 0.81, + "learning_rate": 8.08532563891179e-05, + "loss": 6.5239, + "step": 1437 + }, + { + "epoch": 0.81, + "learning_rate": 8.083264633140974e-05, + "loss": 6.4075, + "step": 1438 + }, + { + "epoch": 0.81, + "learning_rate": 8.081203627370157e-05, + "loss": 6.317, + "step": 1439 + }, + { + "epoch": 0.81, + "learning_rate": 8.07914262159934e-05, + "loss": 6.4798, + "step": 1440 + }, + { + "epoch": 0.81, + "learning_rate": 8.077081615828524e-05, + "loss": 6.5507, + "step": 1441 + }, + { + "epoch": 0.81, + "learning_rate": 8.075020610057709e-05, + "loss": 6.3364, + "step": 1442 + }, + { + "epoch": 0.81, + "learning_rate": 8.072959604286893e-05, + "loss": 6.2062, + "step": 1443 + }, + { + "epoch": 0.81, + "learning_rate": 8.070898598516077e-05, + "loss": 6.2246, + "step": 1444 + }, + { + "epoch": 0.81, + "learning_rate": 8.06883759274526e-05, + "loss": 6.1041, + "step": 1445 + }, + { + "epoch": 0.81, + "learning_rate": 8.066776586974443e-05, + "loss": 6.2448, + "step": 1446 + }, + { + "epoch": 0.81, + "learning_rate": 8.064715581203628e-05, + "loss": 5.947, + "step": 1447 + }, + { + "epoch": 0.81, + "learning_rate": 8.062654575432812e-05, + "loss": 5.7385, + "step": 1448 + }, + { + "epoch": 0.81, + "learning_rate": 8.060593569661995e-05, + "loss": 5.7783, + "step": 1449 + }, + { + "epoch": 0.81, + "learning_rate": 8.058532563891179e-05, + "loss": 5.4841, + "step": 1450 + }, + { + "epoch": 0.81, + "learning_rate": 8.056471558120364e-05, + "loss": 6.9039, + "step": 1451 + }, + { + "epoch": 0.81, + "learning_rate": 8.054410552349547e-05, + "loss": 6.9061, + "step": 1452 + }, + { + "epoch": 0.81, + "learning_rate": 8.052349546578731e-05, + "loss": 6.5211, + "step": 1453 + }, + { + "epoch": 0.82, + "learning_rate": 8.050288540807914e-05, + "loss": 6.6925, + "step": 1454 + }, + { + "epoch": 0.82, + "learning_rate": 8.048227535037098e-05, + "loss": 6.6307, + "step": 1455 + }, + { + "epoch": 0.82, + "learning_rate": 8.046166529266283e-05, + "loss": 6.6721, + "step": 1456 + }, + { + "epoch": 0.82, + "learning_rate": 8.044105523495467e-05, + "loss": 6.7929, + "step": 1457 + }, + { + "epoch": 0.82, + "learning_rate": 8.04204451772465e-05, + "loss": 6.6712, + "step": 1458 + }, + { + "epoch": 0.82, + "learning_rate": 8.039983511953834e-05, + "loss": 6.502, + "step": 1459 + }, + { + "epoch": 0.82, + "learning_rate": 8.037922506183017e-05, + "loss": 6.6736, + "step": 1460 + }, + { + "epoch": 0.82, + "learning_rate": 8.035861500412202e-05, + "loss": 6.5812, + "step": 1461 + }, + { + "epoch": 0.82, + "learning_rate": 8.033800494641386e-05, + "loss": 6.5349, + "step": 1462 + }, + { + "epoch": 0.82, + "learning_rate": 8.031739488870569e-05, + "loss": 6.3841, + "step": 1463 + }, + { + "epoch": 0.82, + "learning_rate": 8.029678483099753e-05, + "loss": 6.4254, + "step": 1464 + }, + { + "epoch": 0.82, + "learning_rate": 8.027617477328938e-05, + "loss": 6.6366, + "step": 1465 + }, + { + "epoch": 0.82, + "learning_rate": 8.02555647155812e-05, + "loss": 6.4399, + "step": 1466 + }, + { + "epoch": 0.82, + "learning_rate": 8.023495465787304e-05, + "loss": 6.5797, + "step": 1467 + }, + { + "epoch": 0.82, + "learning_rate": 8.021434460016488e-05, + "loss": 6.4913, + "step": 1468 + }, + { + "epoch": 0.82, + "learning_rate": 8.019373454245672e-05, + "loss": 6.2793, + "step": 1469 + }, + { + "epoch": 0.82, + "learning_rate": 8.017312448474857e-05, + "loss": 6.3523, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 8.015251442704041e-05, + "loss": 6.4952, + "step": 1471 + }, + { + "epoch": 0.83, + "learning_rate": 8.013190436933224e-05, + "loss": 6.2869, + "step": 1472 + }, + { + "epoch": 0.83, + "learning_rate": 8.011129431162407e-05, + "loss": 6.4113, + "step": 1473 + }, + { + "epoch": 0.83, + "learning_rate": 8.009068425391591e-05, + "loss": 6.2499, + "step": 1474 + }, + { + "epoch": 0.83, + "learning_rate": 8.007007419620776e-05, + "loss": 6.5783, + "step": 1475 + }, + { + "epoch": 0.83, + "learning_rate": 8.00494641384996e-05, + "loss": 6.5573, + "step": 1476 + }, + { + "epoch": 0.83, + "learning_rate": 8.002885408079143e-05, + "loss": 6.667, + "step": 1477 + }, + { + "epoch": 0.83, + "learning_rate": 8.000824402308327e-05, + "loss": 6.2801, + "step": 1478 + }, + { + "epoch": 0.83, + "learning_rate": 7.99876339653751e-05, + "loss": 6.4518, + "step": 1479 + }, + { + "epoch": 0.83, + "learning_rate": 7.996702390766695e-05, + "loss": 6.2863, + "step": 1480 + }, + { + "epoch": 0.83, + "learning_rate": 7.994641384995877e-05, + "loss": 6.5258, + "step": 1481 + }, + { + "epoch": 0.83, + "learning_rate": 7.992580379225062e-05, + "loss": 6.5281, + "step": 1482 + }, + { + "epoch": 0.83, + "learning_rate": 7.990519373454246e-05, + "loss": 6.5639, + "step": 1483 + }, + { + "epoch": 0.83, + "learning_rate": 7.98845836768343e-05, + "loss": 6.1624, + "step": 1484 + }, + { + "epoch": 0.83, + "learning_rate": 7.986397361912615e-05, + "loss": 6.5808, + "step": 1485 + }, + { + "epoch": 0.83, + "learning_rate": 7.984336356141798e-05, + "loss": 6.4646, + "step": 1486 + }, + { + "epoch": 0.83, + "learning_rate": 7.982275350370981e-05, + "loss": 6.3639, + "step": 1487 + }, + { + "epoch": 0.83, + "learning_rate": 7.980214344600165e-05, + "loss": 6.4067, + "step": 1488 + }, + { + "epoch": 0.83, + "learning_rate": 7.97815333882935e-05, + "loss": 6.4335, + "step": 1489 + }, + { + "epoch": 0.84, + "learning_rate": 7.976092333058532e-05, + "loss": 6.1266, + "step": 1490 + }, + { + "epoch": 0.84, + "learning_rate": 7.974031327287717e-05, + "loss": 6.3239, + "step": 1491 + }, + { + "epoch": 0.84, + "learning_rate": 7.971970321516901e-05, + "loss": 6.4637, + "step": 1492 + }, + { + "epoch": 0.84, + "learning_rate": 7.969909315746084e-05, + "loss": 6.1175, + "step": 1493 + }, + { + "epoch": 0.84, + "learning_rate": 7.967848309975268e-05, + "loss": 6.4221, + "step": 1494 + }, + { + "epoch": 0.84, + "learning_rate": 7.965787304204451e-05, + "loss": 6.1656, + "step": 1495 + }, + { + "epoch": 0.84, + "learning_rate": 7.963726298433636e-05, + "loss": 6.4285, + "step": 1496 + }, + { + "epoch": 0.84, + "learning_rate": 7.96166529266282e-05, + "loss": 5.9578, + "step": 1497 + }, + { + "epoch": 0.84, + "learning_rate": 7.959604286892004e-05, + "loss": 5.9881, + "step": 1498 + }, + { + "epoch": 0.84, + "learning_rate": 7.957543281121187e-05, + "loss": 5.338, + "step": 1499 + }, + { + "epoch": 0.84, + "learning_rate": 7.95548227535037e-05, + "loss": 5.7859, + "step": 1500 + }, + { + "epoch": 0.84, + "eval_loss": 13.754671096801758, + "eval_runtime": 1322.0968, + "eval_samples_per_second": 1.998, + "eval_steps_per_second": 0.25, + "eval_wer": 1.0017453391511304, + "step": 1500 + }, + { + "epoch": 0.84, + "learning_rate": 7.953421269579555e-05, + "loss": 6.6723, + "step": 1501 + }, + { + "epoch": 0.84, + "learning_rate": 7.951360263808739e-05, + "loss": 6.8885, + "step": 1502 + }, + { + "epoch": 0.84, + "learning_rate": 7.949299258037923e-05, + "loss": 6.7283, + "step": 1503 + }, + { + "epoch": 0.84, + "learning_rate": 7.947238252267106e-05, + "loss": 6.6297, + "step": 1504 + }, + { + "epoch": 0.84, + "learning_rate": 7.945177246496291e-05, + "loss": 6.6581, + "step": 1505 + }, + { + "epoch": 0.84, + "learning_rate": 7.943116240725475e-05, + "loss": 6.6324, + "step": 1506 + }, + { + "epoch": 0.84, + "learning_rate": 7.941055234954658e-05, + "loss": 6.5038, + "step": 1507 + }, + { + "epoch": 0.85, + "learning_rate": 7.938994229183842e-05, + "loss": 6.5113, + "step": 1508 + }, + { + "epoch": 0.85, + "learning_rate": 7.936933223413025e-05, + "loss": 6.6097, + "step": 1509 + }, + { + "epoch": 0.85, + "learning_rate": 7.93487221764221e-05, + "loss": 6.4359, + "step": 1510 + }, + { + "epoch": 0.85, + "learning_rate": 7.932811211871394e-05, + "loss": 6.3489, + "step": 1511 + }, + { + "epoch": 0.85, + "learning_rate": 7.930750206100578e-05, + "loss": 6.4349, + "step": 1512 + }, + { + "epoch": 0.85, + "learning_rate": 7.928689200329761e-05, + "loss": 6.307, + "step": 1513 + }, + { + "epoch": 0.85, + "learning_rate": 7.926628194558944e-05, + "loss": 6.4262, + "step": 1514 + }, + { + "epoch": 0.85, + "learning_rate": 7.924567188788129e-05, + "loss": 6.3284, + "step": 1515 + }, + { + "epoch": 0.85, + "learning_rate": 7.922506183017313e-05, + "loss": 6.5092, + "step": 1516 + }, + { + "epoch": 0.85, + "learning_rate": 7.920445177246497e-05, + "loss": 6.3617, + "step": 1517 + }, + { + "epoch": 0.85, + "learning_rate": 7.91838417147568e-05, + "loss": 6.4496, + "step": 1518 + }, + { + "epoch": 0.85, + "learning_rate": 7.916323165704865e-05, + "loss": 6.3697, + "step": 1519 + }, + { + "epoch": 0.85, + "learning_rate": 7.914262159934048e-05, + "loss": 6.3962, + "step": 1520 + }, + { + "epoch": 0.85, + "learning_rate": 7.912201154163232e-05, + "loss": 6.4277, + "step": 1521 + }, + { + "epoch": 0.85, + "learning_rate": 7.910140148392415e-05, + "loss": 6.4263, + "step": 1522 + }, + { + "epoch": 0.85, + "learning_rate": 7.9080791426216e-05, + "loss": 6.5048, + "step": 1523 + }, + { + "epoch": 0.85, + "learning_rate": 7.906018136850784e-05, + "loss": 6.4714, + "step": 1524 + }, + { + "epoch": 0.85, + "learning_rate": 7.903957131079968e-05, + "loss": 6.2625, + "step": 1525 + }, + { + "epoch": 0.86, + "learning_rate": 7.901896125309151e-05, + "loss": 6.2747, + "step": 1526 + }, + { + "epoch": 0.86, + "learning_rate": 7.899835119538334e-05, + "loss": 6.38, + "step": 1527 + }, + { + "epoch": 0.86, + "learning_rate": 7.897774113767518e-05, + "loss": 6.2112, + "step": 1528 + }, + { + "epoch": 0.86, + "learning_rate": 7.895713107996703e-05, + "loss": 6.375, + "step": 1529 + }, + { + "epoch": 0.86, + "learning_rate": 7.893652102225887e-05, + "loss": 6.3159, + "step": 1530 + }, + { + "epoch": 0.86, + "learning_rate": 7.891591096455071e-05, + "loss": 6.2258, + "step": 1531 + }, + { + "epoch": 0.86, + "learning_rate": 7.889530090684254e-05, + "loss": 6.1093, + "step": 1532 + }, + { + "epoch": 0.86, + "learning_rate": 7.887469084913439e-05, + "loss": 6.4539, + "step": 1533 + }, + { + "epoch": 0.86, + "learning_rate": 7.885408079142622e-05, + "loss": 6.3002, + "step": 1534 + }, + { + "epoch": 0.86, + "learning_rate": 7.883347073371806e-05, + "loss": 6.3641, + "step": 1535 + }, + { + "epoch": 0.86, + "learning_rate": 7.881286067600989e-05, + "loss": 6.2894, + "step": 1536 + }, + { + "epoch": 0.86, + "learning_rate": 7.879225061830173e-05, + "loss": 6.2724, + "step": 1537 + }, + { + "epoch": 0.86, + "learning_rate": 7.877164056059358e-05, + "loss": 6.5049, + "step": 1538 + }, + { + "epoch": 0.86, + "learning_rate": 7.875103050288542e-05, + "loss": 6.2737, + "step": 1539 + }, + { + "epoch": 0.86, + "learning_rate": 7.873042044517725e-05, + "loss": 6.25, + "step": 1540 + }, + { + "epoch": 0.86, + "learning_rate": 7.870981038746908e-05, + "loss": 6.2985, + "step": 1541 + }, + { + "epoch": 0.86, + "learning_rate": 7.868920032976092e-05, + "loss": 6.2135, + "step": 1542 + }, + { + "epoch": 0.86, + "learning_rate": 7.866859027205277e-05, + "loss": 6.1809, + "step": 1543 + }, + { + "epoch": 0.87, + "learning_rate": 7.864798021434461e-05, + "loss": 6.1791, + "step": 1544 + }, + { + "epoch": 0.87, + "learning_rate": 7.862737015663644e-05, + "loss": 6.2036, + "step": 1545 + }, + { + "epoch": 0.87, + "learning_rate": 7.860676009892828e-05, + "loss": 5.8358, + "step": 1546 + }, + { + "epoch": 0.87, + "learning_rate": 7.858615004122011e-05, + "loss": 5.9255, + "step": 1547 + }, + { + "epoch": 0.87, + "learning_rate": 7.856553998351196e-05, + "loss": 5.5548, + "step": 1548 + }, + { + "epoch": 0.87, + "learning_rate": 7.85449299258038e-05, + "loss": 5.6425, + "step": 1549 + }, + { + "epoch": 0.87, + "learning_rate": 7.852431986809563e-05, + "loss": 5.5332, + "step": 1550 + }, + { + "epoch": 0.87, + "learning_rate": 7.850370981038747e-05, + "loss": 6.7436, + "step": 1551 + }, + { + "epoch": 0.87, + "learning_rate": 7.848309975267932e-05, + "loss": 6.7697, + "step": 1552 + }, + { + "epoch": 0.87, + "learning_rate": 7.846248969497116e-05, + "loss": 6.5713, + "step": 1553 + }, + { + "epoch": 0.87, + "learning_rate": 7.844187963726299e-05, + "loss": 6.3137, + "step": 1554 + }, + { + "epoch": 0.87, + "learning_rate": 7.842126957955482e-05, + "loss": 6.5256, + "step": 1555 + }, + { + "epoch": 0.87, + "learning_rate": 7.840065952184666e-05, + "loss": 6.6239, + "step": 1556 + }, + { + "epoch": 0.87, + "learning_rate": 7.83800494641385e-05, + "loss": 6.3652, + "step": 1557 + }, + { + "epoch": 0.87, + "learning_rate": 7.835943940643035e-05, + "loss": 6.3865, + "step": 1558 + }, + { + "epoch": 0.87, + "learning_rate": 7.833882934872218e-05, + "loss": 6.3597, + "step": 1559 + }, + { + "epoch": 0.87, + "learning_rate": 7.831821929101402e-05, + "loss": 6.4912, + "step": 1560 + }, + { + "epoch": 0.88, + "learning_rate": 7.829760923330585e-05, + "loss": 6.5519, + "step": 1561 + }, + { + "epoch": 0.88, + "learning_rate": 7.82769991755977e-05, + "loss": 6.4939, + "step": 1562 + }, + { + "epoch": 0.88, + "learning_rate": 7.825638911788954e-05, + "loss": 6.3752, + "step": 1563 + }, + { + "epoch": 0.88, + "learning_rate": 7.823577906018137e-05, + "loss": 6.2081, + "step": 1564 + }, + { + "epoch": 0.88, + "learning_rate": 7.821516900247321e-05, + "loss": 6.3505, + "step": 1565 + }, + { + "epoch": 0.88, + "learning_rate": 7.819455894476506e-05, + "loss": 6.3314, + "step": 1566 + }, + { + "epoch": 0.88, + "learning_rate": 7.817394888705689e-05, + "loss": 6.2007, + "step": 1567 + }, + { + "epoch": 0.88, + "learning_rate": 7.815333882934872e-05, + "loss": 6.3803, + "step": 1568 + }, + { + "epoch": 0.88, + "learning_rate": 7.813272877164056e-05, + "loss": 6.4836, + "step": 1569 + }, + { + "epoch": 0.88, + "learning_rate": 7.81121187139324e-05, + "loss": 6.3539, + "step": 1570 + }, + { + "epoch": 0.88, + "learning_rate": 7.809150865622425e-05, + "loss": 6.4897, + "step": 1571 + }, + { + "epoch": 0.88, + "learning_rate": 7.807089859851609e-05, + "loss": 6.581, + "step": 1572 + }, + { + "epoch": 0.88, + "learning_rate": 7.805028854080792e-05, + "loss": 6.3875, + "step": 1573 + }, + { + "epoch": 0.88, + "learning_rate": 7.802967848309975e-05, + "loss": 6.1355, + "step": 1574 + }, + { + "epoch": 0.88, + "learning_rate": 7.800906842539159e-05, + "loss": 6.0615, + "step": 1575 + }, + { + "epoch": 0.88, + "learning_rate": 7.798845836768344e-05, + "loss": 6.1832, + "step": 1576 + }, + { + "epoch": 0.88, + "learning_rate": 7.796784830997527e-05, + "loss": 6.125, + "step": 1577 + }, + { + "epoch": 0.88, + "learning_rate": 7.794723825226711e-05, + "loss": 6.3613, + "step": 1578 + }, + { + "epoch": 0.89, + "learning_rate": 7.792662819455895e-05, + "loss": 6.3064, + "step": 1579 + }, + { + "epoch": 0.89, + "learning_rate": 7.79060181368508e-05, + "loss": 6.3335, + "step": 1580 + }, + { + "epoch": 0.89, + "learning_rate": 7.788540807914263e-05, + "loss": 6.4822, + "step": 1581 + }, + { + "epoch": 0.89, + "learning_rate": 7.786479802143445e-05, + "loss": 6.5168, + "step": 1582 + }, + { + "epoch": 0.89, + "learning_rate": 7.78441879637263e-05, + "loss": 6.3182, + "step": 1583 + }, + { + "epoch": 0.89, + "learning_rate": 7.782357790601814e-05, + "loss": 6.1804, + "step": 1584 + }, + { + "epoch": 0.89, + "learning_rate": 7.780296784830998e-05, + "loss": 6.3152, + "step": 1585 + }, + { + "epoch": 0.89, + "learning_rate": 7.778235779060181e-05, + "loss": 6.2435, + "step": 1586 + }, + { + "epoch": 0.89, + "learning_rate": 7.776174773289366e-05, + "loss": 6.3649, + "step": 1587 + }, + { + "epoch": 0.89, + "learning_rate": 7.774113767518549e-05, + "loss": 6.2956, + "step": 1588 + }, + { + "epoch": 0.89, + "learning_rate": 7.772052761747733e-05, + "loss": 6.2906, + "step": 1589 + }, + { + "epoch": 0.89, + "learning_rate": 7.769991755976917e-05, + "loss": 6.2203, + "step": 1590 + }, + { + "epoch": 0.89, + "learning_rate": 7.7679307502061e-05, + "loss": 6.1462, + "step": 1591 + }, + { + "epoch": 0.89, + "learning_rate": 7.765869744435285e-05, + "loss": 6.1384, + "step": 1592 + }, + { + "epoch": 0.89, + "learning_rate": 7.763808738664469e-05, + "loss": 5.9677, + "step": 1593 + }, + { + "epoch": 0.89, + "learning_rate": 7.761747732893652e-05, + "loss": 5.8599, + "step": 1594 + }, + { + "epoch": 0.89, + "learning_rate": 7.759686727122836e-05, + "loss": 6.1369, + "step": 1595 + }, + { + "epoch": 0.89, + "learning_rate": 7.75762572135202e-05, + "loss": 5.9377, + "step": 1596 + }, + { + "epoch": 0.9, + "learning_rate": 7.755564715581204e-05, + "loss": 5.7642, + "step": 1597 + }, + { + "epoch": 0.9, + "learning_rate": 7.753503709810388e-05, + "loss": 5.8439, + "step": 1598 + }, + { + "epoch": 0.9, + "learning_rate": 7.751442704039572e-05, + "loss": 5.9778, + "step": 1599 + }, + { + "epoch": 0.9, + "learning_rate": 7.749381698268755e-05, + "loss": 5.4054, + "step": 1600 + }, + { + "epoch": 0.9, + "learning_rate": 7.74732069249794e-05, + "loss": 6.8132, + "step": 1601 + }, + { + "epoch": 0.9, + "learning_rate": 7.745259686727123e-05, + "loss": 6.4352, + "step": 1602 + }, + { + "epoch": 0.9, + "learning_rate": 7.743198680956307e-05, + "loss": 6.663, + "step": 1603 + }, + { + "epoch": 0.9, + "learning_rate": 7.741137675185491e-05, + "loss": 6.6237, + "step": 1604 + }, + { + "epoch": 0.9, + "learning_rate": 7.739076669414674e-05, + "loss": 6.2351, + "step": 1605 + }, + { + "epoch": 0.9, + "learning_rate": 7.737015663643859e-05, + "loss": 6.4498, + "step": 1606 + }, + { + "epoch": 0.9, + "learning_rate": 7.734954657873043e-05, + "loss": 6.6253, + "step": 1607 + }, + { + "epoch": 0.9, + "learning_rate": 7.732893652102226e-05, + "loss": 6.4016, + "step": 1608 + }, + { + "epoch": 0.9, + "learning_rate": 7.730832646331409e-05, + "loss": 6.6199, + "step": 1609 + }, + { + "epoch": 0.9, + "learning_rate": 7.728771640560593e-05, + "loss": 6.3524, + "step": 1610 + }, + { + "epoch": 0.9, + "learning_rate": 7.726710634789778e-05, + "loss": 6.5455, + "step": 1611 + }, + { + "epoch": 0.9, + "learning_rate": 7.724649629018962e-05, + "loss": 6.5292, + "step": 1612 + }, + { + "epoch": 0.9, + "learning_rate": 7.722588623248146e-05, + "loss": 6.5911, + "step": 1613 + }, + { + "epoch": 0.9, + "learning_rate": 7.72052761747733e-05, + "loss": 6.3152, + "step": 1614 + }, + { + "epoch": 0.91, + "learning_rate": 7.718466611706512e-05, + "loss": 6.3507, + "step": 1615 + }, + { + "epoch": 0.91, + "learning_rate": 7.716405605935697e-05, + "loss": 6.2601, + "step": 1616 + }, + { + "epoch": 0.91, + "learning_rate": 7.714344600164881e-05, + "loss": 6.358, + "step": 1617 + }, + { + "epoch": 0.91, + "learning_rate": 7.712283594394065e-05, + "loss": 6.2729, + "step": 1618 + }, + { + "epoch": 0.91, + "learning_rate": 7.710222588623248e-05, + "loss": 6.25, + "step": 1619 + }, + { + "epoch": 0.91, + "learning_rate": 7.708161582852433e-05, + "loss": 6.4666, + "step": 1620 + }, + { + "epoch": 0.91, + "learning_rate": 7.706100577081616e-05, + "loss": 6.496, + "step": 1621 + }, + { + "epoch": 0.91, + "learning_rate": 7.7040395713108e-05, + "loss": 6.2441, + "step": 1622 + }, + { + "epoch": 0.91, + "learning_rate": 7.701978565539983e-05, + "loss": 6.433, + "step": 1623 + }, + { + "epoch": 0.91, + "learning_rate": 7.699917559769167e-05, + "loss": 6.1261, + "step": 1624 + }, + { + "epoch": 0.91, + "learning_rate": 7.697856553998352e-05, + "loss": 6.445, + "step": 1625 + }, + { + "epoch": 0.91, + "learning_rate": 7.695795548227536e-05, + "loss": 6.487, + "step": 1626 + }, + { + "epoch": 0.91, + "learning_rate": 7.69373454245672e-05, + "loss": 6.3512, + "step": 1627 + }, + { + "epoch": 0.91, + "learning_rate": 7.691673536685903e-05, + "loss": 6.3751, + "step": 1628 + }, + { + "epoch": 0.91, + "learning_rate": 7.689612530915086e-05, + "loss": 6.2509, + "step": 1629 + }, + { + "epoch": 0.91, + "learning_rate": 7.68755152514427e-05, + "loss": 6.1044, + "step": 1630 + }, + { + "epoch": 0.91, + "learning_rate": 7.685490519373455e-05, + "loss": 6.1718, + "step": 1631 + }, + { + "epoch": 0.91, + "learning_rate": 7.683429513602638e-05, + "loss": 6.4578, + "step": 1632 + }, + { + "epoch": 0.92, + "learning_rate": 7.681368507831822e-05, + "loss": 6.1984, + "step": 1633 + }, + { + "epoch": 0.92, + "learning_rate": 7.679307502061007e-05, + "loss": 6.3056, + "step": 1634 + }, + { + "epoch": 0.92, + "learning_rate": 7.67724649629019e-05, + "loss": 6.1774, + "step": 1635 + }, + { + "epoch": 0.92, + "learning_rate": 7.675185490519374e-05, + "loss": 6.2339, + "step": 1636 + }, + { + "epoch": 0.92, + "learning_rate": 7.673124484748557e-05, + "loss": 6.0308, + "step": 1637 + }, + { + "epoch": 0.92, + "learning_rate": 7.671063478977741e-05, + "loss": 6.3409, + "step": 1638 + }, + { + "epoch": 0.92, + "learning_rate": 7.669002473206926e-05, + "loss": 6.0673, + "step": 1639 + }, + { + "epoch": 0.92, + "learning_rate": 7.66694146743611e-05, + "loss": 5.9582, + "step": 1640 + }, + { + "epoch": 0.92, + "learning_rate": 7.664880461665293e-05, + "loss": 6.1056, + "step": 1641 + }, + { + "epoch": 0.92, + "learning_rate": 7.662819455894476e-05, + "loss": 6.0144, + "step": 1642 + }, + { + "epoch": 0.92, + "learning_rate": 7.66075845012366e-05, + "loss": 6.0707, + "step": 1643 + }, + { + "epoch": 0.92, + "learning_rate": 7.658697444352845e-05, + "loss": 5.8554, + "step": 1644 + }, + { + "epoch": 0.92, + "learning_rate": 7.656636438582029e-05, + "loss": 5.969, + "step": 1645 + }, + { + "epoch": 0.92, + "learning_rate": 7.654575432811212e-05, + "loss": 5.866, + "step": 1646 + }, + { + "epoch": 0.92, + "learning_rate": 7.652514427040396e-05, + "loss": 5.3577, + "step": 1647 + }, + { + "epoch": 0.92, + "learning_rate": 7.65045342126958e-05, + "loss": 5.5293, + "step": 1648 + }, + { + "epoch": 0.92, + "learning_rate": 7.648392415498764e-05, + "loss": 5.8241, + "step": 1649 + }, + { + "epoch": 0.92, + "learning_rate": 7.646331409727948e-05, + "loss": 4.7029, + "step": 1650 + }, + { + "epoch": 0.93, + "learning_rate": 7.644270403957131e-05, + "loss": 6.8336, + "step": 1651 + }, + { + "epoch": 0.93, + "learning_rate": 7.642209398186315e-05, + "loss": 6.8522, + "step": 1652 + }, + { + "epoch": 0.93, + "learning_rate": 7.6401483924155e-05, + "loss": 6.8629, + "step": 1653 + }, + { + "epoch": 0.93, + "learning_rate": 7.638087386644684e-05, + "loss": 6.4754, + "step": 1654 + }, + { + "epoch": 0.93, + "learning_rate": 7.636026380873867e-05, + "loss": 6.3516, + "step": 1655 + }, + { + "epoch": 0.93, + "learning_rate": 7.63396537510305e-05, + "loss": 6.5487, + "step": 1656 + }, + { + "epoch": 0.93, + "learning_rate": 7.631904369332234e-05, + "loss": 6.4746, + "step": 1657 + }, + { + "epoch": 0.93, + "learning_rate": 7.629843363561419e-05, + "loss": 6.4064, + "step": 1658 + }, + { + "epoch": 0.93, + "learning_rate": 7.627782357790603e-05, + "loss": 6.18, + "step": 1659 + }, + { + "epoch": 0.93, + "learning_rate": 7.625721352019786e-05, + "loss": 6.3152, + "step": 1660 + }, + { + "epoch": 0.93, + "learning_rate": 7.62366034624897e-05, + "loss": 6.3586, + "step": 1661 + }, + { + "epoch": 0.93, + "learning_rate": 7.621599340478153e-05, + "loss": 6.2372, + "step": 1662 + }, + { + "epoch": 0.93, + "learning_rate": 7.619538334707338e-05, + "loss": 6.1655, + "step": 1663 + }, + { + "epoch": 0.93, + "learning_rate": 7.61747732893652e-05, + "loss": 6.4477, + "step": 1664 + }, + { + "epoch": 0.93, + "learning_rate": 7.615416323165705e-05, + "loss": 6.4334, + "step": 1665 + }, + { + "epoch": 0.93, + "learning_rate": 7.613355317394889e-05, + "loss": 6.3551, + "step": 1666 + }, + { + "epoch": 0.93, + "learning_rate": 7.611294311624074e-05, + "loss": 6.3373, + "step": 1667 + }, + { + "epoch": 0.93, + "learning_rate": 7.609233305853258e-05, + "loss": 6.3359, + "step": 1668 + }, + { + "epoch": 0.94, + "learning_rate": 7.60717230008244e-05, + "loss": 6.2578, + "step": 1669 + }, + { + "epoch": 0.94, + "learning_rate": 7.605111294311624e-05, + "loss": 6.6616, + "step": 1670 + }, + { + "epoch": 0.94, + "learning_rate": 7.603050288540808e-05, + "loss": 6.2783, + "step": 1671 + }, + { + "epoch": 0.94, + "learning_rate": 7.600989282769993e-05, + "loss": 6.4077, + "step": 1672 + }, + { + "epoch": 0.94, + "learning_rate": 7.598928276999176e-05, + "loss": 6.3968, + "step": 1673 + }, + { + "epoch": 0.94, + "learning_rate": 7.59686727122836e-05, + "loss": 6.5875, + "step": 1674 + }, + { + "epoch": 0.94, + "learning_rate": 7.594806265457544e-05, + "loss": 6.578, + "step": 1675 + }, + { + "epoch": 0.94, + "learning_rate": 7.592745259686727e-05, + "loss": 6.086, + "step": 1676 + }, + { + "epoch": 0.94, + "learning_rate": 7.590684253915912e-05, + "loss": 6.1493, + "step": 1677 + }, + { + "epoch": 0.94, + "learning_rate": 7.588623248145094e-05, + "loss": 6.2071, + "step": 1678 + }, + { + "epoch": 0.94, + "learning_rate": 7.586562242374279e-05, + "loss": 6.4358, + "step": 1679 + }, + { + "epoch": 0.94, + "learning_rate": 7.584501236603463e-05, + "loss": 6.4295, + "step": 1680 + }, + { + "epoch": 0.94, + "learning_rate": 7.582440230832648e-05, + "loss": 6.1906, + "step": 1681 + }, + { + "epoch": 0.94, + "learning_rate": 7.58037922506183e-05, + "loss": 6.1454, + "step": 1682 + }, + { + "epoch": 0.94, + "learning_rate": 7.578318219291013e-05, + "loss": 6.303, + "step": 1683 + }, + { + "epoch": 0.94, + "learning_rate": 7.576257213520198e-05, + "loss": 6.2885, + "step": 1684 + }, + { + "epoch": 0.94, + "learning_rate": 7.574196207749382e-05, + "loss": 6.3411, + "step": 1685 + }, + { + "epoch": 0.95, + "learning_rate": 7.572135201978566e-05, + "loss": 6.2932, + "step": 1686 + }, + { + "epoch": 0.95, + "learning_rate": 7.57007419620775e-05, + "loss": 6.2879, + "step": 1687 + }, + { + "epoch": 0.95, + "learning_rate": 7.568013190436934e-05, + "loss": 6.3244, + "step": 1688 + }, + { + "epoch": 0.95, + "learning_rate": 7.565952184666117e-05, + "loss": 6.3288, + "step": 1689 + }, + { + "epoch": 0.95, + "learning_rate": 7.563891178895301e-05, + "loss": 5.8205, + "step": 1690 + }, + { + "epoch": 0.95, + "learning_rate": 7.561830173124485e-05, + "loss": 6.1915, + "step": 1691 + }, + { + "epoch": 0.95, + "learning_rate": 7.559769167353668e-05, + "loss": 6.1144, + "step": 1692 + }, + { + "epoch": 0.95, + "learning_rate": 7.557708161582853e-05, + "loss": 6.2232, + "step": 1693 + }, + { + "epoch": 0.95, + "learning_rate": 7.555647155812037e-05, + "loss": 6.1406, + "step": 1694 + }, + { + "epoch": 0.95, + "learning_rate": 7.553586150041221e-05, + "loss": 6.0955, + "step": 1695 + }, + { + "epoch": 0.95, + "learning_rate": 7.551525144270404e-05, + "loss": 5.8895, + "step": 1696 + }, + { + "epoch": 0.95, + "learning_rate": 7.549464138499587e-05, + "loss": 5.6026, + "step": 1697 + }, + { + "epoch": 0.95, + "learning_rate": 7.547403132728772e-05, + "loss": 5.9436, + "step": 1698 + }, + { + "epoch": 0.95, + "learning_rate": 7.545342126957956e-05, + "loss": 5.3007, + "step": 1699 + }, + { + "epoch": 0.95, + "learning_rate": 7.54328112118714e-05, + "loss": 4.936, + "step": 1700 + }, + { + "epoch": 0.95, + "learning_rate": 7.541220115416323e-05, + "loss": 6.8987, + "step": 1701 + }, + { + "epoch": 0.95, + "learning_rate": 7.539159109645508e-05, + "loss": 6.6929, + "step": 1702 + }, + { + "epoch": 0.95, + "learning_rate": 7.537098103874691e-05, + "loss": 6.8099, + "step": 1703 + }, + { + "epoch": 0.96, + "learning_rate": 7.535037098103875e-05, + "loss": 6.8334, + "step": 1704 + }, + { + "epoch": 0.96, + "learning_rate": 7.532976092333058e-05, + "loss": 6.7801, + "step": 1705 + }, + { + "epoch": 0.96, + "learning_rate": 7.530915086562242e-05, + "loss": 6.567, + "step": 1706 + }, + { + "epoch": 0.96, + "learning_rate": 7.528854080791427e-05, + "loss": 6.5278, + "step": 1707 + }, + { + "epoch": 0.96, + "learning_rate": 7.526793075020611e-05, + "loss": 6.5903, + "step": 1708 + }, + { + "epoch": 0.96, + "learning_rate": 7.524732069249794e-05, + "loss": 6.6181, + "step": 1709 + }, + { + "epoch": 0.96, + "learning_rate": 7.522671063478977e-05, + "loss": 6.7529, + "step": 1710 + }, + { + "epoch": 0.96, + "learning_rate": 7.520610057708161e-05, + "loss": 6.3619, + "step": 1711 + }, + { + "epoch": 0.96, + "learning_rate": 7.518549051937346e-05, + "loss": 6.5322, + "step": 1712 + }, + { + "epoch": 0.96, + "learning_rate": 7.51648804616653e-05, + "loss": 6.6858, + "step": 1713 + }, + { + "epoch": 0.96, + "learning_rate": 7.514427040395714e-05, + "loss": 6.5605, + "step": 1714 + }, + { + "epoch": 0.96, + "learning_rate": 7.512366034624897e-05, + "loss": 6.3312, + "step": 1715 + }, + { + "epoch": 0.96, + "learning_rate": 7.510305028854082e-05, + "loss": 6.4678, + "step": 1716 + }, + { + "epoch": 0.96, + "learning_rate": 7.508244023083265e-05, + "loss": 6.6303, + "step": 1717 + }, + { + "epoch": 0.96, + "learning_rate": 7.506183017312449e-05, + "loss": 6.5718, + "step": 1718 + }, + { + "epoch": 0.96, + "learning_rate": 7.504122011541632e-05, + "loss": 6.2367, + "step": 1719 + }, + { + "epoch": 0.96, + "learning_rate": 7.502061005770816e-05, + "loss": 6.5411, + "step": 1720 + }, + { + "epoch": 0.96, + "learning_rate": 7.500000000000001e-05, + "loss": 6.2289, + "step": 1721 + }, + { + "epoch": 0.97, + "learning_rate": 7.497938994229185e-05, + "loss": 6.2666, + "step": 1722 + }, + { + "epoch": 0.97, + "learning_rate": 7.495877988458368e-05, + "loss": 6.3734, + "step": 1723 + }, + { + "epoch": 0.97, + "learning_rate": 7.493816982687551e-05, + "loss": 6.348, + "step": 1724 + }, + { + "epoch": 0.97, + "learning_rate": 7.491755976916735e-05, + "loss": 6.3644, + "step": 1725 + }, + { + "epoch": 0.97, + "learning_rate": 7.48969497114592e-05, + "loss": 6.202, + "step": 1726 + }, + { + "epoch": 0.97, + "learning_rate": 7.487633965375104e-05, + "loss": 6.3468, + "step": 1727 + }, + { + "epoch": 0.97, + "learning_rate": 7.485572959604287e-05, + "loss": 6.4894, + "step": 1728 + }, + { + "epoch": 0.97, + "learning_rate": 7.483511953833471e-05, + "loss": 6.4731, + "step": 1729 + }, + { + "epoch": 0.97, + "learning_rate": 7.481450948062654e-05, + "loss": 6.3239, + "step": 1730 + }, + { + "epoch": 0.97, + "learning_rate": 7.479389942291839e-05, + "loss": 6.0349, + "step": 1731 + }, + { + "epoch": 0.97, + "learning_rate": 7.477328936521023e-05, + "loss": 6.2273, + "step": 1732 + }, + { + "epoch": 0.97, + "learning_rate": 7.475267930750206e-05, + "loss": 6.1722, + "step": 1733 + }, + { + "epoch": 0.97, + "learning_rate": 7.47320692497939e-05, + "loss": 6.1955, + "step": 1734 + }, + { + "epoch": 0.97, + "learning_rate": 7.471145919208575e-05, + "loss": 6.4348, + "step": 1735 + }, + { + "epoch": 0.97, + "learning_rate": 7.469084913437758e-05, + "loss": 6.4883, + "step": 1736 + }, + { + "epoch": 0.97, + "learning_rate": 7.467023907666942e-05, + "loss": 5.9311, + "step": 1737 + }, + { + "epoch": 0.97, + "learning_rate": 7.464962901896125e-05, + "loss": 6.1114, + "step": 1738 + }, + { + "epoch": 0.97, + "learning_rate": 7.462901896125309e-05, + "loss": 6.0823, + "step": 1739 + }, + { + "epoch": 0.98, + "learning_rate": 7.460840890354494e-05, + "loss": 6.1456, + "step": 1740 + }, + { + "epoch": 0.98, + "learning_rate": 7.458779884583678e-05, + "loss": 6.0059, + "step": 1741 + }, + { + "epoch": 0.98, + "learning_rate": 7.456718878812861e-05, + "loss": 6.1632, + "step": 1742 + }, + { + "epoch": 0.98, + "learning_rate": 7.454657873042045e-05, + "loss": 6.14, + "step": 1743 + }, + { + "epoch": 0.98, + "learning_rate": 7.452596867271228e-05, + "loss": 5.7737, + "step": 1744 + }, + { + "epoch": 0.98, + "learning_rate": 7.450535861500413e-05, + "loss": 5.8, + "step": 1745 + }, + { + "epoch": 0.98, + "learning_rate": 7.448474855729597e-05, + "loss": 5.9718, + "step": 1746 + }, + { + "epoch": 0.98, + "learning_rate": 7.44641384995878e-05, + "loss": 5.899, + "step": 1747 + }, + { + "epoch": 0.98, + "learning_rate": 7.444352844187964e-05, + "loss": 5.3356, + "step": 1748 + }, + { + "epoch": 0.98, + "learning_rate": 7.442291838417149e-05, + "loss": 5.0416, + "step": 1749 + }, + { + "epoch": 0.98, + "learning_rate": 7.440230832646332e-05, + "loss": 5.1395, + "step": 1750 + }, + { + "epoch": 0.98, + "learning_rate": 7.438169826875515e-05, + "loss": 6.5064, + "step": 1751 + }, + { + "epoch": 0.98, + "learning_rate": 7.436108821104699e-05, + "loss": 6.4211, + "step": 1752 + }, + { + "epoch": 0.98, + "learning_rate": 7.434047815333883e-05, + "loss": 6.7651, + "step": 1753 + }, + { + "epoch": 0.98, + "learning_rate": 7.431986809563068e-05, + "loss": 6.5074, + "step": 1754 + }, + { + "epoch": 0.98, + "learning_rate": 7.429925803792252e-05, + "loss": 6.7467, + "step": 1755 + }, + { + "epoch": 0.98, + "learning_rate": 7.427864798021435e-05, + "loss": 6.2585, + "step": 1756 + }, + { + "epoch": 0.98, + "learning_rate": 7.425803792250618e-05, + "loss": 6.2969, + "step": 1757 + }, + { + "epoch": 0.99, + "learning_rate": 7.423742786479802e-05, + "loss": 6.3445, + "step": 1758 + }, + { + "epoch": 0.99, + "learning_rate": 7.421681780708987e-05, + "loss": 6.4834, + "step": 1759 + }, + { + "epoch": 0.99, + "learning_rate": 7.41962077493817e-05, + "loss": 6.3534, + "step": 1760 + }, + { + "epoch": 0.99, + "learning_rate": 7.417559769167354e-05, + "loss": 6.3592, + "step": 1761 + }, + { + "epoch": 0.99, + "learning_rate": 7.415498763396538e-05, + "loss": 6.2128, + "step": 1762 + }, + { + "epoch": 0.99, + "learning_rate": 7.413437757625723e-05, + "loss": 6.2277, + "step": 1763 + }, + { + "epoch": 0.99, + "learning_rate": 7.411376751854906e-05, + "loss": 6.4297, + "step": 1764 + }, + { + "epoch": 0.99, + "learning_rate": 7.409315746084089e-05, + "loss": 6.1905, + "step": 1765 + }, + { + "epoch": 0.99, + "learning_rate": 7.407254740313273e-05, + "loss": 6.353, + "step": 1766 + }, + { + "epoch": 0.99, + "learning_rate": 7.405193734542457e-05, + "loss": 6.0997, + "step": 1767 + }, + { + "epoch": 0.99, + "learning_rate": 7.403132728771642e-05, + "loss": 6.0389, + "step": 1768 + }, + { + "epoch": 0.99, + "learning_rate": 7.401071723000826e-05, + "loss": 6.1909, + "step": 1769 + }, + { + "epoch": 0.99, + "learning_rate": 7.399010717230009e-05, + "loss": 6.1645, + "step": 1770 + }, + { + "epoch": 0.99, + "learning_rate": 7.396949711459192e-05, + "loss": 6.3143, + "step": 1771 + }, + { + "epoch": 0.99, + "learning_rate": 7.394888705688376e-05, + "loss": 6.1018, + "step": 1772 + }, + { + "epoch": 0.99, + "learning_rate": 7.39282769991756e-05, + "loss": 6.2156, + "step": 1773 + }, + { + "epoch": 0.99, + "learning_rate": 7.390766694146744e-05, + "loss": 6.1875, + "step": 1774 + }, + { + "epoch": 0.99, + "learning_rate": 7.388705688375928e-05, + "loss": 6.091, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 7.386644682605112e-05, + "loss": 6.1959, + "step": 1776 + }, + { + "epoch": 1.0, + "learning_rate": 7.384583676834295e-05, + "loss": 5.9469, + "step": 1777 + }, + { + "epoch": 1.0, + "learning_rate": 7.38252267106348e-05, + "loss": 6.0417, + "step": 1778 + }, + { + "epoch": 1.0, + "learning_rate": 7.380461665292662e-05, + "loss": 6.1808, + "step": 1779 + }, + { + "epoch": 1.0, + "learning_rate": 7.378400659521847e-05, + "loss": 5.9332, + "step": 1780 + }, + { + "epoch": 1.0, + "learning_rate": 7.376339653751031e-05, + "loss": 5.8296, + "step": 1781 + }, + { + "epoch": 1.0, + "learning_rate": 7.374278647980215e-05, + "loss": 5.8377, + "step": 1782 + }, + { + "epoch": 1.0, + "learning_rate": 7.372217642209398e-05, + "loss": 5.4688, + "step": 1783 + }, + { + "epoch": 1.0, + "learning_rate": 7.370156636438581e-05, + "loss": 5.7211, + "step": 1784 + }, + { + "epoch": 1.0, + "learning_rate": 7.368095630667766e-05, + "loss": 6.722, + "step": 1785 + }, + { + "epoch": 1.0, + "learning_rate": 7.36603462489695e-05, + "loss": 6.8508, + "step": 1786 + }, + { + "epoch": 1.0, + "learning_rate": 7.363973619126134e-05, + "loss": 6.4532, + "step": 1787 + }, + { + "epoch": 1.0, + "learning_rate": 7.361912613355317e-05, + "loss": 6.3318, + "step": 1788 + }, + { + "epoch": 1.0, + "learning_rate": 7.359851607584502e-05, + "loss": 6.3874, + "step": 1789 + }, + { + "epoch": 1.0, + "learning_rate": 7.357790601813686e-05, + "loss": 6.3401, + "step": 1790 + }, + { + "epoch": 1.0, + "learning_rate": 7.355729596042869e-05, + "loss": 6.1114, + "step": 1791 + }, + { + "epoch": 1.0, + "learning_rate": 7.353668590272052e-05, + "loss": 6.2306, + "step": 1792 + }, + { + "epoch": 1.01, + "learning_rate": 7.351607584501236e-05, + "loss": 6.25, + "step": 1793 + }, + { + "epoch": 1.01, + "learning_rate": 7.349546578730421e-05, + "loss": 6.356, + "step": 1794 + }, + { + "epoch": 1.01, + "learning_rate": 7.347485572959605e-05, + "loss": 6.6261, + "step": 1795 + }, + { + "epoch": 1.01, + "learning_rate": 7.34542456718879e-05, + "loss": 6.3501, + "step": 1796 + }, + { + "epoch": 1.01, + "learning_rate": 7.343363561417972e-05, + "loss": 6.0522, + "step": 1797 + }, + { + "epoch": 1.01, + "learning_rate": 7.341302555647155e-05, + "loss": 6.3529, + "step": 1798 + }, + { + "epoch": 1.01, + "learning_rate": 7.33924154987634e-05, + "loss": 6.238, + "step": 1799 + }, + { + "epoch": 1.01, + "learning_rate": 7.337180544105524e-05, + "loss": 6.3849, + "step": 1800 + }, + { + "epoch": 1.01, + "learning_rate": 7.335119538334708e-05, + "loss": 6.1521, + "step": 1801 + }, + { + "epoch": 1.01, + "learning_rate": 7.333058532563891e-05, + "loss": 6.0876, + "step": 1802 + }, + { + "epoch": 1.01, + "learning_rate": 7.330997526793076e-05, + "loss": 6.1947, + "step": 1803 + }, + { + "epoch": 1.01, + "learning_rate": 7.328936521022259e-05, + "loss": 6.2976, + "step": 1804 + }, + { + "epoch": 1.01, + "learning_rate": 7.326875515251443e-05, + "loss": 6.0193, + "step": 1805 + }, + { + "epoch": 1.01, + "learning_rate": 7.324814509480626e-05, + "loss": 6.1926, + "step": 1806 + }, + { + "epoch": 1.01, + "learning_rate": 7.32275350370981e-05, + "loss": 5.8249, + "step": 1807 + }, + { + "epoch": 1.01, + "learning_rate": 7.320692497938995e-05, + "loss": 6.082, + "step": 1808 + }, + { + "epoch": 1.01, + "learning_rate": 7.318631492168179e-05, + "loss": 6.2665, + "step": 1809 + }, + { + "epoch": 1.01, + "learning_rate": 7.316570486397363e-05, + "loss": 6.1055, + "step": 1810 + }, + { + "epoch": 1.02, + "learning_rate": 7.314509480626546e-05, + "loss": 6.1897, + "step": 1811 + }, + { + "epoch": 1.02, + "learning_rate": 7.31244847485573e-05, + "loss": 6.4533, + "step": 1812 + }, + { + "epoch": 1.02, + "learning_rate": 7.310387469084914e-05, + "loss": 6.2788, + "step": 1813 + }, + { + "epoch": 1.02, + "learning_rate": 7.308326463314098e-05, + "loss": 6.0038, + "step": 1814 + }, + { + "epoch": 1.02, + "learning_rate": 7.306265457543281e-05, + "loss": 6.5261, + "step": 1815 + }, + { + "epoch": 1.02, + "learning_rate": 7.304204451772465e-05, + "loss": 6.5334, + "step": 1816 + }, + { + "epoch": 1.02, + "learning_rate": 7.30214344600165e-05, + "loss": 6.3832, + "step": 1817 + }, + { + "epoch": 1.02, + "learning_rate": 7.300082440230833e-05, + "loss": 6.3488, + "step": 1818 + }, + { + "epoch": 1.02, + "learning_rate": 7.298021434460017e-05, + "loss": 6.3527, + "step": 1819 + }, + { + "epoch": 1.02, + "learning_rate": 7.2959604286892e-05, + "loss": 6.3372, + "step": 1820 + }, + { + "epoch": 1.02, + "learning_rate": 7.293899422918384e-05, + "loss": 6.2826, + "step": 1821 + }, + { + "epoch": 1.02, + "learning_rate": 7.291838417147569e-05, + "loss": 6.0329, + "step": 1822 + }, + { + "epoch": 1.02, + "learning_rate": 7.289777411376753e-05, + "loss": 5.9251, + "step": 1823 + }, + { + "epoch": 1.02, + "learning_rate": 7.287716405605936e-05, + "loss": 6.2476, + "step": 1824 + }, + { + "epoch": 1.02, + "learning_rate": 7.285655399835119e-05, + "loss": 5.8438, + "step": 1825 + }, + { + "epoch": 1.02, + "learning_rate": 7.283594394064303e-05, + "loss": 6.2469, + "step": 1826 + }, + { + "epoch": 1.02, + "learning_rate": 7.281533388293488e-05, + "loss": 5.6287, + "step": 1827 + }, + { + "epoch": 1.02, + "learning_rate": 7.279472382522672e-05, + "loss": 5.8789, + "step": 1828 + }, + { + "epoch": 1.03, + "learning_rate": 7.277411376751855e-05, + "loss": 5.8134, + "step": 1829 + }, + { + "epoch": 1.03, + "learning_rate": 7.27535037098104e-05, + "loss": 5.5021, + "step": 1830 + }, + { + "epoch": 1.03, + "learning_rate": 7.273289365210222e-05, + "loss": 5.2052, + "step": 1831 + }, + { + "epoch": 1.03, + "learning_rate": 7.271228359439407e-05, + "loss": 5.6184, + "step": 1832 + }, + { + "epoch": 1.03, + "learning_rate": 7.269167353668591e-05, + "loss": 5.1316, + "step": 1833 + }, + { + "epoch": 1.03, + "learning_rate": 7.267106347897774e-05, + "loss": 5.4363, + "step": 1834 + }, + { + "epoch": 1.03, + "learning_rate": 7.265045342126958e-05, + "loss": 6.8016, + "step": 1835 + }, + { + "epoch": 1.03, + "learning_rate": 7.262984336356143e-05, + "loss": 6.6736, + "step": 1836 + }, + { + "epoch": 1.03, + "learning_rate": 7.260923330585327e-05, + "loss": 6.539, + "step": 1837 + }, + { + "epoch": 1.03, + "learning_rate": 7.25886232481451e-05, + "loss": 6.5137, + "step": 1838 + }, + { + "epoch": 1.03, + "learning_rate": 7.256801319043693e-05, + "loss": 6.417, + "step": 1839 + }, + { + "epoch": 1.03, + "learning_rate": 7.254740313272877e-05, + "loss": 6.5209, + "step": 1840 + }, + { + "epoch": 1.03, + "learning_rate": 7.252679307502062e-05, + "loss": 6.3556, + "step": 1841 + }, + { + "epoch": 1.03, + "learning_rate": 7.250618301731246e-05, + "loss": 6.4017, + "step": 1842 + }, + { + "epoch": 1.03, + "learning_rate": 7.248557295960429e-05, + "loss": 6.4154, + "step": 1843 + }, + { + "epoch": 1.03, + "learning_rate": 7.246496290189613e-05, + "loss": 6.655, + "step": 1844 + }, + { + "epoch": 1.03, + "learning_rate": 7.244435284418796e-05, + "loss": 6.2909, + "step": 1845 + }, + { + "epoch": 1.03, + "learning_rate": 7.24237427864798e-05, + "loss": 6.2227, + "step": 1846 + }, + { + "epoch": 1.04, + "learning_rate": 7.240313272877164e-05, + "loss": 6.3727, + "step": 1847 + }, + { + "epoch": 1.04, + "learning_rate": 7.238252267106348e-05, + "loss": 6.0336, + "step": 1848 + }, + { + "epoch": 1.04, + "learning_rate": 7.236191261335532e-05, + "loss": 6.3156, + "step": 1849 + }, + { + "epoch": 1.04, + "learning_rate": 7.234130255564717e-05, + "loss": 6.4039, + "step": 1850 + }, + { + "epoch": 1.04, + "learning_rate": 7.2320692497939e-05, + "loss": 6.2369, + "step": 1851 + }, + { + "epoch": 1.04, + "learning_rate": 7.230008244023083e-05, + "loss": 6.2632, + "step": 1852 + }, + { + "epoch": 1.04, + "learning_rate": 7.227947238252267e-05, + "loss": 6.1932, + "step": 1853 + }, + { + "epoch": 1.04, + "learning_rate": 7.225886232481451e-05, + "loss": 5.9555, + "step": 1854 + }, + { + "epoch": 1.04, + "learning_rate": 7.223825226710636e-05, + "loss": 6.291, + "step": 1855 + }, + { + "epoch": 1.04, + "learning_rate": 7.22176422093982e-05, + "loss": 6.1569, + "step": 1856 + }, + { + "epoch": 1.04, + "learning_rate": 7.219703215169003e-05, + "loss": 6.2376, + "step": 1857 + }, + { + "epoch": 1.04, + "learning_rate": 7.217642209398187e-05, + "loss": 6.0347, + "step": 1858 + }, + { + "epoch": 1.04, + "learning_rate": 7.21558120362737e-05, + "loss": 6.0034, + "step": 1859 + }, + { + "epoch": 1.04, + "learning_rate": 7.213520197856555e-05, + "loss": 6.1119, + "step": 1860 + }, + { + "epoch": 1.04, + "learning_rate": 7.211459192085738e-05, + "loss": 5.9701, + "step": 1861 + }, + { + "epoch": 1.04, + "learning_rate": 7.209398186314922e-05, + "loss": 6.3005, + "step": 1862 + }, + { + "epoch": 1.04, + "learning_rate": 7.207337180544106e-05, + "loss": 6.1879, + "step": 1863 + }, + { + "epoch": 1.04, + "learning_rate": 7.20527617477329e-05, + "loss": 6.1382, + "step": 1864 + }, + { + "epoch": 1.05, + "learning_rate": 7.203215169002474e-05, + "loss": 6.4226, + "step": 1865 + }, + { + "epoch": 1.05, + "learning_rate": 7.201154163231657e-05, + "loss": 5.8622, + "step": 1866 + }, + { + "epoch": 1.05, + "learning_rate": 7.199093157460841e-05, + "loss": 6.1435, + "step": 1867 + }, + { + "epoch": 1.05, + "learning_rate": 7.197032151690025e-05, + "loss": 6.0543, + "step": 1868 + }, + { + "epoch": 1.05, + "learning_rate": 7.19497114591921e-05, + "loss": 6.0013, + "step": 1869 + }, + { + "epoch": 1.05, + "learning_rate": 7.192910140148393e-05, + "loss": 6.0608, + "step": 1870 + }, + { + "epoch": 1.05, + "learning_rate": 7.190849134377577e-05, + "loss": 6.2417, + "step": 1871 + }, + { + "epoch": 1.05, + "learning_rate": 7.18878812860676e-05, + "loss": 5.9665, + "step": 1872 + }, + { + "epoch": 1.05, + "learning_rate": 7.186727122835944e-05, + "loss": 5.9812, + "step": 1873 + }, + { + "epoch": 1.05, + "learning_rate": 7.184666117065129e-05, + "loss": 5.9866, + "step": 1874 + }, + { + "epoch": 1.05, + "learning_rate": 7.182605111294311e-05, + "loss": 5.9281, + "step": 1875 + }, + { + "epoch": 1.05, + "learning_rate": 7.180544105523496e-05, + "loss": 5.9735, + "step": 1876 + }, + { + "epoch": 1.05, + "learning_rate": 7.17848309975268e-05, + "loss": 5.907, + "step": 1877 + }, + { + "epoch": 1.05, + "learning_rate": 7.176422093981863e-05, + "loss": 5.7226, + "step": 1878 + }, + { + "epoch": 1.05, + "learning_rate": 7.174361088211046e-05, + "loss": 5.9319, + "step": 1879 + }, + { + "epoch": 1.05, + "learning_rate": 7.17230008244023e-05, + "loss": 5.2742, + "step": 1880 + }, + { + "epoch": 1.05, + "learning_rate": 7.170239076669415e-05, + "loss": 5.2245, + "step": 1881 + }, + { + "epoch": 1.05, + "learning_rate": 7.168178070898599e-05, + "loss": 5.6733, + "step": 1882 + }, + { + "epoch": 1.06, + "learning_rate": 7.166117065127783e-05, + "loss": 4.9064, + "step": 1883 + }, + { + "epoch": 1.06, + "learning_rate": 7.164056059356966e-05, + "loss": 5.2621, + "step": 1884 + }, + { + "epoch": 1.06, + "learning_rate": 7.161995053586151e-05, + "loss": 6.3769, + "step": 1885 + }, + { + "epoch": 1.06, + "learning_rate": 7.159934047815334e-05, + "loss": 6.5522, + "step": 1886 + }, + { + "epoch": 1.06, + "learning_rate": 7.157873042044518e-05, + "loss": 6.7445, + "step": 1887 + }, + { + "epoch": 1.06, + "learning_rate": 7.155812036273702e-05, + "loss": 6.741, + "step": 1888 + }, + { + "epoch": 1.06, + "learning_rate": 7.153751030502885e-05, + "loss": 6.4579, + "step": 1889 + }, + { + "epoch": 1.06, + "learning_rate": 7.15169002473207e-05, + "loss": 6.5868, + "step": 1890 + }, + { + "epoch": 1.06, + "learning_rate": 7.149629018961254e-05, + "loss": 6.5091, + "step": 1891 + }, + { + "epoch": 1.06, + "learning_rate": 7.147568013190437e-05, + "loss": 6.3756, + "step": 1892 + }, + { + "epoch": 1.06, + "learning_rate": 7.14550700741962e-05, + "loss": 6.2766, + "step": 1893 + }, + { + "epoch": 1.06, + "learning_rate": 7.143446001648804e-05, + "loss": 6.1053, + "step": 1894 + }, + { + "epoch": 1.06, + "learning_rate": 7.141384995877989e-05, + "loss": 6.3209, + "step": 1895 + }, + { + "epoch": 1.06, + "learning_rate": 7.139323990107173e-05, + "loss": 6.3257, + "step": 1896 + }, + { + "epoch": 1.06, + "learning_rate": 7.137262984336357e-05, + "loss": 6.3316, + "step": 1897 + }, + { + "epoch": 1.06, + "learning_rate": 7.13520197856554e-05, + "loss": 6.6602, + "step": 1898 + }, + { + "epoch": 1.06, + "learning_rate": 7.133140972794723e-05, + "loss": 6.095, + "step": 1899 + }, + { + "epoch": 1.07, + "learning_rate": 7.131079967023908e-05, + "loss": 6.2617, + "step": 1900 + }, + { + "epoch": 1.07, + "learning_rate": 7.129018961253092e-05, + "loss": 6.2745, + "step": 1901 + }, + { + "epoch": 1.07, + "learning_rate": 7.126957955482275e-05, + "loss": 6.0751, + "step": 1902 + }, + { + "epoch": 1.07, + "learning_rate": 7.12489694971146e-05, + "loss": 6.0871, + "step": 1903 + }, + { + "epoch": 1.07, + "learning_rate": 7.122835943940644e-05, + "loss": 6.429, + "step": 1904 + }, + { + "epoch": 1.07, + "learning_rate": 7.120774938169828e-05, + "loss": 6.3095, + "step": 1905 + }, + { + "epoch": 1.07, + "learning_rate": 7.118713932399011e-05, + "loss": 6.2037, + "step": 1906 + }, + { + "epoch": 1.07, + "learning_rate": 7.116652926628194e-05, + "loss": 6.0297, + "step": 1907 + }, + { + "epoch": 1.07, + "learning_rate": 7.114591920857378e-05, + "loss": 6.2163, + "step": 1908 + }, + { + "epoch": 1.07, + "learning_rate": 7.112530915086563e-05, + "loss": 6.0086, + "step": 1909 + }, + { + "epoch": 1.07, + "learning_rate": 7.110469909315747e-05, + "loss": 6.082, + "step": 1910 + }, + { + "epoch": 1.07, + "learning_rate": 7.10840890354493e-05, + "loss": 6.339, + "step": 1911 + }, + { + "epoch": 1.07, + "learning_rate": 7.106347897774114e-05, + "loss": 6.3119, + "step": 1912 + }, + { + "epoch": 1.07, + "learning_rate": 7.104286892003297e-05, + "loss": 6.1296, + "step": 1913 + }, + { + "epoch": 1.07, + "learning_rate": 7.102225886232482e-05, + "loss": 6.3032, + "step": 1914 + }, + { + "epoch": 1.07, + "learning_rate": 7.100164880461666e-05, + "loss": 6.2159, + "step": 1915 + }, + { + "epoch": 1.07, + "learning_rate": 7.098103874690849e-05, + "loss": 6.0998, + "step": 1916 + }, + { + "epoch": 1.07, + "learning_rate": 7.096042868920033e-05, + "loss": 6.06, + "step": 1917 + }, + { + "epoch": 1.08, + "learning_rate": 7.093981863149218e-05, + "loss": 6.3286, + "step": 1918 + }, + { + "epoch": 1.08, + "learning_rate": 7.091920857378401e-05, + "loss": 6.1292, + "step": 1919 + }, + { + "epoch": 1.08, + "learning_rate": 7.089859851607585e-05, + "loss": 5.8884, + "step": 1920 + }, + { + "epoch": 1.08, + "learning_rate": 7.087798845836768e-05, + "loss": 5.8613, + "step": 1921 + }, + { + "epoch": 1.08, + "learning_rate": 7.085737840065952e-05, + "loss": 6.0918, + "step": 1922 + }, + { + "epoch": 1.08, + "learning_rate": 7.083676834295137e-05, + "loss": 6.0741, + "step": 1923 + }, + { + "epoch": 1.08, + "learning_rate": 7.081615828524321e-05, + "loss": 6.1614, + "step": 1924 + }, + { + "epoch": 1.08, + "learning_rate": 7.079554822753504e-05, + "loss": 6.2435, + "step": 1925 + }, + { + "epoch": 1.08, + "learning_rate": 7.077493816982687e-05, + "loss": 5.7098, + "step": 1926 + }, + { + "epoch": 1.08, + "learning_rate": 7.075432811211871e-05, + "loss": 5.7991, + "step": 1927 + }, + { + "epoch": 1.08, + "learning_rate": 7.073371805441056e-05, + "loss": 5.7895, + "step": 1928 + }, + { + "epoch": 1.08, + "learning_rate": 7.07131079967024e-05, + "loss": 5.4833, + "step": 1929 + }, + { + "epoch": 1.08, + "learning_rate": 7.069249793899423e-05, + "loss": 5.221, + "step": 1930 + }, + { + "epoch": 1.08, + "learning_rate": 7.067188788128607e-05, + "loss": 4.8789, + "step": 1931 + }, + { + "epoch": 1.08, + "learning_rate": 7.065127782357792e-05, + "loss": 5.4228, + "step": 1932 + }, + { + "epoch": 1.08, + "learning_rate": 7.063066776586975e-05, + "loss": 4.1895, + "step": 1933 + }, + { + "epoch": 1.08, + "learning_rate": 7.061005770816158e-05, + "loss": 4.2875, + "step": 1934 + }, + { + "epoch": 1.08, + "learning_rate": 7.058944765045342e-05, + "loss": 6.7407, + "step": 1935 + }, + { + "epoch": 1.09, + "learning_rate": 7.056883759274526e-05, + "loss": 6.4043, + "step": 1936 + }, + { + "epoch": 1.09, + "learning_rate": 7.05482275350371e-05, + "loss": 6.6195, + "step": 1937 + }, + { + "epoch": 1.09, + "learning_rate": 7.052761747732895e-05, + "loss": 6.6489, + "step": 1938 + }, + { + "epoch": 1.09, + "learning_rate": 7.050700741962078e-05, + "loss": 6.6773, + "step": 1939 + }, + { + "epoch": 1.09, + "learning_rate": 7.048639736191261e-05, + "loss": 6.6056, + "step": 1940 + }, + { + "epoch": 1.09, + "learning_rate": 7.046578730420445e-05, + "loss": 6.4539, + "step": 1941 + }, + { + "epoch": 1.09, + "learning_rate": 7.04451772464963e-05, + "loss": 6.2917, + "step": 1942 + }, + { + "epoch": 1.09, + "learning_rate": 7.042456718878814e-05, + "loss": 6.441, + "step": 1943 + }, + { + "epoch": 1.09, + "learning_rate": 7.040395713107997e-05, + "loss": 6.2151, + "step": 1944 + }, + { + "epoch": 1.09, + "learning_rate": 7.038334707337181e-05, + "loss": 6.6105, + "step": 1945 + }, + { + "epoch": 1.09, + "learning_rate": 7.036273701566364e-05, + "loss": 6.119, + "step": 1946 + }, + { + "epoch": 1.09, + "learning_rate": 7.034212695795549e-05, + "loss": 6.59, + "step": 1947 + }, + { + "epoch": 1.09, + "learning_rate": 7.032151690024732e-05, + "loss": 6.0529, + "step": 1948 + }, + { + "epoch": 1.09, + "learning_rate": 7.030090684253916e-05, + "loss": 6.4011, + "step": 1949 + }, + { + "epoch": 1.09, + "learning_rate": 7.0280296784831e-05, + "loss": 6.0184, + "step": 1950 + }, + { + "epoch": 1.09, + "learning_rate": 7.025968672712285e-05, + "loss": 6.382, + "step": 1951 + }, + { + "epoch": 1.09, + "learning_rate": 7.023907666941469e-05, + "loss": 6.1324, + "step": 1952 + }, + { + "epoch": 1.09, + "learning_rate": 7.021846661170652e-05, + "loss": 6.2615, + "step": 1953 + }, + { + "epoch": 1.1, + "learning_rate": 7.019785655399835e-05, + "loss": 6.2009, + "step": 1954 + }, + { + "epoch": 1.1, + "learning_rate": 7.017724649629019e-05, + "loss": 6.2041, + "step": 1955 + }, + { + "epoch": 1.1, + "learning_rate": 7.015663643858204e-05, + "loss": 6.1434, + "step": 1956 + }, + { + "epoch": 1.1, + "learning_rate": 7.013602638087387e-05, + "loss": 6.161, + "step": 1957 + }, + { + "epoch": 1.1, + "learning_rate": 7.011541632316571e-05, + "loss": 6.2077, + "step": 1958 + }, + { + "epoch": 1.1, + "learning_rate": 7.009480626545755e-05, + "loss": 6.068, + "step": 1959 + }, + { + "epoch": 1.1, + "learning_rate": 7.007419620774938e-05, + "loss": 5.8918, + "step": 1960 + }, + { + "epoch": 1.1, + "learning_rate": 7.005358615004123e-05, + "loss": 6.1657, + "step": 1961 + }, + { + "epoch": 1.1, + "learning_rate": 7.003297609233306e-05, + "loss": 6.0497, + "step": 1962 + }, + { + "epoch": 1.1, + "learning_rate": 7.00123660346249e-05, + "loss": 6.0809, + "step": 1963 + }, + { + "epoch": 1.1, + "learning_rate": 6.999175597691674e-05, + "loss": 6.1268, + "step": 1964 + }, + { + "epoch": 1.1, + "learning_rate": 6.997114591920859e-05, + "loss": 6.162, + "step": 1965 + }, + { + "epoch": 1.1, + "learning_rate": 6.995053586150042e-05, + "loss": 6.0088, + "step": 1966 + }, + { + "epoch": 1.1, + "learning_rate": 6.992992580379225e-05, + "loss": 6.0618, + "step": 1967 + }, + { + "epoch": 1.1, + "learning_rate": 6.990931574608409e-05, + "loss": 5.7682, + "step": 1968 + }, + { + "epoch": 1.1, + "learning_rate": 6.988870568837593e-05, + "loss": 6.016, + "step": 1969 + }, + { + "epoch": 1.1, + "learning_rate": 6.986809563066778e-05, + "loss": 6.1438, + "step": 1970 + }, + { + "epoch": 1.1, + "learning_rate": 6.98474855729596e-05, + "loss": 5.9619, + "step": 1971 + }, + { + "epoch": 1.11, + "learning_rate": 6.982687551525145e-05, + "loss": 6.2298, + "step": 1972 + }, + { + "epoch": 1.11, + "learning_rate": 6.980626545754329e-05, + "loss": 6.0031, + "step": 1973 + }, + { + "epoch": 1.11, + "learning_rate": 6.978565539983512e-05, + "loss": 5.9033, + "step": 1974 + }, + { + "epoch": 1.11, + "learning_rate": 6.976504534212697e-05, + "loss": 6.3249, + "step": 1975 + }, + { + "epoch": 1.11, + "learning_rate": 6.97444352844188e-05, + "loss": 5.9185, + "step": 1976 + }, + { + "epoch": 1.11, + "learning_rate": 6.972382522671064e-05, + "loss": 5.7083, + "step": 1977 + }, + { + "epoch": 1.11, + "learning_rate": 6.970321516900248e-05, + "loss": 5.6635, + "step": 1978 + }, + { + "epoch": 1.11, + "learning_rate": 6.968260511129432e-05, + "loss": 5.7517, + "step": 1979 + }, + { + "epoch": 1.11, + "learning_rate": 6.966199505358615e-05, + "loss": 5.657, + "step": 1980 + }, + { + "epoch": 1.11, + "learning_rate": 6.964138499587798e-05, + "loss": 5.9614, + "step": 1981 + }, + { + "epoch": 1.11, + "learning_rate": 6.962077493816983e-05, + "loss": 5.2092, + "step": 1982 + }, + { + "epoch": 1.11, + "learning_rate": 6.960016488046167e-05, + "loss": 5.1481, + "step": 1983 + }, + { + "epoch": 1.11, + "learning_rate": 6.957955482275351e-05, + "loss": 4.6062, + "step": 1984 + }, + { + "epoch": 1.11, + "learning_rate": 6.955894476504534e-05, + "loss": 6.3583, + "step": 1985 + }, + { + "epoch": 1.11, + "learning_rate": 6.953833470733719e-05, + "loss": 6.4402, + "step": 1986 + }, + { + "epoch": 1.11, + "learning_rate": 6.951772464962902e-05, + "loss": 6.3579, + "step": 1987 + }, + { + "epoch": 1.11, + "learning_rate": 6.949711459192086e-05, + "loss": 6.3863, + "step": 1988 + }, + { + "epoch": 1.11, + "learning_rate": 6.947650453421269e-05, + "loss": 6.2665, + "step": 1989 + }, + { + "epoch": 1.12, + "learning_rate": 6.945589447650453e-05, + "loss": 6.167, + "step": 1990 + }, + { + "epoch": 1.12, + "learning_rate": 6.943528441879638e-05, + "loss": 6.2893, + "step": 1991 + }, + { + "epoch": 1.12, + "learning_rate": 6.941467436108822e-05, + "loss": 5.9272, + "step": 1992 + }, + { + "epoch": 1.12, + "learning_rate": 6.939406430338005e-05, + "loss": 6.3382, + "step": 1993 + }, + { + "epoch": 1.12, + "learning_rate": 6.937345424567188e-05, + "loss": 6.2742, + "step": 1994 + }, + { + "epoch": 1.12, + "learning_rate": 6.935284418796372e-05, + "loss": 6.3302, + "step": 1995 + }, + { + "epoch": 1.12, + "learning_rate": 6.933223413025557e-05, + "loss": 6.3073, + "step": 1996 + }, + { + "epoch": 1.12, + "learning_rate": 6.931162407254741e-05, + "loss": 6.2554, + "step": 1997 + }, + { + "epoch": 1.12, + "learning_rate": 6.929101401483924e-05, + "loss": 6.1768, + "step": 1998 + }, + { + "epoch": 1.12, + "learning_rate": 6.927040395713108e-05, + "loss": 6.2346, + "step": 1999 + }, + { + "epoch": 1.12, + "learning_rate": 6.924979389942293e-05, + "loss": 6.0219, + "step": 2000 + }, + { + "epoch": 1.12, + "eval_loss": 15.496562004089355, + "eval_runtime": 1323.7007, + "eval_samples_per_second": 1.996, + "eval_steps_per_second": 0.25, + "eval_wer": 1.000654502181674, + "step": 2000 + }, + { + "epoch": 1.12, + "learning_rate": 6.922918384171476e-05, + "loss": 6.1631, + "step": 2001 + }, + { + "epoch": 1.12, + "learning_rate": 6.92085737840066e-05, + "loss": 6.1246, + "step": 2002 + }, + { + "epoch": 1.12, + "learning_rate": 6.918796372629843e-05, + "loss": 6.188, + "step": 2003 + }, + { + "epoch": 1.12, + "learning_rate": 6.916735366859027e-05, + "loss": 6.1743, + "step": 2004 + }, + { + "epoch": 1.12, + "learning_rate": 6.914674361088212e-05, + "loss": 5.9699, + "step": 2005 + }, + { + "epoch": 1.12, + "learning_rate": 6.912613355317396e-05, + "loss": 5.9532, + "step": 2006 + }, + { + "epoch": 1.12, + "learning_rate": 6.910552349546579e-05, + "loss": 6.0209, + "step": 2007 + }, + { + "epoch": 1.13, + "learning_rate": 6.908491343775762e-05, + "loss": 6.053, + "step": 2008 + }, + { + "epoch": 1.13, + "learning_rate": 6.906430338004946e-05, + "loss": 6.0931, + "step": 2009 + }, + { + "epoch": 1.13, + "learning_rate": 6.904369332234131e-05, + "loss": 5.7504, + "step": 2010 + }, + { + "epoch": 1.13, + "learning_rate": 6.902308326463315e-05, + "loss": 6.0419, + "step": 2011 + }, + { + "epoch": 1.13, + "learning_rate": 6.900247320692498e-05, + "loss": 6.0502, + "step": 2012 + }, + { + "epoch": 1.13, + "learning_rate": 6.898186314921682e-05, + "loss": 6.0082, + "step": 2013 + }, + { + "epoch": 1.13, + "learning_rate": 6.896125309150865e-05, + "loss": 6.2437, + "step": 2014 + }, + { + "epoch": 1.13, + "learning_rate": 6.89406430338005e-05, + "loss": 5.982, + "step": 2015 + }, + { + "epoch": 1.13, + "learning_rate": 6.892003297609234e-05, + "loss": 5.9837, + "step": 2016 + }, + { + "epoch": 1.13, + "learning_rate": 6.889942291838417e-05, + "loss": 5.8709, + "step": 2017 + }, + { + "epoch": 1.13, + "learning_rate": 6.887881286067601e-05, + "loss": 6.1067, + "step": 2018 + }, + { + "epoch": 1.13, + "learning_rate": 6.885820280296786e-05, + "loss": 5.9583, + "step": 2019 + }, + { + "epoch": 1.13, + "learning_rate": 6.88375927452597e-05, + "loss": 6.0019, + "step": 2020 + }, + { + "epoch": 1.13, + "learning_rate": 6.881698268755153e-05, + "loss": 5.8361, + "step": 2021 + }, + { + "epoch": 1.13, + "learning_rate": 6.879637262984336e-05, + "loss": 5.7651, + "step": 2022 + }, + { + "epoch": 1.13, + "learning_rate": 6.87757625721352e-05, + "loss": 5.7909, + "step": 2023 + }, + { + "epoch": 1.13, + "learning_rate": 6.875515251442705e-05, + "loss": 6.0283, + "step": 2024 + }, + { + "epoch": 1.14, + "learning_rate": 6.873454245671889e-05, + "loss": 5.8875, + "step": 2025 + }, + { + "epoch": 1.14, + "learning_rate": 6.871393239901072e-05, + "loss": 5.7122, + "step": 2026 + }, + { + "epoch": 1.14, + "learning_rate": 6.869332234130256e-05, + "loss": 5.9138, + "step": 2027 + }, + { + "epoch": 1.14, + "learning_rate": 6.867271228359439e-05, + "loss": 5.6584, + "step": 2028 + }, + { + "epoch": 1.14, + "learning_rate": 6.865210222588624e-05, + "loss": 5.7417, + "step": 2029 + }, + { + "epoch": 1.14, + "learning_rate": 6.863149216817807e-05, + "loss": 5.7285, + "step": 2030 + }, + { + "epoch": 1.14, + "learning_rate": 6.861088211046991e-05, + "loss": 5.6083, + "step": 2031 + }, + { + "epoch": 1.14, + "learning_rate": 6.859027205276175e-05, + "loss": 5.0971, + "step": 2032 + }, + { + "epoch": 1.14, + "learning_rate": 6.85696619950536e-05, + "loss": 5.2789, + "step": 2033 + }, + { + "epoch": 1.14, + "learning_rate": 6.854905193734543e-05, + "loss": 5.0661, + "step": 2034 + }, + { + "epoch": 1.14, + "learning_rate": 6.852844187963726e-05, + "loss": 6.5277, + "step": 2035 + }, + { + "epoch": 1.14, + "learning_rate": 6.85078318219291e-05, + "loss": 6.5096, + "step": 2036 + }, + { + "epoch": 1.14, + "learning_rate": 6.848722176422094e-05, + "loss": 6.4087, + "step": 2037 + }, + { + "epoch": 1.14, + "learning_rate": 6.846661170651279e-05, + "loss": 6.3021, + "step": 2038 + }, + { + "epoch": 1.14, + "learning_rate": 6.844600164880463e-05, + "loss": 6.3638, + "step": 2039 + }, + { + "epoch": 1.14, + "learning_rate": 6.842539159109646e-05, + "loss": 6.2208, + "step": 2040 + }, + { + "epoch": 1.14, + "learning_rate": 6.840478153338829e-05, + "loss": 5.873, + "step": 2041 + }, + { + "epoch": 1.14, + "learning_rate": 6.838417147568013e-05, + "loss": 6.3203, + "step": 2042 + }, + { + "epoch": 1.15, + "learning_rate": 6.836356141797198e-05, + "loss": 6.3828, + "step": 2043 + }, + { + "epoch": 1.15, + "learning_rate": 6.83429513602638e-05, + "loss": 6.1402, + "step": 2044 + }, + { + "epoch": 1.15, + "learning_rate": 6.832234130255565e-05, + "loss": 6.3322, + "step": 2045 + }, + { + "epoch": 1.15, + "learning_rate": 6.830173124484749e-05, + "loss": 6.213, + "step": 2046 + }, + { + "epoch": 1.15, + "learning_rate": 6.828112118713934e-05, + "loss": 6.1765, + "step": 2047 + }, + { + "epoch": 1.15, + "learning_rate": 6.826051112943117e-05, + "loss": 6.1591, + "step": 2048 + }, + { + "epoch": 1.15, + "learning_rate": 6.8239901071723e-05, + "loss": 6.2926, + "step": 2049 + }, + { + "epoch": 1.15, + "learning_rate": 6.821929101401484e-05, + "loss": 6.3418, + "step": 2050 + }, + { + "epoch": 1.15, + "learning_rate": 6.819868095630668e-05, + "loss": 6.0468, + "step": 2051 + }, + { + "epoch": 1.15, + "learning_rate": 6.817807089859853e-05, + "loss": 6.0098, + "step": 2052 + }, + { + "epoch": 1.15, + "learning_rate": 6.815746084089036e-05, + "loss": 6.3538, + "step": 2053 + }, + { + "epoch": 1.15, + "learning_rate": 6.81368507831822e-05, + "loss": 6.0953, + "step": 2054 + }, + { + "epoch": 1.15, + "learning_rate": 6.811624072547403e-05, + "loss": 6.2692, + "step": 2055 + }, + { + "epoch": 1.15, + "learning_rate": 6.809563066776587e-05, + "loss": 6.1488, + "step": 2056 + }, + { + "epoch": 1.15, + "learning_rate": 6.807502061005772e-05, + "loss": 5.7936, + "step": 2057 + }, + { + "epoch": 1.15, + "learning_rate": 6.805441055234955e-05, + "loss": 6.1277, + "step": 2058 + }, + { + "epoch": 1.15, + "learning_rate": 6.803380049464139e-05, + "loss": 6.2566, + "step": 2059 + }, + { + "epoch": 1.15, + "learning_rate": 6.801319043693323e-05, + "loss": 6.0533, + "step": 2060 + }, + { + "epoch": 1.16, + "learning_rate": 6.799258037922506e-05, + "loss": 6.0881, + "step": 2061 + }, + { + "epoch": 1.16, + "learning_rate": 6.79719703215169e-05, + "loss": 5.9229, + "step": 2062 + }, + { + "epoch": 1.16, + "learning_rate": 6.795136026380874e-05, + "loss": 5.9916, + "step": 2063 + }, + { + "epoch": 1.16, + "learning_rate": 6.793075020610058e-05, + "loss": 5.7237, + "step": 2064 + }, + { + "epoch": 1.16, + "learning_rate": 6.791014014839242e-05, + "loss": 6.0816, + "step": 2065 + }, + { + "epoch": 1.16, + "learning_rate": 6.788953009068427e-05, + "loss": 6.0755, + "step": 2066 + }, + { + "epoch": 1.16, + "learning_rate": 6.78689200329761e-05, + "loss": 5.8085, + "step": 2067 + }, + { + "epoch": 1.16, + "learning_rate": 6.784830997526794e-05, + "loss": 6.0001, + "step": 2068 + }, + { + "epoch": 1.16, + "learning_rate": 6.782769991755977e-05, + "loss": 5.9467, + "step": 2069 + }, + { + "epoch": 1.16, + "learning_rate": 6.780708985985161e-05, + "loss": 5.9096, + "step": 2070 + }, + { + "epoch": 1.16, + "learning_rate": 6.778647980214346e-05, + "loss": 6.0679, + "step": 2071 + }, + { + "epoch": 1.16, + "learning_rate": 6.776586974443528e-05, + "loss": 5.8684, + "step": 2072 + }, + { + "epoch": 1.16, + "learning_rate": 6.774525968672713e-05, + "loss": 5.7592, + "step": 2073 + }, + { + "epoch": 1.16, + "learning_rate": 6.772464962901897e-05, + "loss": 5.6765, + "step": 2074 + }, + { + "epoch": 1.16, + "learning_rate": 6.77040395713108e-05, + "loss": 5.9594, + "step": 2075 + }, + { + "epoch": 1.16, + "learning_rate": 6.768342951360263e-05, + "loss": 5.6854, + "step": 2076 + }, + { + "epoch": 1.16, + "learning_rate": 6.766281945589447e-05, + "loss": 5.665, + "step": 2077 + }, + { + "epoch": 1.16, + "learning_rate": 6.764220939818632e-05, + "loss": 5.699, + "step": 2078 + }, + { + "epoch": 1.17, + "learning_rate": 6.762159934047816e-05, + "loss": 5.453, + "step": 2079 + }, + { + "epoch": 1.17, + "learning_rate": 6.760098928277e-05, + "loss": 5.6179, + "step": 2080 + }, + { + "epoch": 1.17, + "learning_rate": 6.758037922506183e-05, + "loss": 5.4101, + "step": 2081 + }, + { + "epoch": 1.17, + "learning_rate": 6.755976916735366e-05, + "loss": 4.7626, + "step": 2082 + }, + { + "epoch": 1.17, + "learning_rate": 6.753915910964551e-05, + "loss": 4.6147, + "step": 2083 + }, + { + "epoch": 1.17, + "learning_rate": 6.751854905193735e-05, + "loss": 4.2746, + "step": 2084 + }, + { + "epoch": 1.17, + "learning_rate": 6.749793899422918e-05, + "loss": 6.6981, + "step": 2085 + }, + { + "epoch": 1.17, + "learning_rate": 6.747732893652102e-05, + "loss": 6.2172, + "step": 2086 + }, + { + "epoch": 1.17, + "learning_rate": 6.745671887881287e-05, + "loss": 6.583, + "step": 2087 + }, + { + "epoch": 1.17, + "learning_rate": 6.74361088211047e-05, + "loss": 6.5243, + "step": 2088 + }, + { + "epoch": 1.17, + "learning_rate": 6.741549876339654e-05, + "loss": 6.3215, + "step": 2089 + }, + { + "epoch": 1.17, + "learning_rate": 6.739488870568837e-05, + "loss": 6.3071, + "step": 2090 + }, + { + "epoch": 1.17, + "learning_rate": 6.737427864798021e-05, + "loss": 6.1755, + "step": 2091 + }, + { + "epoch": 1.17, + "learning_rate": 6.735366859027206e-05, + "loss": 6.2807, + "step": 2092 + }, + { + "epoch": 1.17, + "learning_rate": 6.73330585325639e-05, + "loss": 6.1846, + "step": 2093 + }, + { + "epoch": 1.17, + "learning_rate": 6.731244847485574e-05, + "loss": 6.3009, + "step": 2094 + }, + { + "epoch": 1.17, + "learning_rate": 6.729183841714757e-05, + "loss": 6.3237, + "step": 2095 + }, + { + "epoch": 1.17, + "learning_rate": 6.72712283594394e-05, + "loss": 6.2201, + "step": 2096 + }, + { + "epoch": 1.18, + "learning_rate": 6.725061830173125e-05, + "loss": 6.2391, + "step": 2097 + }, + { + "epoch": 1.18, + "learning_rate": 6.723000824402309e-05, + "loss": 6.0313, + "step": 2098 + }, + { + "epoch": 1.18, + "learning_rate": 6.720939818631492e-05, + "loss": 6.2093, + "step": 2099 + }, + { + "epoch": 1.18, + "learning_rate": 6.718878812860676e-05, + "loss": 6.0364, + "step": 2100 + }, + { + "epoch": 1.18, + "learning_rate": 6.716817807089861e-05, + "loss": 6.3371, + "step": 2101 + }, + { + "epoch": 1.18, + "learning_rate": 6.714756801319044e-05, + "loss": 5.9394, + "step": 2102 + }, + { + "epoch": 1.18, + "learning_rate": 6.712695795548228e-05, + "loss": 5.8904, + "step": 2103 + }, + { + "epoch": 1.18, + "learning_rate": 6.710634789777411e-05, + "loss": 6.0285, + "step": 2104 + }, + { + "epoch": 1.18, + "learning_rate": 6.708573784006595e-05, + "loss": 6.0394, + "step": 2105 + }, + { + "epoch": 1.18, + "learning_rate": 6.70651277823578e-05, + "loss": 6.0823, + "step": 2106 + }, + { + "epoch": 1.18, + "learning_rate": 6.704451772464964e-05, + "loss": 6.2314, + "step": 2107 + }, + { + "epoch": 1.18, + "learning_rate": 6.702390766694147e-05, + "loss": 5.9968, + "step": 2108 + }, + { + "epoch": 1.18, + "learning_rate": 6.70032976092333e-05, + "loss": 5.9124, + "step": 2109 + }, + { + "epoch": 1.18, + "learning_rate": 6.698268755152514e-05, + "loss": 5.8474, + "step": 2110 + }, + { + "epoch": 1.18, + "learning_rate": 6.696207749381699e-05, + "loss": 6.0957, + "step": 2111 + }, + { + "epoch": 1.18, + "learning_rate": 6.694146743610883e-05, + "loss": 6.0672, + "step": 2112 + }, + { + "epoch": 1.18, + "learning_rate": 6.692085737840066e-05, + "loss": 6.0414, + "step": 2113 + }, + { + "epoch": 1.18, + "learning_rate": 6.69002473206925e-05, + "loss": 6.1328, + "step": 2114 + }, + { + "epoch": 1.19, + "learning_rate": 6.687963726298435e-05, + "loss": 5.8443, + "step": 2115 + }, + { + "epoch": 1.19, + "learning_rate": 6.685902720527618e-05, + "loss": 5.8522, + "step": 2116 + }, + { + "epoch": 1.19, + "learning_rate": 6.6838417147568e-05, + "loss": 5.7923, + "step": 2117 + }, + { + "epoch": 1.19, + "learning_rate": 6.681780708985985e-05, + "loss": 6.144, + "step": 2118 + }, + { + "epoch": 1.19, + "learning_rate": 6.67971970321517e-05, + "loss": 5.9532, + "step": 2119 + }, + { + "epoch": 1.19, + "learning_rate": 6.677658697444354e-05, + "loss": 5.7745, + "step": 2120 + }, + { + "epoch": 1.19, + "learning_rate": 6.675597691673538e-05, + "loss": 6.0354, + "step": 2121 + }, + { + "epoch": 1.19, + "learning_rate": 6.673536685902721e-05, + "loss": 6.0307, + "step": 2122 + }, + { + "epoch": 1.19, + "learning_rate": 6.671475680131904e-05, + "loss": 5.6876, + "step": 2123 + }, + { + "epoch": 1.19, + "learning_rate": 6.669414674361088e-05, + "loss": 5.6976, + "step": 2124 + }, + { + "epoch": 1.19, + "learning_rate": 6.667353668590273e-05, + "loss": 5.7321, + "step": 2125 + }, + { + "epoch": 1.19, + "learning_rate": 6.665292662819457e-05, + "loss": 5.6091, + "step": 2126 + }, + { + "epoch": 1.19, + "learning_rate": 6.66323165704864e-05, + "loss": 5.8132, + "step": 2127 + }, + { + "epoch": 1.19, + "learning_rate": 6.661170651277824e-05, + "loss": 5.8644, + "step": 2128 + }, + { + "epoch": 1.19, + "learning_rate": 6.659109645507007e-05, + "loss": 5.3233, + "step": 2129 + }, + { + "epoch": 1.19, + "learning_rate": 6.657048639736192e-05, + "loss": 5.6143, + "step": 2130 + }, + { + "epoch": 1.19, + "learning_rate": 6.654987633965375e-05, + "loss": 5.1681, + "step": 2131 + }, + { + "epoch": 1.2, + "learning_rate": 6.652926628194559e-05, + "loss": 5.383, + "step": 2132 + }, + { + "epoch": 1.2, + "learning_rate": 6.650865622423743e-05, + "loss": 4.8962, + "step": 2133 + }, + { + "epoch": 1.2, + "learning_rate": 6.648804616652928e-05, + "loss": 4.9056, + "step": 2134 + }, + { + "epoch": 1.2, + "learning_rate": 6.64674361088211e-05, + "loss": 6.7507, + "step": 2135 + }, + { + "epoch": 1.2, + "learning_rate": 6.644682605111294e-05, + "loss": 6.6432, + "step": 2136 + }, + { + "epoch": 1.2, + "learning_rate": 6.642621599340478e-05, + "loss": 6.5765, + "step": 2137 + }, + { + "epoch": 1.2, + "learning_rate": 6.640560593569662e-05, + "loss": 6.3653, + "step": 2138 + }, + { + "epoch": 1.2, + "learning_rate": 6.638499587798847e-05, + "loss": 6.356, + "step": 2139 + }, + { + "epoch": 1.2, + "learning_rate": 6.63643858202803e-05, + "loss": 6.5398, + "step": 2140 + }, + { + "epoch": 1.2, + "learning_rate": 6.634377576257214e-05, + "loss": 6.3054, + "step": 2141 + }, + { + "epoch": 1.2, + "learning_rate": 6.632316570486398e-05, + "loss": 6.1053, + "step": 2142 + }, + { + "epoch": 1.2, + "learning_rate": 6.630255564715581e-05, + "loss": 6.2217, + "step": 2143 + }, + { + "epoch": 1.2, + "learning_rate": 6.628194558944766e-05, + "loss": 6.1067, + "step": 2144 + }, + { + "epoch": 1.2, + "learning_rate": 6.626133553173949e-05, + "loss": 6.5636, + "step": 2145 + }, + { + "epoch": 1.2, + "learning_rate": 6.624072547403133e-05, + "loss": 6.2459, + "step": 2146 + }, + { + "epoch": 1.2, + "learning_rate": 6.622011541632317e-05, + "loss": 6.1126, + "step": 2147 + }, + { + "epoch": 1.2, + "learning_rate": 6.619950535861502e-05, + "loss": 6.3532, + "step": 2148 + }, + { + "epoch": 1.2, + "learning_rate": 6.617889530090685e-05, + "loss": 6.1415, + "step": 2149 + }, + { + "epoch": 1.21, + "learning_rate": 6.615828524319868e-05, + "loss": 6.3646, + "step": 2150 + }, + { + "epoch": 1.21, + "learning_rate": 6.613767518549052e-05, + "loss": 6.1164, + "step": 2151 + }, + { + "epoch": 1.21, + "learning_rate": 6.611706512778236e-05, + "loss": 6.0621, + "step": 2152 + }, + { + "epoch": 1.21, + "learning_rate": 6.60964550700742e-05, + "loss": 6.1596, + "step": 2153 + }, + { + "epoch": 1.21, + "learning_rate": 6.607584501236604e-05, + "loss": 5.9848, + "step": 2154 + }, + { + "epoch": 1.21, + "learning_rate": 6.605523495465788e-05, + "loss": 6.0085, + "step": 2155 + }, + { + "epoch": 1.21, + "learning_rate": 6.603462489694971e-05, + "loss": 5.943, + "step": 2156 + }, + { + "epoch": 1.21, + "learning_rate": 6.601401483924155e-05, + "loss": 6.1705, + "step": 2157 + }, + { + "epoch": 1.21, + "learning_rate": 6.59934047815334e-05, + "loss": 5.9881, + "step": 2158 + }, + { + "epoch": 1.21, + "learning_rate": 6.597279472382523e-05, + "loss": 6.1217, + "step": 2159 + }, + { + "epoch": 1.21, + "learning_rate": 6.595218466611707e-05, + "loss": 5.9979, + "step": 2160 + }, + { + "epoch": 1.21, + "learning_rate": 6.593157460840891e-05, + "loss": 5.9699, + "step": 2161 + }, + { + "epoch": 1.21, + "learning_rate": 6.591096455070076e-05, + "loss": 6.0975, + "step": 2162 + }, + { + "epoch": 1.21, + "learning_rate": 6.589035449299259e-05, + "loss": 5.9272, + "step": 2163 + }, + { + "epoch": 1.21, + "learning_rate": 6.586974443528442e-05, + "loss": 5.9836, + "step": 2164 + }, + { + "epoch": 1.21, + "learning_rate": 6.584913437757626e-05, + "loss": 5.896, + "step": 2165 + }, + { + "epoch": 1.21, + "learning_rate": 6.58285243198681e-05, + "loss": 5.9604, + "step": 2166 + }, + { + "epoch": 1.21, + "learning_rate": 6.580791426215995e-05, + "loss": 5.9916, + "step": 2167 + }, + { + "epoch": 1.22, + "learning_rate": 6.578730420445178e-05, + "loss": 5.8871, + "step": 2168 + }, + { + "epoch": 1.22, + "learning_rate": 6.576669414674362e-05, + "loss": 5.8517, + "step": 2169 + }, + { + "epoch": 1.22, + "learning_rate": 6.574608408903545e-05, + "loss": 6.022, + "step": 2170 + }, + { + "epoch": 1.22, + "learning_rate": 6.572547403132729e-05, + "loss": 5.8726, + "step": 2171 + }, + { + "epoch": 1.22, + "learning_rate": 6.570486397361912e-05, + "loss": 6.0097, + "step": 2172 + }, + { + "epoch": 1.22, + "learning_rate": 6.568425391591096e-05, + "loss": 5.8198, + "step": 2173 + }, + { + "epoch": 1.22, + "learning_rate": 6.566364385820281e-05, + "loss": 5.8286, + "step": 2174 + }, + { + "epoch": 1.22, + "learning_rate": 6.564303380049465e-05, + "loss": 5.7201, + "step": 2175 + }, + { + "epoch": 1.22, + "learning_rate": 6.562242374278648e-05, + "loss": 5.5982, + "step": 2176 + }, + { + "epoch": 1.22, + "learning_rate": 6.560181368507831e-05, + "loss": 5.4879, + "step": 2177 + }, + { + "epoch": 1.22, + "learning_rate": 6.558120362737015e-05, + "loss": 5.4083, + "step": 2178 + }, + { + "epoch": 1.22, + "learning_rate": 6.5560593569662e-05, + "loss": 5.066, + "step": 2179 + }, + { + "epoch": 1.22, + "learning_rate": 6.553998351195384e-05, + "loss": 5.6788, + "step": 2180 + }, + { + "epoch": 1.22, + "learning_rate": 6.551937345424568e-05, + "loss": 5.1659, + "step": 2181 + }, + { + "epoch": 1.22, + "learning_rate": 6.549876339653751e-05, + "loss": 4.432, + "step": 2182 + }, + { + "epoch": 1.22, + "learning_rate": 6.547815333882934e-05, + "loss": 4.7796, + "step": 2183 + }, + { + "epoch": 1.22, + "learning_rate": 6.545754328112119e-05, + "loss": 4.5003, + "step": 2184 + }, + { + "epoch": 1.22, + "learning_rate": 6.543693322341303e-05, + "loss": 6.5239, + "step": 2185 + }, + { + "epoch": 1.23, + "learning_rate": 6.541632316570486e-05, + "loss": 6.4874, + "step": 2186 + }, + { + "epoch": 1.23, + "learning_rate": 6.53957131079967e-05, + "loss": 6.5089, + "step": 2187 + }, + { + "epoch": 1.23, + "learning_rate": 6.537510305028855e-05, + "loss": 6.6392, + "step": 2188 + }, + { + "epoch": 1.23, + "learning_rate": 6.535449299258039e-05, + "loss": 6.5906, + "step": 2189 + }, + { + "epoch": 1.23, + "learning_rate": 6.533388293487222e-05, + "loss": 6.2606, + "step": 2190 + }, + { + "epoch": 1.23, + "learning_rate": 6.531327287716405e-05, + "loss": 6.2502, + "step": 2191 + }, + { + "epoch": 1.23, + "learning_rate": 6.52926628194559e-05, + "loss": 6.459, + "step": 2192 + }, + { + "epoch": 1.23, + "learning_rate": 6.527205276174774e-05, + "loss": 6.287, + "step": 2193 + }, + { + "epoch": 1.23, + "learning_rate": 6.525144270403958e-05, + "loss": 6.0641, + "step": 2194 + }, + { + "epoch": 1.23, + "learning_rate": 6.523083264633141e-05, + "loss": 6.341, + "step": 2195 + }, + { + "epoch": 1.23, + "learning_rate": 6.521022258862325e-05, + "loss": 6.0879, + "step": 2196 + }, + { + "epoch": 1.23, + "learning_rate": 6.518961253091508e-05, + "loss": 6.3201, + "step": 2197 + }, + { + "epoch": 1.23, + "learning_rate": 6.516900247320693e-05, + "loss": 6.2408, + "step": 2198 + }, + { + "epoch": 1.23, + "learning_rate": 6.514839241549877e-05, + "loss": 6.1589, + "step": 2199 + }, + { + "epoch": 1.23, + "learning_rate": 6.51277823577906e-05, + "loss": 6.1206, + "step": 2200 + }, + { + "epoch": 1.23, + "learning_rate": 6.510717230008244e-05, + "loss": 6.1978, + "step": 2201 + }, + { + "epoch": 1.23, + "learning_rate": 6.508656224237429e-05, + "loss": 6.1662, + "step": 2202 + }, + { + "epoch": 1.23, + "learning_rate": 6.506595218466612e-05, + "loss": 6.119, + "step": 2203 + }, + { + "epoch": 1.24, + "learning_rate": 6.504534212695795e-05, + "loss": 6.2899, + "step": 2204 + }, + { + "epoch": 1.24, + "learning_rate": 6.502473206924979e-05, + "loss": 6.1891, + "step": 2205 + }, + { + "epoch": 1.24, + "learning_rate": 6.500412201154163e-05, + "loss": 6.1601, + "step": 2206 + }, + { + "epoch": 1.24, + "learning_rate": 6.498351195383348e-05, + "loss": 6.1654, + "step": 2207 + }, + { + "epoch": 1.24, + "learning_rate": 6.496290189612532e-05, + "loss": 5.8683, + "step": 2208 + }, + { + "epoch": 1.24, + "learning_rate": 6.494229183841715e-05, + "loss": 5.7457, + "step": 2209 + }, + { + "epoch": 1.24, + "learning_rate": 6.4921681780709e-05, + "loss": 6.0387, + "step": 2210 + }, + { + "epoch": 1.24, + "learning_rate": 6.490107172300082e-05, + "loss": 5.8191, + "step": 2211 + }, + { + "epoch": 1.24, + "learning_rate": 6.488046166529267e-05, + "loss": 5.9898, + "step": 2212 + }, + { + "epoch": 1.24, + "learning_rate": 6.485985160758451e-05, + "loss": 5.8303, + "step": 2213 + }, + { + "epoch": 1.24, + "learning_rate": 6.483924154987634e-05, + "loss": 6.0946, + "step": 2214 + }, + { + "epoch": 1.24, + "learning_rate": 6.481863149216818e-05, + "loss": 5.8074, + "step": 2215 + }, + { + "epoch": 1.24, + "learning_rate": 6.479802143446003e-05, + "loss": 5.9847, + "step": 2216 + }, + { + "epoch": 1.24, + "learning_rate": 6.477741137675186e-05, + "loss": 6.0669, + "step": 2217 + }, + { + "epoch": 1.24, + "learning_rate": 6.475680131904369e-05, + "loss": 6.1585, + "step": 2218 + }, + { + "epoch": 1.24, + "learning_rate": 6.473619126133553e-05, + "loss": 6.0244, + "step": 2219 + }, + { + "epoch": 1.24, + "learning_rate": 6.471558120362737e-05, + "loss": 6.0474, + "step": 2220 + }, + { + "epoch": 1.24, + "learning_rate": 6.469497114591922e-05, + "loss": 6.0693, + "step": 2221 + }, + { + "epoch": 1.25, + "learning_rate": 6.467436108821106e-05, + "loss": 5.6556, + "step": 2222 + }, + { + "epoch": 1.25, + "learning_rate": 6.465375103050289e-05, + "loss": 5.7469, + "step": 2223 + }, + { + "epoch": 1.25, + "learning_rate": 6.463314097279472e-05, + "loss": 5.7649, + "step": 2224 + }, + { + "epoch": 1.25, + "learning_rate": 6.461253091508656e-05, + "loss": 5.8245, + "step": 2225 + }, + { + "epoch": 1.25, + "learning_rate": 6.45919208573784e-05, + "loss": 5.5172, + "step": 2226 + }, + { + "epoch": 1.25, + "learning_rate": 6.457131079967024e-05, + "loss": 5.7024, + "step": 2227 + }, + { + "epoch": 1.25, + "learning_rate": 6.455070074196208e-05, + "loss": 5.5549, + "step": 2228 + }, + { + "epoch": 1.25, + "learning_rate": 6.453009068425392e-05, + "loss": 5.5432, + "step": 2229 + }, + { + "epoch": 1.25, + "learning_rate": 6.450948062654575e-05, + "loss": 5.3941, + "step": 2230 + }, + { + "epoch": 1.25, + "learning_rate": 6.44888705688376e-05, + "loss": 5.11, + "step": 2231 + }, + { + "epoch": 1.25, + "learning_rate": 6.446826051112943e-05, + "loss": 5.0231, + "step": 2232 + }, + { + "epoch": 1.25, + "learning_rate": 6.444765045342127e-05, + "loss": 4.5619, + "step": 2233 + }, + { + "epoch": 1.25, + "learning_rate": 6.442704039571311e-05, + "loss": 4.7537, + "step": 2234 + }, + { + "epoch": 1.25, + "learning_rate": 6.440643033800496e-05, + "loss": 6.4782, + "step": 2235 + }, + { + "epoch": 1.25, + "learning_rate": 6.438582028029679e-05, + "loss": 6.4069, + "step": 2236 + }, + { + "epoch": 1.25, + "learning_rate": 6.436521022258863e-05, + "loss": 6.267, + "step": 2237 + }, + { + "epoch": 1.25, + "learning_rate": 6.434460016488046e-05, + "loss": 6.4899, + "step": 2238 + }, + { + "epoch": 1.26, + "learning_rate": 6.43239901071723e-05, + "loss": 6.1803, + "step": 2239 + }, + { + "epoch": 1.26, + "learning_rate": 6.430338004946415e-05, + "loss": 6.2265, + "step": 2240 + }, + { + "epoch": 1.26, + "learning_rate": 6.428276999175598e-05, + "loss": 6.2026, + "step": 2241 + }, + { + "epoch": 1.26, + "learning_rate": 6.426215993404782e-05, + "loss": 6.2286, + "step": 2242 + }, + { + "epoch": 1.26, + "learning_rate": 6.424154987633966e-05, + "loss": 6.0059, + "step": 2243 + }, + { + "epoch": 1.26, + "learning_rate": 6.422093981863149e-05, + "loss": 5.9502, + "step": 2244 + }, + { + "epoch": 1.26, + "learning_rate": 6.420032976092334e-05, + "loss": 6.2846, + "step": 2245 + }, + { + "epoch": 1.26, + "learning_rate": 6.417971970321517e-05, + "loss": 6.0352, + "step": 2246 + }, + { + "epoch": 1.26, + "learning_rate": 6.415910964550701e-05, + "loss": 6.0844, + "step": 2247 + }, + { + "epoch": 1.26, + "learning_rate": 6.413849958779885e-05, + "loss": 5.9324, + "step": 2248 + }, + { + "epoch": 1.26, + "learning_rate": 6.41178895300907e-05, + "loss": 5.8395, + "step": 2249 + }, + { + "epoch": 1.26, + "learning_rate": 6.409727947238253e-05, + "loss": 5.7373, + "step": 2250 + }, + { + "epoch": 1.26, + "learning_rate": 6.407666941467436e-05, + "loss": 6.1256, + "step": 2251 + }, + { + "epoch": 1.26, + "learning_rate": 6.40560593569662e-05, + "loss": 6.0227, + "step": 2252 + }, + { + "epoch": 1.26, + "learning_rate": 6.403544929925804e-05, + "loss": 6.0676, + "step": 2253 + }, + { + "epoch": 1.26, + "learning_rate": 6.401483924154989e-05, + "loss": 5.9807, + "step": 2254 + }, + { + "epoch": 1.26, + "learning_rate": 6.399422918384172e-05, + "loss": 6.009, + "step": 2255 + }, + { + "epoch": 1.26, + "learning_rate": 6.397361912613356e-05, + "loss": 6.1252, + "step": 2256 + }, + { + "epoch": 1.27, + "learning_rate": 6.39530090684254e-05, + "loss": 5.7307, + "step": 2257 + }, + { + "epoch": 1.27, + "learning_rate": 6.393239901071723e-05, + "loss": 6.0216, + "step": 2258 + }, + { + "epoch": 1.27, + "learning_rate": 6.391178895300906e-05, + "loss": 5.9037, + "step": 2259 + }, + { + "epoch": 1.27, + "learning_rate": 6.38911788953009e-05, + "loss": 6.1228, + "step": 2260 + }, + { + "epoch": 1.27, + "learning_rate": 6.387056883759275e-05, + "loss": 6.0367, + "step": 2261 + }, + { + "epoch": 1.27, + "learning_rate": 6.384995877988459e-05, + "loss": 5.8727, + "step": 2262 + }, + { + "epoch": 1.27, + "learning_rate": 6.382934872217644e-05, + "loss": 6.0839, + "step": 2263 + }, + { + "epoch": 1.27, + "learning_rate": 6.380873866446827e-05, + "loss": 6.0069, + "step": 2264 + }, + { + "epoch": 1.27, + "learning_rate": 6.37881286067601e-05, + "loss": 6.0504, + "step": 2265 + }, + { + "epoch": 1.27, + "learning_rate": 6.376751854905194e-05, + "loss": 5.7871, + "step": 2266 + }, + { + "epoch": 1.27, + "learning_rate": 6.374690849134378e-05, + "loss": 5.8572, + "step": 2267 + }, + { + "epoch": 1.27, + "learning_rate": 6.372629843363561e-05, + "loss": 5.8462, + "step": 2268 + }, + { + "epoch": 1.27, + "learning_rate": 6.370568837592745e-05, + "loss": 5.9735, + "step": 2269 + }, + { + "epoch": 1.27, + "learning_rate": 6.36850783182193e-05, + "loss": 5.6834, + "step": 2270 + }, + { + "epoch": 1.27, + "learning_rate": 6.366446826051113e-05, + "loss": 6.12, + "step": 2271 + }, + { + "epoch": 1.27, + "learning_rate": 6.364385820280297e-05, + "loss": 5.8128, + "step": 2272 + }, + { + "epoch": 1.27, + "learning_rate": 6.36232481450948e-05, + "loss": 5.8983, + "step": 2273 + }, + { + "epoch": 1.27, + "learning_rate": 6.360263808738664e-05, + "loss": 5.6224, + "step": 2274 + }, + { + "epoch": 1.28, + "learning_rate": 6.358202802967849e-05, + "loss": 5.9593, + "step": 2275 + }, + { + "epoch": 1.28, + "learning_rate": 6.356141797197033e-05, + "loss": 5.32, + "step": 2276 + }, + { + "epoch": 1.28, + "learning_rate": 6.354080791426217e-05, + "loss": 5.1954, + "step": 2277 + }, + { + "epoch": 1.28, + "learning_rate": 6.352019785655399e-05, + "loss": 5.2401, + "step": 2278 + }, + { + "epoch": 1.28, + "learning_rate": 6.349958779884583e-05, + "loss": 5.5798, + "step": 2279 + }, + { + "epoch": 1.28, + "learning_rate": 6.347897774113768e-05, + "loss": 4.8629, + "step": 2280 + }, + { + "epoch": 1.28, + "learning_rate": 6.345836768342952e-05, + "loss": 4.9714, + "step": 2281 + }, + { + "epoch": 1.28, + "learning_rate": 6.343775762572135e-05, + "loss": 4.8109, + "step": 2282 + }, + { + "epoch": 1.28, + "learning_rate": 6.34171475680132e-05, + "loss": 4.9339, + "step": 2283 + }, + { + "epoch": 1.28, + "learning_rate": 6.339653751030504e-05, + "loss": 4.9987, + "step": 2284 + }, + { + "epoch": 1.28, + "learning_rate": 6.337592745259687e-05, + "loss": 6.771, + "step": 2285 + }, + { + "epoch": 1.28, + "learning_rate": 6.335531739488871e-05, + "loss": 6.2792, + "step": 2286 + }, + { + "epoch": 1.28, + "learning_rate": 6.333470733718054e-05, + "loss": 6.2168, + "step": 2287 + }, + { + "epoch": 1.28, + "learning_rate": 6.331409727947238e-05, + "loss": 6.2771, + "step": 2288 + }, + { + "epoch": 1.28, + "learning_rate": 6.329348722176423e-05, + "loss": 6.2932, + "step": 2289 + }, + { + "epoch": 1.28, + "learning_rate": 6.327287716405607e-05, + "loss": 6.0278, + "step": 2290 + }, + { + "epoch": 1.28, + "learning_rate": 6.32522671063479e-05, + "loss": 5.9444, + "step": 2291 + }, + { + "epoch": 1.28, + "learning_rate": 6.323165704863973e-05, + "loss": 6.3625, + "step": 2292 + }, + { + "epoch": 1.29, + "learning_rate": 6.321104699093157e-05, + "loss": 6.0556, + "step": 2293 + }, + { + "epoch": 1.29, + "learning_rate": 6.319043693322342e-05, + "loss": 6.0384, + "step": 2294 + }, + { + "epoch": 1.29, + "learning_rate": 6.316982687551526e-05, + "loss": 6.1691, + "step": 2295 + }, + { + "epoch": 1.29, + "learning_rate": 6.314921681780709e-05, + "loss": 5.9597, + "step": 2296 + }, + { + "epoch": 1.29, + "learning_rate": 6.312860676009893e-05, + "loss": 5.9306, + "step": 2297 + }, + { + "epoch": 1.29, + "learning_rate": 6.310799670239076e-05, + "loss": 5.9257, + "step": 2298 + }, + { + "epoch": 1.29, + "learning_rate": 6.308738664468261e-05, + "loss": 6.0685, + "step": 2299 + }, + { + "epoch": 1.29, + "learning_rate": 6.306677658697445e-05, + "loss": 6.0871, + "step": 2300 + }, + { + "epoch": 1.29, + "learning_rate": 6.304616652926628e-05, + "loss": 6.1666, + "step": 2301 + }, + { + "epoch": 1.29, + "learning_rate": 6.302555647155812e-05, + "loss": 5.9999, + "step": 2302 + }, + { + "epoch": 1.29, + "learning_rate": 6.300494641384997e-05, + "loss": 5.8489, + "step": 2303 + }, + { + "epoch": 1.29, + "learning_rate": 6.298433635614181e-05, + "loss": 6.0053, + "step": 2304 + }, + { + "epoch": 1.29, + "learning_rate": 6.296372629843364e-05, + "loss": 6.0093, + "step": 2305 + }, + { + "epoch": 1.29, + "learning_rate": 6.294311624072547e-05, + "loss": 6.0211, + "step": 2306 + }, + { + "epoch": 1.29, + "learning_rate": 6.292250618301731e-05, + "loss": 6.057, + "step": 2307 + }, + { + "epoch": 1.29, + "learning_rate": 6.290189612530916e-05, + "loss": 5.8718, + "step": 2308 + }, + { + "epoch": 1.29, + "learning_rate": 6.2881286067601e-05, + "loss": 5.8148, + "step": 2309 + }, + { + "epoch": 1.29, + "learning_rate": 6.286067600989283e-05, + "loss": 5.8424, + "step": 2310 + }, + { + "epoch": 1.3, + "learning_rate": 6.284006595218467e-05, + "loss": 5.9733, + "step": 2311 + }, + { + "epoch": 1.3, + "learning_rate": 6.28194558944765e-05, + "loss": 5.9925, + "step": 2312 + }, + { + "epoch": 1.3, + "learning_rate": 6.279884583676835e-05, + "loss": 6.0461, + "step": 2313 + }, + { + "epoch": 1.3, + "learning_rate": 6.277823577906018e-05, + "loss": 5.8378, + "step": 2314 + }, + { + "epoch": 1.3, + "learning_rate": 6.275762572135202e-05, + "loss": 5.9278, + "step": 2315 + }, + { + "epoch": 1.3, + "learning_rate": 6.273701566364386e-05, + "loss": 5.8177, + "step": 2316 + }, + { + "epoch": 1.3, + "learning_rate": 6.271640560593571e-05, + "loss": 5.7545, + "step": 2317 + }, + { + "epoch": 1.3, + "learning_rate": 6.269579554822754e-05, + "loss": 6.1442, + "step": 2318 + }, + { + "epoch": 1.3, + "learning_rate": 6.267518549051937e-05, + "loss": 5.6581, + "step": 2319 + }, + { + "epoch": 1.3, + "learning_rate": 6.265457543281121e-05, + "loss": 5.9769, + "step": 2320 + }, + { + "epoch": 1.3, + "learning_rate": 6.263396537510305e-05, + "loss": 6.0064, + "step": 2321 + }, + { + "epoch": 1.3, + "learning_rate": 6.26133553173949e-05, + "loss": 5.6422, + "step": 2322 + }, + { + "epoch": 1.3, + "learning_rate": 6.259274525968673e-05, + "loss": 5.7914, + "step": 2323 + }, + { + "epoch": 1.3, + "learning_rate": 6.257213520197857e-05, + "loss": 5.5305, + "step": 2324 + }, + { + "epoch": 1.3, + "learning_rate": 6.255152514427041e-05, + "loss": 5.5658, + "step": 2325 + }, + { + "epoch": 1.3, + "learning_rate": 6.253091508656224e-05, + "loss": 5.3647, + "step": 2326 + }, + { + "epoch": 1.3, + "learning_rate": 6.251030502885409e-05, + "loss": 5.2408, + "step": 2327 + }, + { + "epoch": 1.3, + "learning_rate": 6.248969497114592e-05, + "loss": 5.1506, + "step": 2328 + }, + { + "epoch": 1.31, + "learning_rate": 6.246908491343776e-05, + "loss": 4.9608, + "step": 2329 + }, + { + "epoch": 1.31, + "learning_rate": 6.24484748557296e-05, + "loss": 4.8251, + "step": 2330 + }, + { + "epoch": 1.31, + "learning_rate": 6.242786479802145e-05, + "loss": 4.4761, + "step": 2331 + }, + { + "epoch": 1.31, + "learning_rate": 6.240725474031328e-05, + "loss": 4.3094, + "step": 2332 + }, + { + "epoch": 1.31, + "learning_rate": 6.23866446826051e-05, + "loss": 4.5418, + "step": 2333 + }, + { + "epoch": 1.31, + "learning_rate": 6.236603462489695e-05, + "loss": 4.4399, + "step": 2334 + }, + { + "epoch": 1.31, + "learning_rate": 6.234542456718879e-05, + "loss": 6.6193, + "step": 2335 + }, + { + "epoch": 1.31, + "learning_rate": 6.232481450948064e-05, + "loss": 6.4067, + "step": 2336 + }, + { + "epoch": 1.31, + "learning_rate": 6.230420445177247e-05, + "loss": 6.4091, + "step": 2337 + }, + { + "epoch": 1.31, + "learning_rate": 6.228359439406431e-05, + "loss": 6.4824, + "step": 2338 + }, + { + "epoch": 1.31, + "learning_rate": 6.226298433635614e-05, + "loss": 6.4644, + "step": 2339 + }, + { + "epoch": 1.31, + "learning_rate": 6.224237427864798e-05, + "loss": 6.3168, + "step": 2340 + }, + { + "epoch": 1.31, + "learning_rate": 6.222176422093983e-05, + "loss": 6.1637, + "step": 2341 + }, + { + "epoch": 1.31, + "learning_rate": 6.220115416323166e-05, + "loss": 5.93, + "step": 2342 + }, + { + "epoch": 1.31, + "learning_rate": 6.21805441055235e-05, + "loss": 5.9956, + "step": 2343 + }, + { + "epoch": 1.31, + "learning_rate": 6.215993404781534e-05, + "loss": 6.0299, + "step": 2344 + }, + { + "epoch": 1.31, + "learning_rate": 6.213932399010717e-05, + "loss": 6.2086, + "step": 2345 + }, + { + "epoch": 1.32, + "learning_rate": 6.2118713932399e-05, + "loss": 6.2814, + "step": 2346 + }, + { + "epoch": 1.32, + "learning_rate": 6.209810387469085e-05, + "loss": 6.1956, + "step": 2347 + }, + { + "epoch": 1.32, + "learning_rate": 6.207749381698269e-05, + "loss": 6.1036, + "step": 2348 + }, + { + "epoch": 1.32, + "learning_rate": 6.205688375927453e-05, + "loss": 5.8926, + "step": 2349 + }, + { + "epoch": 1.32, + "learning_rate": 6.203627370156638e-05, + "loss": 6.1973, + "step": 2350 + }, + { + "epoch": 1.32, + "learning_rate": 6.20156636438582e-05, + "loss": 5.9136, + "step": 2351 + }, + { + "epoch": 1.32, + "learning_rate": 6.199505358615005e-05, + "loss": 5.8949, + "step": 2352 + }, + { + "epoch": 1.32, + "learning_rate": 6.197444352844188e-05, + "loss": 6.0556, + "step": 2353 + }, + { + "epoch": 1.32, + "learning_rate": 6.195383347073372e-05, + "loss": 5.8397, + "step": 2354 + }, + { + "epoch": 1.32, + "learning_rate": 6.193322341302555e-05, + "loss": 5.8506, + "step": 2355 + }, + { + "epoch": 1.32, + "learning_rate": 6.19126133553174e-05, + "loss": 5.8701, + "step": 2356 + }, + { + "epoch": 1.32, + "learning_rate": 6.189200329760924e-05, + "loss": 6.087, + "step": 2357 + }, + { + "epoch": 1.32, + "learning_rate": 6.187139323990108e-05, + "loss": 5.6798, + "step": 2358 + }, + { + "epoch": 1.32, + "learning_rate": 6.185078318219291e-05, + "loss": 6.1065, + "step": 2359 + }, + { + "epoch": 1.32, + "learning_rate": 6.183017312448474e-05, + "loss": 6.1676, + "step": 2360 + }, + { + "epoch": 1.32, + "learning_rate": 6.180956306677659e-05, + "loss": 5.9871, + "step": 2361 + }, + { + "epoch": 1.32, + "learning_rate": 6.178895300906843e-05, + "loss": 5.875, + "step": 2362 + }, + { + "epoch": 1.32, + "learning_rate": 6.176834295136027e-05, + "loss": 5.9861, + "step": 2363 + }, + { + "epoch": 1.33, + "learning_rate": 6.174773289365212e-05, + "loss": 5.9745, + "step": 2364 + }, + { + "epoch": 1.33, + "learning_rate": 6.172712283594395e-05, + "loss": 5.9864, + "step": 2365 + }, + { + "epoch": 1.33, + "learning_rate": 6.170651277823577e-05, + "loss": 5.6138, + "step": 2366 + }, + { + "epoch": 1.33, + "learning_rate": 6.168590272052762e-05, + "loss": 5.8488, + "step": 2367 + }, + { + "epoch": 1.33, + "learning_rate": 6.166529266281946e-05, + "loss": 5.7086, + "step": 2368 + }, + { + "epoch": 1.33, + "learning_rate": 6.164468260511129e-05, + "loss": 5.9017, + "step": 2369 + }, + { + "epoch": 1.33, + "learning_rate": 6.162407254740313e-05, + "loss": 5.8135, + "step": 2370 + }, + { + "epoch": 1.33, + "learning_rate": 6.160346248969498e-05, + "loss": 5.6579, + "step": 2371 + }, + { + "epoch": 1.33, + "learning_rate": 6.158285243198682e-05, + "loss": 5.6759, + "step": 2372 + }, + { + "epoch": 1.33, + "learning_rate": 6.156224237427865e-05, + "loss": 5.7245, + "step": 2373 + }, + { + "epoch": 1.33, + "learning_rate": 6.154163231657048e-05, + "loss": 5.5857, + "step": 2374 + }, + { + "epoch": 1.33, + "learning_rate": 6.152102225886232e-05, + "loss": 5.9084, + "step": 2375 + }, + { + "epoch": 1.33, + "learning_rate": 6.150041220115417e-05, + "loss": 5.7678, + "step": 2376 + }, + { + "epoch": 1.33, + "learning_rate": 6.147980214344601e-05, + "loss": 5.27, + "step": 2377 + }, + { + "epoch": 1.33, + "learning_rate": 6.145919208573784e-05, + "loss": 5.2836, + "step": 2378 + }, + { + "epoch": 1.33, + "learning_rate": 6.143858202802968e-05, + "loss": 5.2527, + "step": 2379 + }, + { + "epoch": 1.33, + "learning_rate": 6.141797197032151e-05, + "loss": 5.0726, + "step": 2380 + }, + { + "epoch": 1.33, + "learning_rate": 6.139736191261336e-05, + "loss": 4.596, + "step": 2381 + }, + { + "epoch": 1.34, + "learning_rate": 6.13767518549052e-05, + "loss": 4.4636, + "step": 2382 + }, + { + "epoch": 1.34, + "learning_rate": 6.135614179719703e-05, + "loss": 4.6327, + "step": 2383 + }, + { + "epoch": 1.34, + "learning_rate": 6.133553173948887e-05, + "loss": 4.6107, + "step": 2384 + }, + { + "epoch": 1.34, + "learning_rate": 6.131492168178072e-05, + "loss": 6.5827, + "step": 2385 + }, + { + "epoch": 1.34, + "learning_rate": 6.129431162407255e-05, + "loss": 6.6261, + "step": 2386 + }, + { + "epoch": 1.34, + "learning_rate": 6.127370156636439e-05, + "loss": 6.6341, + "step": 2387 + }, + { + "epoch": 1.34, + "learning_rate": 6.125309150865622e-05, + "loss": 6.4606, + "step": 2388 + }, + { + "epoch": 1.34, + "learning_rate": 6.123248145094806e-05, + "loss": 6.5439, + "step": 2389 + }, + { + "epoch": 1.34, + "learning_rate": 6.121187139323991e-05, + "loss": 6.5022, + "step": 2390 + }, + { + "epoch": 1.34, + "learning_rate": 6.119126133553175e-05, + "loss": 6.1396, + "step": 2391 + }, + { + "epoch": 1.34, + "learning_rate": 6.117065127782358e-05, + "loss": 6.1479, + "step": 2392 + }, + { + "epoch": 1.34, + "learning_rate": 6.115004122011541e-05, + "loss": 6.0012, + "step": 2393 + }, + { + "epoch": 1.34, + "learning_rate": 6.112943116240725e-05, + "loss": 6.0761, + "step": 2394 + }, + { + "epoch": 1.34, + "learning_rate": 6.11088211046991e-05, + "loss": 5.92, + "step": 2395 + }, + { + "epoch": 1.34, + "learning_rate": 6.108821104699094e-05, + "loss": 6.0262, + "step": 2396 + }, + { + "epoch": 1.34, + "learning_rate": 6.106760098928277e-05, + "loss": 6.0072, + "step": 2397 + }, + { + "epoch": 1.34, + "learning_rate": 6.104699093157461e-05, + "loss": 6.1731, + "step": 2398 + }, + { + "epoch": 1.34, + "learning_rate": 6.102638087386645e-05, + "loss": 6.075, + "step": 2399 + }, + { + "epoch": 1.35, + "learning_rate": 6.1005770816158294e-05, + "loss": 5.9694, + "step": 2400 + }, + { + "epoch": 1.35, + "learning_rate": 6.0985160758450124e-05, + "loss": 6.2467, + "step": 2401 + }, + { + "epoch": 1.35, + "learning_rate": 6.096455070074196e-05, + "loss": 5.9527, + "step": 2402 + }, + { + "epoch": 1.35, + "learning_rate": 6.0943940643033804e-05, + "loss": 6.2593, + "step": 2403 + }, + { + "epoch": 1.35, + "learning_rate": 6.092333058532565e-05, + "loss": 5.8669, + "step": 2404 + }, + { + "epoch": 1.35, + "learning_rate": 6.0902720527617484e-05, + "loss": 6.0725, + "step": 2405 + }, + { + "epoch": 1.35, + "learning_rate": 6.0882110469909314e-05, + "loss": 6.1165, + "step": 2406 + }, + { + "epoch": 1.35, + "learning_rate": 6.086150041220116e-05, + "loss": 5.7195, + "step": 2407 + }, + { + "epoch": 1.35, + "learning_rate": 6.0840890354492994e-05, + "loss": 5.8946, + "step": 2408 + }, + { + "epoch": 1.35, + "learning_rate": 6.082028029678484e-05, + "loss": 5.839, + "step": 2409 + }, + { + "epoch": 1.35, + "learning_rate": 6.079967023907667e-05, + "loss": 5.9338, + "step": 2410 + }, + { + "epoch": 1.35, + "learning_rate": 6.077906018136851e-05, + "loss": 5.8549, + "step": 2411 + }, + { + "epoch": 1.35, + "learning_rate": 6.075845012366035e-05, + "loss": 5.6487, + "step": 2412 + }, + { + "epoch": 1.35, + "learning_rate": 6.073784006595219e-05, + "loss": 5.8746, + "step": 2413 + }, + { + "epoch": 1.35, + "learning_rate": 6.0717230008244034e-05, + "loss": 5.7437, + "step": 2414 + }, + { + "epoch": 1.35, + "learning_rate": 6.069661995053586e-05, + "loss": 5.869, + "step": 2415 + }, + { + "epoch": 1.35, + "learning_rate": 6.06760098928277e-05, + "loss": 6.062, + "step": 2416 + }, + { + "epoch": 1.35, + "learning_rate": 6.065539983511954e-05, + "loss": 5.8543, + "step": 2417 + }, + { + "epoch": 1.36, + "learning_rate": 6.063478977741138e-05, + "loss": 5.904, + "step": 2418 + }, + { + "epoch": 1.36, + "learning_rate": 6.061417971970322e-05, + "loss": 6.0218, + "step": 2419 + }, + { + "epoch": 1.36, + "learning_rate": 6.059356966199505e-05, + "loss": 5.8784, + "step": 2420 + }, + { + "epoch": 1.36, + "learning_rate": 6.0572959604286897e-05, + "loss": 6.022, + "step": 2421 + }, + { + "epoch": 1.36, + "learning_rate": 6.055234954657873e-05, + "loss": 5.7566, + "step": 2422 + }, + { + "epoch": 1.36, + "learning_rate": 6.0531739488870576e-05, + "loss": 5.7456, + "step": 2423 + }, + { + "epoch": 1.36, + "learning_rate": 6.0511129431162406e-05, + "loss": 5.6459, + "step": 2424 + }, + { + "epoch": 1.36, + "learning_rate": 6.049051937345425e-05, + "loss": 5.7706, + "step": 2425 + }, + { + "epoch": 1.36, + "learning_rate": 6.0469909315746086e-05, + "loss": 5.7006, + "step": 2426 + }, + { + "epoch": 1.36, + "learning_rate": 6.044929925803793e-05, + "loss": 5.5467, + "step": 2427 + }, + { + "epoch": 1.36, + "learning_rate": 6.0428689200329766e-05, + "loss": 5.2735, + "step": 2428 + }, + { + "epoch": 1.36, + "learning_rate": 6.0408079142621596e-05, + "loss": 5.5859, + "step": 2429 + }, + { + "epoch": 1.36, + "learning_rate": 6.038746908491344e-05, + "loss": 5.2278, + "step": 2430 + }, + { + "epoch": 1.36, + "learning_rate": 6.036685902720528e-05, + "loss": 5.1114, + "step": 2431 + }, + { + "epoch": 1.36, + "learning_rate": 6.034624896949712e-05, + "loss": 4.7538, + "step": 2432 + }, + { + "epoch": 1.36, + "learning_rate": 6.032563891178895e-05, + "loss": 4.1802, + "step": 2433 + }, + { + "epoch": 1.36, + "learning_rate": 6.030502885408079e-05, + "loss": 4.4042, + "step": 2434 + }, + { + "epoch": 1.36, + "learning_rate": 6.028441879637263e-05, + "loss": 6.5203, + "step": 2435 + }, + { + "epoch": 1.37, + "learning_rate": 6.026380873866447e-05, + "loss": 6.4601, + "step": 2436 + }, + { + "epoch": 1.37, + "learning_rate": 6.0243198680956316e-05, + "loss": 6.4345, + "step": 2437 + }, + { + "epoch": 1.37, + "learning_rate": 6.0222588623248146e-05, + "loss": 6.2445, + "step": 2438 + }, + { + "epoch": 1.37, + "learning_rate": 6.020197856553998e-05, + "loss": 6.1898, + "step": 2439 + }, + { + "epoch": 1.37, + "learning_rate": 6.0181368507831826e-05, + "loss": 6.0689, + "step": 2440 + }, + { + "epoch": 1.37, + "learning_rate": 6.016075845012367e-05, + "loss": 6.1299, + "step": 2441 + }, + { + "epoch": 1.37, + "learning_rate": 6.01401483924155e-05, + "loss": 6.1717, + "step": 2442 + }, + { + "epoch": 1.37, + "learning_rate": 6.0119538334707336e-05, + "loss": 6.2791, + "step": 2443 + }, + { + "epoch": 1.37, + "learning_rate": 6.009892827699918e-05, + "loss": 6.2394, + "step": 2444 + }, + { + "epoch": 1.37, + "learning_rate": 6.0078318219291016e-05, + "loss": 6.1911, + "step": 2445 + }, + { + "epoch": 1.37, + "learning_rate": 6.005770816158286e-05, + "loss": 6.2473, + "step": 2446 + }, + { + "epoch": 1.37, + "learning_rate": 6.003709810387469e-05, + "loss": 6.1123, + "step": 2447 + }, + { + "epoch": 1.37, + "learning_rate": 6.001648804616653e-05, + "loss": 5.8622, + "step": 2448 + }, + { + "epoch": 1.37, + "learning_rate": 5.999587798845837e-05, + "loss": 5.7588, + "step": 2449 + }, + { + "epoch": 1.37, + "learning_rate": 5.997526793075021e-05, + "loss": 5.8624, + "step": 2450 + }, + { + "epoch": 1.37, + "learning_rate": 5.9954657873042056e-05, + "loss": 5.8168, + "step": 2451 + }, + { + "epoch": 1.37, + "learning_rate": 5.9934047815333885e-05, + "loss": 5.9534, + "step": 2452 + }, + { + "epoch": 1.38, + "learning_rate": 5.991343775762572e-05, + "loss": 6.191, + "step": 2453 + }, + { + "epoch": 1.38, + "learning_rate": 5.9892827699917565e-05, + "loss": 5.9854, + "step": 2454 + }, + { + "epoch": 1.38, + "learning_rate": 5.98722176422094e-05, + "loss": 5.9814, + "step": 2455 + }, + { + "epoch": 1.38, + "learning_rate": 5.985160758450123e-05, + "loss": 5.878, + "step": 2456 + }, + { + "epoch": 1.38, + "learning_rate": 5.9830997526793075e-05, + "loss": 5.8879, + "step": 2457 + }, + { + "epoch": 1.38, + "learning_rate": 5.981038746908492e-05, + "loss": 5.9156, + "step": 2458 + }, + { + "epoch": 1.38, + "learning_rate": 5.9789777411376755e-05, + "loss": 6.0119, + "step": 2459 + }, + { + "epoch": 1.38, + "learning_rate": 5.97691673536686e-05, + "loss": 5.7436, + "step": 2460 + }, + { + "epoch": 1.38, + "learning_rate": 5.974855729596043e-05, + "loss": 6.0177, + "step": 2461 + }, + { + "epoch": 1.38, + "learning_rate": 5.972794723825227e-05, + "loss": 5.8751, + "step": 2462 + }, + { + "epoch": 1.38, + "learning_rate": 5.970733718054411e-05, + "loss": 5.9553, + "step": 2463 + }, + { + "epoch": 1.38, + "learning_rate": 5.968672712283595e-05, + "loss": 5.8383, + "step": 2464 + }, + { + "epoch": 1.38, + "learning_rate": 5.966611706512778e-05, + "loss": 5.9689, + "step": 2465 + }, + { + "epoch": 1.38, + "learning_rate": 5.964550700741962e-05, + "loss": 5.6316, + "step": 2466 + }, + { + "epoch": 1.38, + "learning_rate": 5.962489694971146e-05, + "loss": 5.8278, + "step": 2467 + }, + { + "epoch": 1.38, + "learning_rate": 5.9604286892003305e-05, + "loss": 5.668, + "step": 2468 + }, + { + "epoch": 1.38, + "learning_rate": 5.958367683429514e-05, + "loss": 5.8898, + "step": 2469 + }, + { + "epoch": 1.38, + "learning_rate": 5.956306677658697e-05, + "loss": 5.9813, + "step": 2470 + }, + { + "epoch": 1.39, + "learning_rate": 5.9542456718878815e-05, + "loss": 5.596, + "step": 2471 + }, + { + "epoch": 1.39, + "learning_rate": 5.952184666117066e-05, + "loss": 5.6197, + "step": 2472 + }, + { + "epoch": 1.39, + "learning_rate": 5.9501236603462495e-05, + "loss": 5.7545, + "step": 2473 + }, + { + "epoch": 1.39, + "learning_rate": 5.9480626545754325e-05, + "loss": 5.4748, + "step": 2474 + }, + { + "epoch": 1.39, + "learning_rate": 5.946001648804617e-05, + "loss": 5.6478, + "step": 2475 + }, + { + "epoch": 1.39, + "learning_rate": 5.9439406430338005e-05, + "loss": 5.909, + "step": 2476 + }, + { + "epoch": 1.39, + "learning_rate": 5.941879637262985e-05, + "loss": 5.5243, + "step": 2477 + }, + { + "epoch": 1.39, + "learning_rate": 5.939818631492169e-05, + "loss": 5.891, + "step": 2478 + }, + { + "epoch": 1.39, + "learning_rate": 5.937757625721352e-05, + "loss": 5.0999, + "step": 2479 + }, + { + "epoch": 1.39, + "learning_rate": 5.935696619950536e-05, + "loss": 5.254, + "step": 2480 + }, + { + "epoch": 1.39, + "learning_rate": 5.93363561417972e-05, + "loss": 4.7389, + "step": 2481 + }, + { + "epoch": 1.39, + "learning_rate": 5.931574608408904e-05, + "loss": 4.785, + "step": 2482 + }, + { + "epoch": 1.39, + "learning_rate": 5.929513602638088e-05, + "loss": 4.5273, + "step": 2483 + }, + { + "epoch": 1.39, + "learning_rate": 5.927452596867271e-05, + "loss": 4.6437, + "step": 2484 + }, + { + "epoch": 1.39, + "learning_rate": 5.9253915910964554e-05, + "loss": 6.5951, + "step": 2485 + }, + { + "epoch": 1.39, + "learning_rate": 5.923330585325639e-05, + "loss": 6.4603, + "step": 2486 + }, + { + "epoch": 1.39, + "learning_rate": 5.9212695795548234e-05, + "loss": 6.3344, + "step": 2487 + }, + { + "epoch": 1.39, + "learning_rate": 5.9192085737840064e-05, + "loss": 6.2191, + "step": 2488 + }, + { + "epoch": 1.4, + "learning_rate": 5.917147568013191e-05, + "loss": 6.1617, + "step": 2489 + }, + { + "epoch": 1.4, + "learning_rate": 5.9150865622423744e-05, + "loss": 6.133, + "step": 2490 + }, + { + "epoch": 1.4, + "learning_rate": 5.913025556471559e-05, + "loss": 5.931, + "step": 2491 + }, + { + "epoch": 1.4, + "learning_rate": 5.9109645507007424e-05, + "loss": 6.122, + "step": 2492 + }, + { + "epoch": 1.4, + "learning_rate": 5.9089035449299254e-05, + "loss": 5.9028, + "step": 2493 + }, + { + "epoch": 1.4, + "learning_rate": 5.90684253915911e-05, + "loss": 5.8767, + "step": 2494 + }, + { + "epoch": 1.4, + "learning_rate": 5.904781533388294e-05, + "loss": 5.8472, + "step": 2495 + }, + { + "epoch": 1.4, + "learning_rate": 5.902720527617478e-05, + "loss": 5.7917, + "step": 2496 + }, + { + "epoch": 1.4, + "learning_rate": 5.900659521846661e-05, + "loss": 5.7144, + "step": 2497 + }, + { + "epoch": 1.4, + "learning_rate": 5.898598516075845e-05, + "loss": 6.0156, + "step": 2498 + }, + { + "epoch": 1.4, + "learning_rate": 5.8965375103050294e-05, + "loss": 5.8232, + "step": 2499 + }, + { + "epoch": 1.4, + "learning_rate": 5.894476504534213e-05, + "loss": 5.9142, + "step": 2500 + }, + { + "epoch": 1.4, + "eval_loss": 18.59187889099121, + "eval_runtime": 1322.6566, + "eval_samples_per_second": 1.997, + "eval_steps_per_second": 0.25, + "eval_wer": 1.0, + "step": 2500 + }, + { + "epoch": 1.4, + "learning_rate": 5.8924154987633974e-05, + "loss": 5.7901, + "step": 2501 + }, + { + "epoch": 1.4, + "learning_rate": 5.8903544929925804e-05, + "loss": 5.7028, + "step": 2502 + }, + { + "epoch": 1.4, + "learning_rate": 5.888293487221764e-05, + "loss": 5.9474, + "step": 2503 + }, + { + "epoch": 1.4, + "learning_rate": 5.8862324814509484e-05, + "loss": 5.8746, + "step": 2504 + }, + { + "epoch": 1.4, + "learning_rate": 5.884171475680133e-05, + "loss": 5.6878, + "step": 2505 + }, + { + "epoch": 1.4, + "learning_rate": 5.8821104699093164e-05, + "loss": 5.8153, + "step": 2506 + }, + { + "epoch": 1.41, + "learning_rate": 5.8800494641384993e-05, + "loss": 5.697, + "step": 2507 + }, + { + "epoch": 1.41, + "learning_rate": 5.877988458367684e-05, + "loss": 5.9145, + "step": 2508 + }, + { + "epoch": 1.41, + "learning_rate": 5.875927452596868e-05, + "loss": 5.721, + "step": 2509 + }, + { + "epoch": 1.41, + "learning_rate": 5.873866446826052e-05, + "loss": 5.8829, + "step": 2510 + }, + { + "epoch": 1.41, + "learning_rate": 5.871805441055235e-05, + "loss": 5.9887, + "step": 2511 + }, + { + "epoch": 1.41, + "learning_rate": 5.869744435284419e-05, + "loss": 5.7313, + "step": 2512 + }, + { + "epoch": 1.41, + "learning_rate": 5.867683429513603e-05, + "loss": 5.5539, + "step": 2513 + }, + { + "epoch": 1.41, + "learning_rate": 5.865622423742787e-05, + "loss": 5.5933, + "step": 2514 + }, + { + "epoch": 1.41, + "learning_rate": 5.8635614179719713e-05, + "loss": 5.71, + "step": 2515 + }, + { + "epoch": 1.41, + "learning_rate": 5.861500412201154e-05, + "loss": 5.5441, + "step": 2516 + }, + { + "epoch": 1.41, + "learning_rate": 5.859439406430338e-05, + "loss": 5.5995, + "step": 2517 + }, + { + "epoch": 1.41, + "learning_rate": 5.857378400659522e-05, + "loss": 5.7763, + "step": 2518 + }, + { + "epoch": 1.41, + "learning_rate": 5.8553173948887067e-05, + "loss": 5.894, + "step": 2519 + }, + { + "epoch": 1.41, + "learning_rate": 5.8532563891178896e-05, + "loss": 5.6307, + "step": 2520 + }, + { + "epoch": 1.41, + "learning_rate": 5.851195383347073e-05, + "loss": 5.759, + "step": 2521 + }, + { + "epoch": 1.41, + "learning_rate": 5.8491343775762576e-05, + "loss": 5.6364, + "step": 2522 + }, + { + "epoch": 1.41, + "learning_rate": 5.847073371805441e-05, + "loss": 5.417, + "step": 2523 + }, + { + "epoch": 1.41, + "learning_rate": 5.8450123660346256e-05, + "loss": 5.3691, + "step": 2524 + }, + { + "epoch": 1.42, + "learning_rate": 5.8429513602638086e-05, + "loss": 5.408, + "step": 2525 + }, + { + "epoch": 1.42, + "learning_rate": 5.840890354492993e-05, + "loss": 5.6791, + "step": 2526 + }, + { + "epoch": 1.42, + "learning_rate": 5.8388293487221766e-05, + "loss": 5.6272, + "step": 2527 + }, + { + "epoch": 1.42, + "learning_rate": 5.836768342951361e-05, + "loss": 5.2763, + "step": 2528 + }, + { + "epoch": 1.42, + "learning_rate": 5.834707337180544e-05, + "loss": 5.0643, + "step": 2529 + }, + { + "epoch": 1.42, + "learning_rate": 5.8326463314097276e-05, + "loss": 5.203, + "step": 2530 + }, + { + "epoch": 1.42, + "learning_rate": 5.830585325638912e-05, + "loss": 5.1267, + "step": 2531 + }, + { + "epoch": 1.42, + "learning_rate": 5.828524319868096e-05, + "loss": 4.807, + "step": 2532 + }, + { + "epoch": 1.42, + "learning_rate": 5.82646331409728e-05, + "loss": 4.9342, + "step": 2533 + }, + { + "epoch": 1.42, + "learning_rate": 5.824402308326463e-05, + "loss": 4.8368, + "step": 2534 + }, + { + "epoch": 1.42, + "learning_rate": 5.822341302555647e-05, + "loss": 6.4036, + "step": 2535 + }, + { + "epoch": 1.42, + "learning_rate": 5.8202802967848316e-05, + "loss": 6.4512, + "step": 2536 + }, + { + "epoch": 1.42, + "learning_rate": 5.818219291014015e-05, + "loss": 6.4055, + "step": 2537 + }, + { + "epoch": 1.42, + "learning_rate": 5.8161582852431996e-05, + "loss": 6.328, + "step": 2538 + }, + { + "epoch": 1.42, + "learning_rate": 5.8140972794723826e-05, + "loss": 5.9897, + "step": 2539 + }, + { + "epoch": 1.42, + "learning_rate": 5.812036273701566e-05, + "loss": 6.1129, + "step": 2540 + }, + { + "epoch": 1.42, + "learning_rate": 5.8099752679307506e-05, + "loss": 6.0076, + "step": 2541 + }, + { + "epoch": 1.42, + "learning_rate": 5.807914262159935e-05, + "loss": 6.029, + "step": 2542 + }, + { + "epoch": 1.43, + "learning_rate": 5.805853256389118e-05, + "loss": 6.0714, + "step": 2543 + }, + { + "epoch": 1.43, + "learning_rate": 5.8037922506183016e-05, + "loss": 5.9973, + "step": 2544 + }, + { + "epoch": 1.43, + "learning_rate": 5.801731244847486e-05, + "loss": 5.8884, + "step": 2545 + }, + { + "epoch": 1.43, + "learning_rate": 5.79967023907667e-05, + "loss": 5.9896, + "step": 2546 + }, + { + "epoch": 1.43, + "learning_rate": 5.797609233305854e-05, + "loss": 5.8941, + "step": 2547 + }, + { + "epoch": 1.43, + "learning_rate": 5.795548227535037e-05, + "loss": 5.7984, + "step": 2548 + }, + { + "epoch": 1.43, + "learning_rate": 5.793487221764221e-05, + "loss": 5.7139, + "step": 2549 + }, + { + "epoch": 1.43, + "learning_rate": 5.791426215993405e-05, + "loss": 5.741, + "step": 2550 + }, + { + "epoch": 1.43, + "learning_rate": 5.789365210222589e-05, + "loss": 5.8849, + "step": 2551 + }, + { + "epoch": 1.43, + "learning_rate": 5.787304204451772e-05, + "loss": 5.9284, + "step": 2552 + }, + { + "epoch": 1.43, + "learning_rate": 5.7852431986809565e-05, + "loss": 5.8631, + "step": 2553 + }, + { + "epoch": 1.43, + "learning_rate": 5.78318219291014e-05, + "loss": 5.8491, + "step": 2554 + }, + { + "epoch": 1.43, + "learning_rate": 5.7811211871393245e-05, + "loss": 5.9435, + "step": 2555 + }, + { + "epoch": 1.43, + "learning_rate": 5.779060181368509e-05, + "loss": 5.7345, + "step": 2556 + }, + { + "epoch": 1.43, + "learning_rate": 5.776999175597692e-05, + "loss": 5.8416, + "step": 2557 + }, + { + "epoch": 1.43, + "learning_rate": 5.7749381698268755e-05, + "loss": 5.7709, + "step": 2558 + }, + { + "epoch": 1.43, + "learning_rate": 5.77287716405606e-05, + "loss": 5.7495, + "step": 2559 + }, + { + "epoch": 1.43, + "learning_rate": 5.7708161582852435e-05, + "loss": 5.9021, + "step": 2560 + }, + { + "epoch": 1.44, + "learning_rate": 5.7687551525144265e-05, + "loss": 5.6159, + "step": 2561 + }, + { + "epoch": 1.44, + "learning_rate": 5.766694146743611e-05, + "loss": 5.788, + "step": 2562 + }, + { + "epoch": 1.44, + "learning_rate": 5.764633140972795e-05, + "loss": 5.7431, + "step": 2563 + }, + { + "epoch": 1.44, + "learning_rate": 5.762572135201979e-05, + "loss": 5.8135, + "step": 2564 + }, + { + "epoch": 1.44, + "learning_rate": 5.760511129431163e-05, + "loss": 5.7594, + "step": 2565 + }, + { + "epoch": 1.44, + "learning_rate": 5.758450123660346e-05, + "loss": 6.0838, + "step": 2566 + }, + { + "epoch": 1.44, + "learning_rate": 5.7563891178895305e-05, + "loss": 5.7919, + "step": 2567 + }, + { + "epoch": 1.44, + "learning_rate": 5.754328112118714e-05, + "loss": 5.9469, + "step": 2568 + }, + { + "epoch": 1.44, + "learning_rate": 5.7522671063478985e-05, + "loss": 5.9776, + "step": 2569 + }, + { + "epoch": 1.44, + "learning_rate": 5.750206100577082e-05, + "loss": 5.6395, + "step": 2570 + }, + { + "epoch": 1.44, + "learning_rate": 5.748145094806265e-05, + "loss": 5.8936, + "step": 2571 + }, + { + "epoch": 1.44, + "learning_rate": 5.7460840890354495e-05, + "loss": 5.5559, + "step": 2572 + }, + { + "epoch": 1.44, + "learning_rate": 5.744023083264634e-05, + "loss": 5.8402, + "step": 2573 + }, + { + "epoch": 1.44, + "learning_rate": 5.7419620774938175e-05, + "loss": 5.7583, + "step": 2574 + }, + { + "epoch": 1.44, + "learning_rate": 5.7399010717230004e-05, + "loss": 5.4738, + "step": 2575 + }, + { + "epoch": 1.44, + "learning_rate": 5.737840065952185e-05, + "loss": 5.3861, + "step": 2576 + }, + { + "epoch": 1.44, + "learning_rate": 5.735779060181369e-05, + "loss": 5.6683, + "step": 2577 + }, + { + "epoch": 1.45, + "learning_rate": 5.733718054410553e-05, + "loss": 5.2936, + "step": 2578 + }, + { + "epoch": 1.45, + "learning_rate": 5.731657048639737e-05, + "loss": 4.868, + "step": 2579 + }, + { + "epoch": 1.45, + "learning_rate": 5.72959604286892e-05, + "loss": 5.2662, + "step": 2580 + }, + { + "epoch": 1.45, + "learning_rate": 5.727535037098104e-05, + "loss": 4.9541, + "step": 2581 + }, + { + "epoch": 1.45, + "learning_rate": 5.725474031327288e-05, + "loss": 4.8811, + "step": 2582 + }, + { + "epoch": 1.45, + "learning_rate": 5.7234130255564724e-05, + "loss": 4.5947, + "step": 2583 + }, + { + "epoch": 1.45, + "learning_rate": 5.7213520197856554e-05, + "loss": 3.9571, + "step": 2584 + }, + { + "epoch": 1.45, + "learning_rate": 5.719291014014839e-05, + "loss": 6.2165, + "step": 2585 + }, + { + "epoch": 1.45, + "learning_rate": 5.7172300082440234e-05, + "loss": 6.5307, + "step": 2586 + }, + { + "epoch": 1.45, + "learning_rate": 5.715169002473207e-05, + "loss": 6.2618, + "step": 2587 + }, + { + "epoch": 1.45, + "learning_rate": 5.7131079967023914e-05, + "loss": 6.2119, + "step": 2588 + }, + { + "epoch": 1.45, + "learning_rate": 5.7110469909315744e-05, + "loss": 5.9386, + "step": 2589 + }, + { + "epoch": 1.45, + "learning_rate": 5.708985985160759e-05, + "loss": 6.1379, + "step": 2590 + }, + { + "epoch": 1.45, + "learning_rate": 5.7069249793899424e-05, + "loss": 5.861, + "step": 2591 + }, + { + "epoch": 1.45, + "learning_rate": 5.704863973619127e-05, + "loss": 6.1214, + "step": 2592 + }, + { + "epoch": 1.45, + "learning_rate": 5.70280296784831e-05, + "loss": 6.1466, + "step": 2593 + }, + { + "epoch": 1.45, + "learning_rate": 5.700741962077494e-05, + "loss": 6.215, + "step": 2594 + }, + { + "epoch": 1.45, + "learning_rate": 5.698680956306678e-05, + "loss": 6.0027, + "step": 2595 + }, + { + "epoch": 1.46, + "learning_rate": 5.696619950535862e-05, + "loss": 5.8485, + "step": 2596 + }, + { + "epoch": 1.46, + "learning_rate": 5.694558944765046e-05, + "loss": 6.0161, + "step": 2597 + }, + { + "epoch": 1.46, + "learning_rate": 5.692497938994229e-05, + "loss": 5.7492, + "step": 2598 + }, + { + "epoch": 1.46, + "learning_rate": 5.690436933223413e-05, + "loss": 5.7642, + "step": 2599 + }, + { + "epoch": 1.46, + "learning_rate": 5.6883759274525974e-05, + "loss": 5.9603, + "step": 2600 + }, + { + "epoch": 1.46, + "learning_rate": 5.686314921681781e-05, + "loss": 5.9208, + "step": 2601 + }, + { + "epoch": 1.46, + "learning_rate": 5.6842539159109654e-05, + "loss": 5.9048, + "step": 2602 + }, + { + "epoch": 1.46, + "learning_rate": 5.6821929101401484e-05, + "loss": 5.7918, + "step": 2603 + }, + { + "epoch": 1.46, + "learning_rate": 5.680131904369333e-05, + "loss": 5.9366, + "step": 2604 + }, + { + "epoch": 1.46, + "learning_rate": 5.6780708985985164e-05, + "loss": 5.7445, + "step": 2605 + }, + { + "epoch": 1.46, + "learning_rate": 5.676009892827701e-05, + "loss": 5.7203, + "step": 2606 + }, + { + "epoch": 1.46, + "learning_rate": 5.673948887056884e-05, + "loss": 5.4464, + "step": 2607 + }, + { + "epoch": 1.46, + "learning_rate": 5.671887881286067e-05, + "loss": 5.8416, + "step": 2608 + }, + { + "epoch": 1.46, + "learning_rate": 5.669826875515252e-05, + "loss": 5.453, + "step": 2609 + }, + { + "epoch": 1.46, + "learning_rate": 5.667765869744436e-05, + "loss": 5.7977, + "step": 2610 + }, + { + "epoch": 1.46, + "learning_rate": 5.66570486397362e-05, + "loss": 5.7749, + "step": 2611 + }, + { + "epoch": 1.46, + "learning_rate": 5.6636438582028027e-05, + "loss": 5.8265, + "step": 2612 + }, + { + "epoch": 1.46, + "learning_rate": 5.661582852431987e-05, + "loss": 5.7769, + "step": 2613 + }, + { + "epoch": 1.47, + "learning_rate": 5.659521846661171e-05, + "loss": 5.7212, + "step": 2614 + }, + { + "epoch": 1.47, + "learning_rate": 5.657460840890355e-05, + "loss": 5.6724, + "step": 2615 + }, + { + "epoch": 1.47, + "learning_rate": 5.655399835119538e-05, + "loss": 5.6212, + "step": 2616 + }, + { + "epoch": 1.47, + "learning_rate": 5.653338829348722e-05, + "loss": 5.9557, + "step": 2617 + }, + { + "epoch": 1.47, + "learning_rate": 5.651277823577906e-05, + "loss": 5.4544, + "step": 2618 + }, + { + "epoch": 1.47, + "learning_rate": 5.64921681780709e-05, + "loss": 5.7835, + "step": 2619 + }, + { + "epoch": 1.47, + "learning_rate": 5.6471558120362747e-05, + "loss": 5.6501, + "step": 2620 + }, + { + "epoch": 1.47, + "learning_rate": 5.6450948062654576e-05, + "loss": 5.6136, + "step": 2621 + }, + { + "epoch": 1.47, + "learning_rate": 5.643033800494641e-05, + "loss": 5.592, + "step": 2622 + }, + { + "epoch": 1.47, + "learning_rate": 5.6409727947238256e-05, + "loss": 5.6509, + "step": 2623 + }, + { + "epoch": 1.47, + "learning_rate": 5.63891178895301e-05, + "loss": 5.6131, + "step": 2624 + }, + { + "epoch": 1.47, + "learning_rate": 5.6368507831821936e-05, + "loss": 5.5366, + "step": 2625 + }, + { + "epoch": 1.47, + "learning_rate": 5.6347897774113766e-05, + "loss": 5.3244, + "step": 2626 + }, + { + "epoch": 1.47, + "learning_rate": 5.632728771640561e-05, + "loss": 5.4092, + "step": 2627 + }, + { + "epoch": 1.47, + "learning_rate": 5.6306677658697446e-05, + "loss": 5.6433, + "step": 2628 + }, + { + "epoch": 1.47, + "learning_rate": 5.628606760098929e-05, + "loss": 4.909, + "step": 2629 + }, + { + "epoch": 1.47, + "learning_rate": 5.626545754328112e-05, + "loss": 5.1848, + "step": 2630 + }, + { + "epoch": 1.47, + "learning_rate": 5.624484748557296e-05, + "loss": 5.0638, + "step": 2631 + }, + { + "epoch": 1.48, + "learning_rate": 5.62242374278648e-05, + "loss": 5.1416, + "step": 2632 + }, + { + "epoch": 1.48, + "learning_rate": 5.620362737015664e-05, + "loss": 4.3338, + "step": 2633 + }, + { + "epoch": 1.48, + "learning_rate": 5.618301731244848e-05, + "loss": 4.0624, + "step": 2634 + }, + { + "epoch": 1.48, + "learning_rate": 5.616240725474031e-05, + "loss": 6.3927, + "step": 2635 + }, + { + "epoch": 1.48, + "learning_rate": 5.614179719703215e-05, + "loss": 6.468, + "step": 2636 + }, + { + "epoch": 1.48, + "learning_rate": 5.6121187139323996e-05, + "loss": 6.4229, + "step": 2637 + }, + { + "epoch": 1.48, + "learning_rate": 5.610057708161583e-05, + "loss": 5.96, + "step": 2638 + }, + { + "epoch": 1.48, + "learning_rate": 5.607996702390766e-05, + "loss": 6.009, + "step": 2639 + }, + { + "epoch": 1.48, + "learning_rate": 5.6059356966199506e-05, + "loss": 6.0452, + "step": 2640 + }, + { + "epoch": 1.48, + "learning_rate": 5.603874690849135e-05, + "loss": 5.8857, + "step": 2641 + }, + { + "epoch": 1.48, + "learning_rate": 5.6018136850783186e-05, + "loss": 5.8823, + "step": 2642 + }, + { + "epoch": 1.48, + "learning_rate": 5.599752679307503e-05, + "loss": 6.3062, + "step": 2643 + }, + { + "epoch": 1.48, + "learning_rate": 5.597691673536686e-05, + "loss": 5.9064, + "step": 2644 + }, + { + "epoch": 1.48, + "learning_rate": 5.5956306677658695e-05, + "loss": 6.1042, + "step": 2645 + }, + { + "epoch": 1.48, + "learning_rate": 5.593569661995054e-05, + "loss": 5.9905, + "step": 2646 + }, + { + "epoch": 1.48, + "learning_rate": 5.591508656224238e-05, + "loss": 6.0662, + "step": 2647 + }, + { + "epoch": 1.48, + "learning_rate": 5.589447650453421e-05, + "loss": 5.8334, + "step": 2648 + }, + { + "epoch": 1.48, + "learning_rate": 5.587386644682605e-05, + "loss": 6.0055, + "step": 2649 + }, + { + "epoch": 1.49, + "learning_rate": 5.585325638911789e-05, + "loss": 5.6764, + "step": 2650 + }, + { + "epoch": 1.49, + "learning_rate": 5.5832646331409735e-05, + "loss": 5.7396, + "step": 2651 + }, + { + "epoch": 1.49, + "learning_rate": 5.581203627370157e-05, + "loss": 5.7029, + "step": 2652 + }, + { + "epoch": 1.49, + "learning_rate": 5.57914262159934e-05, + "loss": 5.8036, + "step": 2653 + }, + { + "epoch": 1.49, + "learning_rate": 5.5770816158285245e-05, + "loss": 5.9269, + "step": 2654 + }, + { + "epoch": 1.49, + "learning_rate": 5.575020610057708e-05, + "loss": 5.5839, + "step": 2655 + }, + { + "epoch": 1.49, + "learning_rate": 5.5729596042868925e-05, + "loss": 5.6285, + "step": 2656 + }, + { + "epoch": 1.49, + "learning_rate": 5.570898598516077e-05, + "loss": 5.8291, + "step": 2657 + }, + { + "epoch": 1.49, + "learning_rate": 5.56883759274526e-05, + "loss": 5.6403, + "step": 2658 + }, + { + "epoch": 1.49, + "learning_rate": 5.5667765869744435e-05, + "loss": 5.9204, + "step": 2659 + }, + { + "epoch": 1.49, + "learning_rate": 5.564715581203628e-05, + "loss": 5.9101, + "step": 2660 + }, + { + "epoch": 1.49, + "learning_rate": 5.562654575432812e-05, + "loss": 5.8895, + "step": 2661 + }, + { + "epoch": 1.49, + "learning_rate": 5.560593569661995e-05, + "loss": 5.679, + "step": 2662 + }, + { + "epoch": 1.49, + "learning_rate": 5.558532563891179e-05, + "loss": 5.8258, + "step": 2663 + }, + { + "epoch": 1.49, + "learning_rate": 5.556471558120363e-05, + "loss": 5.7111, + "step": 2664 + }, + { + "epoch": 1.49, + "learning_rate": 5.554410552349547e-05, + "loss": 5.9546, + "step": 2665 + }, + { + "epoch": 1.49, + "learning_rate": 5.552349546578731e-05, + "loss": 5.7587, + "step": 2666 + }, + { + "epoch": 1.49, + "learning_rate": 5.550288540807914e-05, + "loss": 5.7617, + "step": 2667 + }, + { + "epoch": 1.5, + "learning_rate": 5.5482275350370985e-05, + "loss": 5.6652, + "step": 2668 + }, + { + "epoch": 1.5, + "learning_rate": 5.546166529266282e-05, + "loss": 5.9374, + "step": 2669 + }, + { + "epoch": 1.5, + "learning_rate": 5.5441055234954665e-05, + "loss": 5.692, + "step": 2670 + }, + { + "epoch": 1.5, + "learning_rate": 5.5420445177246495e-05, + "loss": 5.7827, + "step": 2671 + }, + { + "epoch": 1.5, + "learning_rate": 5.539983511953834e-05, + "loss": 5.7402, + "step": 2672 + }, + { + "epoch": 1.5, + "learning_rate": 5.5379225061830175e-05, + "loss": 5.6595, + "step": 2673 + }, + { + "epoch": 1.5, + "learning_rate": 5.535861500412202e-05, + "loss": 5.495, + "step": 2674 + }, + { + "epoch": 1.5, + "learning_rate": 5.5338004946413855e-05, + "loss": 5.5096, + "step": 2675 + }, + { + "epoch": 1.5, + "learning_rate": 5.5317394888705684e-05, + "loss": 5.2512, + "step": 2676 + }, + { + "epoch": 1.5, + "learning_rate": 5.529678483099753e-05, + "loss": 5.3943, + "step": 2677 + }, + { + "epoch": 1.5, + "learning_rate": 5.527617477328937e-05, + "loss": 5.4625, + "step": 2678 + }, + { + "epoch": 1.5, + "learning_rate": 5.525556471558121e-05, + "loss": 4.8919, + "step": 2679 + }, + { + "epoch": 1.5, + "learning_rate": 5.523495465787304e-05, + "loss": 4.6155, + "step": 2680 + }, + { + "epoch": 1.5, + "learning_rate": 5.521434460016488e-05, + "loss": 4.9008, + "step": 2681 + }, + { + "epoch": 1.5, + "learning_rate": 5.519373454245672e-05, + "loss": 4.2268, + "step": 2682 + }, + { + "epoch": 1.5, + "learning_rate": 5.517312448474856e-05, + "loss": 4.2809, + "step": 2683 + }, + { + "epoch": 1.5, + "learning_rate": 5.5152514427040404e-05, + "loss": 3.8784, + "step": 2684 + }, + { + "epoch": 1.51, + "learning_rate": 5.5131904369332234e-05, + "loss": 6.6671, + "step": 2685 + }, + { + "epoch": 1.51, + "learning_rate": 5.511129431162407e-05, + "loss": 6.4423, + "step": 2686 + }, + { + "epoch": 1.51, + "learning_rate": 5.5090684253915914e-05, + "loss": 6.4163, + "step": 2687 + }, + { + "epoch": 1.51, + "learning_rate": 5.507007419620776e-05, + "loss": 6.4371, + "step": 2688 + }, + { + "epoch": 1.51, + "learning_rate": 5.5049464138499594e-05, + "loss": 6.3983, + "step": 2689 + }, + { + "epoch": 1.51, + "learning_rate": 5.5028854080791424e-05, + "loss": 6.0475, + "step": 2690 + }, + { + "epoch": 1.51, + "learning_rate": 5.500824402308327e-05, + "loss": 5.9444, + "step": 2691 + }, + { + "epoch": 1.51, + "learning_rate": 5.4987633965375104e-05, + "loss": 5.8132, + "step": 2692 + }, + { + "epoch": 1.51, + "learning_rate": 5.496702390766695e-05, + "loss": 5.9174, + "step": 2693 + }, + { + "epoch": 1.51, + "learning_rate": 5.494641384995878e-05, + "loss": 5.7761, + "step": 2694 + }, + { + "epoch": 1.51, + "learning_rate": 5.492580379225062e-05, + "loss": 6.2468, + "step": 2695 + }, + { + "epoch": 1.51, + "learning_rate": 5.490519373454246e-05, + "loss": 6.0611, + "step": 2696 + }, + { + "epoch": 1.51, + "learning_rate": 5.48845836768343e-05, + "loss": 5.9891, + "step": 2697 + }, + { + "epoch": 1.51, + "learning_rate": 5.4863973619126144e-05, + "loss": 5.7021, + "step": 2698 + }, + { + "epoch": 1.51, + "learning_rate": 5.4843363561417974e-05, + "loss": 5.8497, + "step": 2699 + }, + { + "epoch": 1.51, + "learning_rate": 5.482275350370981e-05, + "loss": 5.8544, + "step": 2700 + }, + { + "epoch": 1.51, + "learning_rate": 5.4802143446001654e-05, + "loss": 5.6569, + "step": 2701 + }, + { + "epoch": 1.51, + "learning_rate": 5.478153338829349e-05, + "loss": 5.5629, + "step": 2702 + }, + { + "epoch": 1.52, + "learning_rate": 5.476092333058532e-05, + "loss": 5.7124, + "step": 2703 + }, + { + "epoch": 1.52, + "learning_rate": 5.4740313272877163e-05, + "loss": 6.0113, + "step": 2704 + }, + { + "epoch": 1.52, + "learning_rate": 5.471970321516901e-05, + "loss": 5.4811, + "step": 2705 + }, + { + "epoch": 1.52, + "learning_rate": 5.4699093157460843e-05, + "loss": 5.8539, + "step": 2706 + }, + { + "epoch": 1.52, + "learning_rate": 5.467848309975269e-05, + "loss": 5.6219, + "step": 2707 + }, + { + "epoch": 1.52, + "learning_rate": 5.465787304204452e-05, + "loss": 5.6332, + "step": 2708 + }, + { + "epoch": 1.52, + "learning_rate": 5.463726298433636e-05, + "loss": 5.7932, + "step": 2709 + }, + { + "epoch": 1.52, + "learning_rate": 5.46166529266282e-05, + "loss": 5.7133, + "step": 2710 + }, + { + "epoch": 1.52, + "learning_rate": 5.459604286892004e-05, + "loss": 5.581, + "step": 2711 + }, + { + "epoch": 1.52, + "learning_rate": 5.457543281121187e-05, + "loss": 5.7376, + "step": 2712 + }, + { + "epoch": 1.52, + "learning_rate": 5.4554822753503706e-05, + "loss": 5.6705, + "step": 2713 + }, + { + "epoch": 1.52, + "learning_rate": 5.453421269579555e-05, + "loss": 6.0672, + "step": 2714 + }, + { + "epoch": 1.52, + "learning_rate": 5.451360263808739e-05, + "loss": 5.727, + "step": 2715 + }, + { + "epoch": 1.52, + "learning_rate": 5.449299258037923e-05, + "loss": 5.6685, + "step": 2716 + }, + { + "epoch": 1.52, + "learning_rate": 5.447238252267106e-05, + "loss": 5.7941, + "step": 2717 + }, + { + "epoch": 1.52, + "learning_rate": 5.44517724649629e-05, + "loss": 5.4307, + "step": 2718 + }, + { + "epoch": 1.52, + "learning_rate": 5.4431162407254746e-05, + "loss": 5.4934, + "step": 2719 + }, + { + "epoch": 1.52, + "learning_rate": 5.441055234954658e-05, + "loss": 5.4979, + "step": 2720 + }, + { + "epoch": 1.53, + "learning_rate": 5.4389942291838426e-05, + "loss": 5.6014, + "step": 2721 + }, + { + "epoch": 1.53, + "learning_rate": 5.4369332234130256e-05, + "loss": 5.6167, + "step": 2722 + }, + { + "epoch": 1.53, + "learning_rate": 5.434872217642209e-05, + "loss": 5.7636, + "step": 2723 + }, + { + "epoch": 1.53, + "learning_rate": 5.4328112118713936e-05, + "loss": 5.4219, + "step": 2724 + }, + { + "epoch": 1.53, + "learning_rate": 5.430750206100578e-05, + "loss": 5.6091, + "step": 2725 + }, + { + "epoch": 1.53, + "learning_rate": 5.428689200329761e-05, + "loss": 5.5347, + "step": 2726 + }, + { + "epoch": 1.53, + "learning_rate": 5.4266281945589446e-05, + "loss": 5.509, + "step": 2727 + }, + { + "epoch": 1.53, + "learning_rate": 5.424567188788129e-05, + "loss": 5.0722, + "step": 2728 + }, + { + "epoch": 1.53, + "learning_rate": 5.422506183017313e-05, + "loss": 5.1858, + "step": 2729 + }, + { + "epoch": 1.53, + "learning_rate": 5.420445177246497e-05, + "loss": 5.5228, + "step": 2730 + }, + { + "epoch": 1.53, + "learning_rate": 5.41838417147568e-05, + "loss": 4.7512, + "step": 2731 + }, + { + "epoch": 1.53, + "learning_rate": 5.416323165704864e-05, + "loss": 4.3598, + "step": 2732 + }, + { + "epoch": 1.53, + "learning_rate": 5.414262159934048e-05, + "loss": 5.0303, + "step": 2733 + }, + { + "epoch": 1.53, + "learning_rate": 5.412201154163232e-05, + "loss": 3.8957, + "step": 2734 + }, + { + "epoch": 1.53, + "learning_rate": 5.410140148392415e-05, + "loss": 6.7927, + "step": 2735 + }, + { + "epoch": 1.53, + "learning_rate": 5.4080791426215996e-05, + "loss": 6.3645, + "step": 2736 + }, + { + "epoch": 1.53, + "learning_rate": 5.406018136850783e-05, + "loss": 6.502, + "step": 2737 + }, + { + "epoch": 1.53, + "learning_rate": 5.4039571310799676e-05, + "loss": 6.3437, + "step": 2738 + }, + { + "epoch": 1.54, + "learning_rate": 5.401896125309151e-05, + "loss": 6.0587, + "step": 2739 + }, + { + "epoch": 1.54, + "learning_rate": 5.399835119538334e-05, + "loss": 6.2409, + "step": 2740 + }, + { + "epoch": 1.54, + "learning_rate": 5.3977741137675186e-05, + "loss": 6.0423, + "step": 2741 + }, + { + "epoch": 1.54, + "learning_rate": 5.395713107996703e-05, + "loss": 5.9563, + "step": 2742 + }, + { + "epoch": 1.54, + "learning_rate": 5.3936521022258866e-05, + "loss": 5.8911, + "step": 2743 + }, + { + "epoch": 1.54, + "learning_rate": 5.391591096455071e-05, + "loss": 6.0492, + "step": 2744 + }, + { + "epoch": 1.54, + "learning_rate": 5.389530090684254e-05, + "loss": 6.0198, + "step": 2745 + }, + { + "epoch": 1.54, + "learning_rate": 5.387469084913438e-05, + "loss": 6.1499, + "step": 2746 + }, + { + "epoch": 1.54, + "learning_rate": 5.385408079142622e-05, + "loss": 6.0223, + "step": 2747 + }, + { + "epoch": 1.54, + "learning_rate": 5.383347073371806e-05, + "loss": 6.0197, + "step": 2748 + }, + { + "epoch": 1.54, + "learning_rate": 5.381286067600989e-05, + "loss": 5.9114, + "step": 2749 + }, + { + "epoch": 1.54, + "learning_rate": 5.379225061830173e-05, + "loss": 6.0084, + "step": 2750 + }, + { + "epoch": 1.54, + "learning_rate": 5.377164056059357e-05, + "loss": 5.764, + "step": 2751 + }, + { + "epoch": 1.54, + "learning_rate": 5.3751030502885415e-05, + "loss": 5.6981, + "step": 2752 + }, + { + "epoch": 1.54, + "learning_rate": 5.373042044517725e-05, + "loss": 5.5797, + "step": 2753 + }, + { + "epoch": 1.54, + "learning_rate": 5.370981038746908e-05, + "loss": 5.8573, + "step": 2754 + }, + { + "epoch": 1.54, + "learning_rate": 5.3689200329760925e-05, + "loss": 6.0034, + "step": 2755 + }, + { + "epoch": 1.54, + "learning_rate": 5.366859027205277e-05, + "loss": 5.8674, + "step": 2756 + }, + { + "epoch": 1.55, + "learning_rate": 5.3647980214344605e-05, + "loss": 6.0442, + "step": 2757 + }, + { + "epoch": 1.55, + "learning_rate": 5.3627370156636435e-05, + "loss": 5.8236, + "step": 2758 + }, + { + "epoch": 1.55, + "learning_rate": 5.360676009892828e-05, + "loss": 5.836, + "step": 2759 + }, + { + "epoch": 1.55, + "learning_rate": 5.3586150041220115e-05, + "loss": 5.7103, + "step": 2760 + }, + { + "epoch": 1.55, + "learning_rate": 5.356553998351196e-05, + "loss": 5.834, + "step": 2761 + }, + { + "epoch": 1.55, + "learning_rate": 5.35449299258038e-05, + "loss": 5.8321, + "step": 2762 + }, + { + "epoch": 1.55, + "learning_rate": 5.352431986809563e-05, + "loss": 5.715, + "step": 2763 + }, + { + "epoch": 1.55, + "learning_rate": 5.350370981038747e-05, + "loss": 5.7562, + "step": 2764 + }, + { + "epoch": 1.55, + "learning_rate": 5.348309975267931e-05, + "loss": 5.4303, + "step": 2765 + }, + { + "epoch": 1.55, + "learning_rate": 5.3462489694971155e-05, + "loss": 5.542, + "step": 2766 + }, + { + "epoch": 1.55, + "learning_rate": 5.3441879637262985e-05, + "loss": 5.7416, + "step": 2767 + }, + { + "epoch": 1.55, + "learning_rate": 5.342126957955482e-05, + "loss": 5.7627, + "step": 2768 + }, + { + "epoch": 1.55, + "learning_rate": 5.3400659521846665e-05, + "loss": 5.2022, + "step": 2769 + }, + { + "epoch": 1.55, + "learning_rate": 5.33800494641385e-05, + "loss": 5.6851, + "step": 2770 + }, + { + "epoch": 1.55, + "learning_rate": 5.3359439406430345e-05, + "loss": 5.558, + "step": 2771 + }, + { + "epoch": 1.55, + "learning_rate": 5.3338829348722174e-05, + "loss": 5.5541, + "step": 2772 + }, + { + "epoch": 1.55, + "learning_rate": 5.331821929101402e-05, + "loss": 5.4567, + "step": 2773 + }, + { + "epoch": 1.55, + "learning_rate": 5.3297609233305854e-05, + "loss": 5.4195, + "step": 2774 + }, + { + "epoch": 1.56, + "learning_rate": 5.32769991755977e-05, + "loss": 5.2493, + "step": 2775 + }, + { + "epoch": 1.56, + "learning_rate": 5.325638911788954e-05, + "loss": 5.3501, + "step": 2776 + }, + { + "epoch": 1.56, + "learning_rate": 5.323577906018137e-05, + "loss": 5.0934, + "step": 2777 + }, + { + "epoch": 1.56, + "learning_rate": 5.321516900247321e-05, + "loss": 5.4446, + "step": 2778 + }, + { + "epoch": 1.56, + "learning_rate": 5.319455894476505e-05, + "loss": 4.9355, + "step": 2779 + }, + { + "epoch": 1.56, + "learning_rate": 5.317394888705689e-05, + "loss": 4.7122, + "step": 2780 + }, + { + "epoch": 1.56, + "learning_rate": 5.315333882934872e-05, + "loss": 4.3588, + "step": 2781 + }, + { + "epoch": 1.56, + "learning_rate": 5.313272877164056e-05, + "loss": 4.4043, + "step": 2782 + }, + { + "epoch": 1.56, + "learning_rate": 5.3112118713932404e-05, + "loss": 4.6048, + "step": 2783 + }, + { + "epoch": 1.56, + "learning_rate": 5.309150865622424e-05, + "loss": 3.5184, + "step": 2784 + }, + { + "epoch": 1.56, + "learning_rate": 5.3070898598516084e-05, + "loss": 6.2321, + "step": 2785 + }, + { + "epoch": 1.56, + "learning_rate": 5.3050288540807914e-05, + "loss": 6.6032, + "step": 2786 + }, + { + "epoch": 1.56, + "learning_rate": 5.302967848309975e-05, + "loss": 6.4747, + "step": 2787 + }, + { + "epoch": 1.56, + "learning_rate": 5.3009068425391594e-05, + "loss": 6.0285, + "step": 2788 + }, + { + "epoch": 1.56, + "learning_rate": 5.298845836768344e-05, + "loss": 6.0427, + "step": 2789 + }, + { + "epoch": 1.56, + "learning_rate": 5.296784830997527e-05, + "loss": 5.832, + "step": 2790 + }, + { + "epoch": 1.56, + "learning_rate": 5.2947238252267104e-05, + "loss": 6.0142, + "step": 2791 + }, + { + "epoch": 1.57, + "learning_rate": 5.292662819455895e-05, + "loss": 5.7314, + "step": 2792 + }, + { + "epoch": 1.57, + "learning_rate": 5.290601813685079e-05, + "loss": 5.7732, + "step": 2793 + }, + { + "epoch": 1.57, + "learning_rate": 5.288540807914263e-05, + "loss": 5.9518, + "step": 2794 + }, + { + "epoch": 1.57, + "learning_rate": 5.286479802143446e-05, + "loss": 5.9541, + "step": 2795 + }, + { + "epoch": 1.57, + "learning_rate": 5.28441879637263e-05, + "loss": 5.9807, + "step": 2796 + }, + { + "epoch": 1.57, + "learning_rate": 5.282357790601814e-05, + "loss": 6.0053, + "step": 2797 + }, + { + "epoch": 1.57, + "learning_rate": 5.280296784830998e-05, + "loss": 6.1149, + "step": 2798 + }, + { + "epoch": 1.57, + "learning_rate": 5.278235779060181e-05, + "loss": 5.8564, + "step": 2799 + }, + { + "epoch": 1.57, + "learning_rate": 5.2761747732893654e-05, + "loss": 5.6504, + "step": 2800 + }, + { + "epoch": 1.57, + "learning_rate": 5.274113767518549e-05, + "loss": 5.7547, + "step": 2801 + }, + { + "epoch": 1.57, + "learning_rate": 5.2720527617477334e-05, + "loss": 5.7427, + "step": 2802 + }, + { + "epoch": 1.57, + "learning_rate": 5.269991755976918e-05, + "loss": 5.8852, + "step": 2803 + }, + { + "epoch": 1.57, + "learning_rate": 5.267930750206101e-05, + "loss": 5.5738, + "step": 2804 + }, + { + "epoch": 1.57, + "learning_rate": 5.2658697444352843e-05, + "loss": 5.6673, + "step": 2805 + }, + { + "epoch": 1.57, + "learning_rate": 5.263808738664469e-05, + "loss": 5.9246, + "step": 2806 + }, + { + "epoch": 1.57, + "learning_rate": 5.261747732893652e-05, + "loss": 5.7039, + "step": 2807 + }, + { + "epoch": 1.57, + "learning_rate": 5.259686727122837e-05, + "loss": 5.9319, + "step": 2808 + }, + { + "epoch": 1.57, + "learning_rate": 5.2576257213520197e-05, + "loss": 5.5924, + "step": 2809 + }, + { + "epoch": 1.58, + "learning_rate": 5.255564715581204e-05, + "loss": 5.3909, + "step": 2810 + }, + { + "epoch": 1.58, + "learning_rate": 5.2535037098103877e-05, + "loss": 5.713, + "step": 2811 + }, + { + "epoch": 1.58, + "learning_rate": 5.251442704039572e-05, + "loss": 5.6611, + "step": 2812 + }, + { + "epoch": 1.58, + "learning_rate": 5.249381698268755e-05, + "loss": 5.9764, + "step": 2813 + }, + { + "epoch": 1.58, + "learning_rate": 5.247320692497939e-05, + "loss": 5.757, + "step": 2814 + }, + { + "epoch": 1.58, + "learning_rate": 5.245259686727123e-05, + "loss": 5.6443, + "step": 2815 + }, + { + "epoch": 1.58, + "learning_rate": 5.243198680956307e-05, + "loss": 5.4124, + "step": 2816 + }, + { + "epoch": 1.58, + "learning_rate": 5.241137675185491e-05, + "loss": 5.4078, + "step": 2817 + }, + { + "epoch": 1.58, + "learning_rate": 5.239076669414674e-05, + "loss": 5.3505, + "step": 2818 + }, + { + "epoch": 1.58, + "learning_rate": 5.237015663643858e-05, + "loss": 5.7052, + "step": 2819 + }, + { + "epoch": 1.58, + "learning_rate": 5.2349546578730426e-05, + "loss": 5.4948, + "step": 2820 + }, + { + "epoch": 1.58, + "learning_rate": 5.232893652102226e-05, + "loss": 5.4401, + "step": 2821 + }, + { + "epoch": 1.58, + "learning_rate": 5.230832646331409e-05, + "loss": 5.3929, + "step": 2822 + }, + { + "epoch": 1.58, + "learning_rate": 5.2287716405605936e-05, + "loss": 5.4454, + "step": 2823 + }, + { + "epoch": 1.58, + "learning_rate": 5.226710634789778e-05, + "loss": 5.4855, + "step": 2824 + }, + { + "epoch": 1.58, + "learning_rate": 5.2246496290189616e-05, + "loss": 5.4984, + "step": 2825 + }, + { + "epoch": 1.58, + "learning_rate": 5.222588623248146e-05, + "loss": 5.3734, + "step": 2826 + }, + { + "epoch": 1.58, + "learning_rate": 5.220527617477329e-05, + "loss": 5.2363, + "step": 2827 + }, + { + "epoch": 1.59, + "learning_rate": 5.2184666117065126e-05, + "loss": 5.4789, + "step": 2828 + }, + { + "epoch": 1.59, + "learning_rate": 5.216405605935697e-05, + "loss": 4.969, + "step": 2829 + }, + { + "epoch": 1.59, + "learning_rate": 5.214344600164881e-05, + "loss": 4.8523, + "step": 2830 + }, + { + "epoch": 1.59, + "learning_rate": 5.212283594394065e-05, + "loss": 4.2979, + "step": 2831 + }, + { + "epoch": 1.59, + "learning_rate": 5.210222588623248e-05, + "loss": 4.2262, + "step": 2832 + }, + { + "epoch": 1.59, + "learning_rate": 5.208161582852432e-05, + "loss": 4.138, + "step": 2833 + }, + { + "epoch": 1.59, + "learning_rate": 5.206100577081616e-05, + "loss": 4.2949, + "step": 2834 + }, + { + "epoch": 1.59, + "learning_rate": 5.2040395713108e-05, + "loss": 6.6696, + "step": 2835 + }, + { + "epoch": 1.59, + "learning_rate": 5.201978565539983e-05, + "loss": 6.254, + "step": 2836 + }, + { + "epoch": 1.59, + "learning_rate": 5.1999175597691676e-05, + "loss": 6.2813, + "step": 2837 + }, + { + "epoch": 1.59, + "learning_rate": 5.197856553998351e-05, + "loss": 6.3864, + "step": 2838 + }, + { + "epoch": 1.59, + "learning_rate": 5.1957955482275356e-05, + "loss": 6.3677, + "step": 2839 + }, + { + "epoch": 1.59, + "learning_rate": 5.19373454245672e-05, + "loss": 6.0178, + "step": 2840 + }, + { + "epoch": 1.59, + "learning_rate": 5.191673536685903e-05, + "loss": 6.0931, + "step": 2841 + }, + { + "epoch": 1.59, + "learning_rate": 5.1896125309150865e-05, + "loss": 5.9587, + "step": 2842 + }, + { + "epoch": 1.59, + "learning_rate": 5.187551525144271e-05, + "loss": 6.1064, + "step": 2843 + }, + { + "epoch": 1.59, + "learning_rate": 5.1854905193734545e-05, + "loss": 6.1668, + "step": 2844 + }, + { + "epoch": 1.59, + "learning_rate": 5.1834295136026375e-05, + "loss": 6.0073, + "step": 2845 + }, + { + "epoch": 1.6, + "learning_rate": 5.181368507831822e-05, + "loss": 6.0163, + "step": 2846 + }, + { + "epoch": 1.6, + "learning_rate": 5.179307502061006e-05, + "loss": 6.0154, + "step": 2847 + }, + { + "epoch": 1.6, + "learning_rate": 5.17724649629019e-05, + "loss": 5.9107, + "step": 2848 + }, + { + "epoch": 1.6, + "learning_rate": 5.175185490519374e-05, + "loss": 5.7459, + "step": 2849 + }, + { + "epoch": 1.6, + "learning_rate": 5.173124484748557e-05, + "loss": 5.8149, + "step": 2850 + }, + { + "epoch": 1.6, + "learning_rate": 5.1710634789777415e-05, + "loss": 5.7108, + "step": 2851 + }, + { + "epoch": 1.6, + "learning_rate": 5.169002473206925e-05, + "loss": 5.7573, + "step": 2852 + }, + { + "epoch": 1.6, + "learning_rate": 5.1669414674361095e-05, + "loss": 5.9029, + "step": 2853 + }, + { + "epoch": 1.6, + "learning_rate": 5.1648804616652925e-05, + "loss": 5.6344, + "step": 2854 + }, + { + "epoch": 1.6, + "learning_rate": 5.162819455894476e-05, + "loss": 5.6151, + "step": 2855 + }, + { + "epoch": 1.6, + "learning_rate": 5.1607584501236605e-05, + "loss": 5.7319, + "step": 2856 + }, + { + "epoch": 1.6, + "learning_rate": 5.158697444352845e-05, + "loss": 5.5918, + "step": 2857 + }, + { + "epoch": 1.6, + "learning_rate": 5.1566364385820285e-05, + "loss": 5.6666, + "step": 2858 + }, + { + "epoch": 1.6, + "learning_rate": 5.1545754328112115e-05, + "loss": 5.4391, + "step": 2859 + }, + { + "epoch": 1.6, + "learning_rate": 5.152514427040396e-05, + "loss": 5.4475, + "step": 2860 + }, + { + "epoch": 1.6, + "learning_rate": 5.15045342126958e-05, + "loss": 5.8912, + "step": 2861 + }, + { + "epoch": 1.6, + "learning_rate": 5.148392415498764e-05, + "loss": 5.6641, + "step": 2862 + }, + { + "epoch": 1.6, + "learning_rate": 5.146331409727948e-05, + "loss": 5.8244, + "step": 2863 + }, + { + "epoch": 1.61, + "learning_rate": 5.144270403957131e-05, + "loss": 5.8281, + "step": 2864 + }, + { + "epoch": 1.61, + "learning_rate": 5.142209398186315e-05, + "loss": 5.2527, + "step": 2865 + }, + { + "epoch": 1.61, + "learning_rate": 5.140148392415499e-05, + "loss": 5.5685, + "step": 2866 + }, + { + "epoch": 1.61, + "learning_rate": 5.1380873866446835e-05, + "loss": 5.3888, + "step": 2867 + }, + { + "epoch": 1.61, + "learning_rate": 5.1360263808738665e-05, + "loss": 5.4446, + "step": 2868 + }, + { + "epoch": 1.61, + "learning_rate": 5.13396537510305e-05, + "loss": 5.5593, + "step": 2869 + }, + { + "epoch": 1.61, + "learning_rate": 5.1319043693322345e-05, + "loss": 5.226, + "step": 2870 + }, + { + "epoch": 1.61, + "learning_rate": 5.129843363561419e-05, + "loss": 5.473, + "step": 2871 + }, + { + "epoch": 1.61, + "learning_rate": 5.1277823577906025e-05, + "loss": 5.33, + "step": 2872 + }, + { + "epoch": 1.61, + "learning_rate": 5.1257213520197854e-05, + "loss": 5.5023, + "step": 2873 + }, + { + "epoch": 1.61, + "learning_rate": 5.12366034624897e-05, + "loss": 5.4893, + "step": 2874 + }, + { + "epoch": 1.61, + "learning_rate": 5.1215993404781534e-05, + "loss": 5.4934, + "step": 2875 + }, + { + "epoch": 1.61, + "learning_rate": 5.119538334707338e-05, + "loss": 5.2252, + "step": 2876 + }, + { + "epoch": 1.61, + "learning_rate": 5.117477328936521e-05, + "loss": 4.9892, + "step": 2877 + }, + { + "epoch": 1.61, + "learning_rate": 5.115416323165705e-05, + "loss": 5.4179, + "step": 2878 + }, + { + "epoch": 1.61, + "learning_rate": 5.113355317394889e-05, + "loss": 5.015, + "step": 2879 + }, + { + "epoch": 1.61, + "learning_rate": 5.111294311624073e-05, + "loss": 5.0428, + "step": 2880 + }, + { + "epoch": 1.61, + "learning_rate": 5.1092333058532574e-05, + "loss": 5.0233, + "step": 2881 + }, + { + "epoch": 1.62, + "learning_rate": 5.10717230008244e-05, + "loss": 4.7935, + "step": 2882 + }, + { + "epoch": 1.62, + "learning_rate": 5.105111294311624e-05, + "loss": 4.1623, + "step": 2883 + }, + { + "epoch": 1.62, + "learning_rate": 5.1030502885408084e-05, + "loss": 4.1555, + "step": 2884 + }, + { + "epoch": 1.62, + "learning_rate": 5.100989282769992e-05, + "loss": 6.483, + "step": 2885 + }, + { + "epoch": 1.62, + "learning_rate": 5.098928276999175e-05, + "loss": 6.3773, + "step": 2886 + }, + { + "epoch": 1.62, + "learning_rate": 5.0968672712283594e-05, + "loss": 6.3269, + "step": 2887 + }, + { + "epoch": 1.62, + "learning_rate": 5.094806265457544e-05, + "loss": 6.1417, + "step": 2888 + }, + { + "epoch": 1.62, + "learning_rate": 5.0927452596867274e-05, + "loss": 6.2228, + "step": 2889 + }, + { + "epoch": 1.62, + "learning_rate": 5.090684253915912e-05, + "loss": 5.6857, + "step": 2890 + }, + { + "epoch": 1.62, + "learning_rate": 5.088623248145095e-05, + "loss": 5.8776, + "step": 2891 + }, + { + "epoch": 1.62, + "learning_rate": 5.0865622423742784e-05, + "loss": 5.8407, + "step": 2892 + }, + { + "epoch": 1.62, + "learning_rate": 5.084501236603463e-05, + "loss": 5.8435, + "step": 2893 + }, + { + "epoch": 1.62, + "learning_rate": 5.082440230832647e-05, + "loss": 5.824, + "step": 2894 + }, + { + "epoch": 1.62, + "learning_rate": 5.080379225061831e-05, + "loss": 5.789, + "step": 2895 + }, + { + "epoch": 1.62, + "learning_rate": 5.078318219291014e-05, + "loss": 5.8227, + "step": 2896 + }, + { + "epoch": 1.62, + "learning_rate": 5.076257213520198e-05, + "loss": 5.9495, + "step": 2897 + }, + { + "epoch": 1.62, + "learning_rate": 5.0741962077493824e-05, + "loss": 5.7963, + "step": 2898 + }, + { + "epoch": 1.62, + "learning_rate": 5.072135201978566e-05, + "loss": 5.5862, + "step": 2899 + }, + { + "epoch": 1.63, + "learning_rate": 5.070074196207749e-05, + "loss": 5.7162, + "step": 2900 + }, + { + "epoch": 1.63, + "learning_rate": 5.0680131904369333e-05, + "loss": 5.6098, + "step": 2901 + }, + { + "epoch": 1.63, + "learning_rate": 5.065952184666117e-05, + "loss": 5.694, + "step": 2902 + }, + { + "epoch": 1.63, + "learning_rate": 5.0638911788953013e-05, + "loss": 5.7095, + "step": 2903 + }, + { + "epoch": 1.63, + "learning_rate": 5.061830173124486e-05, + "loss": 5.596, + "step": 2904 + }, + { + "epoch": 1.63, + "learning_rate": 5.059769167353669e-05, + "loss": 5.7468, + "step": 2905 + }, + { + "epoch": 1.63, + "learning_rate": 5.057708161582852e-05, + "loss": 5.7125, + "step": 2906 + }, + { + "epoch": 1.63, + "learning_rate": 5.055647155812037e-05, + "loss": 5.8804, + "step": 2907 + }, + { + "epoch": 1.63, + "learning_rate": 5.053586150041221e-05, + "loss": 5.6151, + "step": 2908 + }, + { + "epoch": 1.63, + "learning_rate": 5.051525144270404e-05, + "loss": 5.4165, + "step": 2909 + }, + { + "epoch": 1.63, + "learning_rate": 5.0494641384995876e-05, + "loss": 5.4888, + "step": 2910 + }, + { + "epoch": 1.63, + "learning_rate": 5.047403132728772e-05, + "loss": 5.6692, + "step": 2911 + }, + { + "epoch": 1.63, + "learning_rate": 5.0453421269579556e-05, + "loss": 5.6111, + "step": 2912 + }, + { + "epoch": 1.63, + "learning_rate": 5.04328112118714e-05, + "loss": 5.8472, + "step": 2913 + }, + { + "epoch": 1.63, + "learning_rate": 5.041220115416323e-05, + "loss": 5.6902, + "step": 2914 + }, + { + "epoch": 1.63, + "learning_rate": 5.039159109645507e-05, + "loss": 5.5513, + "step": 2915 + }, + { + "epoch": 1.63, + "learning_rate": 5.037098103874691e-05, + "loss": 5.3788, + "step": 2916 + }, + { + "epoch": 1.64, + "learning_rate": 5.035037098103875e-05, + "loss": 5.4353, + "step": 2917 + }, + { + "epoch": 1.64, + "learning_rate": 5.032976092333058e-05, + "loss": 5.5274, + "step": 2918 + }, + { + "epoch": 1.64, + "learning_rate": 5.0309150865622426e-05, + "loss": 5.5732, + "step": 2919 + }, + { + "epoch": 1.64, + "learning_rate": 5.028854080791426e-05, + "loss": 5.472, + "step": 2920 + }, + { + "epoch": 1.64, + "learning_rate": 5.0267930750206106e-05, + "loss": 5.4856, + "step": 2921 + }, + { + "epoch": 1.64, + "learning_rate": 5.024732069249794e-05, + "loss": 5.5026, + "step": 2922 + }, + { + "epoch": 1.64, + "learning_rate": 5.022671063478977e-05, + "loss": 5.4506, + "step": 2923 + }, + { + "epoch": 1.64, + "learning_rate": 5.0206100577081616e-05, + "loss": 5.1028, + "step": 2924 + }, + { + "epoch": 1.64, + "learning_rate": 5.018549051937346e-05, + "loss": 5.3179, + "step": 2925 + }, + { + "epoch": 1.64, + "learning_rate": 5.0164880461665296e-05, + "loss": 5.4887, + "step": 2926 + }, + { + "epoch": 1.64, + "learning_rate": 5.014427040395714e-05, + "loss": 4.9979, + "step": 2927 + }, + { + "epoch": 1.64, + "learning_rate": 5.012366034624897e-05, + "loss": 5.532, + "step": 2928 + }, + { + "epoch": 1.64, + "learning_rate": 5.010305028854081e-05, + "loss": 4.8991, + "step": 2929 + }, + { + "epoch": 1.64, + "learning_rate": 5.008244023083265e-05, + "loss": 4.9945, + "step": 2930 + }, + { + "epoch": 1.64, + "learning_rate": 5.006183017312449e-05, + "loss": 4.5562, + "step": 2931 + }, + { + "epoch": 1.64, + "learning_rate": 5.004122011541632e-05, + "loss": 4.7683, + "step": 2932 + }, + { + "epoch": 1.64, + "learning_rate": 5.002061005770816e-05, + "loss": 4.4515, + "step": 2933 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 3.6295, + "step": 2934 + }, + { + "epoch": 1.65, + "learning_rate": 4.9979389942291846e-05, + "loss": 6.0934, + "step": 2935 + }, + { + "epoch": 1.65, + "learning_rate": 4.9958779884583676e-05, + "loss": 6.2427, + "step": 2936 + }, + { + "epoch": 1.65, + "learning_rate": 4.993816982687552e-05, + "loss": 6.2501, + "step": 2937 + }, + { + "epoch": 1.65, + "learning_rate": 4.9917559769167356e-05, + "loss": 5.9979, + "step": 2938 + }, + { + "epoch": 1.65, + "learning_rate": 4.989694971145919e-05, + "loss": 5.8803, + "step": 2939 + }, + { + "epoch": 1.65, + "learning_rate": 4.987633965375103e-05, + "loss": 5.6942, + "step": 2940 + }, + { + "epoch": 1.65, + "learning_rate": 4.985572959604287e-05, + "loss": 5.8034, + "step": 2941 + }, + { + "epoch": 1.65, + "learning_rate": 4.9835119538334716e-05, + "loss": 5.641, + "step": 2942 + }, + { + "epoch": 1.65, + "learning_rate": 4.9814509480626545e-05, + "loss": 6.0814, + "step": 2943 + }, + { + "epoch": 1.65, + "learning_rate": 4.979389942291839e-05, + "loss": 5.8509, + "step": 2944 + }, + { + "epoch": 1.65, + "learning_rate": 4.9773289365210225e-05, + "loss": 5.931, + "step": 2945 + }, + { + "epoch": 1.65, + "learning_rate": 4.975267930750206e-05, + "loss": 5.9978, + "step": 2946 + }, + { + "epoch": 1.65, + "learning_rate": 4.97320692497939e-05, + "loss": 6.0963, + "step": 2947 + }, + { + "epoch": 1.65, + "learning_rate": 4.971145919208574e-05, + "loss": 5.8914, + "step": 2948 + }, + { + "epoch": 1.65, + "learning_rate": 4.969084913437758e-05, + "loss": 5.5398, + "step": 2949 + }, + { + "epoch": 1.65, + "learning_rate": 4.9670239076669415e-05, + "loss": 5.7136, + "step": 2950 + }, + { + "epoch": 1.65, + "learning_rate": 4.964962901896126e-05, + "loss": 5.7492, + "step": 2951 + }, + { + "epoch": 1.65, + "learning_rate": 4.9629018961253095e-05, + "loss": 5.4765, + "step": 2952 + }, + { + "epoch": 1.66, + "learning_rate": 4.960840890354493e-05, + "loss": 5.5388, + "step": 2953 + }, + { + "epoch": 1.66, + "learning_rate": 4.958779884583677e-05, + "loss": 5.7643, + "step": 2954 + }, + { + "epoch": 1.66, + "learning_rate": 4.956718878812861e-05, + "loss": 5.7582, + "step": 2955 + }, + { + "epoch": 1.66, + "learning_rate": 4.954657873042045e-05, + "loss": 5.9383, + "step": 2956 + }, + { + "epoch": 1.66, + "learning_rate": 4.9525968672712285e-05, + "loss": 5.3801, + "step": 2957 + }, + { + "epoch": 1.66, + "learning_rate": 4.950535861500413e-05, + "loss": 5.7217, + "step": 2958 + }, + { + "epoch": 1.66, + "learning_rate": 4.9484748557295965e-05, + "loss": 5.5787, + "step": 2959 + }, + { + "epoch": 1.66, + "learning_rate": 4.94641384995878e-05, + "loss": 5.7074, + "step": 2960 + }, + { + "epoch": 1.66, + "learning_rate": 4.944352844187964e-05, + "loss": 5.6082, + "step": 2961 + }, + { + "epoch": 1.66, + "learning_rate": 4.942291838417148e-05, + "loss": 5.4113, + "step": 2962 + }, + { + "epoch": 1.66, + "learning_rate": 4.940230832646331e-05, + "loss": 5.6214, + "step": 2963 + }, + { + "epoch": 1.66, + "learning_rate": 4.9381698268755155e-05, + "loss": 5.695, + "step": 2964 + }, + { + "epoch": 1.66, + "learning_rate": 4.936108821104699e-05, + "loss": 5.6469, + "step": 2965 + }, + { + "epoch": 1.66, + "learning_rate": 4.9340478153338835e-05, + "loss": 5.3937, + "step": 2966 + }, + { + "epoch": 1.66, + "learning_rate": 4.931986809563067e-05, + "loss": 5.3793, + "step": 2967 + }, + { + "epoch": 1.66, + "learning_rate": 4.929925803792251e-05, + "loss": 5.4278, + "step": 2968 + }, + { + "epoch": 1.66, + "learning_rate": 4.927864798021435e-05, + "loss": 5.3329, + "step": 2969 + }, + { + "epoch": 1.66, + "learning_rate": 4.925803792250618e-05, + "loss": 5.4046, + "step": 2970 + }, + { + "epoch": 1.67, + "learning_rate": 4.9237427864798024e-05, + "loss": 5.3621, + "step": 2971 + }, + { + "epoch": 1.67, + "learning_rate": 4.921681780708986e-05, + "loss": 5.5643, + "step": 2972 + }, + { + "epoch": 1.67, + "learning_rate": 4.91962077493817e-05, + "loss": 5.7094, + "step": 2973 + }, + { + "epoch": 1.67, + "learning_rate": 4.917559769167354e-05, + "loss": 5.7099, + "step": 2974 + }, + { + "epoch": 1.67, + "learning_rate": 4.915498763396538e-05, + "loss": 5.5654, + "step": 2975 + }, + { + "epoch": 1.67, + "learning_rate": 4.913437757625722e-05, + "loss": 5.2909, + "step": 2976 + }, + { + "epoch": 1.67, + "learning_rate": 4.911376751854905e-05, + "loss": 5.4886, + "step": 2977 + }, + { + "epoch": 1.67, + "learning_rate": 4.9093157460840894e-05, + "loss": 5.1702, + "step": 2978 + }, + { + "epoch": 1.67, + "learning_rate": 4.907254740313273e-05, + "loss": 5.166, + "step": 2979 + }, + { + "epoch": 1.67, + "learning_rate": 4.905193734542457e-05, + "loss": 4.7005, + "step": 2980 + }, + { + "epoch": 1.67, + "learning_rate": 4.9031327287716404e-05, + "loss": 4.609, + "step": 2981 + }, + { + "epoch": 1.67, + "learning_rate": 4.901071723000825e-05, + "loss": 4.7536, + "step": 2982 + }, + { + "epoch": 1.67, + "learning_rate": 4.8990107172300084e-05, + "loss": 4.2948, + "step": 2983 + }, + { + "epoch": 1.67, + "learning_rate": 4.896949711459192e-05, + "loss": 4.3511, + "step": 2984 + }, + { + "epoch": 1.67, + "learning_rate": 4.8948887056883764e-05, + "loss": 6.6435, + "step": 2985 + }, + { + "epoch": 1.67, + "learning_rate": 4.89282769991756e-05, + "loss": 6.4827, + "step": 2986 + }, + { + "epoch": 1.67, + "learning_rate": 4.890766694146744e-05, + "loss": 6.445, + "step": 2987 + }, + { + "epoch": 1.67, + "learning_rate": 4.8887056883759274e-05, + "loss": 6.3789, + "step": 2988 + }, + { + "epoch": 1.68, + "learning_rate": 4.886644682605112e-05, + "loss": 6.3487, + "step": 2989 + }, + { + "epoch": 1.68, + "learning_rate": 4.8845836768342954e-05, + "loss": 6.1719, + "step": 2990 + }, + { + "epoch": 1.68, + "learning_rate": 4.882522671063479e-05, + "loss": 6.3097, + "step": 2991 + }, + { + "epoch": 1.68, + "learning_rate": 4.8804616652926634e-05, + "loss": 5.851, + "step": 2992 + }, + { + "epoch": 1.68, + "learning_rate": 4.878400659521847e-05, + "loss": 5.9957, + "step": 2993 + }, + { + "epoch": 1.68, + "learning_rate": 4.876339653751031e-05, + "loss": 5.6974, + "step": 2994 + }, + { + "epoch": 1.68, + "learning_rate": 4.8742786479802144e-05, + "loss": 5.7307, + "step": 2995 + }, + { + "epoch": 1.68, + "learning_rate": 4.872217642209399e-05, + "loss": 5.8638, + "step": 2996 + }, + { + "epoch": 1.68, + "learning_rate": 4.870156636438582e-05, + "loss": 5.9128, + "step": 2997 + }, + { + "epoch": 1.68, + "learning_rate": 4.868095630667766e-05, + "loss": 5.9028, + "step": 2998 + }, + { + "epoch": 1.68, + "learning_rate": 4.8660346248969504e-05, + "loss": 5.7087, + "step": 2999 + }, + { + "epoch": 1.68, + "learning_rate": 4.863973619126134e-05, + "loss": 5.6761, + "step": 3000 + }, + { + "epoch": 1.68, + "eval_loss": 16.960098266601562, + "eval_runtime": 1334.4635, + "eval_samples_per_second": 1.98, + "eval_steps_per_second": 0.248, + "eval_wer": 1.0, + "step": 3000 + }, + { + "epoch": 1.68, + "learning_rate": 4.861912613355318e-05, + "loss": 5.5905, + "step": 3001 + }, + { + "epoch": 1.68, + "learning_rate": 4.8598516075845013e-05, + "loss": 5.5754, + "step": 3002 + }, + { + "epoch": 1.68, + "learning_rate": 4.857790601813686e-05, + "loss": 5.5713, + "step": 3003 + }, + { + "epoch": 1.68, + "learning_rate": 4.8557295960428687e-05, + "loss": 5.9027, + "step": 3004 + }, + { + "epoch": 1.68, + "learning_rate": 4.853668590272053e-05, + "loss": 5.711, + "step": 3005 + }, + { + "epoch": 1.68, + "learning_rate": 4.851607584501237e-05, + "loss": 5.7977, + "step": 3006 + }, + { + "epoch": 1.69, + "learning_rate": 4.84954657873042e-05, + "loss": 5.8576, + "step": 3007 + }, + { + "epoch": 1.69, + "learning_rate": 4.8474855729596047e-05, + "loss": 5.4058, + "step": 3008 + }, + { + "epoch": 1.69, + "learning_rate": 4.845424567188788e-05, + "loss": 5.5432, + "step": 3009 + }, + { + "epoch": 1.69, + "learning_rate": 4.8433635614179727e-05, + "loss": 5.6276, + "step": 3010 + }, + { + "epoch": 1.69, + "learning_rate": 4.8413025556471556e-05, + "loss": 5.7842, + "step": 3011 + }, + { + "epoch": 1.69, + "learning_rate": 4.83924154987634e-05, + "loss": 5.6691, + "step": 3012 + }, + { + "epoch": 1.69, + "learning_rate": 4.8371805441055236e-05, + "loss": 5.6965, + "step": 3013 + }, + { + "epoch": 1.69, + "learning_rate": 4.835119538334707e-05, + "loss": 5.5238, + "step": 3014 + }, + { + "epoch": 1.69, + "learning_rate": 4.8330585325638916e-05, + "loss": 5.4294, + "step": 3015 + }, + { + "epoch": 1.69, + "learning_rate": 4.830997526793075e-05, + "loss": 5.5859, + "step": 3016 + }, + { + "epoch": 1.69, + "learning_rate": 4.828936521022259e-05, + "loss": 5.8723, + "step": 3017 + }, + { + "epoch": 1.69, + "learning_rate": 4.8268755152514426e-05, + "loss": 5.4862, + "step": 3018 + }, + { + "epoch": 1.69, + "learning_rate": 4.824814509480627e-05, + "loss": 5.6382, + "step": 3019 + }, + { + "epoch": 1.69, + "learning_rate": 4.8227535037098106e-05, + "loss": 5.3826, + "step": 3020 + }, + { + "epoch": 1.69, + "learning_rate": 4.820692497938994e-05, + "loss": 5.5957, + "step": 3021 + }, + { + "epoch": 1.69, + "learning_rate": 4.8186314921681786e-05, + "loss": 5.3667, + "step": 3022 + }, + { + "epoch": 1.69, + "learning_rate": 4.816570486397362e-05, + "loss": 5.3344, + "step": 3023 + }, + { + "epoch": 1.7, + "learning_rate": 4.814509480626546e-05, + "loss": 5.4663, + "step": 3024 + }, + { + "epoch": 1.7, + "learning_rate": 4.8124484748557296e-05, + "loss": 5.6224, + "step": 3025 + }, + { + "epoch": 1.7, + "learning_rate": 4.810387469084914e-05, + "loss": 5.6679, + "step": 3026 + }, + { + "epoch": 1.7, + "learning_rate": 4.8083264633140976e-05, + "loss": 5.3889, + "step": 3027 + }, + { + "epoch": 1.7, + "learning_rate": 4.806265457543281e-05, + "loss": 5.1358, + "step": 3028 + }, + { + "epoch": 1.7, + "learning_rate": 4.804204451772465e-05, + "loss": 4.8834, + "step": 3029 + }, + { + "epoch": 1.7, + "learning_rate": 4.802143446001649e-05, + "loss": 5.2398, + "step": 3030 + }, + { + "epoch": 1.7, + "learning_rate": 4.800082440230833e-05, + "loss": 4.9875, + "step": 3031 + }, + { + "epoch": 1.7, + "learning_rate": 4.7980214344600166e-05, + "loss": 4.5499, + "step": 3032 + }, + { + "epoch": 1.7, + "learning_rate": 4.795960428689201e-05, + "loss": 4.5488, + "step": 3033 + }, + { + "epoch": 1.7, + "learning_rate": 4.7938994229183846e-05, + "loss": 3.7851, + "step": 3034 + }, + { + "epoch": 1.7, + "learning_rate": 4.791838417147568e-05, + "loss": 6.247, + "step": 3035 + }, + { + "epoch": 1.7, + "learning_rate": 4.789777411376752e-05, + "loss": 6.1313, + "step": 3036 + }, + { + "epoch": 1.7, + "learning_rate": 4.787716405605936e-05, + "loss": 6.2583, + "step": 3037 + }, + { + "epoch": 1.7, + "learning_rate": 4.78565539983512e-05, + "loss": 5.9628, + "step": 3038 + }, + { + "epoch": 1.7, + "learning_rate": 4.7835943940643035e-05, + "loss": 6.0476, + "step": 3039 + }, + { + "epoch": 1.7, + "learning_rate": 4.781533388293488e-05, + "loss": 5.7625, + "step": 3040 + }, + { + "epoch": 1.7, + "learning_rate": 4.779472382522671e-05, + "loss": 6.0996, + "step": 3041 + }, + { + "epoch": 1.71, + "learning_rate": 4.777411376751855e-05, + "loss": 5.4776, + "step": 3042 + }, + { + "epoch": 1.71, + "learning_rate": 4.775350370981039e-05, + "loss": 5.6714, + "step": 3043 + }, + { + "epoch": 1.71, + "learning_rate": 4.7732893652102225e-05, + "loss": 5.6129, + "step": 3044 + }, + { + "epoch": 1.71, + "learning_rate": 4.771228359439407e-05, + "loss": 5.7005, + "step": 3045 + }, + { + "epoch": 1.71, + "learning_rate": 4.7691673536685905e-05, + "loss": 5.845, + "step": 3046 + }, + { + "epoch": 1.71, + "learning_rate": 4.767106347897775e-05, + "loss": 5.7577, + "step": 3047 + }, + { + "epoch": 1.71, + "learning_rate": 4.765045342126958e-05, + "loss": 5.7636, + "step": 3048 + }, + { + "epoch": 1.71, + "learning_rate": 4.762984336356142e-05, + "loss": 5.4162, + "step": 3049 + }, + { + "epoch": 1.71, + "learning_rate": 4.760923330585326e-05, + "loss": 5.4843, + "step": 3050 + }, + { + "epoch": 1.71, + "learning_rate": 4.7588623248145095e-05, + "loss": 5.5686, + "step": 3051 + }, + { + "epoch": 1.71, + "learning_rate": 4.756801319043693e-05, + "loss": 5.6714, + "step": 3052 + }, + { + "epoch": 1.71, + "learning_rate": 4.7547403132728775e-05, + "loss": 5.795, + "step": 3053 + }, + { + "epoch": 1.71, + "learning_rate": 4.752679307502061e-05, + "loss": 5.5836, + "step": 3054 + }, + { + "epoch": 1.71, + "learning_rate": 4.750618301731245e-05, + "loss": 5.866, + "step": 3055 + }, + { + "epoch": 1.71, + "learning_rate": 4.748557295960429e-05, + "loss": 5.4484, + "step": 3056 + }, + { + "epoch": 1.71, + "learning_rate": 4.746496290189613e-05, + "loss": 5.8637, + "step": 3057 + }, + { + "epoch": 1.71, + "learning_rate": 4.7444352844187965e-05, + "loss": 5.6725, + "step": 3058 + }, + { + "epoch": 1.71, + "learning_rate": 4.74237427864798e-05, + "loss": 5.719, + "step": 3059 + }, + { + "epoch": 1.72, + "learning_rate": 4.7403132728771645e-05, + "loss": 5.6675, + "step": 3060 + }, + { + "epoch": 1.72, + "learning_rate": 4.738252267106348e-05, + "loss": 5.4754, + "step": 3061 + }, + { + "epoch": 1.72, + "learning_rate": 4.736191261335532e-05, + "loss": 5.7349, + "step": 3062 + }, + { + "epoch": 1.72, + "learning_rate": 4.734130255564716e-05, + "loss": 5.4839, + "step": 3063 + }, + { + "epoch": 1.72, + "learning_rate": 4.7320692497939e-05, + "loss": 5.559, + "step": 3064 + }, + { + "epoch": 1.72, + "learning_rate": 4.7300082440230835e-05, + "loss": 5.3717, + "step": 3065 + }, + { + "epoch": 1.72, + "learning_rate": 4.727947238252267e-05, + "loss": 5.4572, + "step": 3066 + }, + { + "epoch": 1.72, + "learning_rate": 4.7258862324814515e-05, + "loss": 5.326, + "step": 3067 + }, + { + "epoch": 1.72, + "learning_rate": 4.7238252267106344e-05, + "loss": 5.7818, + "step": 3068 + }, + { + "epoch": 1.72, + "learning_rate": 4.721764220939819e-05, + "loss": 5.5257, + "step": 3069 + }, + { + "epoch": 1.72, + "learning_rate": 4.719703215169003e-05, + "loss": 5.6525, + "step": 3070 + }, + { + "epoch": 1.72, + "learning_rate": 4.717642209398187e-05, + "loss": 5.6279, + "step": 3071 + }, + { + "epoch": 1.72, + "learning_rate": 4.7155812036273704e-05, + "loss": 5.3997, + "step": 3072 + }, + { + "epoch": 1.72, + "learning_rate": 4.713520197856554e-05, + "loss": 5.551, + "step": 3073 + }, + { + "epoch": 1.72, + "learning_rate": 4.7114591920857384e-05, + "loss": 5.4583, + "step": 3074 + }, + { + "epoch": 1.72, + "learning_rate": 4.7093981863149214e-05, + "loss": 5.3976, + "step": 3075 + }, + { + "epoch": 1.72, + "learning_rate": 4.707337180544106e-05, + "loss": 5.2285, + "step": 3076 + }, + { + "epoch": 1.72, + "learning_rate": 4.70527617477329e-05, + "loss": 5.3089, + "step": 3077 + }, + { + "epoch": 1.73, + "learning_rate": 4.703215169002473e-05, + "loss": 4.9581, + "step": 3078 + }, + { + "epoch": 1.73, + "learning_rate": 4.7011541632316574e-05, + "loss": 4.9583, + "step": 3079 + }, + { + "epoch": 1.73, + "learning_rate": 4.699093157460841e-05, + "loss": 4.75, + "step": 3080 + }, + { + "epoch": 1.73, + "learning_rate": 4.6970321516900254e-05, + "loss": 4.5465, + "step": 3081 + }, + { + "epoch": 1.73, + "learning_rate": 4.6949711459192084e-05, + "loss": 4.6335, + "step": 3082 + }, + { + "epoch": 1.73, + "learning_rate": 4.692910140148393e-05, + "loss": 4.6033, + "step": 3083 + }, + { + "epoch": 1.73, + "learning_rate": 4.6908491343775764e-05, + "loss": 3.4964, + "step": 3084 + }, + { + "epoch": 1.73, + "learning_rate": 4.68878812860676e-05, + "loss": 6.3402, + "step": 3085 + }, + { + "epoch": 1.73, + "learning_rate": 4.6867271228359444e-05, + "loss": 6.3707, + "step": 3086 + }, + { + "epoch": 1.73, + "learning_rate": 4.684666117065128e-05, + "loss": 6.14, + "step": 3087 + }, + { + "epoch": 1.73, + "learning_rate": 4.682605111294312e-05, + "loss": 6.2036, + "step": 3088 + }, + { + "epoch": 1.73, + "learning_rate": 4.6805441055234954e-05, + "loss": 6.0157, + "step": 3089 + }, + { + "epoch": 1.73, + "learning_rate": 4.67848309975268e-05, + "loss": 5.775, + "step": 3090 + }, + { + "epoch": 1.73, + "learning_rate": 4.6764220939818634e-05, + "loss": 5.8102, + "step": 3091 + }, + { + "epoch": 1.73, + "learning_rate": 4.674361088211047e-05, + "loss": 5.578, + "step": 3092 + }, + { + "epoch": 1.73, + "learning_rate": 4.6723000824402314e-05, + "loss": 5.547, + "step": 3093 + }, + { + "epoch": 1.73, + "learning_rate": 4.670239076669415e-05, + "loss": 6.0101, + "step": 3094 + }, + { + "epoch": 1.73, + "learning_rate": 4.668178070898599e-05, + "loss": 6.1514, + "step": 3095 + }, + { + "epoch": 1.74, + "learning_rate": 4.6661170651277824e-05, + "loss": 5.9794, + "step": 3096 + }, + { + "epoch": 1.74, + "learning_rate": 4.664056059356967e-05, + "loss": 5.8762, + "step": 3097 + }, + { + "epoch": 1.74, + "learning_rate": 4.6619950535861504e-05, + "loss": 5.8051, + "step": 3098 + }, + { + "epoch": 1.74, + "learning_rate": 4.659934047815334e-05, + "loss": 5.685, + "step": 3099 + }, + { + "epoch": 1.74, + "learning_rate": 4.657873042044518e-05, + "loss": 5.6357, + "step": 3100 + }, + { + "epoch": 1.74, + "learning_rate": 4.655812036273702e-05, + "loss": 5.4024, + "step": 3101 + }, + { + "epoch": 1.74, + "learning_rate": 4.653751030502886e-05, + "loss": 5.7835, + "step": 3102 + }, + { + "epoch": 1.74, + "learning_rate": 4.651690024732069e-05, + "loss": 5.7811, + "step": 3103 + }, + { + "epoch": 1.74, + "learning_rate": 4.649629018961254e-05, + "loss": 5.7703, + "step": 3104 + }, + { + "epoch": 1.74, + "learning_rate": 4.647568013190437e-05, + "loss": 5.6608, + "step": 3105 + }, + { + "epoch": 1.74, + "learning_rate": 4.645507007419621e-05, + "loss": 5.985, + "step": 3106 + }, + { + "epoch": 1.74, + "learning_rate": 4.6434460016488046e-05, + "loss": 5.4905, + "step": 3107 + }, + { + "epoch": 1.74, + "learning_rate": 4.641384995877989e-05, + "loss": 5.5153, + "step": 3108 + }, + { + "epoch": 1.74, + "learning_rate": 4.6393239901071726e-05, + "loss": 5.626, + "step": 3109 + }, + { + "epoch": 1.74, + "learning_rate": 4.637262984336356e-05, + "loss": 5.7921, + "step": 3110 + }, + { + "epoch": 1.74, + "learning_rate": 4.6352019785655406e-05, + "loss": 5.5655, + "step": 3111 + }, + { + "epoch": 1.74, + "learning_rate": 4.6331409727947236e-05, + "loss": 5.7692, + "step": 3112 + }, + { + "epoch": 1.74, + "learning_rate": 4.631079967023908e-05, + "loss": 5.3995, + "step": 3113 + }, + { + "epoch": 1.75, + "learning_rate": 4.6290189612530916e-05, + "loss": 5.6182, + "step": 3114 + }, + { + "epoch": 1.75, + "learning_rate": 4.626957955482275e-05, + "loss": 5.5781, + "step": 3115 + }, + { + "epoch": 1.75, + "learning_rate": 4.624896949711459e-05, + "loss": 5.4869, + "step": 3116 + }, + { + "epoch": 1.75, + "learning_rate": 4.622835943940643e-05, + "loss": 5.562, + "step": 3117 + }, + { + "epoch": 1.75, + "learning_rate": 4.6207749381698276e-05, + "loss": 5.4102, + "step": 3118 + }, + { + "epoch": 1.75, + "learning_rate": 4.6187139323990106e-05, + "loss": 5.3611, + "step": 3119 + }, + { + "epoch": 1.75, + "learning_rate": 4.616652926628195e-05, + "loss": 5.6127, + "step": 3120 + }, + { + "epoch": 1.75, + "learning_rate": 4.6145919208573786e-05, + "loss": 5.5359, + "step": 3121 + }, + { + "epoch": 1.75, + "learning_rate": 4.612530915086562e-05, + "loss": 5.2957, + "step": 3122 + }, + { + "epoch": 1.75, + "learning_rate": 4.610469909315746e-05, + "loss": 5.2826, + "step": 3123 + }, + { + "epoch": 1.75, + "learning_rate": 4.60840890354493e-05, + "loss": 5.1736, + "step": 3124 + }, + { + "epoch": 1.75, + "learning_rate": 4.606347897774114e-05, + "loss": 5.1955, + "step": 3125 + }, + { + "epoch": 1.75, + "learning_rate": 4.6042868920032976e-05, + "loss": 5.2258, + "step": 3126 + }, + { + "epoch": 1.75, + "learning_rate": 4.602225886232482e-05, + "loss": 5.1112, + "step": 3127 + }, + { + "epoch": 1.75, + "learning_rate": 4.6001648804616656e-05, + "loss": 4.5242, + "step": 3128 + }, + { + "epoch": 1.75, + "learning_rate": 4.598103874690849e-05, + "loss": 5.0299, + "step": 3129 + }, + { + "epoch": 1.75, + "learning_rate": 4.596042868920033e-05, + "loss": 4.7415, + "step": 3130 + }, + { + "epoch": 1.76, + "learning_rate": 4.593981863149217e-05, + "loss": 4.2763, + "step": 3131 + }, + { + "epoch": 1.76, + "learning_rate": 4.591920857378401e-05, + "loss": 4.6881, + "step": 3132 + }, + { + "epoch": 1.76, + "learning_rate": 4.5898598516075846e-05, + "loss": 4.4063, + "step": 3133 + }, + { + "epoch": 1.76, + "learning_rate": 4.587798845836769e-05, + "loss": 3.8932, + "step": 3134 + }, + { + "epoch": 1.76, + "learning_rate": 4.5857378400659526e-05, + "loss": 6.5079, + "step": 3135 + }, + { + "epoch": 1.76, + "learning_rate": 4.583676834295136e-05, + "loss": 6.6432, + "step": 3136 + }, + { + "epoch": 1.76, + "learning_rate": 4.58161582852432e-05, + "loss": 6.4147, + "step": 3137 + }, + { + "epoch": 1.76, + "learning_rate": 4.579554822753504e-05, + "loss": 6.3245, + "step": 3138 + }, + { + "epoch": 1.76, + "learning_rate": 4.577493816982687e-05, + "loss": 6.1681, + "step": 3139 + }, + { + "epoch": 1.76, + "learning_rate": 4.5754328112118715e-05, + "loss": 6.1698, + "step": 3140 + }, + { + "epoch": 1.76, + "learning_rate": 4.573371805441056e-05, + "loss": 6.1969, + "step": 3141 + }, + { + "epoch": 1.76, + "learning_rate": 4.5713107996702395e-05, + "loss": 5.8063, + "step": 3142 + }, + { + "epoch": 1.76, + "learning_rate": 4.569249793899423e-05, + "loss": 5.907, + "step": 3143 + }, + { + "epoch": 1.76, + "learning_rate": 4.567188788128607e-05, + "loss": 5.8898, + "step": 3144 + }, + { + "epoch": 1.76, + "learning_rate": 4.565127782357791e-05, + "loss": 5.5589, + "step": 3145 + }, + { + "epoch": 1.76, + "learning_rate": 4.563066776586974e-05, + "loss": 5.8336, + "step": 3146 + }, + { + "epoch": 1.76, + "learning_rate": 4.5610057708161585e-05, + "loss": 6.1281, + "step": 3147 + }, + { + "epoch": 1.76, + "learning_rate": 4.558944765045342e-05, + "loss": 5.8023, + "step": 3148 + }, + { + "epoch": 1.77, + "learning_rate": 4.556883759274526e-05, + "loss": 5.8118, + "step": 3149 + }, + { + "epoch": 1.77, + "learning_rate": 4.55482275350371e-05, + "loss": 5.7816, + "step": 3150 + }, + { + "epoch": 1.77, + "learning_rate": 4.552761747732894e-05, + "loss": 5.5165, + "step": 3151 + }, + { + "epoch": 1.77, + "learning_rate": 4.550700741962078e-05, + "loss": 5.6958, + "step": 3152 + }, + { + "epoch": 1.77, + "learning_rate": 4.548639736191261e-05, + "loss": 5.5323, + "step": 3153 + }, + { + "epoch": 1.77, + "learning_rate": 4.5465787304204455e-05, + "loss": 5.8422, + "step": 3154 + }, + { + "epoch": 1.77, + "learning_rate": 4.544517724649629e-05, + "loss": 5.7006, + "step": 3155 + }, + { + "epoch": 1.77, + "learning_rate": 4.542456718878813e-05, + "loss": 5.4958, + "step": 3156 + }, + { + "epoch": 1.77, + "learning_rate": 4.540395713107997e-05, + "loss": 5.6166, + "step": 3157 + }, + { + "epoch": 1.77, + "learning_rate": 4.538334707337181e-05, + "loss": 5.4958, + "step": 3158 + }, + { + "epoch": 1.77, + "learning_rate": 4.5362737015663645e-05, + "loss": 5.3287, + "step": 3159 + }, + { + "epoch": 1.77, + "learning_rate": 4.534212695795548e-05, + "loss": 5.3935, + "step": 3160 + }, + { + "epoch": 1.77, + "learning_rate": 4.5321516900247325e-05, + "loss": 5.1473, + "step": 3161 + }, + { + "epoch": 1.77, + "learning_rate": 4.530090684253916e-05, + "loss": 5.4204, + "step": 3162 + }, + { + "epoch": 1.77, + "learning_rate": 4.5280296784831e-05, + "loss": 5.3813, + "step": 3163 + }, + { + "epoch": 1.77, + "learning_rate": 4.525968672712284e-05, + "loss": 5.7133, + "step": 3164 + }, + { + "epoch": 1.77, + "learning_rate": 4.523907666941468e-05, + "loss": 5.677, + "step": 3165 + }, + { + "epoch": 1.77, + "learning_rate": 4.5218466611706515e-05, + "loss": 5.5851, + "step": 3166 + }, + { + "epoch": 1.78, + "learning_rate": 4.519785655399835e-05, + "loss": 5.5578, + "step": 3167 + }, + { + "epoch": 1.78, + "learning_rate": 4.5177246496290194e-05, + "loss": 5.5437, + "step": 3168 + }, + { + "epoch": 1.78, + "learning_rate": 4.515663643858203e-05, + "loss": 5.2055, + "step": 3169 + }, + { + "epoch": 1.78, + "learning_rate": 4.513602638087387e-05, + "loss": 5.1145, + "step": 3170 + }, + { + "epoch": 1.78, + "learning_rate": 4.5115416323165704e-05, + "loss": 5.6634, + "step": 3171 + }, + { + "epoch": 1.78, + "learning_rate": 4.509480626545755e-05, + "loss": 5.192, + "step": 3172 + }, + { + "epoch": 1.78, + "learning_rate": 4.5074196207749384e-05, + "loss": 5.1495, + "step": 3173 + }, + { + "epoch": 1.78, + "learning_rate": 4.505358615004122e-05, + "loss": 5.2917, + "step": 3174 + }, + { + "epoch": 1.78, + "learning_rate": 4.5032976092333064e-05, + "loss": 5.1774, + "step": 3175 + }, + { + "epoch": 1.78, + "learning_rate": 4.50123660346249e-05, + "loss": 5.016, + "step": 3176 + }, + { + "epoch": 1.78, + "learning_rate": 4.499175597691674e-05, + "loss": 5.0582, + "step": 3177 + }, + { + "epoch": 1.78, + "learning_rate": 4.4971145919208574e-05, + "loss": 4.6815, + "step": 3178 + }, + { + "epoch": 1.78, + "learning_rate": 4.495053586150042e-05, + "loss": 4.575, + "step": 3179 + }, + { + "epoch": 1.78, + "learning_rate": 4.4929925803792254e-05, + "loss": 4.3785, + "step": 3180 + }, + { + "epoch": 1.78, + "learning_rate": 4.490931574608409e-05, + "loss": 4.6115, + "step": 3181 + }, + { + "epoch": 1.78, + "learning_rate": 4.4888705688375934e-05, + "loss": 4.397, + "step": 3182 + }, + { + "epoch": 1.78, + "learning_rate": 4.4868095630667764e-05, + "loss": 4.5611, + "step": 3183 + }, + { + "epoch": 1.78, + "learning_rate": 4.484748557295961e-05, + "loss": 3.9241, + "step": 3184 + }, + { + "epoch": 1.79, + "learning_rate": 4.4826875515251444e-05, + "loss": 6.2305, + "step": 3185 + }, + { + "epoch": 1.79, + "learning_rate": 4.480626545754329e-05, + "loss": 6.4099, + "step": 3186 + }, + { + "epoch": 1.79, + "learning_rate": 4.478565539983512e-05, + "loss": 6.3702, + "step": 3187 + }, + { + "epoch": 1.79, + "learning_rate": 4.476504534212696e-05, + "loss": 6.2495, + "step": 3188 + }, + { + "epoch": 1.79, + "learning_rate": 4.4744435284418804e-05, + "loss": 6.1973, + "step": 3189 + }, + { + "epoch": 1.79, + "learning_rate": 4.4723825226710634e-05, + "loss": 6.2641, + "step": 3190 + }, + { + "epoch": 1.79, + "learning_rate": 4.470321516900248e-05, + "loss": 5.9749, + "step": 3191 + }, + { + "epoch": 1.79, + "learning_rate": 4.4682605111294314e-05, + "loss": 5.7804, + "step": 3192 + }, + { + "epoch": 1.79, + "learning_rate": 4.466199505358615e-05, + "loss": 5.5762, + "step": 3193 + }, + { + "epoch": 1.79, + "learning_rate": 4.464138499587799e-05, + "loss": 5.759, + "step": 3194 + }, + { + "epoch": 1.79, + "learning_rate": 4.462077493816983e-05, + "loss": 6.156, + "step": 3195 + }, + { + "epoch": 1.79, + "learning_rate": 4.460016488046167e-05, + "loss": 5.6865, + "step": 3196 + }, + { + "epoch": 1.79, + "learning_rate": 4.4579554822753503e-05, + "loss": 5.827, + "step": 3197 + }, + { + "epoch": 1.79, + "learning_rate": 4.455894476504535e-05, + "loss": 5.8325, + "step": 3198 + }, + { + "epoch": 1.79, + "learning_rate": 4.4538334707337183e-05, + "loss": 5.7281, + "step": 3199 + }, + { + "epoch": 1.79, + "learning_rate": 4.451772464962902e-05, + "loss": 5.8727, + "step": 3200 + }, + { + "epoch": 1.79, + "learning_rate": 4.449711459192086e-05, + "loss": 5.7423, + "step": 3201 + }, + { + "epoch": 1.79, + "learning_rate": 4.44765045342127e-05, + "loss": 5.5457, + "step": 3202 + }, + { + "epoch": 1.8, + "learning_rate": 4.4455894476504537e-05, + "loss": 5.6537, + "step": 3203 + }, + { + "epoch": 1.8, + "learning_rate": 4.443528441879637e-05, + "loss": 5.7254, + "step": 3204 + }, + { + "epoch": 1.8, + "learning_rate": 4.4414674361088217e-05, + "loss": 5.6592, + "step": 3205 + }, + { + "epoch": 1.8, + "learning_rate": 4.439406430338005e-05, + "loss": 5.5766, + "step": 3206 + }, + { + "epoch": 1.8, + "learning_rate": 4.437345424567189e-05, + "loss": 5.53, + "step": 3207 + }, + { + "epoch": 1.8, + "learning_rate": 4.4352844187963726e-05, + "loss": 5.4371, + "step": 3208 + }, + { + "epoch": 1.8, + "learning_rate": 4.433223413025557e-05, + "loss": 5.5417, + "step": 3209 + }, + { + "epoch": 1.8, + "learning_rate": 4.4311624072547406e-05, + "loss": 5.8157, + "step": 3210 + }, + { + "epoch": 1.8, + "learning_rate": 4.429101401483924e-05, + "loss": 5.4279, + "step": 3211 + }, + { + "epoch": 1.8, + "learning_rate": 4.4270403957131086e-05, + "loss": 5.3381, + "step": 3212 + }, + { + "epoch": 1.8, + "learning_rate": 4.424979389942292e-05, + "loss": 5.5063, + "step": 3213 + }, + { + "epoch": 1.8, + "learning_rate": 4.422918384171476e-05, + "loss": 5.5456, + "step": 3214 + }, + { + "epoch": 1.8, + "learning_rate": 4.4208573784006596e-05, + "loss": 5.5231, + "step": 3215 + }, + { + "epoch": 1.8, + "learning_rate": 4.418796372629844e-05, + "loss": 5.3868, + "step": 3216 + }, + { + "epoch": 1.8, + "learning_rate": 4.416735366859027e-05, + "loss": 5.3985, + "step": 3217 + }, + { + "epoch": 1.8, + "learning_rate": 4.414674361088211e-05, + "loss": 5.3855, + "step": 3218 + }, + { + "epoch": 1.8, + "learning_rate": 4.412613355317395e-05, + "loss": 5.4029, + "step": 3219 + }, + { + "epoch": 1.8, + "learning_rate": 4.4105523495465786e-05, + "loss": 5.5219, + "step": 3220 + }, + { + "epoch": 1.81, + "learning_rate": 4.408491343775763e-05, + "loss": 5.5083, + "step": 3221 + }, + { + "epoch": 1.81, + "learning_rate": 4.4064303380049466e-05, + "loss": 5.5048, + "step": 3222 + }, + { + "epoch": 1.81, + "learning_rate": 4.404369332234131e-05, + "loss": 5.0737, + "step": 3223 + }, + { + "epoch": 1.81, + "learning_rate": 4.402308326463314e-05, + "loss": 5.1574, + "step": 3224 + }, + { + "epoch": 1.81, + "learning_rate": 4.400247320692498e-05, + "loss": 5.5569, + "step": 3225 + }, + { + "epoch": 1.81, + "learning_rate": 4.398186314921682e-05, + "loss": 5.6752, + "step": 3226 + }, + { + "epoch": 1.81, + "learning_rate": 4.3961253091508656e-05, + "loss": 5.4583, + "step": 3227 + }, + { + "epoch": 1.81, + "learning_rate": 4.39406430338005e-05, + "loss": 5.4408, + "step": 3228 + }, + { + "epoch": 1.81, + "learning_rate": 4.3920032976092336e-05, + "loss": 4.8424, + "step": 3229 + }, + { + "epoch": 1.81, + "learning_rate": 4.389942291838417e-05, + "loss": 4.6179, + "step": 3230 + }, + { + "epoch": 1.81, + "learning_rate": 4.387881286067601e-05, + "loss": 4.2164, + "step": 3231 + }, + { + "epoch": 1.81, + "learning_rate": 4.385820280296785e-05, + "loss": 4.4672, + "step": 3232 + }, + { + "epoch": 1.81, + "learning_rate": 4.383759274525969e-05, + "loss": 4.1475, + "step": 3233 + }, + { + "epoch": 1.81, + "learning_rate": 4.3816982687551526e-05, + "loss": 4.0489, + "step": 3234 + }, + { + "epoch": 1.81, + "learning_rate": 4.379637262984336e-05, + "loss": 6.2986, + "step": 3235 + }, + { + "epoch": 1.81, + "learning_rate": 4.3775762572135205e-05, + "loss": 6.12, + "step": 3236 + }, + { + "epoch": 1.81, + "learning_rate": 4.375515251442704e-05, + "loss": 5.937, + "step": 3237 + }, + { + "epoch": 1.82, + "learning_rate": 4.373454245671888e-05, + "loss": 5.7298, + "step": 3238 + }, + { + "epoch": 1.82, + "learning_rate": 4.371393239901072e-05, + "loss": 5.5001, + "step": 3239 + }, + { + "epoch": 1.82, + "learning_rate": 4.369332234130256e-05, + "loss": 5.6899, + "step": 3240 + }, + { + "epoch": 1.82, + "learning_rate": 4.3672712283594395e-05, + "loss": 5.709, + "step": 3241 + }, + { + "epoch": 1.82, + "learning_rate": 4.365210222588623e-05, + "loss": 5.6731, + "step": 3242 + }, + { + "epoch": 1.82, + "learning_rate": 4.3631492168178075e-05, + "loss": 5.8171, + "step": 3243 + }, + { + "epoch": 1.82, + "learning_rate": 4.361088211046991e-05, + "loss": 5.8501, + "step": 3244 + }, + { + "epoch": 1.82, + "learning_rate": 4.359027205276175e-05, + "loss": 5.5596, + "step": 3245 + }, + { + "epoch": 1.82, + "learning_rate": 4.356966199505359e-05, + "loss": 5.6844, + "step": 3246 + }, + { + "epoch": 1.82, + "learning_rate": 4.354905193734543e-05, + "loss": 5.671, + "step": 3247 + }, + { + "epoch": 1.82, + "learning_rate": 4.3528441879637265e-05, + "loss": 5.4187, + "step": 3248 + }, + { + "epoch": 1.82, + "learning_rate": 4.35078318219291e-05, + "loss": 5.3147, + "step": 3249 + }, + { + "epoch": 1.82, + "learning_rate": 4.3487221764220945e-05, + "loss": 5.5356, + "step": 3250 + }, + { + "epoch": 1.82, + "learning_rate": 4.3466611706512775e-05, + "loss": 5.3978, + "step": 3251 + }, + { + "epoch": 1.82, + "learning_rate": 4.344600164880462e-05, + "loss": 5.6248, + "step": 3252 + }, + { + "epoch": 1.82, + "learning_rate": 4.342539159109646e-05, + "loss": 5.5547, + "step": 3253 + }, + { + "epoch": 1.82, + "learning_rate": 4.340478153338829e-05, + "loss": 5.4911, + "step": 3254 + }, + { + "epoch": 1.82, + "learning_rate": 4.3384171475680135e-05, + "loss": 5.8001, + "step": 3255 + }, + { + "epoch": 1.83, + "learning_rate": 4.336356141797197e-05, + "loss": 5.4475, + "step": 3256 + }, + { + "epoch": 1.83, + "learning_rate": 4.3342951360263815e-05, + "loss": 5.4783, + "step": 3257 + }, + { + "epoch": 1.83, + "learning_rate": 4.3322341302555645e-05, + "loss": 5.7017, + "step": 3258 + }, + { + "epoch": 1.83, + "learning_rate": 4.330173124484749e-05, + "loss": 5.5363, + "step": 3259 + }, + { + "epoch": 1.83, + "learning_rate": 4.328112118713933e-05, + "loss": 5.7294, + "step": 3260 + }, + { + "epoch": 1.83, + "learning_rate": 4.326051112943116e-05, + "loss": 5.5237, + "step": 3261 + }, + { + "epoch": 1.83, + "learning_rate": 4.3239901071723005e-05, + "loss": 5.4946, + "step": 3262 + }, + { + "epoch": 1.83, + "learning_rate": 4.321929101401484e-05, + "loss": 5.5063, + "step": 3263 + }, + { + "epoch": 1.83, + "learning_rate": 4.319868095630668e-05, + "loss": 5.2962, + "step": 3264 + }, + { + "epoch": 1.83, + "learning_rate": 4.3178070898598514e-05, + "loss": 5.2847, + "step": 3265 + }, + { + "epoch": 1.83, + "learning_rate": 4.315746084089036e-05, + "loss": 5.2221, + "step": 3266 + }, + { + "epoch": 1.83, + "learning_rate": 4.3136850783182194e-05, + "loss": 5.2168, + "step": 3267 + }, + { + "epoch": 1.83, + "learning_rate": 4.311624072547403e-05, + "loss": 5.3174, + "step": 3268 + }, + { + "epoch": 1.83, + "learning_rate": 4.3095630667765874e-05, + "loss": 5.402, + "step": 3269 + }, + { + "epoch": 1.83, + "learning_rate": 4.307502061005771e-05, + "loss": 5.5608, + "step": 3270 + }, + { + "epoch": 1.83, + "learning_rate": 4.305441055234955e-05, + "loss": 5.263, + "step": 3271 + }, + { + "epoch": 1.83, + "learning_rate": 4.3033800494641384e-05, + "loss": 5.2796, + "step": 3272 + }, + { + "epoch": 1.83, + "learning_rate": 4.301319043693323e-05, + "loss": 4.9098, + "step": 3273 + }, + { + "epoch": 1.84, + "learning_rate": 4.2992580379225064e-05, + "loss": 5.4077, + "step": 3274 + }, + { + "epoch": 1.84, + "learning_rate": 4.29719703215169e-05, + "loss": 5.3724, + "step": 3275 + }, + { + "epoch": 1.84, + "learning_rate": 4.2951360263808744e-05, + "loss": 5.1926, + "step": 3276 + }, + { + "epoch": 1.84, + "learning_rate": 4.293075020610058e-05, + "loss": 4.8654, + "step": 3277 + }, + { + "epoch": 1.84, + "learning_rate": 4.291014014839242e-05, + "loss": 4.7527, + "step": 3278 + }, + { + "epoch": 1.84, + "learning_rate": 4.2889530090684254e-05, + "loss": 4.9271, + "step": 3279 + }, + { + "epoch": 1.84, + "learning_rate": 4.28689200329761e-05, + "loss": 4.5649, + "step": 3280 + }, + { + "epoch": 1.84, + "learning_rate": 4.2848309975267934e-05, + "loss": 5.0093, + "step": 3281 + }, + { + "epoch": 1.84, + "learning_rate": 4.282769991755977e-05, + "loss": 4.381, + "step": 3282 + }, + { + "epoch": 1.84, + "learning_rate": 4.2807089859851614e-05, + "loss": 4.2148, + "step": 3283 + }, + { + "epoch": 1.84, + "learning_rate": 4.278647980214345e-05, + "loss": 3.9339, + "step": 3284 + }, + { + "epoch": 1.84, + "learning_rate": 4.276586974443529e-05, + "loss": 6.1312, + "step": 3285 + }, + { + "epoch": 1.84, + "learning_rate": 4.2745259686727124e-05, + "loss": 6.5256, + "step": 3286 + }, + { + "epoch": 1.84, + "learning_rate": 4.272464962901897e-05, + "loss": 6.3133, + "step": 3287 + }, + { + "epoch": 1.84, + "learning_rate": 4.27040395713108e-05, + "loss": 6.0509, + "step": 3288 + }, + { + "epoch": 1.84, + "learning_rate": 4.268342951360264e-05, + "loss": 6.3869, + "step": 3289 + }, + { + "epoch": 1.84, + "learning_rate": 4.266281945589448e-05, + "loss": 6.0576, + "step": 3290 + }, + { + "epoch": 1.84, + "learning_rate": 4.2642209398186314e-05, + "loss": 5.9814, + "step": 3291 + }, + { + "epoch": 1.85, + "learning_rate": 4.262159934047816e-05, + "loss": 5.9296, + "step": 3292 + }, + { + "epoch": 1.85, + "learning_rate": 4.2600989282769994e-05, + "loss": 5.7038, + "step": 3293 + }, + { + "epoch": 1.85, + "learning_rate": 4.258037922506184e-05, + "loss": 5.3696, + "step": 3294 + }, + { + "epoch": 1.85, + "learning_rate": 4.255976916735367e-05, + "loss": 5.8631, + "step": 3295 + }, + { + "epoch": 1.85, + "learning_rate": 4.253915910964551e-05, + "loss": 5.7216, + "step": 3296 + }, + { + "epoch": 1.85, + "learning_rate": 4.251854905193735e-05, + "loss": 5.769, + "step": 3297 + }, + { + "epoch": 1.85, + "learning_rate": 4.249793899422918e-05, + "loss": 6.0181, + "step": 3298 + }, + { + "epoch": 1.85, + "learning_rate": 4.247732893652103e-05, + "loss": 5.9639, + "step": 3299 + }, + { + "epoch": 1.85, + "learning_rate": 4.245671887881286e-05, + "loss": 5.8763, + "step": 3300 + }, + { + "epoch": 1.85, + "learning_rate": 4.24361088211047e-05, + "loss": 5.4752, + "step": 3301 + }, + { + "epoch": 1.85, + "learning_rate": 4.2415498763396537e-05, + "loss": 5.6417, + "step": 3302 + }, + { + "epoch": 1.85, + "learning_rate": 4.239488870568838e-05, + "loss": 5.7145, + "step": 3303 + }, + { + "epoch": 1.85, + "learning_rate": 4.2374278647980216e-05, + "loss": 5.595, + "step": 3304 + }, + { + "epoch": 1.85, + "learning_rate": 4.235366859027205e-05, + "loss": 5.6083, + "step": 3305 + }, + { + "epoch": 1.85, + "learning_rate": 4.233305853256389e-05, + "loss": 5.5918, + "step": 3306 + }, + { + "epoch": 1.85, + "learning_rate": 4.231244847485573e-05, + "loss": 5.4666, + "step": 3307 + }, + { + "epoch": 1.85, + "learning_rate": 4.229183841714757e-05, + "loss": 5.4354, + "step": 3308 + }, + { + "epoch": 1.85, + "learning_rate": 4.2271228359439406e-05, + "loss": 5.3463, + "step": 3309 + }, + { + "epoch": 1.86, + "learning_rate": 4.225061830173125e-05, + "loss": 5.7034, + "step": 3310 + }, + { + "epoch": 1.86, + "learning_rate": 4.2230008244023086e-05, + "loss": 5.6103, + "step": 3311 + }, + { + "epoch": 1.86, + "learning_rate": 4.220939818631492e-05, + "loss": 5.5189, + "step": 3312 + }, + { + "epoch": 1.86, + "learning_rate": 4.218878812860676e-05, + "loss": 5.3411, + "step": 3313 + }, + { + "epoch": 1.86, + "learning_rate": 4.21681780708986e-05, + "loss": 5.8469, + "step": 3314 + }, + { + "epoch": 1.86, + "learning_rate": 4.214756801319044e-05, + "loss": 5.1504, + "step": 3315 + }, + { + "epoch": 1.86, + "learning_rate": 4.2126957955482276e-05, + "loss": 5.587, + "step": 3316 + }, + { + "epoch": 1.86, + "learning_rate": 4.210634789777412e-05, + "loss": 5.3031, + "step": 3317 + }, + { + "epoch": 1.86, + "learning_rate": 4.2085737840065956e-05, + "loss": 5.379, + "step": 3318 + }, + { + "epoch": 1.86, + "learning_rate": 4.206512778235779e-05, + "loss": 5.1322, + "step": 3319 + }, + { + "epoch": 1.86, + "learning_rate": 4.204451772464963e-05, + "loss": 5.2721, + "step": 3320 + }, + { + "epoch": 1.86, + "learning_rate": 4.202390766694147e-05, + "loss": 5.3163, + "step": 3321 + }, + { + "epoch": 1.86, + "learning_rate": 4.20032976092333e-05, + "loss": 5.3586, + "step": 3322 + }, + { + "epoch": 1.86, + "learning_rate": 4.1982687551525146e-05, + "loss": 5.2086, + "step": 3323 + }, + { + "epoch": 1.86, + "learning_rate": 4.196207749381699e-05, + "loss": 5.403, + "step": 3324 + }, + { + "epoch": 1.86, + "learning_rate": 4.194146743610882e-05, + "loss": 4.9347, + "step": 3325 + }, + { + "epoch": 1.86, + "learning_rate": 4.192085737840066e-05, + "loss": 4.8861, + "step": 3326 + }, + { + "epoch": 1.86, + "learning_rate": 4.19002473206925e-05, + "loss": 4.9488, + "step": 3327 + }, + { + "epoch": 1.87, + "learning_rate": 4.187963726298434e-05, + "loss": 4.7301, + "step": 3328 + }, + { + "epoch": 1.87, + "learning_rate": 4.185902720527617e-05, + "loss": 4.8365, + "step": 3329 + }, + { + "epoch": 1.87, + "learning_rate": 4.1838417147568016e-05, + "loss": 4.8502, + "step": 3330 + }, + { + "epoch": 1.87, + "learning_rate": 4.181780708985986e-05, + "loss": 4.8659, + "step": 3331 + }, + { + "epoch": 1.87, + "learning_rate": 4.179719703215169e-05, + "loss": 4.4798, + "step": 3332 + }, + { + "epoch": 1.87, + "learning_rate": 4.177658697444353e-05, + "loss": 3.9956, + "step": 3333 + }, + { + "epoch": 1.87, + "learning_rate": 4.175597691673537e-05, + "loss": 3.7229, + "step": 3334 + }, + { + "epoch": 1.87, + "learning_rate": 4.1735366859027205e-05, + "loss": 6.0699, + "step": 3335 + }, + { + "epoch": 1.87, + "learning_rate": 4.171475680131904e-05, + "loss": 6.0446, + "step": 3336 + }, + { + "epoch": 1.87, + "learning_rate": 4.1694146743610885e-05, + "loss": 5.776, + "step": 3337 + }, + { + "epoch": 1.87, + "learning_rate": 4.167353668590272e-05, + "loss": 5.8673, + "step": 3338 + }, + { + "epoch": 1.87, + "learning_rate": 4.165292662819456e-05, + "loss": 5.7402, + "step": 3339 + }, + { + "epoch": 1.87, + "learning_rate": 4.16323165704864e-05, + "loss": 5.5649, + "step": 3340 + }, + { + "epoch": 1.87, + "learning_rate": 4.161170651277824e-05, + "loss": 5.6119, + "step": 3341 + }, + { + "epoch": 1.87, + "learning_rate": 4.1591096455070075e-05, + "loss": 5.6371, + "step": 3342 + }, + { + "epoch": 1.87, + "learning_rate": 4.157048639736191e-05, + "loss": 5.6995, + "step": 3343 + }, + { + "epoch": 1.87, + "learning_rate": 4.1549876339653755e-05, + "loss": 5.7293, + "step": 3344 + }, + { + "epoch": 1.88, + "learning_rate": 4.152926628194559e-05, + "loss": 5.9956, + "step": 3345 + }, + { + "epoch": 1.88, + "learning_rate": 4.150865622423743e-05, + "loss": 5.5443, + "step": 3346 + }, + { + "epoch": 1.88, + "learning_rate": 4.148804616652927e-05, + "loss": 5.5409, + "step": 3347 + }, + { + "epoch": 1.88, + "learning_rate": 4.146743610882111e-05, + "loss": 5.6532, + "step": 3348 + }, + { + "epoch": 1.88, + "learning_rate": 4.1446826051112945e-05, + "loss": 5.4659, + "step": 3349 + }, + { + "epoch": 1.88, + "learning_rate": 4.142621599340478e-05, + "loss": 5.4347, + "step": 3350 + }, + { + "epoch": 1.88, + "learning_rate": 4.1405605935696625e-05, + "loss": 5.7067, + "step": 3351 + }, + { + "epoch": 1.88, + "learning_rate": 4.138499587798846e-05, + "loss": 5.4366, + "step": 3352 + }, + { + "epoch": 1.88, + "learning_rate": 4.13643858202803e-05, + "loss": 5.5702, + "step": 3353 + }, + { + "epoch": 1.88, + "learning_rate": 4.1343775762572135e-05, + "loss": 5.6123, + "step": 3354 + }, + { + "epoch": 1.88, + "learning_rate": 4.132316570486398e-05, + "loss": 5.8279, + "step": 3355 + }, + { + "epoch": 1.88, + "learning_rate": 4.1302555647155815e-05, + "loss": 5.3833, + "step": 3356 + }, + { + "epoch": 1.88, + "learning_rate": 4.128194558944765e-05, + "loss": 5.6998, + "step": 3357 + }, + { + "epoch": 1.88, + "learning_rate": 4.1261335531739495e-05, + "loss": 5.5354, + "step": 3358 + }, + { + "epoch": 1.88, + "learning_rate": 4.1240725474031325e-05, + "loss": 5.4652, + "step": 3359 + }, + { + "epoch": 1.88, + "learning_rate": 4.122011541632317e-05, + "loss": 5.313, + "step": 3360 + }, + { + "epoch": 1.88, + "learning_rate": 4.1199505358615005e-05, + "loss": 5.7661, + "step": 3361 + }, + { + "epoch": 1.88, + "learning_rate": 4.117889530090685e-05, + "loss": 5.4502, + "step": 3362 + }, + { + "epoch": 1.89, + "learning_rate": 4.1158285243198685e-05, + "loss": 5.6141, + "step": 3363 + }, + { + "epoch": 1.89, + "learning_rate": 4.113767518549052e-05, + "loss": 5.3409, + "step": 3364 + }, + { + "epoch": 1.89, + "learning_rate": 4.1117065127782365e-05, + "loss": 5.5752, + "step": 3365 + }, + { + "epoch": 1.89, + "learning_rate": 4.1096455070074194e-05, + "loss": 5.3241, + "step": 3366 + }, + { + "epoch": 1.89, + "learning_rate": 4.107584501236604e-05, + "loss": 5.6452, + "step": 3367 + }, + { + "epoch": 1.89, + "learning_rate": 4.1055234954657874e-05, + "loss": 5.6178, + "step": 3368 + }, + { + "epoch": 1.89, + "learning_rate": 4.103462489694971e-05, + "loss": 5.4709, + "step": 3369 + }, + { + "epoch": 1.89, + "learning_rate": 4.101401483924155e-05, + "loss": 5.1829, + "step": 3370 + }, + { + "epoch": 1.89, + "learning_rate": 4.099340478153339e-05, + "loss": 5.489, + "step": 3371 + }, + { + "epoch": 1.89, + "learning_rate": 4.097279472382523e-05, + "loss": 5.3858, + "step": 3372 + }, + { + "epoch": 1.89, + "learning_rate": 4.0952184666117064e-05, + "loss": 5.0831, + "step": 3373 + }, + { + "epoch": 1.89, + "learning_rate": 4.093157460840891e-05, + "loss": 5.3024, + "step": 3374 + }, + { + "epoch": 1.89, + "learning_rate": 4.0910964550700744e-05, + "loss": 5.3974, + "step": 3375 + }, + { + "epoch": 1.89, + "learning_rate": 4.089035449299258e-05, + "loss": 5.1107, + "step": 3376 + }, + { + "epoch": 1.89, + "learning_rate": 4.086974443528442e-05, + "loss": 4.9754, + "step": 3377 + }, + { + "epoch": 1.89, + "learning_rate": 4.084913437757626e-05, + "loss": 4.6871, + "step": 3378 + }, + { + "epoch": 1.89, + "learning_rate": 4.08285243198681e-05, + "loss": 4.8203, + "step": 3379 + }, + { + "epoch": 1.89, + "learning_rate": 4.0807914262159934e-05, + "loss": 4.2289, + "step": 3380 + }, + { + "epoch": 1.9, + "learning_rate": 4.078730420445178e-05, + "loss": 4.5062, + "step": 3381 + }, + { + "epoch": 1.9, + "learning_rate": 4.0766694146743614e-05, + "loss": 4.8398, + "step": 3382 + }, + { + "epoch": 1.9, + "learning_rate": 4.074608408903545e-05, + "loss": 4.6227, + "step": 3383 + }, + { + "epoch": 1.9, + "learning_rate": 4.072547403132729e-05, + "loss": 3.7491, + "step": 3384 + }, + { + "epoch": 1.9, + "learning_rate": 4.070486397361913e-05, + "loss": 6.2525, + "step": 3385 + }, + { + "epoch": 1.9, + "learning_rate": 4.068425391591097e-05, + "loss": 6.0752, + "step": 3386 + }, + { + "epoch": 1.9, + "learning_rate": 4.0663643858202804e-05, + "loss": 6.1343, + "step": 3387 + }, + { + "epoch": 1.9, + "learning_rate": 4.064303380049465e-05, + "loss": 6.1431, + "step": 3388 + }, + { + "epoch": 1.9, + "learning_rate": 4.0622423742786484e-05, + "loss": 5.9386, + "step": 3389 + }, + { + "epoch": 1.9, + "learning_rate": 4.060181368507832e-05, + "loss": 5.9311, + "step": 3390 + }, + { + "epoch": 1.9, + "learning_rate": 4.058120362737016e-05, + "loss": 5.8697, + "step": 3391 + }, + { + "epoch": 1.9, + "learning_rate": 4.0560593569662e-05, + "loss": 5.3034, + "step": 3392 + }, + { + "epoch": 1.9, + "learning_rate": 4.053998351195383e-05, + "loss": 5.6131, + "step": 3393 + }, + { + "epoch": 1.9, + "learning_rate": 4.0519373454245673e-05, + "loss": 5.6489, + "step": 3394 + }, + { + "epoch": 1.9, + "learning_rate": 4.049876339653752e-05, + "loss": 5.6986, + "step": 3395 + }, + { + "epoch": 1.9, + "learning_rate": 4.047815333882935e-05, + "loss": 5.8621, + "step": 3396 + }, + { + "epoch": 1.9, + "learning_rate": 4.045754328112119e-05, + "loss": 5.6194, + "step": 3397 + }, + { + "epoch": 1.9, + "learning_rate": 4.043693322341303e-05, + "loss": 5.5137, + "step": 3398 + }, + { + "epoch": 1.91, + "learning_rate": 4.041632316570487e-05, + "loss": 5.503, + "step": 3399 + }, + { + "epoch": 1.91, + "learning_rate": 4.03957131079967e-05, + "loss": 5.5253, + "step": 3400 + }, + { + "epoch": 1.91, + "learning_rate": 4.037510305028854e-05, + "loss": 5.6719, + "step": 3401 + }, + { + "epoch": 1.91, + "learning_rate": 4.0354492992580387e-05, + "loss": 5.2867, + "step": 3402 + }, + { + "epoch": 1.91, + "learning_rate": 4.0333882934872216e-05, + "loss": 5.6767, + "step": 3403 + }, + { + "epoch": 1.91, + "learning_rate": 4.031327287716406e-05, + "loss": 5.4734, + "step": 3404 + }, + { + "epoch": 1.91, + "learning_rate": 4.0292662819455896e-05, + "loss": 5.3024, + "step": 3405 + }, + { + "epoch": 1.91, + "learning_rate": 4.027205276174773e-05, + "loss": 5.6079, + "step": 3406 + }, + { + "epoch": 1.91, + "learning_rate": 4.025144270403957e-05, + "loss": 5.3155, + "step": 3407 + }, + { + "epoch": 1.91, + "learning_rate": 4.023083264633141e-05, + "loss": 5.3534, + "step": 3408 + }, + { + "epoch": 1.91, + "learning_rate": 4.021022258862325e-05, + "loss": 5.5757, + "step": 3409 + }, + { + "epoch": 1.91, + "learning_rate": 4.0189612530915086e-05, + "loss": 5.6358, + "step": 3410 + }, + { + "epoch": 1.91, + "learning_rate": 4.016900247320693e-05, + "loss": 5.3268, + "step": 3411 + }, + { + "epoch": 1.91, + "learning_rate": 4.0148392415498766e-05, + "loss": 5.4113, + "step": 3412 + }, + { + "epoch": 1.91, + "learning_rate": 4.01277823577906e-05, + "loss": 5.3119, + "step": 3413 + }, + { + "epoch": 1.91, + "learning_rate": 4.010717230008244e-05, + "loss": 5.4045, + "step": 3414 + }, + { + "epoch": 1.91, + "learning_rate": 4.008656224237428e-05, + "loss": 5.3918, + "step": 3415 + }, + { + "epoch": 1.91, + "learning_rate": 4.006595218466612e-05, + "loss": 5.2643, + "step": 3416 + }, + { + "epoch": 1.92, + "learning_rate": 4.0045342126957956e-05, + "loss": 5.6131, + "step": 3417 + }, + { + "epoch": 1.92, + "learning_rate": 4.00247320692498e-05, + "loss": 5.2872, + "step": 3418 + }, + { + "epoch": 1.92, + "learning_rate": 4.0004122011541636e-05, + "loss": 5.2959, + "step": 3419 + }, + { + "epoch": 1.92, + "learning_rate": 3.998351195383347e-05, + "loss": 5.2153, + "step": 3420 + }, + { + "epoch": 1.92, + "learning_rate": 3.996290189612531e-05, + "loss": 5.3143, + "step": 3421 + }, + { + "epoch": 1.92, + "learning_rate": 3.994229183841715e-05, + "loss": 5.2001, + "step": 3422 + }, + { + "epoch": 1.92, + "learning_rate": 3.992168178070899e-05, + "loss": 5.0732, + "step": 3423 + }, + { + "epoch": 1.92, + "learning_rate": 3.9901071723000826e-05, + "loss": 5.1598, + "step": 3424 + }, + { + "epoch": 1.92, + "learning_rate": 3.988046166529266e-05, + "loss": 5.0808, + "step": 3425 + }, + { + "epoch": 1.92, + "learning_rate": 3.9859851607584506e-05, + "loss": 4.8424, + "step": 3426 + }, + { + "epoch": 1.92, + "learning_rate": 3.983924154987634e-05, + "loss": 4.7635, + "step": 3427 + }, + { + "epoch": 1.92, + "learning_rate": 3.981863149216818e-05, + "loss": 5.0744, + "step": 3428 + }, + { + "epoch": 1.92, + "learning_rate": 3.979802143446002e-05, + "loss": 4.4816, + "step": 3429 + }, + { + "epoch": 1.92, + "learning_rate": 3.977741137675185e-05, + "loss": 4.8335, + "step": 3430 + }, + { + "epoch": 1.92, + "learning_rate": 3.9756801319043696e-05, + "loss": 4.186, + "step": 3431 + }, + { + "epoch": 1.92, + "learning_rate": 3.973619126133553e-05, + "loss": 4.4019, + "step": 3432 + }, + { + "epoch": 1.92, + "learning_rate": 3.9715581203627376e-05, + "loss": 3.7319, + "step": 3433 + }, + { + "epoch": 1.92, + "learning_rate": 3.969497114591921e-05, + "loss": 4.0751, + "step": 3434 + }, + { + "epoch": 1.93, + "learning_rate": 3.967436108821105e-05, + "loss": 5.9855, + "step": 3435 + }, + { + "epoch": 1.93, + "learning_rate": 3.965375103050289e-05, + "loss": 5.953, + "step": 3436 + }, + { + "epoch": 1.93, + "learning_rate": 3.963314097279472e-05, + "loss": 5.7745, + "step": 3437 + }, + { + "epoch": 1.93, + "learning_rate": 3.9612530915086565e-05, + "loss": 5.8006, + "step": 3438 + }, + { + "epoch": 1.93, + "learning_rate": 3.95919208573784e-05, + "loss": 5.6913, + "step": 3439 + }, + { + "epoch": 1.93, + "learning_rate": 3.957131079967024e-05, + "loss": 5.5104, + "step": 3440 + }, + { + "epoch": 1.93, + "learning_rate": 3.9550700741962075e-05, + "loss": 5.6275, + "step": 3441 + }, + { + "epoch": 1.93, + "learning_rate": 3.953009068425392e-05, + "loss": 5.5625, + "step": 3442 + }, + { + "epoch": 1.93, + "learning_rate": 3.9509480626545755e-05, + "loss": 5.5603, + "step": 3443 + }, + { + "epoch": 1.93, + "learning_rate": 3.948887056883759e-05, + "loss": 5.7096, + "step": 3444 + }, + { + "epoch": 1.93, + "learning_rate": 3.9468260511129435e-05, + "loss": 5.5242, + "step": 3445 + }, + { + "epoch": 1.93, + "learning_rate": 3.944765045342127e-05, + "loss": 5.5425, + "step": 3446 + }, + { + "epoch": 1.93, + "learning_rate": 3.942704039571311e-05, + "loss": 5.4492, + "step": 3447 + }, + { + "epoch": 1.93, + "learning_rate": 3.9406430338004945e-05, + "loss": 5.4982, + "step": 3448 + }, + { + "epoch": 1.93, + "learning_rate": 3.938582028029679e-05, + "loss": 5.559, + "step": 3449 + }, + { + "epoch": 1.93, + "learning_rate": 3.9365210222588625e-05, + "loss": 5.5101, + "step": 3450 + }, + { + "epoch": 1.93, + "learning_rate": 3.934460016488046e-05, + "loss": 5.6185, + "step": 3451 + }, + { + "epoch": 1.93, + "learning_rate": 3.9323990107172305e-05, + "loss": 5.7636, + "step": 3452 + }, + { + "epoch": 1.94, + "learning_rate": 3.930338004946414e-05, + "loss": 5.4604, + "step": 3453 + }, + { + "epoch": 1.94, + "learning_rate": 3.928276999175598e-05, + "loss": 5.4974, + "step": 3454 + }, + { + "epoch": 1.94, + "learning_rate": 3.9262159934047815e-05, + "loss": 5.5244, + "step": 3455 + }, + { + "epoch": 1.94, + "learning_rate": 3.924154987633966e-05, + "loss": 5.0976, + "step": 3456 + }, + { + "epoch": 1.94, + "learning_rate": 3.9220939818631495e-05, + "loss": 5.6602, + "step": 3457 + }, + { + "epoch": 1.94, + "learning_rate": 3.920032976092333e-05, + "loss": 5.4898, + "step": 3458 + }, + { + "epoch": 1.94, + "learning_rate": 3.9179719703215175e-05, + "loss": 5.3281, + "step": 3459 + }, + { + "epoch": 1.94, + "learning_rate": 3.915910964550701e-05, + "loss": 5.4928, + "step": 3460 + }, + { + "epoch": 1.94, + "learning_rate": 3.913849958779885e-05, + "loss": 5.2609, + "step": 3461 + }, + { + "epoch": 1.94, + "learning_rate": 3.9117889530090684e-05, + "loss": 5.0064, + "step": 3462 + }, + { + "epoch": 1.94, + "learning_rate": 3.909727947238253e-05, + "loss": 5.5969, + "step": 3463 + }, + { + "epoch": 1.94, + "learning_rate": 3.907666941467436e-05, + "loss": 5.4982, + "step": 3464 + }, + { + "epoch": 1.94, + "learning_rate": 3.90560593569662e-05, + "loss": 5.3903, + "step": 3465 + }, + { + "epoch": 1.94, + "learning_rate": 3.9035449299258044e-05, + "loss": 5.4929, + "step": 3466 + }, + { + "epoch": 1.94, + "learning_rate": 3.9014839241549874e-05, + "loss": 5.4419, + "step": 3467 + }, + { + "epoch": 1.94, + "learning_rate": 3.899422918384172e-05, + "loss": 5.0764, + "step": 3468 + }, + { + "epoch": 1.94, + "learning_rate": 3.8973619126133554e-05, + "loss": 5.1773, + "step": 3469 + }, + { + "epoch": 1.95, + "learning_rate": 3.89530090684254e-05, + "loss": 5.4131, + "step": 3470 + }, + { + "epoch": 1.95, + "learning_rate": 3.893239901071723e-05, + "loss": 4.9462, + "step": 3471 + }, + { + "epoch": 1.95, + "learning_rate": 3.891178895300907e-05, + "loss": 5.1302, + "step": 3472 + }, + { + "epoch": 1.95, + "learning_rate": 3.889117889530091e-05, + "loss": 5.3741, + "step": 3473 + }, + { + "epoch": 1.95, + "learning_rate": 3.8870568837592744e-05, + "loss": 5.7522, + "step": 3474 + }, + { + "epoch": 1.95, + "learning_rate": 3.884995877988459e-05, + "loss": 5.1779, + "step": 3475 + }, + { + "epoch": 1.95, + "learning_rate": 3.8829348722176424e-05, + "loss": 4.596, + "step": 3476 + }, + { + "epoch": 1.95, + "learning_rate": 3.880873866446826e-05, + "loss": 5.3504, + "step": 3477 + }, + { + "epoch": 1.95, + "learning_rate": 3.87881286067601e-05, + "loss": 4.9981, + "step": 3478 + }, + { + "epoch": 1.95, + "learning_rate": 3.876751854905194e-05, + "loss": 5.2599, + "step": 3479 + }, + { + "epoch": 1.95, + "learning_rate": 3.874690849134378e-05, + "loss": 4.8636, + "step": 3480 + }, + { + "epoch": 1.95, + "learning_rate": 3.8726298433635614e-05, + "loss": 4.3372, + "step": 3481 + }, + { + "epoch": 1.95, + "learning_rate": 3.870568837592746e-05, + "loss": 4.3269, + "step": 3482 + }, + { + "epoch": 1.95, + "learning_rate": 3.8685078318219294e-05, + "loss": 3.8859, + "step": 3483 + }, + { + "epoch": 1.95, + "learning_rate": 3.866446826051113e-05, + "loss": 4.1612, + "step": 3484 + }, + { + "epoch": 1.95, + "learning_rate": 3.864385820280297e-05, + "loss": 6.6559, + "step": 3485 + }, + { + "epoch": 1.95, + "learning_rate": 3.862324814509481e-05, + "loss": 6.3815, + "step": 3486 + }, + { + "epoch": 1.95, + "learning_rate": 3.860263808738665e-05, + "loss": 6.2367, + "step": 3487 + }, + { + "epoch": 1.96, + "learning_rate": 3.8582028029678484e-05, + "loss": 6.2754, + "step": 3488 + }, + { + "epoch": 1.96, + "learning_rate": 3.856141797197033e-05, + "loss": 6.1662, + "step": 3489 + }, + { + "epoch": 1.96, + "learning_rate": 3.8540807914262164e-05, + "loss": 6.1569, + "step": 3490 + }, + { + "epoch": 1.96, + "learning_rate": 3.8520197856554e-05, + "loss": 5.7571, + "step": 3491 + }, + { + "epoch": 1.96, + "learning_rate": 3.849958779884584e-05, + "loss": 5.6761, + "step": 3492 + }, + { + "epoch": 1.96, + "learning_rate": 3.847897774113768e-05, + "loss": 5.7094, + "step": 3493 + }, + { + "epoch": 1.96, + "learning_rate": 3.845836768342952e-05, + "loss": 5.7374, + "step": 3494 + }, + { + "epoch": 1.96, + "learning_rate": 3.843775762572135e-05, + "loss": 5.858, + "step": 3495 + }, + { + "epoch": 1.96, + "learning_rate": 3.841714756801319e-05, + "loss": 6.0393, + "step": 3496 + }, + { + "epoch": 1.96, + "learning_rate": 3.839653751030503e-05, + "loss": 6.157, + "step": 3497 + }, + { + "epoch": 1.96, + "learning_rate": 3.837592745259687e-05, + "loss": 6.1623, + "step": 3498 + }, + { + "epoch": 1.96, + "learning_rate": 3.8355317394888707e-05, + "loss": 5.7901, + "step": 3499 + }, + { + "epoch": 1.96, + "learning_rate": 3.833470733718055e-05, + "loss": 5.73, + "step": 3500 + }, + { + "epoch": 1.96, + "eval_loss": 18.985713958740234, + "eval_runtime": 1332.9397, + "eval_samples_per_second": 1.982, + "eval_steps_per_second": 0.248, + "eval_wer": 1.0003966679888934, + "step": 3500 + }, + { + "epoch": 1.96, + "learning_rate": 3.831409727947238e-05, + "loss": 5.8867, + "step": 3501 + }, + { + "epoch": 1.96, + "learning_rate": 3.829348722176422e-05, + "loss": 5.4725, + "step": 3502 + }, + { + "epoch": 1.96, + "learning_rate": 3.827287716405606e-05, + "loss": 5.4952, + "step": 3503 + }, + { + "epoch": 1.96, + "learning_rate": 3.82522671063479e-05, + "loss": 5.4118, + "step": 3504 + }, + { + "epoch": 1.96, + "learning_rate": 3.823165704863974e-05, + "loss": 5.2445, + "step": 3505 + }, + { + "epoch": 1.97, + "learning_rate": 3.8211046990931576e-05, + "loss": 5.486, + "step": 3506 + }, + { + "epoch": 1.97, + "learning_rate": 3.819043693322342e-05, + "loss": 5.1649, + "step": 3507 + }, + { + "epoch": 1.97, + "learning_rate": 3.816982687551525e-05, + "loss": 5.6425, + "step": 3508 + }, + { + "epoch": 1.97, + "learning_rate": 3.814921681780709e-05, + "loss": 5.3597, + "step": 3509 + }, + { + "epoch": 1.97, + "learning_rate": 3.812860676009893e-05, + "loss": 5.3209, + "step": 3510 + }, + { + "epoch": 1.97, + "learning_rate": 3.8107996702390766e-05, + "loss": 5.1255, + "step": 3511 + }, + { + "epoch": 1.97, + "learning_rate": 3.80873866446826e-05, + "loss": 5.2896, + "step": 3512 + }, + { + "epoch": 1.97, + "learning_rate": 3.8066776586974446e-05, + "loss": 5.2099, + "step": 3513 + }, + { + "epoch": 1.97, + "learning_rate": 3.804616652926629e-05, + "loss": 5.3256, + "step": 3514 + }, + { + "epoch": 1.97, + "learning_rate": 3.802555647155812e-05, + "loss": 5.2819, + "step": 3515 + }, + { + "epoch": 1.97, + "learning_rate": 3.800494641384996e-05, + "loss": 4.9691, + "step": 3516 + }, + { + "epoch": 1.97, + "learning_rate": 3.79843363561418e-05, + "loss": 5.4512, + "step": 3517 + }, + { + "epoch": 1.97, + "learning_rate": 3.7963726298433636e-05, + "loss": 5.4823, + "step": 3518 + }, + { + "epoch": 1.97, + "learning_rate": 3.794311624072547e-05, + "loss": 5.3446, + "step": 3519 + }, + { + "epoch": 1.97, + "learning_rate": 3.7922506183017316e-05, + "loss": 5.2289, + "step": 3520 + }, + { + "epoch": 1.97, + "learning_rate": 3.790189612530915e-05, + "loss": 5.3252, + "step": 3521 + }, + { + "epoch": 1.97, + "learning_rate": 3.788128606760099e-05, + "loss": 5.4441, + "step": 3522 + }, + { + "epoch": 1.97, + "learning_rate": 3.786067600989283e-05, + "loss": 5.253, + "step": 3523 + }, + { + "epoch": 1.98, + "learning_rate": 3.784006595218467e-05, + "loss": 5.012, + "step": 3524 + }, + { + "epoch": 1.98, + "learning_rate": 3.7819455894476506e-05, + "loss": 5.1485, + "step": 3525 + }, + { + "epoch": 1.98, + "learning_rate": 3.779884583676834e-05, + "loss": 5.2651, + "step": 3526 + }, + { + "epoch": 1.98, + "learning_rate": 3.7778235779060186e-05, + "loss": 4.9457, + "step": 3527 + }, + { + "epoch": 1.98, + "learning_rate": 3.775762572135202e-05, + "loss": 5.3019, + "step": 3528 + }, + { + "epoch": 1.98, + "learning_rate": 3.773701566364386e-05, + "loss": 4.9211, + "step": 3529 + }, + { + "epoch": 1.98, + "learning_rate": 3.77164056059357e-05, + "loss": 4.4118, + "step": 3530 + }, + { + "epoch": 1.98, + "learning_rate": 3.769579554822754e-05, + "loss": 4.6741, + "step": 3531 + }, + { + "epoch": 1.98, + "learning_rate": 3.7675185490519375e-05, + "loss": 4.5633, + "step": 3532 + }, + { + "epoch": 1.98, + "learning_rate": 3.765457543281121e-05, + "loss": 4.0043, + "step": 3533 + }, + { + "epoch": 1.98, + "learning_rate": 3.7633965375103055e-05, + "loss": 3.8232, + "step": 3534 + }, + { + "epoch": 1.98, + "learning_rate": 3.7613355317394885e-05, + "loss": 5.9907, + "step": 3535 + }, + { + "epoch": 1.98, + "learning_rate": 3.759274525968673e-05, + "loss": 6.2248, + "step": 3536 + }, + { + "epoch": 1.98, + "learning_rate": 3.757213520197857e-05, + "loss": 6.2092, + "step": 3537 + }, + { + "epoch": 1.98, + "learning_rate": 3.755152514427041e-05, + "loss": 5.8029, + "step": 3538 + }, + { + "epoch": 1.98, + "learning_rate": 3.7530915086562245e-05, + "loss": 5.7797, + "step": 3539 + }, + { + "epoch": 1.98, + "learning_rate": 3.751030502885408e-05, + "loss": 5.593, + "step": 3540 + }, + { + "epoch": 1.98, + "learning_rate": 3.7489694971145925e-05, + "loss": 5.8408, + "step": 3541 + }, + { + "epoch": 1.99, + "learning_rate": 3.7469084913437755e-05, + "loss": 5.4401, + "step": 3542 + }, + { + "epoch": 1.99, + "learning_rate": 3.74484748557296e-05, + "loss": 5.7795, + "step": 3543 + }, + { + "epoch": 1.99, + "learning_rate": 3.7427864798021435e-05, + "loss": 5.4889, + "step": 3544 + }, + { + "epoch": 1.99, + "learning_rate": 3.740725474031327e-05, + "loss": 5.367, + "step": 3545 + }, + { + "epoch": 1.99, + "learning_rate": 3.7386644682605115e-05, + "loss": 5.5118, + "step": 3546 + }, + { + "epoch": 1.99, + "learning_rate": 3.736603462489695e-05, + "loss": 5.577, + "step": 3547 + }, + { + "epoch": 1.99, + "learning_rate": 3.734542456718879e-05, + "loss": 5.8672, + "step": 3548 + }, + { + "epoch": 1.99, + "learning_rate": 3.7324814509480625e-05, + "loss": 5.2326, + "step": 3549 + }, + { + "epoch": 1.99, + "learning_rate": 3.730420445177247e-05, + "loss": 5.3383, + "step": 3550 + }, + { + "epoch": 1.99, + "learning_rate": 3.7283594394064305e-05, + "loss": 5.6227, + "step": 3551 + }, + { + "epoch": 1.99, + "learning_rate": 3.726298433635614e-05, + "loss": 5.2739, + "step": 3552 + }, + { + "epoch": 1.99, + "learning_rate": 3.7242374278647985e-05, + "loss": 5.5119, + "step": 3553 + }, + { + "epoch": 1.99, + "learning_rate": 3.722176422093982e-05, + "loss": 5.8369, + "step": 3554 + }, + { + "epoch": 1.99, + "learning_rate": 3.720115416323166e-05, + "loss": 5.4672, + "step": 3555 + }, + { + "epoch": 1.99, + "learning_rate": 3.7180544105523495e-05, + "loss": 5.7119, + "step": 3556 + }, + { + "epoch": 1.99, + "learning_rate": 3.715993404781534e-05, + "loss": 5.1124, + "step": 3557 + }, + { + "epoch": 1.99, + "learning_rate": 3.7139323990107175e-05, + "loss": 5.4202, + "step": 3558 + }, + { + "epoch": 1.99, + "learning_rate": 3.711871393239901e-05, + "loss": 5.1007, + "step": 3559 + }, + { + "epoch": 2.0, + "learning_rate": 3.709810387469085e-05, + "loss": 5.051, + "step": 3560 + }, + { + "epoch": 2.0, + "learning_rate": 3.707749381698269e-05, + "loss": 5.4202, + "step": 3561 + }, + { + "epoch": 2.0, + "learning_rate": 3.705688375927453e-05, + "loss": 5.1877, + "step": 3562 + }, + { + "epoch": 2.0, + "learning_rate": 3.7036273701566364e-05, + "loss": 5.1967, + "step": 3563 + }, + { + "epoch": 2.0, + "learning_rate": 3.701566364385821e-05, + "loss": 5.206, + "step": 3564 + }, + { + "epoch": 2.0, + "learning_rate": 3.6995053586150044e-05, + "loss": 4.4709, + "step": 3565 + }, + { + "epoch": 2.0, + "learning_rate": 3.697444352844188e-05, + "loss": 4.7975, + "step": 3566 + }, + { + "epoch": 2.0, + "learning_rate": 3.695383347073372e-05, + "loss": 5.2066, + "step": 3567 + }, + { + "epoch": 2.0, + "learning_rate": 3.693322341302556e-05, + "loss": 3.105, + "step": 3568 + }, + { + "epoch": 2.0, + "learning_rate": 3.69126133553174e-05, + "loss": 6.127, + "step": 3569 + }, + { + "epoch": 2.0, + "learning_rate": 3.6892003297609234e-05, + "loss": 5.883, + "step": 3570 + }, + { + "epoch": 2.0, + "learning_rate": 3.687139323990108e-05, + "loss": 6.1041, + "step": 3571 + }, + { + "epoch": 2.0, + "learning_rate": 3.685078318219291e-05, + "loss": 6.0245, + "step": 3572 + }, + { + "epoch": 2.0, + "learning_rate": 3.683017312448475e-05, + "loss": 6.0736, + "step": 3573 + }, + { + "epoch": 2.0, + "learning_rate": 3.680956306677659e-05, + "loss": 5.984, + "step": 3574 + }, + { + "epoch": 2.0, + "learning_rate": 3.678895300906843e-05, + "loss": 5.8976, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 3.676834295136026e-05, + "loss": 5.6771, + "step": 3576 + }, + { + "epoch": 2.01, + "learning_rate": 3.6747732893652104e-05, + "loss": 5.5385, + "step": 3577 + }, + { + "epoch": 2.01, + "learning_rate": 3.672712283594395e-05, + "loss": 5.5155, + "step": 3578 + }, + { + "epoch": 2.01, + "learning_rate": 3.670651277823578e-05, + "loss": 5.556, + "step": 3579 + }, + { + "epoch": 2.01, + "learning_rate": 3.668590272052762e-05, + "loss": 5.4327, + "step": 3580 + }, + { + "epoch": 2.01, + "learning_rate": 3.666529266281946e-05, + "loss": 5.6209, + "step": 3581 + }, + { + "epoch": 2.01, + "learning_rate": 3.6644682605111294e-05, + "loss": 5.4614, + "step": 3582 + }, + { + "epoch": 2.01, + "learning_rate": 3.662407254740313e-05, + "loss": 5.565, + "step": 3583 + }, + { + "epoch": 2.01, + "learning_rate": 3.6603462489694974e-05, + "loss": 5.4472, + "step": 3584 + }, + { + "epoch": 2.01, + "learning_rate": 3.658285243198682e-05, + "loss": 5.5381, + "step": 3585 + }, + { + "epoch": 2.01, + "learning_rate": 3.656224237427865e-05, + "loss": 5.4924, + "step": 3586 + }, + { + "epoch": 2.01, + "learning_rate": 3.654163231657049e-05, + "loss": 5.3181, + "step": 3587 + }, + { + "epoch": 2.01, + "learning_rate": 3.652102225886233e-05, + "loss": 5.1983, + "step": 3588 + }, + { + "epoch": 2.01, + "learning_rate": 3.6500412201154163e-05, + "loss": 5.2494, + "step": 3589 + }, + { + "epoch": 2.01, + "learning_rate": 3.6479802143446e-05, + "loss": 5.4013, + "step": 3590 + }, + { + "epoch": 2.01, + "learning_rate": 3.6459192085737843e-05, + "loss": 5.228, + "step": 3591 + }, + { + "epoch": 2.01, + "learning_rate": 3.643858202802968e-05, + "loss": 5.4497, + "step": 3592 + }, + { + "epoch": 2.01, + "learning_rate": 3.641797197032152e-05, + "loss": 5.3885, + "step": 3593 + }, + { + "epoch": 2.01, + "learning_rate": 3.639736191261336e-05, + "loss": 5.2425, + "step": 3594 + }, + { + "epoch": 2.02, + "learning_rate": 3.63767518549052e-05, + "loss": 5.2752, + "step": 3595 + }, + { + "epoch": 2.02, + "learning_rate": 3.635614179719703e-05, + "loss": 5.0694, + "step": 3596 + }, + { + "epoch": 2.02, + "learning_rate": 3.633553173948887e-05, + "loss": 5.3092, + "step": 3597 + }, + { + "epoch": 2.02, + "learning_rate": 3.631492168178071e-05, + "loss": 5.2803, + "step": 3598 + }, + { + "epoch": 2.02, + "learning_rate": 3.629431162407255e-05, + "loss": 5.5403, + "step": 3599 + }, + { + "epoch": 2.02, + "learning_rate": 3.6273701566364386e-05, + "loss": 5.3674, + "step": 3600 + }, + { + "epoch": 2.02, + "learning_rate": 3.625309150865623e-05, + "loss": 5.1751, + "step": 3601 + }, + { + "epoch": 2.02, + "learning_rate": 3.6232481450948066e-05, + "loss": 4.9461, + "step": 3602 + }, + { + "epoch": 2.02, + "learning_rate": 3.62118713932399e-05, + "loss": 4.881, + "step": 3603 + }, + { + "epoch": 2.02, + "learning_rate": 3.619126133553174e-05, + "loss": 4.9013, + "step": 3604 + }, + { + "epoch": 2.02, + "learning_rate": 3.617065127782358e-05, + "loss": 5.384, + "step": 3605 + }, + { + "epoch": 2.02, + "learning_rate": 3.615004122011541e-05, + "loss": 5.2077, + "step": 3606 + }, + { + "epoch": 2.02, + "learning_rate": 3.6129431162407256e-05, + "loss": 4.933, + "step": 3607 + }, + { + "epoch": 2.02, + "learning_rate": 3.61088211046991e-05, + "loss": 5.1936, + "step": 3608 + }, + { + "epoch": 2.02, + "learning_rate": 3.6088211046990936e-05, + "loss": 5.3849, + "step": 3609 + }, + { + "epoch": 2.02, + "learning_rate": 3.606760098928277e-05, + "loss": 4.9283, + "step": 3610 + }, + { + "epoch": 2.02, + "learning_rate": 3.604699093157461e-05, + "loss": 4.5964, + "step": 3611 + }, + { + "epoch": 2.02, + "learning_rate": 3.602638087386645e-05, + "loss": 4.8913, + "step": 3612 + }, + { + "epoch": 2.03, + "learning_rate": 3.600577081615828e-05, + "loss": 4.4793, + "step": 3613 + }, + { + "epoch": 2.03, + "learning_rate": 3.5985160758450126e-05, + "loss": 4.0519, + "step": 3614 + }, + { + "epoch": 2.03, + "learning_rate": 3.596455070074196e-05, + "loss": 3.8786, + "step": 3615 + }, + { + "epoch": 2.03, + "learning_rate": 3.59439406430338e-05, + "loss": 4.2617, + "step": 3616 + }, + { + "epoch": 2.03, + "learning_rate": 3.592333058532564e-05, + "loss": 3.8635, + "step": 3617 + }, + { + "epoch": 2.03, + "learning_rate": 3.590272052761748e-05, + "loss": 3.0257, + "step": 3618 + }, + { + "epoch": 2.03, + "learning_rate": 3.5882110469909316e-05, + "loss": 6.024, + "step": 3619 + }, + { + "epoch": 2.03, + "learning_rate": 3.586150041220115e-05, + "loss": 5.4739, + "step": 3620 + }, + { + "epoch": 2.03, + "learning_rate": 3.5840890354492996e-05, + "loss": 5.881, + "step": 3621 + }, + { + "epoch": 2.03, + "learning_rate": 3.582028029678483e-05, + "loss": 5.7769, + "step": 3622 + }, + { + "epoch": 2.03, + "learning_rate": 3.579967023907667e-05, + "loss": 5.5308, + "step": 3623 + }, + { + "epoch": 2.03, + "learning_rate": 3.577906018136851e-05, + "loss": 5.5062, + "step": 3624 + }, + { + "epoch": 2.03, + "learning_rate": 3.575845012366035e-05, + "loss": 5.8769, + "step": 3625 + }, + { + "epoch": 2.03, + "learning_rate": 3.5737840065952186e-05, + "loss": 5.63, + "step": 3626 + }, + { + "epoch": 2.03, + "learning_rate": 3.571723000824402e-05, + "loss": 5.4859, + "step": 3627 + }, + { + "epoch": 2.03, + "learning_rate": 3.5696619950535866e-05, + "loss": 5.8555, + "step": 3628 + }, + { + "epoch": 2.03, + "learning_rate": 3.56760098928277e-05, + "loss": 5.5083, + "step": 3629 + }, + { + "epoch": 2.03, + "learning_rate": 3.565539983511954e-05, + "loss": 5.3481, + "step": 3630 + }, + { + "epoch": 2.04, + "learning_rate": 3.5634789777411375e-05, + "loss": 5.424, + "step": 3631 + }, + { + "epoch": 2.04, + "learning_rate": 3.561417971970322e-05, + "loss": 5.3468, + "step": 3632 + }, + { + "epoch": 2.04, + "learning_rate": 3.5593569661995055e-05, + "loss": 5.6152, + "step": 3633 + }, + { + "epoch": 2.04, + "learning_rate": 3.557295960428689e-05, + "loss": 5.4798, + "step": 3634 + }, + { + "epoch": 2.04, + "learning_rate": 3.5552349546578735e-05, + "loss": 5.524, + "step": 3635 + }, + { + "epoch": 2.04, + "learning_rate": 3.553173948887057e-05, + "loss": 5.7306, + "step": 3636 + }, + { + "epoch": 2.04, + "learning_rate": 3.551112943116241e-05, + "loss": 5.2901, + "step": 3637 + }, + { + "epoch": 2.04, + "learning_rate": 3.5490519373454245e-05, + "loss": 5.4819, + "step": 3638 + }, + { + "epoch": 2.04, + "learning_rate": 3.546990931574609e-05, + "loss": 5.206, + "step": 3639 + }, + { + "epoch": 2.04, + "learning_rate": 3.5449299258037925e-05, + "loss": 5.3303, + "step": 3640 + }, + { + "epoch": 2.04, + "learning_rate": 3.542868920032976e-05, + "loss": 5.5385, + "step": 3641 + }, + { + "epoch": 2.04, + "learning_rate": 3.5408079142621605e-05, + "loss": 5.344, + "step": 3642 + }, + { + "epoch": 2.04, + "learning_rate": 3.5387469084913435e-05, + "loss": 5.3553, + "step": 3643 + }, + { + "epoch": 2.04, + "learning_rate": 3.536685902720528e-05, + "loss": 5.2297, + "step": 3644 + }, + { + "epoch": 2.04, + "learning_rate": 3.5346248969497115e-05, + "loss": 5.7073, + "step": 3645 + }, + { + "epoch": 2.04, + "learning_rate": 3.532563891178896e-05, + "loss": 5.1669, + "step": 3646 + }, + { + "epoch": 2.04, + "learning_rate": 3.530502885408079e-05, + "loss": 5.4904, + "step": 3647 + }, + { + "epoch": 2.04, + "learning_rate": 3.528441879637263e-05, + "loss": 5.3136, + "step": 3648 + }, + { + "epoch": 2.05, + "learning_rate": 3.5263808738664475e-05, + "loss": 5.1276, + "step": 3649 + }, + { + "epoch": 2.05, + "learning_rate": 3.5243198680956305e-05, + "loss": 4.8803, + "step": 3650 + }, + { + "epoch": 2.05, + "learning_rate": 3.522258862324815e-05, + "loss": 5.3768, + "step": 3651 + }, + { + "epoch": 2.05, + "learning_rate": 3.5201978565539985e-05, + "loss": 5.1056, + "step": 3652 + }, + { + "epoch": 2.05, + "learning_rate": 3.518136850783182e-05, + "loss": 5.1066, + "step": 3653 + }, + { + "epoch": 2.05, + "learning_rate": 3.516075845012366e-05, + "loss": 5.0517, + "step": 3654 + }, + { + "epoch": 2.05, + "learning_rate": 3.51401483924155e-05, + "loss": 5.0335, + "step": 3655 + }, + { + "epoch": 2.05, + "learning_rate": 3.5119538334707345e-05, + "loss": 5.0176, + "step": 3656 + }, + { + "epoch": 2.05, + "learning_rate": 3.5098928276999174e-05, + "loss": 4.9724, + "step": 3657 + }, + { + "epoch": 2.05, + "learning_rate": 3.507831821929102e-05, + "loss": 5.1814, + "step": 3658 + }, + { + "epoch": 2.05, + "learning_rate": 3.5057708161582854e-05, + "loss": 4.8925, + "step": 3659 + }, + { + "epoch": 2.05, + "learning_rate": 3.503709810387469e-05, + "loss": 5.175, + "step": 3660 + }, + { + "epoch": 2.05, + "learning_rate": 3.501648804616653e-05, + "loss": 4.674, + "step": 3661 + }, + { + "epoch": 2.05, + "learning_rate": 3.499587798845837e-05, + "loss": 4.6165, + "step": 3662 + }, + { + "epoch": 2.05, + "learning_rate": 3.497526793075021e-05, + "loss": 4.652, + "step": 3663 + }, + { + "epoch": 2.05, + "learning_rate": 3.4954657873042044e-05, + "loss": 4.423, + "step": 3664 + }, + { + "epoch": 2.05, + "learning_rate": 3.493404781533389e-05, + "loss": 4.4095, + "step": 3665 + }, + { + "epoch": 2.05, + "learning_rate": 3.4913437757625724e-05, + "loss": 4.3349, + "step": 3666 + }, + { + "epoch": 2.06, + "learning_rate": 3.489282769991756e-05, + "loss": 4.0616, + "step": 3667 + }, + { + "epoch": 2.06, + "learning_rate": 3.48722176422094e-05, + "loss": 2.8314, + "step": 3668 + }, + { + "epoch": 2.06, + "learning_rate": 3.485160758450124e-05, + "loss": 5.9588, + "step": 3669 + }, + { + "epoch": 2.06, + "learning_rate": 3.483099752679308e-05, + "loss": 6.396, + "step": 3670 + }, + { + "epoch": 2.06, + "learning_rate": 3.4810387469084914e-05, + "loss": 5.8509, + "step": 3671 + }, + { + "epoch": 2.06, + "learning_rate": 3.478977741137676e-05, + "loss": 5.8099, + "step": 3672 + }, + { + "epoch": 2.06, + "learning_rate": 3.4769167353668594e-05, + "loss": 5.6465, + "step": 3673 + }, + { + "epoch": 2.06, + "learning_rate": 3.474855729596043e-05, + "loss": 5.1601, + "step": 3674 + }, + { + "epoch": 2.06, + "learning_rate": 3.472794723825227e-05, + "loss": 5.57, + "step": 3675 + }, + { + "epoch": 2.06, + "learning_rate": 3.470733718054411e-05, + "loss": 5.3305, + "step": 3676 + }, + { + "epoch": 2.06, + "learning_rate": 3.468672712283594e-05, + "loss": 5.6513, + "step": 3677 + }, + { + "epoch": 2.06, + "learning_rate": 3.4666117065127784e-05, + "loss": 5.5599, + "step": 3678 + }, + { + "epoch": 2.06, + "learning_rate": 3.464550700741962e-05, + "loss": 5.6709, + "step": 3679 + }, + { + "epoch": 2.06, + "learning_rate": 3.4624896949711464e-05, + "loss": 5.627, + "step": 3680 + }, + { + "epoch": 2.06, + "learning_rate": 3.46042868920033e-05, + "loss": 5.3791, + "step": 3681 + }, + { + "epoch": 2.06, + "learning_rate": 3.458367683429514e-05, + "loss": 5.453, + "step": 3682 + }, + { + "epoch": 2.06, + "learning_rate": 3.456306677658698e-05, + "loss": 5.4888, + "step": 3683 + }, + { + "epoch": 2.07, + "learning_rate": 3.454245671887881e-05, + "loss": 5.4331, + "step": 3684 + }, + { + "epoch": 2.07, + "learning_rate": 3.4521846661170654e-05, + "loss": 5.2723, + "step": 3685 + }, + { + "epoch": 2.07, + "learning_rate": 3.450123660346249e-05, + "loss": 5.3436, + "step": 3686 + }, + { + "epoch": 2.07, + "learning_rate": 3.448062654575433e-05, + "loss": 5.2062, + "step": 3687 + }, + { + "epoch": 2.07, + "learning_rate": 3.446001648804617e-05, + "loss": 5.4847, + "step": 3688 + }, + { + "epoch": 2.07, + "learning_rate": 3.443940643033801e-05, + "loss": 5.1766, + "step": 3689 + }, + { + "epoch": 2.07, + "learning_rate": 3.441879637262985e-05, + "loss": 5.2443, + "step": 3690 + }, + { + "epoch": 2.07, + "learning_rate": 3.439818631492168e-05, + "loss": 5.3263, + "step": 3691 + }, + { + "epoch": 2.07, + "learning_rate": 3.437757625721352e-05, + "loss": 5.2855, + "step": 3692 + }, + { + "epoch": 2.07, + "learning_rate": 3.435696619950536e-05, + "loss": 5.0108, + "step": 3693 + }, + { + "epoch": 2.07, + "learning_rate": 3.4336356141797197e-05, + "loss": 5.3299, + "step": 3694 + }, + { + "epoch": 2.07, + "learning_rate": 3.431574608408903e-05, + "loss": 5.5701, + "step": 3695 + }, + { + "epoch": 2.07, + "learning_rate": 3.4295136026380877e-05, + "loss": 5.1422, + "step": 3696 + }, + { + "epoch": 2.07, + "learning_rate": 3.427452596867271e-05, + "loss": 5.1912, + "step": 3697 + }, + { + "epoch": 2.07, + "learning_rate": 3.425391591096455e-05, + "loss": 5.0496, + "step": 3698 + }, + { + "epoch": 2.07, + "learning_rate": 3.423330585325639e-05, + "loss": 5.188, + "step": 3699 + }, + { + "epoch": 2.07, + "learning_rate": 3.421269579554823e-05, + "loss": 5.132, + "step": 3700 + }, + { + "epoch": 2.07, + "learning_rate": 3.4192085737840066e-05, + "loss": 5.2242, + "step": 3701 + }, + { + "epoch": 2.08, + "learning_rate": 3.41714756801319e-05, + "loss": 5.0612, + "step": 3702 + }, + { + "epoch": 2.08, + "learning_rate": 3.4150865622423746e-05, + "loss": 4.8958, + "step": 3703 + }, + { + "epoch": 2.08, + "learning_rate": 3.413025556471558e-05, + "loss": 5.1424, + "step": 3704 + }, + { + "epoch": 2.08, + "learning_rate": 3.410964550700742e-05, + "loss": 5.1985, + "step": 3705 + }, + { + "epoch": 2.08, + "learning_rate": 3.408903544929926e-05, + "loss": 5.1018, + "step": 3706 + }, + { + "epoch": 2.08, + "learning_rate": 3.40684253915911e-05, + "loss": 5.1781, + "step": 3707 + }, + { + "epoch": 2.08, + "learning_rate": 3.4047815333882936e-05, + "loss": 5.0455, + "step": 3708 + }, + { + "epoch": 2.08, + "learning_rate": 3.402720527617477e-05, + "loss": 4.8446, + "step": 3709 + }, + { + "epoch": 2.08, + "learning_rate": 3.4006595218466616e-05, + "loss": 5.2548, + "step": 3710 + }, + { + "epoch": 2.08, + "learning_rate": 3.398598516075845e-05, + "loss": 4.7819, + "step": 3711 + }, + { + "epoch": 2.08, + "learning_rate": 3.396537510305029e-05, + "loss": 4.3011, + "step": 3712 + }, + { + "epoch": 2.08, + "learning_rate": 3.394476504534213e-05, + "loss": 4.3467, + "step": 3713 + }, + { + "epoch": 2.08, + "learning_rate": 3.392415498763397e-05, + "loss": 4.6985, + "step": 3714 + }, + { + "epoch": 2.08, + "learning_rate": 3.3903544929925806e-05, + "loss": 3.9453, + "step": 3715 + }, + { + "epoch": 2.08, + "learning_rate": 3.388293487221764e-05, + "loss": 3.9671, + "step": 3716 + }, + { + "epoch": 2.08, + "learning_rate": 3.3862324814509486e-05, + "loss": 3.5639, + "step": 3717 + }, + { + "epoch": 2.08, + "learning_rate": 3.3841714756801316e-05, + "loss": 3.3643, + "step": 3718 + }, + { + "epoch": 2.08, + "learning_rate": 3.382110469909316e-05, + "loss": 6.1308, + "step": 3719 + }, + { + "epoch": 2.09, + "learning_rate": 3.3800494641385e-05, + "loss": 6.3318, + "step": 3720 + }, + { + "epoch": 2.09, + "learning_rate": 3.377988458367683e-05, + "loss": 5.7818, + "step": 3721 + }, + { + "epoch": 2.09, + "learning_rate": 3.3759274525968676e-05, + "loss": 6.0105, + "step": 3722 + }, + { + "epoch": 2.09, + "learning_rate": 3.373866446826051e-05, + "loss": 5.9744, + "step": 3723 + }, + { + "epoch": 2.09, + "learning_rate": 3.371805441055235e-05, + "loss": 5.8121, + "step": 3724 + }, + { + "epoch": 2.09, + "learning_rate": 3.3697444352844185e-05, + "loss": 5.4698, + "step": 3725 + }, + { + "epoch": 2.09, + "learning_rate": 3.367683429513603e-05, + "loss": 5.4952, + "step": 3726 + }, + { + "epoch": 2.09, + "learning_rate": 3.365622423742787e-05, + "loss": 5.1861, + "step": 3727 + }, + { + "epoch": 2.09, + "learning_rate": 3.36356141797197e-05, + "loss": 5.471, + "step": 3728 + }, + { + "epoch": 2.09, + "learning_rate": 3.3615004122011545e-05, + "loss": 5.804, + "step": 3729 + }, + { + "epoch": 2.09, + "learning_rate": 3.359439406430338e-05, + "loss": 5.7317, + "step": 3730 + }, + { + "epoch": 2.09, + "learning_rate": 3.357378400659522e-05, + "loss": 5.5114, + "step": 3731 + }, + { + "epoch": 2.09, + "learning_rate": 3.3553173948887055e-05, + "loss": 5.5317, + "step": 3732 + }, + { + "epoch": 2.09, + "learning_rate": 3.35325638911789e-05, + "loss": 6.036, + "step": 3733 + }, + { + "epoch": 2.09, + "learning_rate": 3.3511953833470735e-05, + "loss": 5.978, + "step": 3734 + }, + { + "epoch": 2.09, + "learning_rate": 3.349134377576257e-05, + "loss": 5.3144, + "step": 3735 + }, + { + "epoch": 2.09, + "learning_rate": 3.3470733718054415e-05, + "loss": 5.3651, + "step": 3736 + }, + { + "epoch": 2.09, + "learning_rate": 3.345012366034625e-05, + "loss": 5.3305, + "step": 3737 + }, + { + "epoch": 2.1, + "learning_rate": 3.342951360263809e-05, + "loss": 5.2955, + "step": 3738 + }, + { + "epoch": 2.1, + "learning_rate": 3.3408903544929925e-05, + "loss": 5.5505, + "step": 3739 + }, + { + "epoch": 2.1, + "learning_rate": 3.338829348722177e-05, + "loss": 5.3005, + "step": 3740 + }, + { + "epoch": 2.1, + "learning_rate": 3.3367683429513605e-05, + "loss": 5.4158, + "step": 3741 + }, + { + "epoch": 2.1, + "learning_rate": 3.334707337180544e-05, + "loss": 5.4036, + "step": 3742 + }, + { + "epoch": 2.1, + "learning_rate": 3.3326463314097285e-05, + "loss": 5.1308, + "step": 3743 + }, + { + "epoch": 2.1, + "learning_rate": 3.330585325638912e-05, + "loss": 5.7601, + "step": 3744 + }, + { + "epoch": 2.1, + "learning_rate": 3.328524319868096e-05, + "loss": 5.3097, + "step": 3745 + }, + { + "epoch": 2.1, + "learning_rate": 3.3264633140972795e-05, + "loss": 5.2356, + "step": 3746 + }, + { + "epoch": 2.1, + "learning_rate": 3.324402308326464e-05, + "loss": 5.1494, + "step": 3747 + }, + { + "epoch": 2.1, + "learning_rate": 3.322341302555647e-05, + "loss": 5.1521, + "step": 3748 + }, + { + "epoch": 2.1, + "learning_rate": 3.320280296784831e-05, + "loss": 5.2293, + "step": 3749 + }, + { + "epoch": 2.1, + "learning_rate": 3.318219291014015e-05, + "loss": 5.34, + "step": 3750 + }, + { + "epoch": 2.1, + "learning_rate": 3.316158285243199e-05, + "loss": 5.191, + "step": 3751 + }, + { + "epoch": 2.1, + "learning_rate": 3.314097279472383e-05, + "loss": 5.3764, + "step": 3752 + }, + { + "epoch": 2.1, + "learning_rate": 3.3120362737015665e-05, + "loss": 4.9735, + "step": 3753 + }, + { + "epoch": 2.1, + "learning_rate": 3.309975267930751e-05, + "loss": 5.4452, + "step": 3754 + }, + { + "epoch": 2.1, + "learning_rate": 3.307914262159934e-05, + "loss": 5.3847, + "step": 3755 + }, + { + "epoch": 2.11, + "learning_rate": 3.305853256389118e-05, + "loss": 5.1075, + "step": 3756 + }, + { + "epoch": 2.11, + "learning_rate": 3.303792250618302e-05, + "loss": 4.8739, + "step": 3757 + }, + { + "epoch": 2.11, + "learning_rate": 3.3017312448474854e-05, + "loss": 5.2622, + "step": 3758 + }, + { + "epoch": 2.11, + "learning_rate": 3.29967023907667e-05, + "loss": 4.9111, + "step": 3759 + }, + { + "epoch": 2.11, + "learning_rate": 3.2976092333058534e-05, + "loss": 4.9147, + "step": 3760 + }, + { + "epoch": 2.11, + "learning_rate": 3.295548227535038e-05, + "loss": 4.9901, + "step": 3761 + }, + { + "epoch": 2.11, + "learning_rate": 3.293487221764221e-05, + "loss": 4.8524, + "step": 3762 + }, + { + "epoch": 2.11, + "learning_rate": 3.291426215993405e-05, + "loss": 4.7337, + "step": 3763 + }, + { + "epoch": 2.11, + "learning_rate": 3.289365210222589e-05, + "loss": 4.3966, + "step": 3764 + }, + { + "epoch": 2.11, + "learning_rate": 3.2873042044517724e-05, + "loss": 4.3567, + "step": 3765 + }, + { + "epoch": 2.11, + "learning_rate": 3.285243198680956e-05, + "loss": 4.3291, + "step": 3766 + }, + { + "epoch": 2.11, + "learning_rate": 3.2831821929101404e-05, + "loss": 3.6785, + "step": 3767 + }, + { + "epoch": 2.11, + "learning_rate": 3.281121187139324e-05, + "loss": 4.2036, + "step": 3768 + }, + { + "epoch": 2.11, + "learning_rate": 3.279060181368508e-05, + "loss": 6.0295, + "step": 3769 + }, + { + "epoch": 2.11, + "learning_rate": 3.276999175597692e-05, + "loss": 5.6817, + "step": 3770 + }, + { + "epoch": 2.11, + "learning_rate": 3.274938169826876e-05, + "loss": 5.7622, + "step": 3771 + }, + { + "epoch": 2.11, + "learning_rate": 3.2728771640560594e-05, + "loss": 5.7484, + "step": 3772 + }, + { + "epoch": 2.11, + "learning_rate": 3.270816158285243e-05, + "loss": 5.8812, + "step": 3773 + }, + { + "epoch": 2.12, + "learning_rate": 3.2687551525144274e-05, + "loss": 5.3487, + "step": 3774 + }, + { + "epoch": 2.12, + "learning_rate": 3.266694146743611e-05, + "loss": 5.3863, + "step": 3775 + }, + { + "epoch": 2.12, + "learning_rate": 3.264633140972795e-05, + "loss": 5.8218, + "step": 3776 + }, + { + "epoch": 2.12, + "learning_rate": 3.262572135201979e-05, + "loss": 5.5258, + "step": 3777 + }, + { + "epoch": 2.12, + "learning_rate": 3.260511129431163e-05, + "loss": 5.6251, + "step": 3778 + }, + { + "epoch": 2.12, + "learning_rate": 3.2584501236603464e-05, + "loss": 5.4199, + "step": 3779 + }, + { + "epoch": 2.12, + "learning_rate": 3.25638911788953e-05, + "loss": 5.4895, + "step": 3780 + }, + { + "epoch": 2.12, + "learning_rate": 3.2543281121187144e-05, + "loss": 5.4357, + "step": 3781 + }, + { + "epoch": 2.12, + "learning_rate": 3.2522671063478974e-05, + "loss": 5.561, + "step": 3782 + }, + { + "epoch": 2.12, + "learning_rate": 3.250206100577082e-05, + "loss": 4.9942, + "step": 3783 + }, + { + "epoch": 2.12, + "learning_rate": 3.248145094806266e-05, + "loss": 5.3038, + "step": 3784 + }, + { + "epoch": 2.12, + "learning_rate": 3.24608408903545e-05, + "loss": 5.3436, + "step": 3785 + }, + { + "epoch": 2.12, + "learning_rate": 3.2440230832646333e-05, + "loss": 5.3524, + "step": 3786 + }, + { + "epoch": 2.12, + "learning_rate": 3.241962077493817e-05, + "loss": 5.5513, + "step": 3787 + }, + { + "epoch": 2.12, + "learning_rate": 3.2399010717230013e-05, + "loss": 5.1925, + "step": 3788 + }, + { + "epoch": 2.12, + "learning_rate": 3.237840065952184e-05, + "loss": 5.2149, + "step": 3789 + }, + { + "epoch": 2.12, + "learning_rate": 3.235779060181369e-05, + "loss": 5.3527, + "step": 3790 + }, + { + "epoch": 2.12, + "learning_rate": 3.233718054410553e-05, + "loss": 5.3624, + "step": 3791 + }, + { + "epoch": 2.13, + "learning_rate": 3.231657048639736e-05, + "loss": 5.4406, + "step": 3792 + }, + { + "epoch": 2.13, + "learning_rate": 3.22959604286892e-05, + "loss": 5.356, + "step": 3793 + }, + { + "epoch": 2.13, + "learning_rate": 3.227535037098104e-05, + "loss": 5.3362, + "step": 3794 + }, + { + "epoch": 2.13, + "learning_rate": 3.2254740313272876e-05, + "loss": 5.3934, + "step": 3795 + }, + { + "epoch": 2.13, + "learning_rate": 3.223413025556471e-05, + "loss": 5.5152, + "step": 3796 + }, + { + "epoch": 2.13, + "learning_rate": 3.2213520197856556e-05, + "loss": 5.1382, + "step": 3797 + }, + { + "epoch": 2.13, + "learning_rate": 3.219291014014839e-05, + "loss": 5.1583, + "step": 3798 + }, + { + "epoch": 2.13, + "learning_rate": 3.217230008244023e-05, + "loss": 5.0157, + "step": 3799 + }, + { + "epoch": 2.13, + "learning_rate": 3.215169002473207e-05, + "loss": 5.317, + "step": 3800 + }, + { + "epoch": 2.13, + "learning_rate": 3.213107996702391e-05, + "loss": 4.9174, + "step": 3801 + }, + { + "epoch": 2.13, + "learning_rate": 3.2110469909315746e-05, + "loss": 5.2968, + "step": 3802 + }, + { + "epoch": 2.13, + "learning_rate": 3.208985985160758e-05, + "loss": 5.0331, + "step": 3803 + }, + { + "epoch": 2.13, + "learning_rate": 3.2069249793899426e-05, + "loss": 5.2858, + "step": 3804 + }, + { + "epoch": 2.13, + "learning_rate": 3.204863973619126e-05, + "loss": 4.7689, + "step": 3805 + }, + { + "epoch": 2.13, + "learning_rate": 3.20280296784831e-05, + "loss": 5.0334, + "step": 3806 + }, + { + "epoch": 2.13, + "learning_rate": 3.200741962077494e-05, + "loss": 5.1767, + "step": 3807 + }, + { + "epoch": 2.13, + "learning_rate": 3.198680956306678e-05, + "loss": 4.8017, + "step": 3808 + }, + { + "epoch": 2.14, + "learning_rate": 3.1966199505358616e-05, + "loss": 4.9128, + "step": 3809 + }, + { + "epoch": 2.14, + "learning_rate": 3.194558944765045e-05, + "loss": 4.7754, + "step": 3810 + }, + { + "epoch": 2.14, + "learning_rate": 3.1924979389942296e-05, + "loss": 4.8946, + "step": 3811 + }, + { + "epoch": 2.14, + "learning_rate": 3.190436933223413e-05, + "loss": 4.5352, + "step": 3812 + }, + { + "epoch": 2.14, + "learning_rate": 3.188375927452597e-05, + "loss": 4.1154, + "step": 3813 + }, + { + "epoch": 2.14, + "learning_rate": 3.1863149216817806e-05, + "loss": 4.3339, + "step": 3814 + }, + { + "epoch": 2.14, + "learning_rate": 3.184253915910965e-05, + "loss": 4.0952, + "step": 3815 + }, + { + "epoch": 2.14, + "learning_rate": 3.1821929101401486e-05, + "loss": 3.558, + "step": 3816 + }, + { + "epoch": 2.14, + "learning_rate": 3.180131904369332e-05, + "loss": 3.3002, + "step": 3817 + }, + { + "epoch": 2.14, + "learning_rate": 3.1780708985985166e-05, + "loss": 3.7926, + "step": 3818 + }, + { + "epoch": 2.14, + "learning_rate": 3.1760098928276996e-05, + "loss": 6.0818, + "step": 3819 + }, + { + "epoch": 2.14, + "learning_rate": 3.173948887056884e-05, + "loss": 5.8426, + "step": 3820 + }, + { + "epoch": 2.14, + "learning_rate": 3.1718878812860676e-05, + "loss": 5.9946, + "step": 3821 + }, + { + "epoch": 2.14, + "learning_rate": 3.169826875515252e-05, + "loss": 5.6831, + "step": 3822 + }, + { + "epoch": 2.14, + "learning_rate": 3.1677658697444356e-05, + "loss": 5.8113, + "step": 3823 + }, + { + "epoch": 2.14, + "learning_rate": 3.165704863973619e-05, + "loss": 5.3997, + "step": 3824 + }, + { + "epoch": 2.14, + "learning_rate": 3.1636438582028036e-05, + "loss": 5.4732, + "step": 3825 + }, + { + "epoch": 2.14, + "learning_rate": 3.1615828524319865e-05, + "loss": 5.1668, + "step": 3826 + }, + { + "epoch": 2.15, + "learning_rate": 3.159521846661171e-05, + "loss": 5.9104, + "step": 3827 + }, + { + "epoch": 2.15, + "learning_rate": 3.1574608408903545e-05, + "loss": 5.5768, + "step": 3828 + }, + { + "epoch": 2.15, + "learning_rate": 3.155399835119538e-05, + "loss": 5.4835, + "step": 3829 + }, + { + "epoch": 2.15, + "learning_rate": 3.1533388293487225e-05, + "loss": 5.3707, + "step": 3830 + }, + { + "epoch": 2.15, + "learning_rate": 3.151277823577906e-05, + "loss": 5.7151, + "step": 3831 + }, + { + "epoch": 2.15, + "learning_rate": 3.1492168178070905e-05, + "loss": 5.2318, + "step": 3832 + }, + { + "epoch": 2.15, + "learning_rate": 3.1471558120362735e-05, + "loss": 5.1811, + "step": 3833 + }, + { + "epoch": 2.15, + "learning_rate": 3.145094806265458e-05, + "loss": 5.1067, + "step": 3834 + }, + { + "epoch": 2.15, + "learning_rate": 3.1430338004946415e-05, + "loss": 5.2139, + "step": 3835 + }, + { + "epoch": 2.15, + "learning_rate": 3.140972794723825e-05, + "loss": 5.3414, + "step": 3836 + }, + { + "epoch": 2.15, + "learning_rate": 3.138911788953009e-05, + "loss": 5.558, + "step": 3837 + }, + { + "epoch": 2.15, + "learning_rate": 3.136850783182193e-05, + "loss": 5.2758, + "step": 3838 + }, + { + "epoch": 2.15, + "learning_rate": 3.134789777411377e-05, + "loss": 5.1165, + "step": 3839 + }, + { + "epoch": 2.15, + "learning_rate": 3.1327287716405605e-05, + "loss": 5.2383, + "step": 3840 + }, + { + "epoch": 2.15, + "learning_rate": 3.130667765869745e-05, + "loss": 5.2664, + "step": 3841 + }, + { + "epoch": 2.15, + "learning_rate": 3.1286067600989285e-05, + "loss": 5.0617, + "step": 3842 + }, + { + "epoch": 2.15, + "learning_rate": 3.126545754328112e-05, + "loss": 5.102, + "step": 3843 + }, + { + "epoch": 2.15, + "learning_rate": 3.124484748557296e-05, + "loss": 5.125, + "step": 3844 + }, + { + "epoch": 2.16, + "learning_rate": 3.12242374278648e-05, + "loss": 4.9934, + "step": 3845 + }, + { + "epoch": 2.16, + "learning_rate": 3.120362737015664e-05, + "loss": 5.1845, + "step": 3846 + }, + { + "epoch": 2.16, + "learning_rate": 3.1183017312448475e-05, + "loss": 5.3169, + "step": 3847 + }, + { + "epoch": 2.16, + "learning_rate": 3.116240725474032e-05, + "loss": 5.3992, + "step": 3848 + }, + { + "epoch": 2.16, + "learning_rate": 3.1141797197032155e-05, + "loss": 5.0736, + "step": 3849 + }, + { + "epoch": 2.16, + "learning_rate": 3.112118713932399e-05, + "loss": 4.9542, + "step": 3850 + }, + { + "epoch": 2.16, + "learning_rate": 3.110057708161583e-05, + "loss": 4.9805, + "step": 3851 + }, + { + "epoch": 2.16, + "learning_rate": 3.107996702390767e-05, + "loss": 4.9067, + "step": 3852 + }, + { + "epoch": 2.16, + "learning_rate": 3.10593569661995e-05, + "loss": 5.0144, + "step": 3853 + }, + { + "epoch": 2.16, + "learning_rate": 3.1038746908491344e-05, + "loss": 5.2052, + "step": 3854 + }, + { + "epoch": 2.16, + "learning_rate": 3.101813685078319e-05, + "loss": 5.1837, + "step": 3855 + }, + { + "epoch": 2.16, + "learning_rate": 3.0997526793075024e-05, + "loss": 4.9921, + "step": 3856 + }, + { + "epoch": 2.16, + "learning_rate": 3.097691673536686e-05, + "loss": 5.4291, + "step": 3857 + }, + { + "epoch": 2.16, + "learning_rate": 3.09563066776587e-05, + "loss": 4.9635, + "step": 3858 + }, + { + "epoch": 2.16, + "learning_rate": 3.093569661995054e-05, + "loss": 5.2906, + "step": 3859 + }, + { + "epoch": 2.16, + "learning_rate": 3.091508656224237e-05, + "loss": 4.7043, + "step": 3860 + }, + { + "epoch": 2.16, + "learning_rate": 3.0894476504534214e-05, + "loss": 4.5993, + "step": 3861 + }, + { + "epoch": 2.16, + "learning_rate": 3.087386644682606e-05, + "loss": 4.4296, + "step": 3862 + }, + { + "epoch": 2.17, + "learning_rate": 3.085325638911789e-05, + "loss": 4.3902, + "step": 3863 + }, + { + "epoch": 2.17, + "learning_rate": 3.083264633140973e-05, + "loss": 3.9293, + "step": 3864 + }, + { + "epoch": 2.17, + "learning_rate": 3.081203627370157e-05, + "loss": 3.8899, + "step": 3865 + }, + { + "epoch": 2.17, + "learning_rate": 3.079142621599341e-05, + "loss": 3.5067, + "step": 3866 + }, + { + "epoch": 2.17, + "learning_rate": 3.077081615828524e-05, + "loss": 4.6393, + "step": 3867 + }, + { + "epoch": 2.17, + "learning_rate": 3.0750206100577084e-05, + "loss": 3.4679, + "step": 3868 + }, + { + "epoch": 2.17, + "learning_rate": 3.072959604286892e-05, + "loss": 6.391, + "step": 3869 + }, + { + "epoch": 2.17, + "learning_rate": 3.070898598516076e-05, + "loss": 6.5247, + "step": 3870 + }, + { + "epoch": 2.17, + "learning_rate": 3.06883759274526e-05, + "loss": 6.3184, + "step": 3871 + }, + { + "epoch": 2.17, + "learning_rate": 3.066776586974444e-05, + "loss": 6.328, + "step": 3872 + }, + { + "epoch": 2.17, + "learning_rate": 3.0647155812036274e-05, + "loss": 6.0418, + "step": 3873 + }, + { + "epoch": 2.17, + "learning_rate": 3.062654575432811e-05, + "loss": 6.0158, + "step": 3874 + }, + { + "epoch": 2.17, + "learning_rate": 3.0605935696619954e-05, + "loss": 6.0142, + "step": 3875 + }, + { + "epoch": 2.17, + "learning_rate": 3.058532563891179e-05, + "loss": 5.4299, + "step": 3876 + }, + { + "epoch": 2.17, + "learning_rate": 3.056471558120363e-05, + "loss": 5.3817, + "step": 3877 + }, + { + "epoch": 2.17, + "learning_rate": 3.054410552349547e-05, + "loss": 5.5534, + "step": 3878 + }, + { + "epoch": 2.17, + "learning_rate": 3.052349546578731e-05, + "loss": 5.7637, + "step": 3879 + }, + { + "epoch": 2.17, + "learning_rate": 3.0502885408079147e-05, + "loss": 5.6737, + "step": 3880 + }, + { + "epoch": 2.18, + "learning_rate": 3.048227535037098e-05, + "loss": 5.5298, + "step": 3881 + }, + { + "epoch": 2.18, + "learning_rate": 3.0461665292662824e-05, + "loss": 5.5723, + "step": 3882 + }, + { + "epoch": 2.18, + "learning_rate": 3.0441055234954657e-05, + "loss": 5.9206, + "step": 3883 + }, + { + "epoch": 2.18, + "learning_rate": 3.0420445177246497e-05, + "loss": 5.5486, + "step": 3884 + }, + { + "epoch": 2.18, + "learning_rate": 3.0399835119538333e-05, + "loss": 5.2267, + "step": 3885 + }, + { + "epoch": 2.18, + "learning_rate": 3.0379225061830173e-05, + "loss": 5.2051, + "step": 3886 + }, + { + "epoch": 2.18, + "learning_rate": 3.0358615004122017e-05, + "loss": 5.2276, + "step": 3887 + }, + { + "epoch": 2.18, + "learning_rate": 3.033800494641385e-05, + "loss": 5.3668, + "step": 3888 + }, + { + "epoch": 2.18, + "learning_rate": 3.031739488870569e-05, + "loss": 5.4339, + "step": 3889 + }, + { + "epoch": 2.18, + "learning_rate": 3.0296784830997527e-05, + "loss": 5.4779, + "step": 3890 + }, + { + "epoch": 2.18, + "learning_rate": 3.0276174773289367e-05, + "loss": 5.591, + "step": 3891 + }, + { + "epoch": 2.18, + "learning_rate": 3.0255564715581203e-05, + "loss": 5.379, + "step": 3892 + }, + { + "epoch": 2.18, + "learning_rate": 3.0234954657873043e-05, + "loss": 5.2998, + "step": 3893 + }, + { + "epoch": 2.18, + "learning_rate": 3.0214344600164883e-05, + "loss": 5.2557, + "step": 3894 + }, + { + "epoch": 2.18, + "learning_rate": 3.019373454245672e-05, + "loss": 5.3758, + "step": 3895 + }, + { + "epoch": 2.18, + "learning_rate": 3.017312448474856e-05, + "loss": 5.1167, + "step": 3896 + }, + { + "epoch": 2.18, + "learning_rate": 3.0152514427040396e-05, + "loss": 5.1509, + "step": 3897 + }, + { + "epoch": 2.18, + "learning_rate": 3.0131904369332236e-05, + "loss": 5.2189, + "step": 3898 + }, + { + "epoch": 2.19, + "learning_rate": 3.0111294311624073e-05, + "loss": 4.8377, + "step": 3899 + }, + { + "epoch": 2.19, + "learning_rate": 3.0090684253915913e-05, + "loss": 5.3997, + "step": 3900 + }, + { + "epoch": 2.19, + "learning_rate": 3.007007419620775e-05, + "loss": 5.3032, + "step": 3901 + }, + { + "epoch": 2.19, + "learning_rate": 3.004946413849959e-05, + "loss": 5.3173, + "step": 3902 + }, + { + "epoch": 2.19, + "learning_rate": 3.002885408079143e-05, + "loss": 5.6327, + "step": 3903 + }, + { + "epoch": 2.19, + "learning_rate": 3.0008244023083266e-05, + "loss": 5.2228, + "step": 3904 + }, + { + "epoch": 2.19, + "learning_rate": 2.9987633965375106e-05, + "loss": 5.0053, + "step": 3905 + }, + { + "epoch": 2.19, + "learning_rate": 2.9967023907666943e-05, + "loss": 4.9627, + "step": 3906 + }, + { + "epoch": 2.19, + "learning_rate": 2.9946413849958783e-05, + "loss": 4.9666, + "step": 3907 + }, + { + "epoch": 2.19, + "learning_rate": 2.9925803792250616e-05, + "loss": 5.0445, + "step": 3908 + }, + { + "epoch": 2.19, + "learning_rate": 2.990519373454246e-05, + "loss": 5.1057, + "step": 3909 + }, + { + "epoch": 2.19, + "learning_rate": 2.98845836768343e-05, + "loss": 5.0148, + "step": 3910 + }, + { + "epoch": 2.19, + "learning_rate": 2.9863973619126136e-05, + "loss": 4.9458, + "step": 3911 + }, + { + "epoch": 2.19, + "learning_rate": 2.9843363561417976e-05, + "loss": 4.4856, + "step": 3912 + }, + { + "epoch": 2.19, + "learning_rate": 2.982275350370981e-05, + "loss": 4.4098, + "step": 3913 + }, + { + "epoch": 2.19, + "learning_rate": 2.9802143446001652e-05, + "loss": 4.0592, + "step": 3914 + }, + { + "epoch": 2.19, + "learning_rate": 2.9781533388293486e-05, + "loss": 4.4005, + "step": 3915 + }, + { + "epoch": 2.2, + "learning_rate": 2.976092333058533e-05, + "loss": 3.8026, + "step": 3916 + }, + { + "epoch": 2.2, + "learning_rate": 2.9740313272877162e-05, + "loss": 3.9945, + "step": 3917 + }, + { + "epoch": 2.2, + "learning_rate": 2.9719703215169002e-05, + "loss": 3.5634, + "step": 3918 + }, + { + "epoch": 2.2, + "learning_rate": 2.9699093157460846e-05, + "loss": 6.1392, + "step": 3919 + }, + { + "epoch": 2.2, + "learning_rate": 2.967848309975268e-05, + "loss": 6.0442, + "step": 3920 + }, + { + "epoch": 2.2, + "learning_rate": 2.965787304204452e-05, + "loss": 5.8057, + "step": 3921 + }, + { + "epoch": 2.2, + "learning_rate": 2.9637262984336355e-05, + "loss": 5.7901, + "step": 3922 + }, + { + "epoch": 2.2, + "learning_rate": 2.9616652926628195e-05, + "loss": 6.1277, + "step": 3923 + }, + { + "epoch": 2.2, + "learning_rate": 2.9596042868920032e-05, + "loss": 5.7292, + "step": 3924 + }, + { + "epoch": 2.2, + "learning_rate": 2.9575432811211872e-05, + "loss": 5.4492, + "step": 3925 + }, + { + "epoch": 2.2, + "learning_rate": 2.9554822753503712e-05, + "loss": 5.381, + "step": 3926 + }, + { + "epoch": 2.2, + "learning_rate": 2.953421269579555e-05, + "loss": 5.226, + "step": 3927 + }, + { + "epoch": 2.2, + "learning_rate": 2.951360263808739e-05, + "loss": 5.2521, + "step": 3928 + }, + { + "epoch": 2.2, + "learning_rate": 2.9492992580379225e-05, + "loss": 5.4909, + "step": 3929 + }, + { + "epoch": 2.2, + "learning_rate": 2.9472382522671065e-05, + "loss": 5.2135, + "step": 3930 + }, + { + "epoch": 2.2, + "learning_rate": 2.9451772464962902e-05, + "loss": 5.6963, + "step": 3931 + }, + { + "epoch": 2.2, + "learning_rate": 2.9431162407254742e-05, + "loss": 5.4955, + "step": 3932 + }, + { + "epoch": 2.2, + "learning_rate": 2.9410552349546582e-05, + "loss": 5.7687, + "step": 3933 + }, + { + "epoch": 2.21, + "learning_rate": 2.938994229183842e-05, + "loss": 5.4701, + "step": 3934 + }, + { + "epoch": 2.21, + "learning_rate": 2.936933223413026e-05, + "loss": 5.2873, + "step": 3935 + }, + { + "epoch": 2.21, + "learning_rate": 2.9348722176422095e-05, + "loss": 5.054, + "step": 3936 + }, + { + "epoch": 2.21, + "learning_rate": 2.9328112118713935e-05, + "loss": 5.5048, + "step": 3937 + }, + { + "epoch": 2.21, + "learning_rate": 2.930750206100577e-05, + "loss": 5.3579, + "step": 3938 + }, + { + "epoch": 2.21, + "learning_rate": 2.928689200329761e-05, + "loss": 5.4576, + "step": 3939 + }, + { + "epoch": 2.21, + "learning_rate": 2.9266281945589448e-05, + "loss": 5.1547, + "step": 3940 + }, + { + "epoch": 2.21, + "learning_rate": 2.9245671887881288e-05, + "loss": 5.2438, + "step": 3941 + }, + { + "epoch": 2.21, + "learning_rate": 2.9225061830173128e-05, + "loss": 5.0162, + "step": 3942 + }, + { + "epoch": 2.21, + "learning_rate": 2.9204451772464965e-05, + "loss": 5.2216, + "step": 3943 + }, + { + "epoch": 2.21, + "learning_rate": 2.9183841714756805e-05, + "loss": 5.2949, + "step": 3944 + }, + { + "epoch": 2.21, + "learning_rate": 2.9163231657048638e-05, + "loss": 5.0976, + "step": 3945 + }, + { + "epoch": 2.21, + "learning_rate": 2.914262159934048e-05, + "loss": 5.1048, + "step": 3946 + }, + { + "epoch": 2.21, + "learning_rate": 2.9122011541632315e-05, + "loss": 5.1052, + "step": 3947 + }, + { + "epoch": 2.21, + "learning_rate": 2.9101401483924158e-05, + "loss": 5.2757, + "step": 3948 + }, + { + "epoch": 2.21, + "learning_rate": 2.9080791426215998e-05, + "loss": 5.2104, + "step": 3949 + }, + { + "epoch": 2.21, + "learning_rate": 2.906018136850783e-05, + "loss": 4.9563, + "step": 3950 + }, + { + "epoch": 2.21, + "learning_rate": 2.9039571310799675e-05, + "loss": 4.9768, + "step": 3951 + }, + { + "epoch": 2.22, + "learning_rate": 2.9018961253091508e-05, + "loss": 5.4829, + "step": 3952 + }, + { + "epoch": 2.22, + "learning_rate": 2.899835119538335e-05, + "loss": 4.9419, + "step": 3953 + }, + { + "epoch": 2.22, + "learning_rate": 2.8977741137675184e-05, + "loss": 4.9943, + "step": 3954 + }, + { + "epoch": 2.22, + "learning_rate": 2.8957131079967024e-05, + "loss": 4.8308, + "step": 3955 + }, + { + "epoch": 2.22, + "learning_rate": 2.893652102225886e-05, + "loss": 4.7126, + "step": 3956 + }, + { + "epoch": 2.22, + "learning_rate": 2.89159109645507e-05, + "loss": 4.7118, + "step": 3957 + }, + { + "epoch": 2.22, + "learning_rate": 2.8895300906842544e-05, + "loss": 4.7946, + "step": 3958 + }, + { + "epoch": 2.22, + "learning_rate": 2.8874690849134378e-05, + "loss": 4.7423, + "step": 3959 + }, + { + "epoch": 2.22, + "learning_rate": 2.8854080791426218e-05, + "loss": 4.6509, + "step": 3960 + }, + { + "epoch": 2.22, + "learning_rate": 2.8833470733718054e-05, + "loss": 4.3876, + "step": 3961 + }, + { + "epoch": 2.22, + "learning_rate": 2.8812860676009894e-05, + "loss": 4.3694, + "step": 3962 + }, + { + "epoch": 2.22, + "learning_rate": 2.879225061830173e-05, + "loss": 4.2582, + "step": 3963 + }, + { + "epoch": 2.22, + "learning_rate": 2.877164056059357e-05, + "loss": 4.3259, + "step": 3964 + }, + { + "epoch": 2.22, + "learning_rate": 2.875103050288541e-05, + "loss": 4.1985, + "step": 3965 + }, + { + "epoch": 2.22, + "learning_rate": 2.8730420445177247e-05, + "loss": 4.2072, + "step": 3966 + }, + { + "epoch": 2.22, + "learning_rate": 2.8709810387469087e-05, + "loss": 3.7347, + "step": 3967 + }, + { + "epoch": 2.22, + "learning_rate": 2.8689200329760924e-05, + "loss": 3.2268, + "step": 3968 + }, + { + "epoch": 2.22, + "learning_rate": 2.8668590272052764e-05, + "loss": 6.2438, + "step": 3969 + }, + { + "epoch": 2.23, + "learning_rate": 2.86479802143446e-05, + "loss": 6.1856, + "step": 3970 + }, + { + "epoch": 2.23, + "learning_rate": 2.862737015663644e-05, + "loss": 6.2211, + "step": 3971 + }, + { + "epoch": 2.23, + "learning_rate": 2.8606760098928277e-05, + "loss": 5.7582, + "step": 3972 + }, + { + "epoch": 2.23, + "learning_rate": 2.8586150041220117e-05, + "loss": 6.2252, + "step": 3973 + }, + { + "epoch": 2.23, + "learning_rate": 2.8565539983511957e-05, + "loss": 5.6915, + "step": 3974 + }, + { + "epoch": 2.23, + "learning_rate": 2.8544929925803794e-05, + "loss": 5.6016, + "step": 3975 + }, + { + "epoch": 2.23, + "learning_rate": 2.8524319868095634e-05, + "loss": 5.4556, + "step": 3976 + }, + { + "epoch": 2.23, + "learning_rate": 2.850370981038747e-05, + "loss": 5.2926, + "step": 3977 + }, + { + "epoch": 2.23, + "learning_rate": 2.848309975267931e-05, + "loss": 5.2015, + "step": 3978 + }, + { + "epoch": 2.23, + "learning_rate": 2.8462489694971144e-05, + "loss": 5.8511, + "step": 3979 + }, + { + "epoch": 2.23, + "learning_rate": 2.8441879637262987e-05, + "loss": 5.5824, + "step": 3980 + }, + { + "epoch": 2.23, + "learning_rate": 2.8421269579554827e-05, + "loss": 5.2944, + "step": 3981 + }, + { + "epoch": 2.23, + "learning_rate": 2.8400659521846663e-05, + "loss": 5.3682, + "step": 3982 + }, + { + "epoch": 2.23, + "learning_rate": 2.8380049464138503e-05, + "loss": 5.5538, + "step": 3983 + }, + { + "epoch": 2.23, + "learning_rate": 2.8359439406430337e-05, + "loss": 5.6107, + "step": 3984 + }, + { + "epoch": 2.23, + "learning_rate": 2.833882934872218e-05, + "loss": 5.4116, + "step": 3985 + }, + { + "epoch": 2.23, + "learning_rate": 2.8318219291014013e-05, + "loss": 5.1445, + "step": 3986 + }, + { + "epoch": 2.23, + "learning_rate": 2.8297609233305857e-05, + "loss": 5.2883, + "step": 3987 + }, + { + "epoch": 2.24, + "learning_rate": 2.827699917559769e-05, + "loss": 5.2516, + "step": 3988 + }, + { + "epoch": 2.24, + "learning_rate": 2.825638911788953e-05, + "loss": 5.1569, + "step": 3989 + }, + { + "epoch": 2.24, + "learning_rate": 2.8235779060181373e-05, + "loss": 5.5986, + "step": 3990 + }, + { + "epoch": 2.24, + "learning_rate": 2.8215169002473206e-05, + "loss": 5.3054, + "step": 3991 + }, + { + "epoch": 2.24, + "learning_rate": 2.819455894476505e-05, + "loss": 5.4124, + "step": 3992 + }, + { + "epoch": 2.24, + "learning_rate": 2.8173948887056883e-05, + "loss": 5.3458, + "step": 3993 + }, + { + "epoch": 2.24, + "learning_rate": 2.8153338829348723e-05, + "loss": 4.9954, + "step": 3994 + }, + { + "epoch": 2.24, + "learning_rate": 2.813272877164056e-05, + "loss": 5.3204, + "step": 3995 + }, + { + "epoch": 2.24, + "learning_rate": 2.81121187139324e-05, + "loss": 5.0079, + "step": 3996 + }, + { + "epoch": 2.24, + "learning_rate": 2.809150865622424e-05, + "loss": 5.1027, + "step": 3997 + }, + { + "epoch": 2.24, + "learning_rate": 2.8070898598516076e-05, + "loss": 5.2928, + "step": 3998 + }, + { + "epoch": 2.24, + "learning_rate": 2.8050288540807916e-05, + "loss": 5.0121, + "step": 3999 + }, + { + "epoch": 2.24, + "learning_rate": 2.8029678483099753e-05, + "loss": 4.9793, + "step": 4000 + }, + { + "epoch": 2.24, + "eval_loss": 18.32020378112793, + "eval_runtime": 1340.7601, + "eval_samples_per_second": 1.971, + "eval_steps_per_second": 0.247, + "eval_wer": 1.000654502181674, + "step": 4000 + }, + { + "epoch": 2.24, + "learning_rate": 2.8009068425391593e-05, + "loss": 5.1098, + "step": 4001 + }, + { + "epoch": 2.24, + "learning_rate": 2.798845836768343e-05, + "loss": 5.0272, + "step": 4002 + }, + { + "epoch": 2.24, + "learning_rate": 2.796784830997527e-05, + "loss": 5.0944, + "step": 4003 + }, + { + "epoch": 2.24, + "learning_rate": 2.7947238252267106e-05, + "loss": 4.9297, + "step": 4004 + }, + { + "epoch": 2.24, + "learning_rate": 2.7926628194558946e-05, + "loss": 5.3979, + "step": 4005 + }, + { + "epoch": 2.25, + "learning_rate": 2.7906018136850786e-05, + "loss": 5.1523, + "step": 4006 + }, + { + "epoch": 2.25, + "learning_rate": 2.7885408079142623e-05, + "loss": 4.8398, + "step": 4007 + }, + { + "epoch": 2.25, + "learning_rate": 2.7864798021434463e-05, + "loss": 4.6662, + "step": 4008 + }, + { + "epoch": 2.25, + "learning_rate": 2.78441879637263e-05, + "loss": 4.6269, + "step": 4009 + }, + { + "epoch": 2.25, + "learning_rate": 2.782357790601814e-05, + "loss": 4.6741, + "step": 4010 + }, + { + "epoch": 2.25, + "learning_rate": 2.7802967848309976e-05, + "loss": 4.5262, + "step": 4011 + }, + { + "epoch": 2.25, + "learning_rate": 2.7782357790601816e-05, + "loss": 4.5681, + "step": 4012 + }, + { + "epoch": 2.25, + "learning_rate": 2.7761747732893656e-05, + "loss": 4.0361, + "step": 4013 + }, + { + "epoch": 2.25, + "learning_rate": 2.7741137675185492e-05, + "loss": 3.8852, + "step": 4014 + }, + { + "epoch": 2.25, + "learning_rate": 2.7720527617477332e-05, + "loss": 3.6479, + "step": 4015 + }, + { + "epoch": 2.25, + "learning_rate": 2.769991755976917e-05, + "loss": 4.1979, + "step": 4016 + }, + { + "epoch": 2.25, + "learning_rate": 2.767930750206101e-05, + "loss": 3.4106, + "step": 4017 + }, + { + "epoch": 2.25, + "learning_rate": 2.7658697444352842e-05, + "loss": 3.6541, + "step": 4018 + }, + { + "epoch": 2.25, + "learning_rate": 2.7638087386644686e-05, + "loss": 5.8831, + "step": 4019 + }, + { + "epoch": 2.25, + "learning_rate": 2.761747732893652e-05, + "loss": 5.681, + "step": 4020 + }, + { + "epoch": 2.25, + "learning_rate": 2.759686727122836e-05, + "loss": 5.4914, + "step": 4021 + }, + { + "epoch": 2.25, + "learning_rate": 2.7576257213520202e-05, + "loss": 5.7874, + "step": 4022 + }, + { + "epoch": 2.26, + "learning_rate": 2.7555647155812035e-05, + "loss": 5.2653, + "step": 4023 + }, + { + "epoch": 2.26, + "learning_rate": 2.753503709810388e-05, + "loss": 5.5166, + "step": 4024 + }, + { + "epoch": 2.26, + "learning_rate": 2.7514427040395712e-05, + "loss": 5.4618, + "step": 4025 + }, + { + "epoch": 2.26, + "learning_rate": 2.7493816982687552e-05, + "loss": 5.3301, + "step": 4026 + }, + { + "epoch": 2.26, + "learning_rate": 2.747320692497939e-05, + "loss": 5.4028, + "step": 4027 + }, + { + "epoch": 2.26, + "learning_rate": 2.745259686727123e-05, + "loss": 5.4027, + "step": 4028 + }, + { + "epoch": 2.26, + "learning_rate": 2.7431986809563072e-05, + "loss": 5.3485, + "step": 4029 + }, + { + "epoch": 2.26, + "learning_rate": 2.7411376751854905e-05, + "loss": 5.2613, + "step": 4030 + }, + { + "epoch": 2.26, + "learning_rate": 2.7390766694146745e-05, + "loss": 5.4898, + "step": 4031 + }, + { + "epoch": 2.26, + "learning_rate": 2.7370156636438582e-05, + "loss": 5.0799, + "step": 4032 + }, + { + "epoch": 2.26, + "learning_rate": 2.7349546578730422e-05, + "loss": 5.4537, + "step": 4033 + }, + { + "epoch": 2.26, + "learning_rate": 2.732893652102226e-05, + "loss": 5.0737, + "step": 4034 + }, + { + "epoch": 2.26, + "learning_rate": 2.73083264633141e-05, + "loss": 5.1496, + "step": 4035 + }, + { + "epoch": 2.26, + "learning_rate": 2.7287716405605935e-05, + "loss": 4.9672, + "step": 4036 + }, + { + "epoch": 2.26, + "learning_rate": 2.7267106347897775e-05, + "loss": 5.3236, + "step": 4037 + }, + { + "epoch": 2.26, + "learning_rate": 2.7246496290189615e-05, + "loss": 5.1814, + "step": 4038 + }, + { + "epoch": 2.26, + "learning_rate": 2.722588623248145e-05, + "loss": 4.9978, + "step": 4039 + }, + { + "epoch": 2.26, + "learning_rate": 2.720527617477329e-05, + "loss": 5.0351, + "step": 4040 + }, + { + "epoch": 2.27, + "learning_rate": 2.7184666117065128e-05, + "loss": 5.1337, + "step": 4041 + }, + { + "epoch": 2.27, + "learning_rate": 2.7164056059356968e-05, + "loss": 4.9615, + "step": 4042 + }, + { + "epoch": 2.27, + "learning_rate": 2.7143446001648805e-05, + "loss": 5.0481, + "step": 4043 + }, + { + "epoch": 2.27, + "learning_rate": 2.7122835943940645e-05, + "loss": 5.0776, + "step": 4044 + }, + { + "epoch": 2.27, + "learning_rate": 2.7102225886232485e-05, + "loss": 5.2648, + "step": 4045 + }, + { + "epoch": 2.27, + "learning_rate": 2.708161582852432e-05, + "loss": 5.2063, + "step": 4046 + }, + { + "epoch": 2.27, + "learning_rate": 2.706100577081616e-05, + "loss": 4.9996, + "step": 4047 + }, + { + "epoch": 2.27, + "learning_rate": 2.7040395713107998e-05, + "loss": 5.0965, + "step": 4048 + }, + { + "epoch": 2.27, + "learning_rate": 2.7019785655399838e-05, + "loss": 5.0997, + "step": 4049 + }, + { + "epoch": 2.27, + "learning_rate": 2.699917559769167e-05, + "loss": 4.9985, + "step": 4050 + }, + { + "epoch": 2.27, + "learning_rate": 2.6978565539983514e-05, + "loss": 4.8428, + "step": 4051 + }, + { + "epoch": 2.27, + "learning_rate": 2.6957955482275354e-05, + "loss": 5.2227, + "step": 4052 + }, + { + "epoch": 2.27, + "learning_rate": 2.693734542456719e-05, + "loss": 5.2121, + "step": 4053 + }, + { + "epoch": 2.27, + "learning_rate": 2.691673536685903e-05, + "loss": 4.8507, + "step": 4054 + }, + { + "epoch": 2.27, + "learning_rate": 2.6896125309150864e-05, + "loss": 4.8432, + "step": 4055 + }, + { + "epoch": 2.27, + "learning_rate": 2.6875515251442708e-05, + "loss": 4.9215, + "step": 4056 + }, + { + "epoch": 2.27, + "learning_rate": 2.685490519373454e-05, + "loss": 4.9624, + "step": 4057 + }, + { + "epoch": 2.27, + "learning_rate": 2.6834295136026384e-05, + "loss": 5.0619, + "step": 4058 + }, + { + "epoch": 2.28, + "learning_rate": 2.6813685078318217e-05, + "loss": 4.8239, + "step": 4059 + }, + { + "epoch": 2.28, + "learning_rate": 2.6793075020610057e-05, + "loss": 4.8018, + "step": 4060 + }, + { + "epoch": 2.28, + "learning_rate": 2.67724649629019e-05, + "loss": 4.5594, + "step": 4061 + }, + { + "epoch": 2.28, + "learning_rate": 2.6751854905193734e-05, + "loss": 4.222, + "step": 4062 + }, + { + "epoch": 2.28, + "learning_rate": 2.6731244847485577e-05, + "loss": 4.845, + "step": 4063 + }, + { + "epoch": 2.28, + "learning_rate": 2.671063478977741e-05, + "loss": 5.0746, + "step": 4064 + }, + { + "epoch": 2.28, + "learning_rate": 2.669002473206925e-05, + "loss": 3.9455, + "step": 4065 + }, + { + "epoch": 2.28, + "learning_rate": 2.6669414674361087e-05, + "loss": 3.6605, + "step": 4066 + }, + { + "epoch": 2.28, + "learning_rate": 2.6648804616652927e-05, + "loss": 3.3944, + "step": 4067 + }, + { + "epoch": 2.28, + "learning_rate": 2.662819455894477e-05, + "loss": 3.5655, + "step": 4068 + }, + { + "epoch": 2.28, + "learning_rate": 2.6607584501236604e-05, + "loss": 5.851, + "step": 4069 + }, + { + "epoch": 2.28, + "learning_rate": 2.6586974443528444e-05, + "loss": 5.7427, + "step": 4070 + }, + { + "epoch": 2.28, + "learning_rate": 2.656636438582028e-05, + "loss": 5.6979, + "step": 4071 + }, + { + "epoch": 2.28, + "learning_rate": 2.654575432811212e-05, + "loss": 5.7124, + "step": 4072 + }, + { + "epoch": 2.28, + "learning_rate": 2.6525144270403957e-05, + "loss": 5.4221, + "step": 4073 + }, + { + "epoch": 2.28, + "learning_rate": 2.6504534212695797e-05, + "loss": 5.367, + "step": 4074 + }, + { + "epoch": 2.28, + "learning_rate": 2.6483924154987634e-05, + "loss": 5.5586, + "step": 4075 + }, + { + "epoch": 2.28, + "learning_rate": 2.6463314097279474e-05, + "loss": 5.3854, + "step": 4076 + }, + { + "epoch": 2.29, + "learning_rate": 2.6442704039571314e-05, + "loss": 5.284, + "step": 4077 + }, + { + "epoch": 2.29, + "learning_rate": 2.642209398186315e-05, + "loss": 5.303, + "step": 4078 + }, + { + "epoch": 2.29, + "learning_rate": 2.640148392415499e-05, + "loss": 5.4532, + "step": 4079 + }, + { + "epoch": 2.29, + "learning_rate": 2.6380873866446827e-05, + "loss": 5.1557, + "step": 4080 + }, + { + "epoch": 2.29, + "learning_rate": 2.6360263808738667e-05, + "loss": 5.1029, + "step": 4081 + }, + { + "epoch": 2.29, + "learning_rate": 2.6339653751030503e-05, + "loss": 5.1088, + "step": 4082 + }, + { + "epoch": 2.29, + "learning_rate": 2.6319043693322343e-05, + "loss": 5.4688, + "step": 4083 + }, + { + "epoch": 2.29, + "learning_rate": 2.6298433635614183e-05, + "loss": 5.4663, + "step": 4084 + }, + { + "epoch": 2.29, + "learning_rate": 2.627782357790602e-05, + "loss": 5.2992, + "step": 4085 + }, + { + "epoch": 2.29, + "learning_rate": 2.625721352019786e-05, + "loss": 5.1202, + "step": 4086 + }, + { + "epoch": 2.29, + "learning_rate": 2.6236603462489697e-05, + "loss": 5.2048, + "step": 4087 + }, + { + "epoch": 2.29, + "learning_rate": 2.6215993404781537e-05, + "loss": 5.1285, + "step": 4088 + }, + { + "epoch": 2.29, + "learning_rate": 2.619538334707337e-05, + "loss": 5.47, + "step": 4089 + }, + { + "epoch": 2.29, + "learning_rate": 2.6174773289365213e-05, + "loss": 5.2477, + "step": 4090 + }, + { + "epoch": 2.29, + "learning_rate": 2.6154163231657046e-05, + "loss": 5.4747, + "step": 4091 + }, + { + "epoch": 2.29, + "learning_rate": 2.613355317394889e-05, + "loss": 5.3384, + "step": 4092 + }, + { + "epoch": 2.29, + "learning_rate": 2.611294311624073e-05, + "loss": 5.5069, + "step": 4093 + }, + { + "epoch": 2.29, + "learning_rate": 2.6092333058532563e-05, + "loss": 5.2542, + "step": 4094 + }, + { + "epoch": 2.3, + "learning_rate": 2.6071723000824406e-05, + "loss": 5.2138, + "step": 4095 + }, + { + "epoch": 2.3, + "learning_rate": 2.605111294311624e-05, + "loss": 5.2769, + "step": 4096 + }, + { + "epoch": 2.3, + "learning_rate": 2.603050288540808e-05, + "loss": 5.181, + "step": 4097 + }, + { + "epoch": 2.3, + "learning_rate": 2.6009892827699916e-05, + "loss": 5.2214, + "step": 4098 + }, + { + "epoch": 2.3, + "learning_rate": 2.5989282769991756e-05, + "loss": 4.9507, + "step": 4099 + }, + { + "epoch": 2.3, + "learning_rate": 2.59686727122836e-05, + "loss": 5.2979, + "step": 4100 + }, + { + "epoch": 2.3, + "learning_rate": 2.5948062654575433e-05, + "loss": 4.8309, + "step": 4101 + }, + { + "epoch": 2.3, + "learning_rate": 2.5927452596867273e-05, + "loss": 5.0383, + "step": 4102 + }, + { + "epoch": 2.3, + "learning_rate": 2.590684253915911e-05, + "loss": 4.7602, + "step": 4103 + }, + { + "epoch": 2.3, + "learning_rate": 2.588623248145095e-05, + "loss": 5.1394, + "step": 4104 + }, + { + "epoch": 2.3, + "learning_rate": 2.5865622423742786e-05, + "loss": 4.8453, + "step": 4105 + }, + { + "epoch": 2.3, + "learning_rate": 2.5845012366034626e-05, + "loss": 4.861, + "step": 4106 + }, + { + "epoch": 2.3, + "learning_rate": 2.5824402308326463e-05, + "loss": 4.8054, + "step": 4107 + }, + { + "epoch": 2.3, + "learning_rate": 2.5803792250618303e-05, + "loss": 4.9264, + "step": 4108 + }, + { + "epoch": 2.3, + "learning_rate": 2.5783182192910143e-05, + "loss": 4.7961, + "step": 4109 + }, + { + "epoch": 2.3, + "learning_rate": 2.576257213520198e-05, + "loss": 4.7316, + "step": 4110 + }, + { + "epoch": 2.3, + "learning_rate": 2.574196207749382e-05, + "loss": 4.6066, + "step": 4111 + }, + { + "epoch": 2.3, + "learning_rate": 2.5721352019785656e-05, + "loss": 4.35, + "step": 4112 + }, + { + "epoch": 2.31, + "learning_rate": 2.5700741962077496e-05, + "loss": 3.8463, + "step": 4113 + }, + { + "epoch": 2.31, + "learning_rate": 2.5680131904369332e-05, + "loss": 4.1212, + "step": 4114 + }, + { + "epoch": 2.31, + "learning_rate": 2.5659521846661172e-05, + "loss": 3.798, + "step": 4115 + }, + { + "epoch": 2.31, + "learning_rate": 2.5638911788953012e-05, + "loss": 3.6899, + "step": 4116 + }, + { + "epoch": 2.31, + "learning_rate": 2.561830173124485e-05, + "loss": 3.0788, + "step": 4117 + }, + { + "epoch": 2.31, + "learning_rate": 2.559769167353669e-05, + "loss": 2.9089, + "step": 4118 + }, + { + "epoch": 2.31, + "learning_rate": 2.5577081615828525e-05, + "loss": 5.7189, + "step": 4119 + }, + { + "epoch": 2.31, + "learning_rate": 2.5556471558120365e-05, + "loss": 6.112, + "step": 4120 + }, + { + "epoch": 2.31, + "learning_rate": 2.55358615004122e-05, + "loss": 5.7605, + "step": 4121 + }, + { + "epoch": 2.31, + "learning_rate": 2.5515251442704042e-05, + "loss": 5.728, + "step": 4122 + }, + { + "epoch": 2.31, + "learning_rate": 2.5494641384995875e-05, + "loss": 5.6969, + "step": 4123 + }, + { + "epoch": 2.31, + "learning_rate": 2.547403132728772e-05, + "loss": 5.5737, + "step": 4124 + }, + { + "epoch": 2.31, + "learning_rate": 2.545342126957956e-05, + "loss": 5.3179, + "step": 4125 + }, + { + "epoch": 2.31, + "learning_rate": 2.5432811211871392e-05, + "loss": 5.1955, + "step": 4126 + }, + { + "epoch": 2.31, + "learning_rate": 2.5412201154163235e-05, + "loss": 5.4512, + "step": 4127 + }, + { + "epoch": 2.31, + "learning_rate": 2.539159109645507e-05, + "loss": 5.2849, + "step": 4128 + }, + { + "epoch": 2.31, + "learning_rate": 2.5370981038746912e-05, + "loss": 5.2183, + "step": 4129 + }, + { + "epoch": 2.32, + "learning_rate": 2.5350370981038745e-05, + "loss": 5.307, + "step": 4130 + }, + { + "epoch": 2.32, + "learning_rate": 2.5329760923330585e-05, + "loss": 5.477, + "step": 4131 + }, + { + "epoch": 2.32, + "learning_rate": 2.530915086562243e-05, + "loss": 5.2423, + "step": 4132 + }, + { + "epoch": 2.32, + "learning_rate": 2.528854080791426e-05, + "loss": 5.6013, + "step": 4133 + }, + { + "epoch": 2.32, + "learning_rate": 2.5267930750206105e-05, + "loss": 5.2593, + "step": 4134 + }, + { + "epoch": 2.32, + "learning_rate": 2.5247320692497938e-05, + "loss": 5.2904, + "step": 4135 + }, + { + "epoch": 2.32, + "learning_rate": 2.5226710634789778e-05, + "loss": 5.2257, + "step": 4136 + }, + { + "epoch": 2.32, + "learning_rate": 2.5206100577081615e-05, + "loss": 5.4024, + "step": 4137 + }, + { + "epoch": 2.32, + "learning_rate": 2.5185490519373455e-05, + "loss": 5.3906, + "step": 4138 + }, + { + "epoch": 2.32, + "learning_rate": 2.516488046166529e-05, + "loss": 5.3712, + "step": 4139 + }, + { + "epoch": 2.32, + "learning_rate": 2.514427040395713e-05, + "loss": 5.1657, + "step": 4140 + }, + { + "epoch": 2.32, + "learning_rate": 2.512366034624897e-05, + "loss": 5.0435, + "step": 4141 + }, + { + "epoch": 2.32, + "learning_rate": 2.5103050288540808e-05, + "loss": 5.0295, + "step": 4142 + }, + { + "epoch": 2.32, + "learning_rate": 2.5082440230832648e-05, + "loss": 4.9462, + "step": 4143 + }, + { + "epoch": 2.32, + "learning_rate": 2.5061830173124485e-05, + "loss": 5.2205, + "step": 4144 + }, + { + "epoch": 2.32, + "learning_rate": 2.5041220115416325e-05, + "loss": 5.2407, + "step": 4145 + }, + { + "epoch": 2.32, + "learning_rate": 2.502061005770816e-05, + "loss": 5.6528, + "step": 4146 + }, + { + "epoch": 2.32, + "learning_rate": 2.5e-05, + "loss": 5.2118, + "step": 4147 + }, + { + "epoch": 2.33, + "learning_rate": 2.4979389942291838e-05, + "loss": 5.0411, + "step": 4148 + }, + { + "epoch": 2.33, + "learning_rate": 2.4958779884583678e-05, + "loss": 4.8284, + "step": 4149 + }, + { + "epoch": 2.33, + "learning_rate": 2.4938169826875514e-05, + "loss": 5.0396, + "step": 4150 + }, + { + "epoch": 2.33, + "learning_rate": 2.4917559769167358e-05, + "loss": 4.9942, + "step": 4151 + }, + { + "epoch": 2.33, + "learning_rate": 2.4896949711459194e-05, + "loss": 5.0745, + "step": 4152 + }, + { + "epoch": 2.33, + "learning_rate": 2.487633965375103e-05, + "loss": 4.9857, + "step": 4153 + }, + { + "epoch": 2.33, + "learning_rate": 2.485572959604287e-05, + "loss": 5.118, + "step": 4154 + }, + { + "epoch": 2.33, + "learning_rate": 2.4835119538334708e-05, + "loss": 4.5677, + "step": 4155 + }, + { + "epoch": 2.33, + "learning_rate": 2.4814509480626548e-05, + "loss": 4.9855, + "step": 4156 + }, + { + "epoch": 2.33, + "learning_rate": 2.4793899422918384e-05, + "loss": 4.7544, + "step": 4157 + }, + { + "epoch": 2.33, + "learning_rate": 2.4773289365210224e-05, + "loss": 4.8542, + "step": 4158 + }, + { + "epoch": 2.33, + "learning_rate": 2.4752679307502064e-05, + "loss": 4.8212, + "step": 4159 + }, + { + "epoch": 2.33, + "learning_rate": 2.47320692497939e-05, + "loss": 4.5307, + "step": 4160 + }, + { + "epoch": 2.33, + "learning_rate": 2.471145919208574e-05, + "loss": 4.6089, + "step": 4161 + }, + { + "epoch": 2.33, + "learning_rate": 2.4690849134377577e-05, + "loss": 4.4883, + "step": 4162 + }, + { + "epoch": 2.33, + "learning_rate": 2.4670239076669417e-05, + "loss": 3.8925, + "step": 4163 + }, + { + "epoch": 2.33, + "learning_rate": 2.4649629018961254e-05, + "loss": 4.4617, + "step": 4164 + }, + { + "epoch": 2.33, + "learning_rate": 2.462901896125309e-05, + "loss": 3.8613, + "step": 4165 + }, + { + "epoch": 2.34, + "learning_rate": 2.460840890354493e-05, + "loss": 3.6588, + "step": 4166 + }, + { + "epoch": 2.34, + "learning_rate": 2.458779884583677e-05, + "loss": 3.2388, + "step": 4167 + }, + { + "epoch": 2.34, + "learning_rate": 2.456718878812861e-05, + "loss": 3.1461, + "step": 4168 + }, + { + "epoch": 2.34, + "learning_rate": 2.4546578730420447e-05, + "loss": 5.7497, + "step": 4169 + }, + { + "epoch": 2.34, + "learning_rate": 2.4525968672712284e-05, + "loss": 5.5748, + "step": 4170 + }, + { + "epoch": 2.34, + "learning_rate": 2.4505358615004124e-05, + "loss": 5.5658, + "step": 4171 + }, + { + "epoch": 2.34, + "learning_rate": 2.448474855729596e-05, + "loss": 5.3672, + "step": 4172 + }, + { + "epoch": 2.34, + "learning_rate": 2.44641384995878e-05, + "loss": 5.5171, + "step": 4173 + }, + { + "epoch": 2.34, + "learning_rate": 2.4443528441879637e-05, + "loss": 5.368, + "step": 4174 + }, + { + "epoch": 2.34, + "learning_rate": 2.4422918384171477e-05, + "loss": 5.5314, + "step": 4175 + }, + { + "epoch": 2.34, + "learning_rate": 2.4402308326463317e-05, + "loss": 5.4965, + "step": 4176 + }, + { + "epoch": 2.34, + "learning_rate": 2.4381698268755154e-05, + "loss": 5.3976, + "step": 4177 + }, + { + "epoch": 2.34, + "learning_rate": 2.4361088211046994e-05, + "loss": 5.3817, + "step": 4178 + }, + { + "epoch": 2.34, + "learning_rate": 2.434047815333883e-05, + "loss": 5.154, + "step": 4179 + }, + { + "epoch": 2.34, + "learning_rate": 2.431986809563067e-05, + "loss": 5.4107, + "step": 4180 + }, + { + "epoch": 2.34, + "learning_rate": 2.4299258037922507e-05, + "loss": 5.4311, + "step": 4181 + }, + { + "epoch": 2.34, + "learning_rate": 2.4278647980214343e-05, + "loss": 5.3246, + "step": 4182 + }, + { + "epoch": 2.34, + "learning_rate": 2.4258037922506187e-05, + "loss": 5.3993, + "step": 4183 + }, + { + "epoch": 2.35, + "learning_rate": 2.4237427864798023e-05, + "loss": 5.1767, + "step": 4184 + }, + { + "epoch": 2.35, + "learning_rate": 2.4216817807089863e-05, + "loss": 5.0747, + "step": 4185 + }, + { + "epoch": 2.35, + "learning_rate": 2.41962077493817e-05, + "loss": 5.3067, + "step": 4186 + }, + { + "epoch": 2.35, + "learning_rate": 2.4175597691673536e-05, + "loss": 5.2692, + "step": 4187 + }, + { + "epoch": 2.35, + "learning_rate": 2.4154987633965376e-05, + "loss": 5.0494, + "step": 4188 + }, + { + "epoch": 2.35, + "learning_rate": 2.4134377576257213e-05, + "loss": 5.3409, + "step": 4189 + }, + { + "epoch": 2.35, + "learning_rate": 2.4113767518549053e-05, + "loss": 5.1526, + "step": 4190 + }, + { + "epoch": 2.35, + "learning_rate": 2.4093157460840893e-05, + "loss": 4.9631, + "step": 4191 + }, + { + "epoch": 2.35, + "learning_rate": 2.407254740313273e-05, + "loss": 4.8898, + "step": 4192 + }, + { + "epoch": 2.35, + "learning_rate": 2.405193734542457e-05, + "loss": 5.0209, + "step": 4193 + }, + { + "epoch": 2.35, + "learning_rate": 2.4031327287716406e-05, + "loss": 5.1938, + "step": 4194 + }, + { + "epoch": 2.35, + "learning_rate": 2.4010717230008246e-05, + "loss": 5.0939, + "step": 4195 + }, + { + "epoch": 2.35, + "learning_rate": 2.3990107172300083e-05, + "loss": 4.7577, + "step": 4196 + }, + { + "epoch": 2.35, + "learning_rate": 2.3969497114591923e-05, + "loss": 4.9789, + "step": 4197 + }, + { + "epoch": 2.35, + "learning_rate": 2.394888705688376e-05, + "loss": 5.164, + "step": 4198 + }, + { + "epoch": 2.35, + "learning_rate": 2.39282769991756e-05, + "loss": 5.0084, + "step": 4199 + }, + { + "epoch": 2.35, + "learning_rate": 2.390766694146744e-05, + "loss": 4.8951, + "step": 4200 + }, + { + "epoch": 2.35, + "learning_rate": 2.3887056883759276e-05, + "loss": 5.219, + "step": 4201 + }, + { + "epoch": 2.36, + "learning_rate": 2.3866446826051113e-05, + "loss": 5.1004, + "step": 4202 + }, + { + "epoch": 2.36, + "learning_rate": 2.3845836768342953e-05, + "loss": 5.1595, + "step": 4203 + }, + { + "epoch": 2.36, + "learning_rate": 2.382522671063479e-05, + "loss": 4.8579, + "step": 4204 + }, + { + "epoch": 2.36, + "learning_rate": 2.380461665292663e-05, + "loss": 4.9814, + "step": 4205 + }, + { + "epoch": 2.36, + "learning_rate": 2.3784006595218466e-05, + "loss": 4.8938, + "step": 4206 + }, + { + "epoch": 2.36, + "learning_rate": 2.3763396537510306e-05, + "loss": 4.8044, + "step": 4207 + }, + { + "epoch": 2.36, + "learning_rate": 2.3742786479802146e-05, + "loss": 4.662, + "step": 4208 + }, + { + "epoch": 2.36, + "learning_rate": 2.3722176422093982e-05, + "loss": 4.7572, + "step": 4209 + }, + { + "epoch": 2.36, + "learning_rate": 2.3701566364385822e-05, + "loss": 4.615, + "step": 4210 + }, + { + "epoch": 2.36, + "learning_rate": 2.368095630667766e-05, + "loss": 4.562, + "step": 4211 + }, + { + "epoch": 2.36, + "learning_rate": 2.36603462489695e-05, + "loss": 4.4754, + "step": 4212 + }, + { + "epoch": 2.36, + "learning_rate": 2.3639736191261336e-05, + "loss": 4.115, + "step": 4213 + }, + { + "epoch": 2.36, + "learning_rate": 2.3619126133553172e-05, + "loss": 3.7926, + "step": 4214 + }, + { + "epoch": 2.36, + "learning_rate": 2.3598516075845016e-05, + "loss": 4.1234, + "step": 4215 + }, + { + "epoch": 2.36, + "learning_rate": 2.3577906018136852e-05, + "loss": 3.874, + "step": 4216 + }, + { + "epoch": 2.36, + "learning_rate": 2.3557295960428692e-05, + "loss": 3.6244, + "step": 4217 + }, + { + "epoch": 2.36, + "learning_rate": 2.353668590272053e-05, + "loss": 3.0319, + "step": 4218 + }, + { + "epoch": 2.36, + "learning_rate": 2.3516075845012365e-05, + "loss": 5.9924, + "step": 4219 + }, + { + "epoch": 2.37, + "learning_rate": 2.3495465787304205e-05, + "loss": 6.1216, + "step": 4220 + }, + { + "epoch": 2.37, + "learning_rate": 2.3474855729596042e-05, + "loss": 6.1324, + "step": 4221 + }, + { + "epoch": 2.37, + "learning_rate": 2.3454245671887882e-05, + "loss": 5.8872, + "step": 4222 + }, + { + "epoch": 2.37, + "learning_rate": 2.3433635614179722e-05, + "loss": 5.5875, + "step": 4223 + }, + { + "epoch": 2.37, + "learning_rate": 2.341302555647156e-05, + "loss": 5.4947, + "step": 4224 + }, + { + "epoch": 2.37, + "learning_rate": 2.33924154987634e-05, + "loss": 5.1355, + "step": 4225 + }, + { + "epoch": 2.37, + "learning_rate": 2.3371805441055235e-05, + "loss": 5.1328, + "step": 4226 + }, + { + "epoch": 2.37, + "learning_rate": 2.3351195383347075e-05, + "loss": 5.4122, + "step": 4227 + }, + { + "epoch": 2.37, + "learning_rate": 2.3330585325638912e-05, + "loss": 5.6085, + "step": 4228 + }, + { + "epoch": 2.37, + "learning_rate": 2.3309975267930752e-05, + "loss": 5.6609, + "step": 4229 + }, + { + "epoch": 2.37, + "learning_rate": 2.328936521022259e-05, + "loss": 5.7647, + "step": 4230 + }, + { + "epoch": 2.37, + "learning_rate": 2.326875515251443e-05, + "loss": 5.7039, + "step": 4231 + }, + { + "epoch": 2.37, + "learning_rate": 2.324814509480627e-05, + "loss": 5.562, + "step": 4232 + }, + { + "epoch": 2.37, + "learning_rate": 2.3227535037098105e-05, + "loss": 5.1901, + "step": 4233 + }, + { + "epoch": 2.37, + "learning_rate": 2.3206924979389945e-05, + "loss": 5.3084, + "step": 4234 + }, + { + "epoch": 2.37, + "learning_rate": 2.318631492168178e-05, + "loss": 4.8797, + "step": 4235 + }, + { + "epoch": 2.37, + "learning_rate": 2.3165704863973618e-05, + "loss": 5.2335, + "step": 4236 + }, + { + "epoch": 2.38, + "learning_rate": 2.3145094806265458e-05, + "loss": 5.2223, + "step": 4237 + }, + { + "epoch": 2.38, + "learning_rate": 2.3124484748557295e-05, + "loss": 5.0863, + "step": 4238 + }, + { + "epoch": 2.38, + "learning_rate": 2.3103874690849138e-05, + "loss": 5.2302, + "step": 4239 + }, + { + "epoch": 2.38, + "learning_rate": 2.3083264633140975e-05, + "loss": 5.1929, + "step": 4240 + }, + { + "epoch": 2.38, + "learning_rate": 2.306265457543281e-05, + "loss": 5.39, + "step": 4241 + }, + { + "epoch": 2.38, + "learning_rate": 2.304204451772465e-05, + "loss": 5.1904, + "step": 4242 + }, + { + "epoch": 2.38, + "learning_rate": 2.3021434460016488e-05, + "loss": 4.8571, + "step": 4243 + }, + { + "epoch": 2.38, + "learning_rate": 2.3000824402308328e-05, + "loss": 5.0474, + "step": 4244 + }, + { + "epoch": 2.38, + "learning_rate": 2.2980214344600165e-05, + "loss": 4.9549, + "step": 4245 + }, + { + "epoch": 2.38, + "learning_rate": 2.2959604286892005e-05, + "loss": 5.1377, + "step": 4246 + }, + { + "epoch": 2.38, + "learning_rate": 2.2938994229183844e-05, + "loss": 5.5412, + "step": 4247 + }, + { + "epoch": 2.38, + "learning_rate": 2.291838417147568e-05, + "loss": 4.9601, + "step": 4248 + }, + { + "epoch": 2.38, + "learning_rate": 2.289777411376752e-05, + "loss": 5.182, + "step": 4249 + }, + { + "epoch": 2.38, + "learning_rate": 2.2877164056059358e-05, + "loss": 4.6843, + "step": 4250 + }, + { + "epoch": 2.38, + "learning_rate": 2.2856553998351198e-05, + "loss": 4.9547, + "step": 4251 + }, + { + "epoch": 2.38, + "learning_rate": 2.2835943940643034e-05, + "loss": 4.5669, + "step": 4252 + }, + { + "epoch": 2.38, + "learning_rate": 2.281533388293487e-05, + "loss": 5.126, + "step": 4253 + }, + { + "epoch": 2.38, + "learning_rate": 2.279472382522671e-05, + "loss": 4.9312, + "step": 4254 + }, + { + "epoch": 2.39, + "learning_rate": 2.277411376751855e-05, + "loss": 5.0565, + "step": 4255 + }, + { + "epoch": 2.39, + "learning_rate": 2.275350370981039e-05, + "loss": 4.7092, + "step": 4256 + }, + { + "epoch": 2.39, + "learning_rate": 2.2732893652102227e-05, + "loss": 4.7441, + "step": 4257 + }, + { + "epoch": 2.39, + "learning_rate": 2.2712283594394064e-05, + "loss": 4.7731, + "step": 4258 + }, + { + "epoch": 2.39, + "learning_rate": 2.2691673536685904e-05, + "loss": 4.9253, + "step": 4259 + }, + { + "epoch": 2.39, + "learning_rate": 2.267106347897774e-05, + "loss": 4.7753, + "step": 4260 + }, + { + "epoch": 2.39, + "learning_rate": 2.265045342126958e-05, + "loss": 4.4365, + "step": 4261 + }, + { + "epoch": 2.39, + "learning_rate": 2.262984336356142e-05, + "loss": 4.7487, + "step": 4262 + }, + { + "epoch": 2.39, + "learning_rate": 2.2609233305853257e-05, + "loss": 4.4326, + "step": 4263 + }, + { + "epoch": 2.39, + "learning_rate": 2.2588623248145097e-05, + "loss": 4.5492, + "step": 4264 + }, + { + "epoch": 2.39, + "learning_rate": 2.2568013190436934e-05, + "loss": 3.5039, + "step": 4265 + }, + { + "epoch": 2.39, + "learning_rate": 2.2547403132728774e-05, + "loss": 3.3502, + "step": 4266 + }, + { + "epoch": 2.39, + "learning_rate": 2.252679307502061e-05, + "loss": 3.4128, + "step": 4267 + }, + { + "epoch": 2.39, + "learning_rate": 2.250618301731245e-05, + "loss": 3.1935, + "step": 4268 + }, + { + "epoch": 2.39, + "learning_rate": 2.2485572959604287e-05, + "loss": 6.2947, + "step": 4269 + }, + { + "epoch": 2.39, + "learning_rate": 2.2464962901896127e-05, + "loss": 6.1014, + "step": 4270 + }, + { + "epoch": 2.39, + "learning_rate": 2.2444352844187967e-05, + "loss": 6.1289, + "step": 4271 + }, + { + "epoch": 2.39, + "learning_rate": 2.2423742786479804e-05, + "loss": 5.7206, + "step": 4272 + }, + { + "epoch": 2.4, + "learning_rate": 2.2403132728771644e-05, + "loss": 5.8594, + "step": 4273 + }, + { + "epoch": 2.4, + "learning_rate": 2.238252267106348e-05, + "loss": 5.7024, + "step": 4274 + }, + { + "epoch": 2.4, + "learning_rate": 2.2361912613355317e-05, + "loss": 5.5052, + "step": 4275 + }, + { + "epoch": 2.4, + "learning_rate": 2.2341302555647157e-05, + "loss": 5.643, + "step": 4276 + }, + { + "epoch": 2.4, + "learning_rate": 2.2320692497938993e-05, + "loss": 5.4368, + "step": 4277 + }, + { + "epoch": 2.4, + "learning_rate": 2.2300082440230833e-05, + "loss": 5.1624, + "step": 4278 + }, + { + "epoch": 2.4, + "learning_rate": 2.2279472382522673e-05, + "loss": 5.2466, + "step": 4279 + }, + { + "epoch": 2.4, + "learning_rate": 2.225886232481451e-05, + "loss": 5.6408, + "step": 4280 + }, + { + "epoch": 2.4, + "learning_rate": 2.223825226710635e-05, + "loss": 5.522, + "step": 4281 + }, + { + "epoch": 2.4, + "learning_rate": 2.2217642209398187e-05, + "loss": 5.0769, + "step": 4282 + }, + { + "epoch": 2.4, + "learning_rate": 2.2197032151690027e-05, + "loss": 5.366, + "step": 4283 + }, + { + "epoch": 2.4, + "learning_rate": 2.2176422093981863e-05, + "loss": 5.6432, + "step": 4284 + }, + { + "epoch": 2.4, + "learning_rate": 2.2155812036273703e-05, + "loss": 5.2993, + "step": 4285 + }, + { + "epoch": 2.4, + "learning_rate": 2.2135201978565543e-05, + "loss": 5.1372, + "step": 4286 + }, + { + "epoch": 2.4, + "learning_rate": 2.211459192085738e-05, + "loss": 5.1979, + "step": 4287 + }, + { + "epoch": 2.4, + "learning_rate": 2.209398186314922e-05, + "loss": 5.3735, + "step": 4288 + }, + { + "epoch": 2.4, + "learning_rate": 2.2073371805441056e-05, + "loss": 5.3382, + "step": 4289 + }, + { + "epoch": 2.4, + "learning_rate": 2.2052761747732893e-05, + "loss": 5.2439, + "step": 4290 + }, + { + "epoch": 2.41, + "learning_rate": 2.2032151690024733e-05, + "loss": 5.2073, + "step": 4291 + }, + { + "epoch": 2.41, + "learning_rate": 2.201154163231657e-05, + "loss": 5.212, + "step": 4292 + }, + { + "epoch": 2.41, + "learning_rate": 2.199093157460841e-05, + "loss": 5.1937, + "step": 4293 + }, + { + "epoch": 2.41, + "learning_rate": 2.197032151690025e-05, + "loss": 5.2774, + "step": 4294 + }, + { + "epoch": 2.41, + "learning_rate": 2.1949711459192086e-05, + "loss": 5.1807, + "step": 4295 + }, + { + "epoch": 2.41, + "learning_rate": 2.1929101401483926e-05, + "loss": 5.0505, + "step": 4296 + }, + { + "epoch": 2.41, + "learning_rate": 2.1908491343775763e-05, + "loss": 5.4344, + "step": 4297 + }, + { + "epoch": 2.41, + "learning_rate": 2.1887881286067603e-05, + "loss": 5.0043, + "step": 4298 + }, + { + "epoch": 2.41, + "learning_rate": 2.186727122835944e-05, + "loss": 5.0872, + "step": 4299 + }, + { + "epoch": 2.41, + "learning_rate": 2.184666117065128e-05, + "loss": 5.229, + "step": 4300 + }, + { + "epoch": 2.41, + "learning_rate": 2.1826051112943116e-05, + "loss": 5.0606, + "step": 4301 + }, + { + "epoch": 2.41, + "learning_rate": 2.1805441055234956e-05, + "loss": 4.9316, + "step": 4302 + }, + { + "epoch": 2.41, + "learning_rate": 2.1784830997526796e-05, + "loss": 5.107, + "step": 4303 + }, + { + "epoch": 2.41, + "learning_rate": 2.1764220939818633e-05, + "loss": 5.0591, + "step": 4304 + }, + { + "epoch": 2.41, + "learning_rate": 2.1743610882110473e-05, + "loss": 5.198, + "step": 4305 + }, + { + "epoch": 2.41, + "learning_rate": 2.172300082440231e-05, + "loss": 4.8996, + "step": 4306 + }, + { + "epoch": 2.41, + "learning_rate": 2.1702390766694146e-05, + "loss": 5.0202, + "step": 4307 + }, + { + "epoch": 2.41, + "learning_rate": 2.1681780708985986e-05, + "loss": 4.7587, + "step": 4308 + }, + { + "epoch": 2.42, + "learning_rate": 2.1661170651277822e-05, + "loss": 4.7843, + "step": 4309 + }, + { + "epoch": 2.42, + "learning_rate": 2.1640560593569666e-05, + "loss": 4.6884, + "step": 4310 + }, + { + "epoch": 2.42, + "learning_rate": 2.1619950535861502e-05, + "loss": 4.6272, + "step": 4311 + }, + { + "epoch": 2.42, + "learning_rate": 2.159934047815334e-05, + "loss": 4.7039, + "step": 4312 + }, + { + "epoch": 2.42, + "learning_rate": 2.157873042044518e-05, + "loss": 4.5821, + "step": 4313 + }, + { + "epoch": 2.42, + "learning_rate": 2.1558120362737016e-05, + "loss": 4.286, + "step": 4314 + }, + { + "epoch": 2.42, + "learning_rate": 2.1537510305028855e-05, + "loss": 4.2771, + "step": 4315 + }, + { + "epoch": 2.42, + "learning_rate": 2.1516900247320692e-05, + "loss": 4.0421, + "step": 4316 + }, + { + "epoch": 2.42, + "learning_rate": 2.1496290189612532e-05, + "loss": 2.9828, + "step": 4317 + }, + { + "epoch": 2.42, + "learning_rate": 2.1475680131904372e-05, + "loss": 4.0162, + "step": 4318 + }, + { + "epoch": 2.42, + "learning_rate": 2.145507007419621e-05, + "loss": 6.1461, + "step": 4319 + }, + { + "epoch": 2.42, + "learning_rate": 2.143446001648805e-05, + "loss": 6.2737, + "step": 4320 + }, + { + "epoch": 2.42, + "learning_rate": 2.1413849958779885e-05, + "loss": 5.8952, + "step": 4321 + }, + { + "epoch": 2.42, + "learning_rate": 2.1393239901071725e-05, + "loss": 6.1974, + "step": 4322 + }, + { + "epoch": 2.42, + "learning_rate": 2.1372629843363562e-05, + "loss": 6.1323, + "step": 4323 + }, + { + "epoch": 2.42, + "learning_rate": 2.13520197856554e-05, + "loss": 5.7696, + "step": 4324 + }, + { + "epoch": 2.42, + "learning_rate": 2.133140972794724e-05, + "loss": 5.4925, + "step": 4325 + }, + { + "epoch": 2.42, + "learning_rate": 2.131079967023908e-05, + "loss": 5.8638, + "step": 4326 + }, + { + "epoch": 2.43, + "learning_rate": 2.129018961253092e-05, + "loss": 5.9042, + "step": 4327 + }, + { + "epoch": 2.43, + "learning_rate": 2.1269579554822755e-05, + "loss": 5.19, + "step": 4328 + }, + { + "epoch": 2.43, + "learning_rate": 2.124896949711459e-05, + "loss": 5.2944, + "step": 4329 + }, + { + "epoch": 2.43, + "learning_rate": 2.122835943940643e-05, + "loss": 4.9809, + "step": 4330 + }, + { + "epoch": 2.43, + "learning_rate": 2.1207749381698268e-05, + "loss": 5.2796, + "step": 4331 + }, + { + "epoch": 2.43, + "learning_rate": 2.1187139323990108e-05, + "loss": 5.439, + "step": 4332 + }, + { + "epoch": 2.43, + "learning_rate": 2.1166529266281945e-05, + "loss": 5.4381, + "step": 4333 + }, + { + "epoch": 2.43, + "learning_rate": 2.1145919208573785e-05, + "loss": 5.2467, + "step": 4334 + }, + { + "epoch": 2.43, + "learning_rate": 2.1125309150865625e-05, + "loss": 5.1523, + "step": 4335 + }, + { + "epoch": 2.43, + "learning_rate": 2.110469909315746e-05, + "loss": 5.4934, + "step": 4336 + }, + { + "epoch": 2.43, + "learning_rate": 2.10840890354493e-05, + "loss": 5.4688, + "step": 4337 + }, + { + "epoch": 2.43, + "learning_rate": 2.1063478977741138e-05, + "loss": 5.4184, + "step": 4338 + }, + { + "epoch": 2.43, + "learning_rate": 2.1042868920032978e-05, + "loss": 5.2806, + "step": 4339 + }, + { + "epoch": 2.43, + "learning_rate": 2.1022258862324815e-05, + "loss": 5.0691, + "step": 4340 + }, + { + "epoch": 2.43, + "learning_rate": 2.100164880461665e-05, + "loss": 5.0744, + "step": 4341 + }, + { + "epoch": 2.43, + "learning_rate": 2.0981038746908495e-05, + "loss": 5.0286, + "step": 4342 + }, + { + "epoch": 2.43, + "learning_rate": 2.096042868920033e-05, + "loss": 5.4378, + "step": 4343 + }, + { + "epoch": 2.43, + "learning_rate": 2.093981863149217e-05, + "loss": 5.2294, + "step": 4344 + }, + { + "epoch": 2.44, + "learning_rate": 2.0919208573784008e-05, + "loss": 5.3632, + "step": 4345 + }, + { + "epoch": 2.44, + "learning_rate": 2.0898598516075844e-05, + "loss": 4.7122, + "step": 4346 + }, + { + "epoch": 2.44, + "learning_rate": 2.0877988458367684e-05, + "loss": 5.2394, + "step": 4347 + }, + { + "epoch": 2.44, + "learning_rate": 2.085737840065952e-05, + "loss": 4.9127, + "step": 4348 + }, + { + "epoch": 2.44, + "learning_rate": 2.083676834295136e-05, + "loss": 5.3218, + "step": 4349 + }, + { + "epoch": 2.44, + "learning_rate": 2.08161582852432e-05, + "loss": 4.9575, + "step": 4350 + }, + { + "epoch": 2.44, + "learning_rate": 2.0795548227535038e-05, + "loss": 4.7738, + "step": 4351 + }, + { + "epoch": 2.44, + "learning_rate": 2.0774938169826878e-05, + "loss": 4.8735, + "step": 4352 + }, + { + "epoch": 2.44, + "learning_rate": 2.0754328112118714e-05, + "loss": 5.2518, + "step": 4353 + }, + { + "epoch": 2.44, + "learning_rate": 2.0733718054410554e-05, + "loss": 4.8533, + "step": 4354 + }, + { + "epoch": 2.44, + "learning_rate": 2.071310799670239e-05, + "loss": 5.031, + "step": 4355 + }, + { + "epoch": 2.44, + "learning_rate": 2.069249793899423e-05, + "loss": 5.0403, + "step": 4356 + }, + { + "epoch": 2.44, + "learning_rate": 2.0671887881286067e-05, + "loss": 5.1853, + "step": 4357 + }, + { + "epoch": 2.44, + "learning_rate": 2.0651277823577907e-05, + "loss": 4.63, + "step": 4358 + }, + { + "epoch": 2.44, + "learning_rate": 2.0630667765869747e-05, + "loss": 4.8955, + "step": 4359 + }, + { + "epoch": 2.44, + "learning_rate": 2.0610057708161584e-05, + "loss": 4.7766, + "step": 4360 + }, + { + "epoch": 2.44, + "learning_rate": 2.0589447650453424e-05, + "loss": 4.8705, + "step": 4361 + }, + { + "epoch": 2.45, + "learning_rate": 2.056883759274526e-05, + "loss": 4.6676, + "step": 4362 + }, + { + "epoch": 2.45, + "learning_rate": 2.0548227535037097e-05, + "loss": 4.4722, + "step": 4363 + }, + { + "epoch": 2.45, + "learning_rate": 2.0527617477328937e-05, + "loss": 4.5993, + "step": 4364 + }, + { + "epoch": 2.45, + "learning_rate": 2.0507007419620774e-05, + "loss": 4.172, + "step": 4365 + }, + { + "epoch": 2.45, + "learning_rate": 2.0486397361912614e-05, + "loss": 3.7304, + "step": 4366 + }, + { + "epoch": 2.45, + "learning_rate": 2.0465787304204454e-05, + "loss": 2.9871, + "step": 4367 + }, + { + "epoch": 2.45, + "learning_rate": 2.044517724649629e-05, + "loss": 3.7096, + "step": 4368 + }, + { + "epoch": 2.45, + "learning_rate": 2.042456718878813e-05, + "loss": 5.9517, + "step": 4369 + }, + { + "epoch": 2.45, + "learning_rate": 2.0403957131079967e-05, + "loss": 5.7611, + "step": 4370 + }, + { + "epoch": 2.45, + "learning_rate": 2.0383347073371807e-05, + "loss": 5.768, + "step": 4371 + }, + { + "epoch": 2.45, + "learning_rate": 2.0362737015663644e-05, + "loss": 5.8462, + "step": 4372 + }, + { + "epoch": 2.45, + "learning_rate": 2.0342126957955484e-05, + "loss": 5.5798, + "step": 4373 + }, + { + "epoch": 2.45, + "learning_rate": 2.0321516900247324e-05, + "loss": 5.4721, + "step": 4374 + }, + { + "epoch": 2.45, + "learning_rate": 2.030090684253916e-05, + "loss": 5.2936, + "step": 4375 + }, + { + "epoch": 2.45, + "learning_rate": 2.0280296784831e-05, + "loss": 5.1809, + "step": 4376 + }, + { + "epoch": 2.45, + "learning_rate": 2.0259686727122837e-05, + "loss": 5.3037, + "step": 4377 + }, + { + "epoch": 2.45, + "learning_rate": 2.0239076669414673e-05, + "loss": 5.2882, + "step": 4378 + }, + { + "epoch": 2.45, + "learning_rate": 2.0218466611706513e-05, + "loss": 5.394, + "step": 4379 + }, + { + "epoch": 2.46, + "learning_rate": 2.019785655399835e-05, + "loss": 5.0617, + "step": 4380 + }, + { + "epoch": 2.46, + "learning_rate": 2.0177246496290193e-05, + "loss": 4.9391, + "step": 4381 + }, + { + "epoch": 2.46, + "learning_rate": 2.015663643858203e-05, + "loss": 5.3316, + "step": 4382 + }, + { + "epoch": 2.46, + "learning_rate": 2.0136026380873867e-05, + "loss": 5.415, + "step": 4383 + }, + { + "epoch": 2.46, + "learning_rate": 2.0115416323165706e-05, + "loss": 5.3648, + "step": 4384 + }, + { + "epoch": 2.46, + "learning_rate": 2.0094806265457543e-05, + "loss": 5.0399, + "step": 4385 + }, + { + "epoch": 2.46, + "learning_rate": 2.0074196207749383e-05, + "loss": 5.0292, + "step": 4386 + }, + { + "epoch": 2.46, + "learning_rate": 2.005358615004122e-05, + "loss": 5.1416, + "step": 4387 + }, + { + "epoch": 2.46, + "learning_rate": 2.003297609233306e-05, + "loss": 5.2037, + "step": 4388 + }, + { + "epoch": 2.46, + "learning_rate": 2.00123660346249e-05, + "loss": 5.3079, + "step": 4389 + }, + { + "epoch": 2.46, + "learning_rate": 1.9991755976916736e-05, + "loss": 5.1063, + "step": 4390 + }, + { + "epoch": 2.46, + "learning_rate": 1.9971145919208576e-05, + "loss": 5.1406, + "step": 4391 + }, + { + "epoch": 2.46, + "learning_rate": 1.9950535861500413e-05, + "loss": 5.1629, + "step": 4392 + }, + { + "epoch": 2.46, + "learning_rate": 1.9929925803792253e-05, + "loss": 4.9435, + "step": 4393 + }, + { + "epoch": 2.46, + "learning_rate": 1.990931574608409e-05, + "loss": 4.8435, + "step": 4394 + }, + { + "epoch": 2.46, + "learning_rate": 1.9888705688375926e-05, + "loss": 4.9584, + "step": 4395 + }, + { + "epoch": 2.46, + "learning_rate": 1.9868095630667766e-05, + "loss": 4.866, + "step": 4396 + }, + { + "epoch": 2.46, + "learning_rate": 1.9847485572959606e-05, + "loss": 5.293, + "step": 4397 + }, + { + "epoch": 2.47, + "learning_rate": 1.9826875515251446e-05, + "loss": 4.9339, + "step": 4398 + }, + { + "epoch": 2.47, + "learning_rate": 1.9806265457543283e-05, + "loss": 5.1405, + "step": 4399 + }, + { + "epoch": 2.47, + "learning_rate": 1.978565539983512e-05, + "loss": 5.2538, + "step": 4400 + }, + { + "epoch": 2.47, + "learning_rate": 1.976504534212696e-05, + "loss": 4.822, + "step": 4401 + }, + { + "epoch": 2.47, + "learning_rate": 1.9744435284418796e-05, + "loss": 4.7649, + "step": 4402 + }, + { + "epoch": 2.47, + "learning_rate": 1.9723825226710636e-05, + "loss": 4.807, + "step": 4403 + }, + { + "epoch": 2.47, + "learning_rate": 1.9703215169002472e-05, + "loss": 5.1051, + "step": 4404 + }, + { + "epoch": 2.47, + "learning_rate": 1.9682605111294312e-05, + "loss": 4.7506, + "step": 4405 + }, + { + "epoch": 2.47, + "learning_rate": 1.9661995053586152e-05, + "loss": 5.0901, + "step": 4406 + }, + { + "epoch": 2.47, + "learning_rate": 1.964138499587799e-05, + "loss": 4.8788, + "step": 4407 + }, + { + "epoch": 2.47, + "learning_rate": 1.962077493816983e-05, + "loss": 4.5705, + "step": 4408 + }, + { + "epoch": 2.47, + "learning_rate": 1.9600164880461666e-05, + "loss": 4.2101, + "step": 4409 + }, + { + "epoch": 2.47, + "learning_rate": 1.9579554822753506e-05, + "loss": 4.2901, + "step": 4410 + }, + { + "epoch": 2.47, + "learning_rate": 1.9558944765045342e-05, + "loss": 4.3712, + "step": 4411 + }, + { + "epoch": 2.47, + "learning_rate": 1.953833470733718e-05, + "loss": 4.4539, + "step": 4412 + }, + { + "epoch": 2.47, + "learning_rate": 1.9517724649629022e-05, + "loss": 3.8275, + "step": 4413 + }, + { + "epoch": 2.47, + "learning_rate": 1.949711459192086e-05, + "loss": 3.8815, + "step": 4414 + }, + { + "epoch": 2.47, + "learning_rate": 1.94765045342127e-05, + "loss": 4.0714, + "step": 4415 + }, + { + "epoch": 2.48, + "learning_rate": 1.9455894476504535e-05, + "loss": 3.5716, + "step": 4416 + }, + { + "epoch": 2.48, + "learning_rate": 1.9435284418796372e-05, + "loss": 3.5899, + "step": 4417 + }, + { + "epoch": 2.48, + "learning_rate": 1.9414674361088212e-05, + "loss": 3.9711, + "step": 4418 + }, + { + "epoch": 2.48, + "learning_rate": 1.939406430338005e-05, + "loss": 5.5772, + "step": 4419 + }, + { + "epoch": 2.48, + "learning_rate": 1.937345424567189e-05, + "loss": 5.4735, + "step": 4420 + }, + { + "epoch": 2.48, + "learning_rate": 1.935284418796373e-05, + "loss": 5.4202, + "step": 4421 + }, + { + "epoch": 2.48, + "learning_rate": 1.9332234130255565e-05, + "loss": 5.4024, + "step": 4422 + }, + { + "epoch": 2.48, + "learning_rate": 1.9311624072547405e-05, + "loss": 5.5597, + "step": 4423 + }, + { + "epoch": 2.48, + "learning_rate": 1.9291014014839242e-05, + "loss": 5.1182, + "step": 4424 + }, + { + "epoch": 2.48, + "learning_rate": 1.9270403957131082e-05, + "loss": 5.2608, + "step": 4425 + }, + { + "epoch": 2.48, + "learning_rate": 1.924979389942292e-05, + "loss": 5.3052, + "step": 4426 + }, + { + "epoch": 2.48, + "learning_rate": 1.922918384171476e-05, + "loss": 5.2307, + "step": 4427 + }, + { + "epoch": 2.48, + "learning_rate": 1.9208573784006595e-05, + "loss": 5.4074, + "step": 4428 + }, + { + "epoch": 2.48, + "learning_rate": 1.9187963726298435e-05, + "loss": 5.328, + "step": 4429 + }, + { + "epoch": 2.48, + "learning_rate": 1.9167353668590275e-05, + "loss": 5.4193, + "step": 4430 + }, + { + "epoch": 2.48, + "learning_rate": 1.914674361088211e-05, + "loss": 5.2507, + "step": 4431 + }, + { + "epoch": 2.48, + "learning_rate": 1.912613355317395e-05, + "loss": 5.4038, + "step": 4432 + }, + { + "epoch": 2.48, + "learning_rate": 1.9105523495465788e-05, + "loss": 5.1829, + "step": 4433 + }, + { + "epoch": 2.49, + "learning_rate": 1.9084913437757625e-05, + "loss": 4.9451, + "step": 4434 + }, + { + "epoch": 2.49, + "learning_rate": 1.9064303380049465e-05, + "loss": 5.4831, + "step": 4435 + }, + { + "epoch": 2.49, + "learning_rate": 1.90436933223413e-05, + "loss": 5.2358, + "step": 4436 + }, + { + "epoch": 2.49, + "learning_rate": 1.9023083264633145e-05, + "loss": 4.9509, + "step": 4437 + }, + { + "epoch": 2.49, + "learning_rate": 1.900247320692498e-05, + "loss": 5.3464, + "step": 4438 + }, + { + "epoch": 2.49, + "learning_rate": 1.8981863149216818e-05, + "loss": 5.2613, + "step": 4439 + }, + { + "epoch": 2.49, + "learning_rate": 1.8961253091508658e-05, + "loss": 5.2374, + "step": 4440 + }, + { + "epoch": 2.49, + "learning_rate": 1.8940643033800495e-05, + "loss": 5.1689, + "step": 4441 + }, + { + "epoch": 2.49, + "learning_rate": 1.8920032976092335e-05, + "loss": 4.9535, + "step": 4442 + }, + { + "epoch": 2.49, + "learning_rate": 1.889942291838417e-05, + "loss": 5.3084, + "step": 4443 + }, + { + "epoch": 2.49, + "learning_rate": 1.887881286067601e-05, + "loss": 4.9553, + "step": 4444 + }, + { + "epoch": 2.49, + "learning_rate": 1.885820280296785e-05, + "loss": 5.2971, + "step": 4445 + }, + { + "epoch": 2.49, + "learning_rate": 1.8837592745259688e-05, + "loss": 5.3733, + "step": 4446 + }, + { + "epoch": 2.49, + "learning_rate": 1.8816982687551528e-05, + "loss": 5.0168, + "step": 4447 + }, + { + "epoch": 2.49, + "learning_rate": 1.8796372629843364e-05, + "loss": 5.0924, + "step": 4448 + }, + { + "epoch": 2.49, + "learning_rate": 1.8775762572135204e-05, + "loss": 5.0578, + "step": 4449 + }, + { + "epoch": 2.49, + "learning_rate": 1.875515251442704e-05, + "loss": 5.0364, + "step": 4450 + }, + { + "epoch": 2.49, + "learning_rate": 1.8734542456718878e-05, + "loss": 5.1629, + "step": 4451 + }, + { + "epoch": 2.5, + "learning_rate": 1.8713932399010717e-05, + "loss": 4.8615, + "step": 4452 + }, + { + "epoch": 2.5, + "learning_rate": 1.8693322341302557e-05, + "loss": 4.9513, + "step": 4453 + }, + { + "epoch": 2.5, + "learning_rate": 1.8672712283594394e-05, + "loss": 4.8889, + "step": 4454 + }, + { + "epoch": 2.5, + "learning_rate": 1.8652102225886234e-05, + "loss": 4.9717, + "step": 4455 + }, + { + "epoch": 2.5, + "learning_rate": 1.863149216817807e-05, + "loss": 4.8979, + "step": 4456 + }, + { + "epoch": 2.5, + "learning_rate": 1.861088211046991e-05, + "loss": 5.5107, + "step": 4457 + }, + { + "epoch": 2.5, + "learning_rate": 1.8590272052761747e-05, + "loss": 4.4709, + "step": 4458 + }, + { + "epoch": 2.5, + "learning_rate": 1.8569661995053587e-05, + "loss": 4.7312, + "step": 4459 + }, + { + "epoch": 2.5, + "learning_rate": 1.8549051937345424e-05, + "loss": 4.7823, + "step": 4460 + }, + { + "epoch": 2.5, + "learning_rate": 1.8528441879637264e-05, + "loss": 4.097, + "step": 4461 + }, + { + "epoch": 2.5, + "learning_rate": 1.8507831821929104e-05, + "loss": 4.2814, + "step": 4462 + }, + { + "epoch": 2.5, + "learning_rate": 1.848722176422094e-05, + "loss": 4.2304, + "step": 4463 + }, + { + "epoch": 2.5, + "learning_rate": 1.846661170651278e-05, + "loss": 3.8894, + "step": 4464 + }, + { + "epoch": 2.5, + "learning_rate": 1.8446001648804617e-05, + "loss": 4.0412, + "step": 4465 + }, + { + "epoch": 2.5, + "learning_rate": 1.8425391591096454e-05, + "loss": 3.5995, + "step": 4466 + }, + { + "epoch": 2.5, + "learning_rate": 1.8404781533388294e-05, + "loss": 3.9243, + "step": 4467 + }, + { + "epoch": 2.5, + "learning_rate": 1.838417147568013e-05, + "loss": 3.2126, + "step": 4468 + }, + { + "epoch": 2.51, + "learning_rate": 1.8363561417971974e-05, + "loss": 5.942, + "step": 4469 + }, + { + "epoch": 2.51, + "learning_rate": 1.834295136026381e-05, + "loss": 5.7833, + "step": 4470 + }, + { + "epoch": 2.51, + "learning_rate": 1.8322341302555647e-05, + "loss": 5.8002, + "step": 4471 + }, + { + "epoch": 2.51, + "learning_rate": 1.8301731244847487e-05, + "loss": 5.8344, + "step": 4472 + }, + { + "epoch": 2.51, + "learning_rate": 1.8281121187139323e-05, + "loss": 5.348, + "step": 4473 + }, + { + "epoch": 2.51, + "learning_rate": 1.8260511129431163e-05, + "loss": 5.2916, + "step": 4474 + }, + { + "epoch": 2.51, + "learning_rate": 1.8239901071723e-05, + "loss": 5.2287, + "step": 4475 + }, + { + "epoch": 2.51, + "learning_rate": 1.821929101401484e-05, + "loss": 5.2813, + "step": 4476 + }, + { + "epoch": 2.51, + "learning_rate": 1.819868095630668e-05, + "loss": 5.2662, + "step": 4477 + }, + { + "epoch": 2.51, + "learning_rate": 1.8178070898598517e-05, + "loss": 5.2559, + "step": 4478 + }, + { + "epoch": 2.51, + "learning_rate": 1.8157460840890357e-05, + "loss": 5.0452, + "step": 4479 + }, + { + "epoch": 2.51, + "learning_rate": 1.8136850783182193e-05, + "loss": 5.2912, + "step": 4480 + }, + { + "epoch": 2.51, + "learning_rate": 1.8116240725474033e-05, + "loss": 5.4133, + "step": 4481 + }, + { + "epoch": 2.51, + "learning_rate": 1.809563066776587e-05, + "loss": 5.4443, + "step": 4482 + }, + { + "epoch": 2.51, + "learning_rate": 1.8075020610057706e-05, + "loss": 5.0242, + "step": 4483 + }, + { + "epoch": 2.51, + "learning_rate": 1.805441055234955e-05, + "loss": 4.9355, + "step": 4484 + }, + { + "epoch": 2.51, + "learning_rate": 1.8033800494641386e-05, + "loss": 5.0504, + "step": 4485 + }, + { + "epoch": 2.51, + "learning_rate": 1.8013190436933226e-05, + "loss": 5.0545, + "step": 4486 + }, + { + "epoch": 2.52, + "learning_rate": 1.7992580379225063e-05, + "loss": 4.7357, + "step": 4487 + }, + { + "epoch": 2.52, + "learning_rate": 1.79719703215169e-05, + "loss": 5.403, + "step": 4488 + }, + { + "epoch": 2.52, + "learning_rate": 1.795136026380874e-05, + "loss": 5.231, + "step": 4489 + }, + { + "epoch": 2.52, + "learning_rate": 1.7930750206100576e-05, + "loss": 5.2498, + "step": 4490 + }, + { + "epoch": 2.52, + "learning_rate": 1.7910140148392416e-05, + "loss": 5.0503, + "step": 4491 + }, + { + "epoch": 2.52, + "learning_rate": 1.7889530090684256e-05, + "loss": 4.8945, + "step": 4492 + }, + { + "epoch": 2.52, + "learning_rate": 1.7868920032976093e-05, + "loss": 4.9551, + "step": 4493 + }, + { + "epoch": 2.52, + "learning_rate": 1.7848309975267933e-05, + "loss": 5.1155, + "step": 4494 + }, + { + "epoch": 2.52, + "learning_rate": 1.782769991755977e-05, + "loss": 4.9611, + "step": 4495 + }, + { + "epoch": 2.52, + "learning_rate": 1.780708985985161e-05, + "loss": 4.9293, + "step": 4496 + }, + { + "epoch": 2.52, + "learning_rate": 1.7786479802143446e-05, + "loss": 5.0014, + "step": 4497 + }, + { + "epoch": 2.52, + "learning_rate": 1.7765869744435286e-05, + "loss": 4.871, + "step": 4498 + }, + { + "epoch": 2.52, + "learning_rate": 1.7745259686727123e-05, + "loss": 4.9855, + "step": 4499 + }, + { + "epoch": 2.52, + "learning_rate": 1.7724649629018963e-05, + "loss": 5.2332, + "step": 4500 + }, + { + "epoch": 2.52, + "eval_loss": 19.54161262512207, + "eval_runtime": 1336.7962, + "eval_samples_per_second": 1.976, + "eval_steps_per_second": 0.248, + "eval_wer": 1.0008131693772313, + "step": 4500 + }, + { + "epoch": 2.52, + "learning_rate": 1.7704039571310803e-05, + "loss": 5.123, + "step": 4501 + }, + { + "epoch": 2.52, + "learning_rate": 1.768342951360264e-05, + "loss": 5.1154, + "step": 4502 + }, + { + "epoch": 2.52, + "learning_rate": 1.766281945589448e-05, + "loss": 5.2048, + "step": 4503 + }, + { + "epoch": 2.52, + "learning_rate": 1.7642209398186316e-05, + "loss": 5.1437, + "step": 4504 + }, + { + "epoch": 2.53, + "learning_rate": 1.7621599340478152e-05, + "loss": 4.7699, + "step": 4505 + }, + { + "epoch": 2.53, + "learning_rate": 1.7600989282769992e-05, + "loss": 4.7523, + "step": 4506 + }, + { + "epoch": 2.53, + "learning_rate": 1.758037922506183e-05, + "loss": 4.9403, + "step": 4507 + }, + { + "epoch": 2.53, + "learning_rate": 1.7559769167353672e-05, + "loss": 4.5738, + "step": 4508 + }, + { + "epoch": 2.53, + "learning_rate": 1.7559769167353672e-05, + "loss": 5.0021, + "step": 4509 + }, + { + "epoch": 2.53, + "learning_rate": 1.753915910964551e-05, + "loss": 4.6907, + "step": 4510 + }, + { + "epoch": 2.53, + "learning_rate": 1.7518549051937346e-05, + "loss": 4.6278, + "step": 4511 + }, + { + "epoch": 2.53, + "learning_rate": 1.7497938994229186e-05, + "loss": 4.6609, + "step": 4512 + }, + { + "epoch": 2.53, + "learning_rate": 1.7477328936521022e-05, + "loss": 4.7447, + "step": 4513 + }, + { + "epoch": 2.53, + "learning_rate": 1.7456718878812862e-05, + "loss": 4.403, + "step": 4514 + }, + { + "epoch": 2.53, + "learning_rate": 1.74361088211047e-05, + "loss": 3.6336, + "step": 4515 + }, + { + "epoch": 2.53, + "learning_rate": 1.741549876339654e-05, + "loss": 3.7642, + "step": 4516 + }, + { + "epoch": 2.53, + "learning_rate": 1.739488870568838e-05, + "loss": 3.1187, + "step": 4517 + }, + { + "epoch": 2.53, + "learning_rate": 1.7374278647980215e-05, + "loss": 2.7915, + "step": 4518 + }, + { + "epoch": 2.53, + "learning_rate": 1.7353668590272055e-05, + "loss": 6.018, + "step": 4519 + }, + { + "epoch": 2.53, + "learning_rate": 1.7333058532563892e-05, + "loss": 5.9712, + "step": 4520 + }, + { + "epoch": 2.53, + "learning_rate": 1.7312448474855732e-05, + "loss": 6.0018, + "step": 4521 + }, + { + "epoch": 2.53, + "learning_rate": 1.729183841714757e-05, + "loss": 5.9353, + "step": 4522 + }, + { + "epoch": 2.54, + "learning_rate": 1.7271228359439405e-05, + "loss": 5.6326, + "step": 4523 + }, + { + "epoch": 2.54, + "learning_rate": 1.7250618301731245e-05, + "loss": 5.6639, + "step": 4524 + }, + { + "epoch": 2.54, + "learning_rate": 1.7230008244023085e-05, + "loss": 5.629, + "step": 4525 + }, + { + "epoch": 2.54, + "learning_rate": 1.7209398186314925e-05, + "loss": 5.3567, + "step": 4526 + }, + { + "epoch": 2.54, + "learning_rate": 1.718878812860676e-05, + "loss": 5.2605, + "step": 4527 + }, + { + "epoch": 2.54, + "learning_rate": 1.7168178070898598e-05, + "loss": 5.3579, + "step": 4528 + }, + { + "epoch": 2.54, + "learning_rate": 1.7147568013190438e-05, + "loss": 5.0081, + "step": 4529 + }, + { + "epoch": 2.54, + "learning_rate": 1.7126957955482275e-05, + "loss": 5.3893, + "step": 4530 + }, + { + "epoch": 2.54, + "learning_rate": 1.7106347897774115e-05, + "loss": 5.2728, + "step": 4531 + }, + { + "epoch": 2.54, + "learning_rate": 1.708573784006595e-05, + "loss": 5.3676, + "step": 4532 + }, + { + "epoch": 2.54, + "learning_rate": 1.706512778235779e-05, + "loss": 5.2521, + "step": 4533 + }, + { + "epoch": 2.54, + "learning_rate": 1.704451772464963e-05, + "loss": 5.1503, + "step": 4534 + }, + { + "epoch": 2.54, + "learning_rate": 1.7023907666941468e-05, + "loss": 4.9875, + "step": 4535 + }, + { + "epoch": 2.54, + "learning_rate": 1.7003297609233308e-05, + "loss": 5.2658, + "step": 4536 + }, + { + "epoch": 2.54, + "learning_rate": 1.6982687551525145e-05, + "loss": 5.2065, + "step": 4537 + }, + { + "epoch": 2.54, + "learning_rate": 1.6962077493816985e-05, + "loss": 4.8733, + "step": 4538 + }, + { + "epoch": 2.54, + "learning_rate": 1.694146743610882e-05, + "loss": 5.1496, + "step": 4539 + }, + { + "epoch": 2.54, + "learning_rate": 1.6920857378400658e-05, + "loss": 5.1355, + "step": 4540 + }, + { + "epoch": 2.55, + "learning_rate": 1.69002473206925e-05, + "loss": 5.0809, + "step": 4541 + }, + { + "epoch": 2.55, + "learning_rate": 1.6879637262984338e-05, + "loss": 4.6728, + "step": 4542 + }, + { + "epoch": 2.55, + "learning_rate": 1.6859027205276174e-05, + "loss": 5.0352, + "step": 4543 + }, + { + "epoch": 2.55, + "learning_rate": 1.6838417147568014e-05, + "loss": 5.0726, + "step": 4544 + }, + { + "epoch": 2.55, + "learning_rate": 1.681780708985985e-05, + "loss": 4.9712, + "step": 4545 + }, + { + "epoch": 2.55, + "learning_rate": 1.679719703215169e-05, + "loss": 4.802, + "step": 4546 + }, + { + "epoch": 2.55, + "learning_rate": 1.6776586974443528e-05, + "loss": 4.98, + "step": 4547 + }, + { + "epoch": 2.55, + "learning_rate": 1.6755976916735368e-05, + "loss": 5.4892, + "step": 4548 + }, + { + "epoch": 2.55, + "learning_rate": 1.6735366859027208e-05, + "loss": 4.9091, + "step": 4549 + }, + { + "epoch": 2.55, + "learning_rate": 1.6714756801319044e-05, + "loss": 4.9348, + "step": 4550 + }, + { + "epoch": 2.55, + "learning_rate": 1.6694146743610884e-05, + "loss": 4.8223, + "step": 4551 + }, + { + "epoch": 2.55, + "learning_rate": 1.667353668590272e-05, + "loss": 5.0853, + "step": 4552 + }, + { + "epoch": 2.55, + "learning_rate": 1.665292662819456e-05, + "loss": 4.5983, + "step": 4553 + }, + { + "epoch": 2.55, + "learning_rate": 1.6632316570486397e-05, + "loss": 5.2738, + "step": 4554 + }, + { + "epoch": 2.55, + "learning_rate": 1.6611706512778234e-05, + "loss": 4.7383, + "step": 4555 + }, + { + "epoch": 2.55, + "learning_rate": 1.6591096455070074e-05, + "loss": 4.6844, + "step": 4556 + }, + { + "epoch": 2.55, + "learning_rate": 1.6570486397361914e-05, + "loss": 4.8503, + "step": 4557 + }, + { + "epoch": 2.55, + "learning_rate": 1.6549876339653754e-05, + "loss": 5.3885, + "step": 4558 + }, + { + "epoch": 2.56, + "learning_rate": 1.652926628194559e-05, + "loss": 4.5072, + "step": 4559 + }, + { + "epoch": 2.56, + "learning_rate": 1.6508656224237427e-05, + "loss": 4.3443, + "step": 4560 + }, + { + "epoch": 2.56, + "learning_rate": 1.6488046166529267e-05, + "loss": 4.4435, + "step": 4561 + }, + { + "epoch": 2.56, + "learning_rate": 1.6467436108821104e-05, + "loss": 4.3293, + "step": 4562 + }, + { + "epoch": 2.56, + "learning_rate": 1.6446826051112944e-05, + "loss": 4.2233, + "step": 4563 + }, + { + "epoch": 2.56, + "learning_rate": 1.642621599340478e-05, + "loss": 3.9418, + "step": 4564 + }, + { + "epoch": 2.56, + "learning_rate": 1.640560593569662e-05, + "loss": 4.1555, + "step": 4565 + }, + { + "epoch": 2.56, + "learning_rate": 1.638499587798846e-05, + "loss": 3.6353, + "step": 4566 + }, + { + "epoch": 2.56, + "learning_rate": 1.6364385820280297e-05, + "loss": 3.3844, + "step": 4567 + }, + { + "epoch": 2.56, + "learning_rate": 1.6343775762572137e-05, + "loss": 3.3375, + "step": 4568 + }, + { + "epoch": 2.56, + "learning_rate": 1.6323165704863974e-05, + "loss": 5.9592, + "step": 4569 + }, + { + "epoch": 2.56, + "learning_rate": 1.6302555647155814e-05, + "loss": 5.8454, + "step": 4570 + }, + { + "epoch": 2.56, + "learning_rate": 1.628194558944765e-05, + "loss": 5.7155, + "step": 4571 + }, + { + "epoch": 2.56, + "learning_rate": 1.6261335531739487e-05, + "loss": 5.6593, + "step": 4572 + }, + { + "epoch": 2.56, + "learning_rate": 1.624072547403133e-05, + "loss": 5.7745, + "step": 4573 + }, + { + "epoch": 2.56, + "learning_rate": 1.6220115416323167e-05, + "loss": 5.3609, + "step": 4574 + }, + { + "epoch": 2.56, + "learning_rate": 1.6199505358615007e-05, + "loss": 5.0432, + "step": 4575 + }, + { + "epoch": 2.57, + "learning_rate": 1.6178895300906843e-05, + "loss": 4.9757, + "step": 4576 + }, + { + "epoch": 2.57, + "learning_rate": 1.615828524319868e-05, + "loss": 5.0136, + "step": 4577 + }, + { + "epoch": 2.57, + "learning_rate": 1.613767518549052e-05, + "loss": 5.5721, + "step": 4578 + }, + { + "epoch": 2.57, + "learning_rate": 1.6117065127782357e-05, + "loss": 5.3818, + "step": 4579 + }, + { + "epoch": 2.57, + "learning_rate": 1.6096455070074197e-05, + "loss": 5.3473, + "step": 4580 + }, + { + "epoch": 2.57, + "learning_rate": 1.6075845012366037e-05, + "loss": 5.8505, + "step": 4581 + }, + { + "epoch": 2.57, + "learning_rate": 1.6055234954657873e-05, + "loss": 5.5361, + "step": 4582 + }, + { + "epoch": 2.57, + "learning_rate": 1.6034624896949713e-05, + "loss": 5.4926, + "step": 4583 + }, + { + "epoch": 2.57, + "learning_rate": 1.601401483924155e-05, + "loss": 5.2687, + "step": 4584 + }, + { + "epoch": 2.57, + "learning_rate": 1.599340478153339e-05, + "loss": 5.5114, + "step": 4585 + }, + { + "epoch": 2.57, + "learning_rate": 1.5972794723825226e-05, + "loss": 4.9758, + "step": 4586 + }, + { + "epoch": 2.57, + "learning_rate": 1.5952184666117066e-05, + "loss": 5.1631, + "step": 4587 + }, + { + "epoch": 2.57, + "learning_rate": 1.5931574608408903e-05, + "loss": 4.846, + "step": 4588 + }, + { + "epoch": 2.57, + "learning_rate": 1.5910964550700743e-05, + "loss": 5.1495, + "step": 4589 + }, + { + "epoch": 2.57, + "learning_rate": 1.5890354492992583e-05, + "loss": 4.8411, + "step": 4590 + }, + { + "epoch": 2.57, + "learning_rate": 1.586974443528442e-05, + "loss": 5.2266, + "step": 4591 + }, + { + "epoch": 2.57, + "learning_rate": 1.584913437757626e-05, + "loss": 5.239, + "step": 4592 + }, + { + "epoch": 2.57, + "learning_rate": 1.5828524319868096e-05, + "loss": 5.1138, + "step": 4593 + }, + { + "epoch": 2.58, + "learning_rate": 1.5807914262159933e-05, + "loss": 4.7442, + "step": 4594 + }, + { + "epoch": 2.58, + "learning_rate": 1.5787304204451773e-05, + "loss": 4.8861, + "step": 4595 + }, + { + "epoch": 2.58, + "learning_rate": 1.5766694146743613e-05, + "loss": 4.7221, + "step": 4596 + }, + { + "epoch": 2.58, + "learning_rate": 1.5746084089035453e-05, + "loss": 5.0963, + "step": 4597 + }, + { + "epoch": 2.58, + "learning_rate": 1.572547403132729e-05, + "loss": 5.0493, + "step": 4598 + }, + { + "epoch": 2.58, + "learning_rate": 1.5704863973619126e-05, + "loss": 4.9951, + "step": 4599 + }, + { + "epoch": 2.58, + "learning_rate": 1.5684253915910966e-05, + "loss": 5.0208, + "step": 4600 + }, + { + "epoch": 2.58, + "learning_rate": 1.5663643858202802e-05, + "loss": 5.1159, + "step": 4601 + }, + { + "epoch": 2.58, + "learning_rate": 1.5643033800494642e-05, + "loss": 4.9198, + "step": 4602 + }, + { + "epoch": 2.58, + "learning_rate": 1.562242374278648e-05, + "loss": 5.1606, + "step": 4603 + }, + { + "epoch": 2.58, + "learning_rate": 1.560181368507832e-05, + "loss": 4.7225, + "step": 4604 + }, + { + "epoch": 2.58, + "learning_rate": 1.558120362737016e-05, + "loss": 4.9843, + "step": 4605 + }, + { + "epoch": 2.58, + "learning_rate": 1.5560593569661996e-05, + "loss": 4.9399, + "step": 4606 + }, + { + "epoch": 2.58, + "learning_rate": 1.5539983511953836e-05, + "loss": 4.8815, + "step": 4607 + }, + { + "epoch": 2.58, + "learning_rate": 1.5519373454245672e-05, + "loss": 4.6983, + "step": 4608 + }, + { + "epoch": 2.58, + "learning_rate": 1.5498763396537512e-05, + "loss": 4.8812, + "step": 4609 + }, + { + "epoch": 2.58, + "learning_rate": 1.547815333882935e-05, + "loss": 4.8366, + "step": 4610 + }, + { + "epoch": 2.58, + "learning_rate": 1.5457543281121185e-05, + "loss": 4.6595, + "step": 4611 + }, + { + "epoch": 2.59, + "learning_rate": 1.543693322341303e-05, + "loss": 4.3515, + "step": 4612 + }, + { + "epoch": 2.59, + "learning_rate": 1.5416323165704865e-05, + "loss": 4.4063, + "step": 4613 + }, + { + "epoch": 2.59, + "learning_rate": 1.5395713107996705e-05, + "loss": 4.4038, + "step": 4614 + }, + { + "epoch": 2.59, + "learning_rate": 1.5375103050288542e-05, + "loss": 3.2651, + "step": 4615 + }, + { + "epoch": 2.59, + "learning_rate": 1.535449299258038e-05, + "loss": 3.3817, + "step": 4616 + }, + { + "epoch": 2.59, + "learning_rate": 1.533388293487222e-05, + "loss": 3.2292, + "step": 4617 + }, + { + "epoch": 2.59, + "learning_rate": 1.5313272877164055e-05, + "loss": 2.5181, + "step": 4618 + }, + { + "epoch": 2.59, + "learning_rate": 1.5292662819455895e-05, + "loss": 6.1351, + "step": 4619 + }, + { + "epoch": 2.59, + "learning_rate": 1.5272052761747735e-05, + "loss": 6.0788, + "step": 4620 + }, + { + "epoch": 2.59, + "learning_rate": 1.5251442704039573e-05, + "loss": 5.8742, + "step": 4621 + }, + { + "epoch": 2.59, + "learning_rate": 1.5230832646331412e-05, + "loss": 6.1194, + "step": 4622 + }, + { + "epoch": 2.59, + "learning_rate": 1.5210222588623248e-05, + "loss": 5.8599, + "step": 4623 + }, + { + "epoch": 2.59, + "learning_rate": 1.5189612530915087e-05, + "loss": 5.8313, + "step": 4624 + }, + { + "epoch": 2.59, + "learning_rate": 1.5169002473206925e-05, + "loss": 5.8715, + "step": 4625 + }, + { + "epoch": 2.59, + "learning_rate": 1.5148392415498763e-05, + "loss": 5.6002, + "step": 4626 + }, + { + "epoch": 2.59, + "learning_rate": 1.5127782357790602e-05, + "loss": 5.6315, + "step": 4627 + }, + { + "epoch": 2.59, + "learning_rate": 1.5107172300082442e-05, + "loss": 5.2351, + "step": 4628 + }, + { + "epoch": 2.59, + "learning_rate": 1.508656224237428e-05, + "loss": 5.2491, + "step": 4629 + }, + { + "epoch": 2.6, + "learning_rate": 1.5065952184666118e-05, + "loss": 5.543, + "step": 4630 + }, + { + "epoch": 2.6, + "learning_rate": 1.5045342126957956e-05, + "loss": 5.0543, + "step": 4631 + }, + { + "epoch": 2.6, + "learning_rate": 1.5024732069249795e-05, + "loss": 5.4549, + "step": 4632 + }, + { + "epoch": 2.6, + "learning_rate": 1.5004122011541633e-05, + "loss": 5.2722, + "step": 4633 + }, + { + "epoch": 2.6, + "learning_rate": 1.4983511953833471e-05, + "loss": 5.1119, + "step": 4634 + }, + { + "epoch": 2.6, + "learning_rate": 1.4962901896125308e-05, + "loss": 5.3516, + "step": 4635 + }, + { + "epoch": 2.6, + "learning_rate": 1.494229183841715e-05, + "loss": 5.0637, + "step": 4636 + }, + { + "epoch": 2.6, + "learning_rate": 1.4921681780708988e-05, + "loss": 5.0638, + "step": 4637 + }, + { + "epoch": 2.6, + "learning_rate": 1.4901071723000826e-05, + "loss": 5.2384, + "step": 4638 + }, + { + "epoch": 2.6, + "learning_rate": 1.4880461665292665e-05, + "loss": 5.1097, + "step": 4639 + }, + { + "epoch": 2.6, + "learning_rate": 1.4859851607584501e-05, + "loss": 4.9293, + "step": 4640 + }, + { + "epoch": 2.6, + "learning_rate": 1.483924154987634e-05, + "loss": 5.1256, + "step": 4641 + }, + { + "epoch": 2.6, + "learning_rate": 1.4818631492168178e-05, + "loss": 5.0647, + "step": 4642 + }, + { + "epoch": 2.6, + "learning_rate": 1.4798021434460016e-05, + "loss": 5.2194, + "step": 4643 + }, + { + "epoch": 2.6, + "learning_rate": 1.4777411376751856e-05, + "loss": 5.2367, + "step": 4644 + }, + { + "epoch": 2.6, + "learning_rate": 1.4756801319043694e-05, + "loss": 4.9294, + "step": 4645 + }, + { + "epoch": 2.6, + "learning_rate": 1.4736191261335533e-05, + "loss": 5.2209, + "step": 4646 + }, + { + "epoch": 2.6, + "learning_rate": 1.4715581203627371e-05, + "loss": 5.3206, + "step": 4647 + }, + { + "epoch": 2.61, + "learning_rate": 1.469497114591921e-05, + "loss": 4.9015, + "step": 4648 + }, + { + "epoch": 2.61, + "learning_rate": 1.4674361088211048e-05, + "loss": 5.2153, + "step": 4649 + }, + { + "epoch": 2.61, + "learning_rate": 1.4653751030502886e-05, + "loss": 4.9614, + "step": 4650 + }, + { + "epoch": 2.61, + "learning_rate": 1.4633140972794724e-05, + "loss": 5.0746, + "step": 4651 + }, + { + "epoch": 2.61, + "learning_rate": 1.4612530915086564e-05, + "loss": 4.8597, + "step": 4652 + }, + { + "epoch": 2.61, + "learning_rate": 1.4591920857378402e-05, + "loss": 5.2064, + "step": 4653 + }, + { + "epoch": 2.61, + "learning_rate": 1.457131079967024e-05, + "loss": 5.0643, + "step": 4654 + }, + { + "epoch": 2.61, + "learning_rate": 1.4550700741962079e-05, + "loss": 5.009, + "step": 4655 + }, + { + "epoch": 2.61, + "learning_rate": 1.4530090684253916e-05, + "loss": 4.868, + "step": 4656 + }, + { + "epoch": 2.61, + "learning_rate": 1.4509480626545754e-05, + "loss": 4.8306, + "step": 4657 + }, + { + "epoch": 2.61, + "learning_rate": 1.4488870568837592e-05, + "loss": 4.8185, + "step": 4658 + }, + { + "epoch": 2.61, + "learning_rate": 1.446826051112943e-05, + "loss": 4.6676, + "step": 4659 + }, + { + "epoch": 2.61, + "learning_rate": 1.4447650453421272e-05, + "loss": 4.3897, + "step": 4660 + }, + { + "epoch": 2.61, + "learning_rate": 1.4427040395713109e-05, + "loss": 4.4645, + "step": 4661 + }, + { + "epoch": 2.61, + "learning_rate": 1.4406430338004947e-05, + "loss": 4.4046, + "step": 4662 + }, + { + "epoch": 2.61, + "learning_rate": 1.4385820280296785e-05, + "loss": 3.9317, + "step": 4663 + }, + { + "epoch": 2.61, + "learning_rate": 1.4365210222588624e-05, + "loss": 3.8195, + "step": 4664 + }, + { + "epoch": 2.61, + "learning_rate": 1.4344600164880462e-05, + "loss": 3.969, + "step": 4665 + }, + { + "epoch": 2.62, + "learning_rate": 1.43239901071723e-05, + "loss": 3.499, + "step": 4666 + }, + { + "epoch": 2.62, + "learning_rate": 1.4303380049464139e-05, + "loss": 3.862, + "step": 4667 + }, + { + "epoch": 2.62, + "learning_rate": 1.4282769991755979e-05, + "loss": 2.8942, + "step": 4668 + }, + { + "epoch": 2.62, + "learning_rate": 1.4262159934047817e-05, + "loss": 5.429, + "step": 4669 + }, + { + "epoch": 2.62, + "learning_rate": 1.4241549876339655e-05, + "loss": 5.6371, + "step": 4670 + }, + { + "epoch": 2.62, + "learning_rate": 1.4220939818631493e-05, + "loss": 5.3697, + "step": 4671 + }, + { + "epoch": 2.62, + "learning_rate": 1.4200329760923332e-05, + "loss": 5.306, + "step": 4672 + }, + { + "epoch": 2.62, + "learning_rate": 1.4179719703215168e-05, + "loss": 5.2006, + "step": 4673 + }, + { + "epoch": 2.62, + "learning_rate": 1.4159109645507007e-05, + "loss": 5.3761, + "step": 4674 + }, + { + "epoch": 2.62, + "learning_rate": 1.4138499587798845e-05, + "loss": 5.2137, + "step": 4675 + }, + { + "epoch": 2.62, + "learning_rate": 1.4117889530090687e-05, + "loss": 5.1829, + "step": 4676 + }, + { + "epoch": 2.62, + "learning_rate": 1.4097279472382525e-05, + "loss": 4.8796, + "step": 4677 + }, + { + "epoch": 2.62, + "learning_rate": 1.4076669414674362e-05, + "loss": 5.3649, + "step": 4678 + }, + { + "epoch": 2.62, + "learning_rate": 1.40560593569662e-05, + "loss": 5.3002, + "step": 4679 + }, + { + "epoch": 2.62, + "learning_rate": 1.4035449299258038e-05, + "loss": 5.4689, + "step": 4680 + }, + { + "epoch": 2.62, + "learning_rate": 1.4014839241549876e-05, + "loss": 5.1358, + "step": 4681 + }, + { + "epoch": 2.62, + "learning_rate": 1.3994229183841715e-05, + "loss": 4.8624, + "step": 4682 + }, + { + "epoch": 2.62, + "learning_rate": 1.3973619126133553e-05, + "loss": 5.409, + "step": 4683 + }, + { + "epoch": 2.63, + "learning_rate": 1.3953009068425393e-05, + "loss": 4.975, + "step": 4684 + }, + { + "epoch": 2.63, + "learning_rate": 1.3932399010717231e-05, + "loss": 5.449, + "step": 4685 + }, + { + "epoch": 2.63, + "learning_rate": 1.391178895300907e-05, + "loss": 5.1602, + "step": 4686 + }, + { + "epoch": 2.63, + "learning_rate": 1.3891178895300908e-05, + "loss": 5.2282, + "step": 4687 + }, + { + "epoch": 2.63, + "learning_rate": 1.3870568837592746e-05, + "loss": 4.8521, + "step": 4688 + }, + { + "epoch": 2.63, + "learning_rate": 1.3849958779884584e-05, + "loss": 4.8571, + "step": 4689 + }, + { + "epoch": 2.63, + "learning_rate": 1.3829348722176421e-05, + "loss": 4.9942, + "step": 4690 + }, + { + "epoch": 2.63, + "learning_rate": 1.380873866446826e-05, + "loss": 5.1394, + "step": 4691 + }, + { + "epoch": 2.63, + "learning_rate": 1.3788128606760101e-05, + "loss": 5.019, + "step": 4692 + }, + { + "epoch": 2.63, + "learning_rate": 1.376751854905194e-05, + "loss": 4.9152, + "step": 4693 + }, + { + "epoch": 2.63, + "learning_rate": 1.3746908491343776e-05, + "loss": 5.1694, + "step": 4694 + }, + { + "epoch": 2.63, + "learning_rate": 1.3726298433635614e-05, + "loss": 5.2256, + "step": 4695 + }, + { + "epoch": 2.63, + "learning_rate": 1.3705688375927453e-05, + "loss": 5.0988, + "step": 4696 + }, + { + "epoch": 2.63, + "learning_rate": 1.3685078318219291e-05, + "loss": 5.0182, + "step": 4697 + }, + { + "epoch": 2.63, + "learning_rate": 1.366446826051113e-05, + "loss": 4.9901, + "step": 4698 + }, + { + "epoch": 2.63, + "learning_rate": 1.3643858202802967e-05, + "loss": 5.3336, + "step": 4699 + }, + { + "epoch": 2.63, + "learning_rate": 1.3623248145094807e-05, + "loss": 4.7714, + "step": 4700 + }, + { + "epoch": 2.64, + "learning_rate": 1.3602638087386646e-05, + "loss": 5.1735, + "step": 4701 + }, + { + "epoch": 2.64, + "learning_rate": 1.3582028029678484e-05, + "loss": 4.7007, + "step": 4702 + }, + { + "epoch": 2.64, + "learning_rate": 1.3561417971970322e-05, + "loss": 4.927, + "step": 4703 + }, + { + "epoch": 2.64, + "learning_rate": 1.354080791426216e-05, + "loss": 4.9655, + "step": 4704 + }, + { + "epoch": 2.64, + "learning_rate": 1.3520197856553999e-05, + "loss": 4.7445, + "step": 4705 + }, + { + "epoch": 2.64, + "learning_rate": 1.3499587798845836e-05, + "loss": 4.9622, + "step": 4706 + }, + { + "epoch": 2.64, + "learning_rate": 1.3478977741137677e-05, + "loss": 4.7726, + "step": 4707 + }, + { + "epoch": 2.64, + "learning_rate": 1.3458367683429516e-05, + "loss": 4.4683, + "step": 4708 + }, + { + "epoch": 2.64, + "learning_rate": 1.3437757625721354e-05, + "loss": 4.6015, + "step": 4709 + }, + { + "epoch": 2.64, + "learning_rate": 1.3417147568013192e-05, + "loss": 4.7519, + "step": 4710 + }, + { + "epoch": 2.64, + "learning_rate": 1.3396537510305029e-05, + "loss": 4.6081, + "step": 4711 + }, + { + "epoch": 2.64, + "learning_rate": 1.3375927452596867e-05, + "loss": 4.6676, + "step": 4712 + }, + { + "epoch": 2.64, + "learning_rate": 1.3355317394888705e-05, + "loss": 3.8955, + "step": 4713 + }, + { + "epoch": 2.64, + "learning_rate": 1.3334707337180544e-05, + "loss": 3.8866, + "step": 4714 + }, + { + "epoch": 2.64, + "learning_rate": 1.3314097279472385e-05, + "loss": 4.2565, + "step": 4715 + }, + { + "epoch": 2.64, + "learning_rate": 1.3293487221764222e-05, + "loss": 4.0587, + "step": 4716 + }, + { + "epoch": 2.64, + "learning_rate": 1.327287716405606e-05, + "loss": 4.0142, + "step": 4717 + }, + { + "epoch": 2.64, + "learning_rate": 1.3252267106347899e-05, + "loss": 3.4287, + "step": 4718 + }, + { + "epoch": 2.65, + "learning_rate": 1.3231657048639737e-05, + "loss": 5.7851, + "step": 4719 + }, + { + "epoch": 2.65, + "learning_rate": 1.3211046990931575e-05, + "loss": 5.5089, + "step": 4720 + }, + { + "epoch": 2.65, + "learning_rate": 1.3190436933223413e-05, + "loss": 5.5559, + "step": 4721 + }, + { + "epoch": 2.65, + "learning_rate": 1.3169826875515252e-05, + "loss": 5.6857, + "step": 4722 + }, + { + "epoch": 2.65, + "learning_rate": 1.3149216817807092e-05, + "loss": 5.345, + "step": 4723 + }, + { + "epoch": 2.65, + "learning_rate": 1.312860676009893e-05, + "loss": 5.5466, + "step": 4724 + }, + { + "epoch": 2.65, + "learning_rate": 1.3107996702390768e-05, + "loss": 5.2777, + "step": 4725 + }, + { + "epoch": 2.65, + "learning_rate": 1.3087386644682607e-05, + "loss": 5.1564, + "step": 4726 + }, + { + "epoch": 2.65, + "learning_rate": 1.3066776586974445e-05, + "loss": 5.2438, + "step": 4727 + }, + { + "epoch": 2.65, + "learning_rate": 1.3046166529266281e-05, + "loss": 4.8076, + "step": 4728 + }, + { + "epoch": 2.65, + "learning_rate": 1.302555647155812e-05, + "loss": 5.2649, + "step": 4729 + }, + { + "epoch": 2.65, + "learning_rate": 1.3004946413849958e-05, + "loss": 5.2452, + "step": 4730 + }, + { + "epoch": 2.65, + "learning_rate": 1.29843363561418e-05, + "loss": 5.2303, + "step": 4731 + }, + { + "epoch": 2.65, + "learning_rate": 1.2963726298433636e-05, + "loss": 5.5413, + "step": 4732 + }, + { + "epoch": 2.65, + "learning_rate": 1.2943116240725475e-05, + "loss": 5.0739, + "step": 4733 + }, + { + "epoch": 2.65, + "learning_rate": 1.2922506183017313e-05, + "loss": 5.1729, + "step": 4734 + }, + { + "epoch": 2.65, + "learning_rate": 1.2901896125309151e-05, + "loss": 5.1729, + "step": 4735 + }, + { + "epoch": 2.65, + "learning_rate": 1.288128606760099e-05, + "loss": 5.0421, + "step": 4736 + }, + { + "epoch": 2.66, + "learning_rate": 1.2860676009892828e-05, + "loss": 5.0838, + "step": 4737 + }, + { + "epoch": 2.66, + "learning_rate": 1.2840065952184666e-05, + "loss": 5.0516, + "step": 4738 + }, + { + "epoch": 2.66, + "learning_rate": 1.2819455894476506e-05, + "loss": 5.1414, + "step": 4739 + }, + { + "epoch": 2.66, + "learning_rate": 1.2798845836768344e-05, + "loss": 4.9941, + "step": 4740 + }, + { + "epoch": 2.66, + "learning_rate": 1.2778235779060183e-05, + "loss": 4.9446, + "step": 4741 + }, + { + "epoch": 2.66, + "learning_rate": 1.2757625721352021e-05, + "loss": 4.8422, + "step": 4742 + }, + { + "epoch": 2.66, + "learning_rate": 1.273701566364386e-05, + "loss": 5.1293, + "step": 4743 + }, + { + "epoch": 2.66, + "learning_rate": 1.2716405605935696e-05, + "loss": 4.8858, + "step": 4744 + }, + { + "epoch": 2.66, + "learning_rate": 1.2695795548227534e-05, + "loss": 4.9495, + "step": 4745 + }, + { + "epoch": 2.66, + "learning_rate": 1.2675185490519373e-05, + "loss": 4.9746, + "step": 4746 + }, + { + "epoch": 2.66, + "learning_rate": 1.2654575432811214e-05, + "loss": 5.0871, + "step": 4747 + }, + { + "epoch": 2.66, + "learning_rate": 1.2633965375103053e-05, + "loss": 4.8242, + "step": 4748 + }, + { + "epoch": 2.66, + "learning_rate": 1.2613355317394889e-05, + "loss": 4.769, + "step": 4749 + }, + { + "epoch": 2.66, + "learning_rate": 1.2592745259686727e-05, + "loss": 5.2947, + "step": 4750 + }, + { + "epoch": 2.66, + "learning_rate": 1.2572135201978566e-05, + "loss": 5.0034, + "step": 4751 + }, + { + "epoch": 2.66, + "learning_rate": 1.2551525144270404e-05, + "loss": 4.7879, + "step": 4752 + }, + { + "epoch": 2.66, + "learning_rate": 1.2530915086562242e-05, + "loss": 4.7251, + "step": 4753 + }, + { + "epoch": 2.66, + "learning_rate": 1.251030502885408e-05, + "loss": 4.8182, + "step": 4754 + }, + { + "epoch": 2.67, + "learning_rate": 1.2489694971145919e-05, + "loss": 4.9844, + "step": 4755 + }, + { + "epoch": 2.67, + "learning_rate": 1.2469084913437757e-05, + "loss": 4.7295, + "step": 4756 + }, + { + "epoch": 2.67, + "learning_rate": 1.2448474855729597e-05, + "loss": 4.667, + "step": 4757 + }, + { + "epoch": 2.67, + "learning_rate": 1.2427864798021435e-05, + "loss": 4.9337, + "step": 4758 + }, + { + "epoch": 2.67, + "learning_rate": 1.2407254740313274e-05, + "loss": 4.8693, + "step": 4759 + }, + { + "epoch": 2.67, + "learning_rate": 1.2386644682605112e-05, + "loss": 4.8661, + "step": 4760 + }, + { + "epoch": 2.67, + "learning_rate": 1.236603462489695e-05, + "loss": 4.5056, + "step": 4761 + }, + { + "epoch": 2.67, + "learning_rate": 1.2345424567188789e-05, + "loss": 4.0508, + "step": 4762 + }, + { + "epoch": 2.67, + "learning_rate": 1.2324814509480627e-05, + "loss": 4.1134, + "step": 4763 + }, + { + "epoch": 2.67, + "learning_rate": 1.2304204451772465e-05, + "loss": 4.1416, + "step": 4764 + }, + { + "epoch": 2.67, + "learning_rate": 1.2283594394064305e-05, + "loss": 3.9043, + "step": 4765 + }, + { + "epoch": 2.67, + "learning_rate": 1.2262984336356142e-05, + "loss": 3.8532, + "step": 4766 + }, + { + "epoch": 2.67, + "learning_rate": 1.224237427864798e-05, + "loss": 3.521, + "step": 4767 + }, + { + "epoch": 2.67, + "learning_rate": 1.2221764220939818e-05, + "loss": 2.8608, + "step": 4768 + }, + { + "epoch": 2.67, + "learning_rate": 1.2201154163231658e-05, + "loss": 5.7447, + "step": 4769 + }, + { + "epoch": 2.67, + "learning_rate": 1.2180544105523497e-05, + "loss": 5.4452, + "step": 4770 + }, + { + "epoch": 2.67, + "learning_rate": 1.2159934047815335e-05, + "loss": 5.6244, + "step": 4771 + }, + { + "epoch": 2.67, + "learning_rate": 1.2139323990107172e-05, + "loss": 5.4044, + "step": 4772 + }, + { + "epoch": 2.68, + "learning_rate": 1.2118713932399012e-05, + "loss": 5.3677, + "step": 4773 + }, + { + "epoch": 2.68, + "learning_rate": 1.209810387469085e-05, + "loss": 5.3471, + "step": 4774 + }, + { + "epoch": 2.68, + "learning_rate": 1.2077493816982688e-05, + "loss": 5.1157, + "step": 4775 + }, + { + "epoch": 2.68, + "learning_rate": 1.2056883759274527e-05, + "loss": 5.0118, + "step": 4776 + }, + { + "epoch": 2.68, + "learning_rate": 1.2036273701566365e-05, + "loss": 5.0719, + "step": 4777 + }, + { + "epoch": 2.68, + "learning_rate": 1.2015663643858203e-05, + "loss": 4.6052, + "step": 4778 + }, + { + "epoch": 2.68, + "learning_rate": 1.1995053586150041e-05, + "loss": 5.53, + "step": 4779 + }, + { + "epoch": 2.68, + "learning_rate": 1.197444352844188e-05, + "loss": 5.1533, + "step": 4780 + }, + { + "epoch": 2.68, + "learning_rate": 1.195383347073372e-05, + "loss": 5.1219, + "step": 4781 + }, + { + "epoch": 2.68, + "learning_rate": 1.1933223413025556e-05, + "loss": 5.292, + "step": 4782 + }, + { + "epoch": 2.68, + "learning_rate": 1.1912613355317395e-05, + "loss": 4.8821, + "step": 4783 + }, + { + "epoch": 2.68, + "learning_rate": 1.1892003297609233e-05, + "loss": 4.7307, + "step": 4784 + }, + { + "epoch": 2.68, + "learning_rate": 1.1892003297609233e-05, + "loss": 5.5673, + "step": 4785 + }, + { + "epoch": 2.68, + "learning_rate": 1.1871393239901073e-05, + "loss": 4.9396, + "step": 4786 + }, + { + "epoch": 2.68, + "learning_rate": 1.1850783182192911e-05, + "loss": 5.1287, + "step": 4787 + }, + { + "epoch": 2.68, + "learning_rate": 1.183017312448475e-05, + "loss": 5.0082, + "step": 4788 + }, + { + "epoch": 2.68, + "learning_rate": 1.1809563066776586e-05, + "loss": 4.826, + "step": 4789 + }, + { + "epoch": 2.68, + "learning_rate": 1.1788953009068426e-05, + "loss": 5.0319, + "step": 4790 + }, + { + "epoch": 2.69, + "learning_rate": 1.1768342951360264e-05, + "loss": 4.9444, + "step": 4791 + }, + { + "epoch": 2.69, + "learning_rate": 1.1747732893652103e-05, + "loss": 4.9291, + "step": 4792 + }, + { + "epoch": 2.69, + "learning_rate": 1.1727122835943941e-05, + "loss": 4.8865, + "step": 4793 + }, + { + "epoch": 2.69, + "learning_rate": 1.170651277823578e-05, + "loss": 5.4163, + "step": 4794 + }, + { + "epoch": 2.69, + "learning_rate": 1.1685902720527618e-05, + "loss": 5.1865, + "step": 4795 + }, + { + "epoch": 2.69, + "learning_rate": 1.1665292662819456e-05, + "loss": 4.961, + "step": 4796 + }, + { + "epoch": 2.69, + "learning_rate": 1.1644682605111294e-05, + "loss": 4.9457, + "step": 4797 + }, + { + "epoch": 2.69, + "learning_rate": 1.1624072547403134e-05, + "loss": 5.0736, + "step": 4798 + }, + { + "epoch": 2.69, + "learning_rate": 1.1603462489694972e-05, + "loss": 4.8695, + "step": 4799 + }, + { + "epoch": 2.69, + "learning_rate": 1.1582852431986809e-05, + "loss": 5.1756, + "step": 4800 + }, + { + "epoch": 2.69, + "learning_rate": 1.1562242374278647e-05, + "loss": 4.8373, + "step": 4801 + }, + { + "epoch": 2.69, + "learning_rate": 1.1541632316570487e-05, + "loss": 4.8387, + "step": 4802 + }, + { + "epoch": 2.69, + "learning_rate": 1.1521022258862326e-05, + "loss": 4.5614, + "step": 4803 + }, + { + "epoch": 2.69, + "learning_rate": 1.1500412201154164e-05, + "loss": 4.8019, + "step": 4804 + }, + { + "epoch": 2.69, + "learning_rate": 1.1479802143446002e-05, + "loss": 5.3571, + "step": 4805 + }, + { + "epoch": 2.69, + "learning_rate": 1.145919208573784e-05, + "loss": 5.1733, + "step": 4806 + }, + { + "epoch": 2.69, + "learning_rate": 1.1438582028029679e-05, + "loss": 4.7448, + "step": 4807 + }, + { + "epoch": 2.7, + "learning_rate": 1.1417971970321517e-05, + "loss": 4.3336, + "step": 4808 + }, + { + "epoch": 2.7, + "learning_rate": 1.1397361912613355e-05, + "loss": 4.5375, + "step": 4809 + }, + { + "epoch": 2.7, + "learning_rate": 1.1376751854905195e-05, + "loss": 4.7866, + "step": 4810 + }, + { + "epoch": 2.7, + "learning_rate": 1.1356141797197032e-05, + "loss": 4.5435, + "step": 4811 + }, + { + "epoch": 2.7, + "learning_rate": 1.133553173948887e-05, + "loss": 4.4659, + "step": 4812 + }, + { + "epoch": 2.7, + "learning_rate": 1.131492168178071e-05, + "loss": 4.2504, + "step": 4813 + }, + { + "epoch": 2.7, + "learning_rate": 1.1294311624072549e-05, + "loss": 4.0033, + "step": 4814 + }, + { + "epoch": 2.7, + "learning_rate": 1.1273701566364387e-05, + "loss": 4.1132, + "step": 4815 + }, + { + "epoch": 2.7, + "learning_rate": 1.1253091508656225e-05, + "loss": 3.8681, + "step": 4816 + }, + { + "epoch": 2.7, + "learning_rate": 1.1232481450948064e-05, + "loss": 3.3735, + "step": 4817 + }, + { + "epoch": 2.7, + "learning_rate": 1.1211871393239902e-05, + "loss": 2.7238, + "step": 4818 + }, + { + "epoch": 2.7, + "learning_rate": 1.119126133553174e-05, + "loss": 5.5376, + "step": 4819 + }, + { + "epoch": 2.7, + "learning_rate": 1.1170651277823578e-05, + "loss": 5.3701, + "step": 4820 + }, + { + "epoch": 2.7, + "learning_rate": 1.1150041220115417e-05, + "loss": 5.4754, + "step": 4821 + }, + { + "epoch": 2.7, + "learning_rate": 1.1129431162407255e-05, + "loss": 5.3637, + "step": 4822 + }, + { + "epoch": 2.7, + "learning_rate": 1.1108821104699093e-05, + "loss": 5.2137, + "step": 4823 + }, + { + "epoch": 2.7, + "learning_rate": 1.1088211046990932e-05, + "loss": 5.0932, + "step": 4824 + }, + { + "epoch": 2.7, + "learning_rate": 1.1067600989282772e-05, + "loss": 5.2597, + "step": 4825 + }, + { + "epoch": 2.71, + "learning_rate": 1.104699093157461e-05, + "loss": 5.155, + "step": 4826 + }, + { + "epoch": 2.71, + "learning_rate": 1.1026380873866446e-05, + "loss": 5.5922, + "step": 4827 + }, + { + "epoch": 2.71, + "learning_rate": 1.1005770816158285e-05, + "loss": 5.058, + "step": 4828 + }, + { + "epoch": 2.71, + "learning_rate": 1.0985160758450125e-05, + "loss": 4.8975, + "step": 4829 + }, + { + "epoch": 2.71, + "learning_rate": 1.0964550700741963e-05, + "loss": 5.2986, + "step": 4830 + }, + { + "epoch": 2.71, + "learning_rate": 1.0943940643033801e-05, + "loss": 4.9683, + "step": 4831 + }, + { + "epoch": 2.71, + "learning_rate": 1.092333058532564e-05, + "loss": 5.1106, + "step": 4832 + }, + { + "epoch": 2.71, + "learning_rate": 1.0902720527617478e-05, + "loss": 4.9802, + "step": 4833 + }, + { + "epoch": 2.71, + "learning_rate": 1.0882110469909316e-05, + "loss": 5.067, + "step": 4834 + }, + { + "epoch": 2.71, + "learning_rate": 1.0861500412201155e-05, + "loss": 4.8896, + "step": 4835 + }, + { + "epoch": 2.71, + "learning_rate": 1.0840890354492993e-05, + "loss": 5.0764, + "step": 4836 + }, + { + "epoch": 2.71, + "learning_rate": 1.0820280296784833e-05, + "loss": 4.9161, + "step": 4837 + }, + { + "epoch": 2.71, + "learning_rate": 1.079967023907667e-05, + "loss": 5.2063, + "step": 4838 + }, + { + "epoch": 2.71, + "learning_rate": 1.0779060181368508e-05, + "loss": 4.8832, + "step": 4839 + }, + { + "epoch": 2.71, + "learning_rate": 1.0758450123660346e-05, + "loss": 5.2586, + "step": 4840 + }, + { + "epoch": 2.71, + "learning_rate": 1.0737840065952186e-05, + "loss": 5.1788, + "step": 4841 + }, + { + "epoch": 2.71, + "learning_rate": 1.0717230008244024e-05, + "loss": 5.0235, + "step": 4842 + }, + { + "epoch": 2.71, + "learning_rate": 1.0696619950535863e-05, + "loss": 4.8517, + "step": 4843 + }, + { + "epoch": 2.72, + "learning_rate": 1.06760098928277e-05, + "loss": 4.7311, + "step": 4844 + }, + { + "epoch": 2.72, + "learning_rate": 1.065539983511954e-05, + "loss": 5.1198, + "step": 4845 + }, + { + "epoch": 2.72, + "learning_rate": 1.0634789777411378e-05, + "loss": 4.7417, + "step": 4846 + }, + { + "epoch": 2.72, + "learning_rate": 1.0614179719703216e-05, + "loss": 4.691, + "step": 4847 + }, + { + "epoch": 2.72, + "learning_rate": 1.0593569661995054e-05, + "loss": 4.8982, + "step": 4848 + }, + { + "epoch": 2.72, + "learning_rate": 1.0572959604286892e-05, + "loss": 4.9373, + "step": 4849 + }, + { + "epoch": 2.72, + "learning_rate": 1.055234954657873e-05, + "loss": 5.4365, + "step": 4850 + }, + { + "epoch": 2.72, + "learning_rate": 1.0531739488870569e-05, + "loss": 4.7536, + "step": 4851 + }, + { + "epoch": 2.72, + "learning_rate": 1.0511129431162407e-05, + "loss": 4.9543, + "step": 4852 + }, + { + "epoch": 2.72, + "learning_rate": 1.0490519373454247e-05, + "loss": 4.8326, + "step": 4853 + }, + { + "epoch": 2.72, + "learning_rate": 1.0469909315746086e-05, + "loss": 5.4828, + "step": 4854 + }, + { + "epoch": 2.72, + "learning_rate": 1.0449299258037922e-05, + "loss": 5.0004, + "step": 4855 + }, + { + "epoch": 2.72, + "learning_rate": 1.042868920032976e-05, + "loss": 4.886, + "step": 4856 + }, + { + "epoch": 2.72, + "learning_rate": 1.04080791426216e-05, + "loss": 4.5349, + "step": 4857 + }, + { + "epoch": 2.72, + "learning_rate": 1.0387469084913439e-05, + "loss": 4.814, + "step": 4858 + }, + { + "epoch": 2.72, + "learning_rate": 1.0366859027205277e-05, + "loss": 4.6566, + "step": 4859 + }, + { + "epoch": 2.72, + "learning_rate": 1.0346248969497115e-05, + "loss": 4.8235, + "step": 4860 + }, + { + "epoch": 2.72, + "learning_rate": 1.0325638911788954e-05, + "loss": 4.4494, + "step": 4861 + }, + { + "epoch": 2.73, + "learning_rate": 1.0305028854080792e-05, + "loss": 4.6863, + "step": 4862 + }, + { + "epoch": 2.73, + "learning_rate": 1.028441879637263e-05, + "loss": 4.092, + "step": 4863 + }, + { + "epoch": 2.73, + "learning_rate": 1.0263808738664469e-05, + "loss": 4.0433, + "step": 4864 + }, + { + "epoch": 2.73, + "learning_rate": 1.0243198680956307e-05, + "loss": 3.8307, + "step": 4865 + }, + { + "epoch": 2.73, + "learning_rate": 1.0222588623248145e-05, + "loss": 3.6246, + "step": 4866 + }, + { + "epoch": 2.73, + "learning_rate": 1.0201978565539983e-05, + "loss": 2.9351, + "step": 4867 + }, + { + "epoch": 2.73, + "learning_rate": 1.0181368507831822e-05, + "loss": 2.486, + "step": 4868 + }, + { + "epoch": 2.73, + "learning_rate": 1.0160758450123662e-05, + "loss": 5.9463, + "step": 4869 + }, + { + "epoch": 2.73, + "learning_rate": 1.01401483924155e-05, + "loss": 5.6454, + "step": 4870 + }, + { + "epoch": 2.73, + "learning_rate": 1.0119538334707337e-05, + "loss": 5.6587, + "step": 4871 + }, + { + "epoch": 2.73, + "learning_rate": 1.0098928276999175e-05, + "loss": 5.4793, + "step": 4872 + }, + { + "epoch": 2.73, + "learning_rate": 1.0078318219291015e-05, + "loss": 5.6331, + "step": 4873 + }, + { + "epoch": 2.73, + "learning_rate": 1.0057708161582853e-05, + "loss": 5.4195, + "step": 4874 + }, + { + "epoch": 2.73, + "learning_rate": 1.0037098103874692e-05, + "loss": 5.4622, + "step": 4875 + }, + { + "epoch": 2.73, + "learning_rate": 1.001648804616653e-05, + "loss": 5.2223, + "step": 4876 + }, + { + "epoch": 2.73, + "learning_rate": 9.995877988458368e-06, + "loss": 5.295, + "step": 4877 + }, + { + "epoch": 2.73, + "learning_rate": 9.975267930750206e-06, + "loss": 5.1182, + "step": 4878 + }, + { + "epoch": 2.73, + "learning_rate": 9.954657873042045e-06, + "loss": 4.8489, + "step": 4879 + }, + { + "epoch": 2.74, + "learning_rate": 9.934047815333883e-06, + "loss": 5.3366, + "step": 4880 + }, + { + "epoch": 2.74, + "learning_rate": 9.913437757625723e-06, + "loss": 4.8296, + "step": 4881 + }, + { + "epoch": 2.74, + "learning_rate": 9.89282769991756e-06, + "loss": 5.1767, + "step": 4882 + }, + { + "epoch": 2.74, + "learning_rate": 9.872217642209398e-06, + "loss": 5.2692, + "step": 4883 + }, + { + "epoch": 2.74, + "learning_rate": 9.851607584501236e-06, + "loss": 4.968, + "step": 4884 + }, + { + "epoch": 2.74, + "learning_rate": 9.830997526793076e-06, + "loss": 5.5031, + "step": 4885 + }, + { + "epoch": 2.74, + "learning_rate": 9.810387469084915e-06, + "loss": 5.0568, + "step": 4886 + }, + { + "epoch": 2.74, + "learning_rate": 9.789777411376753e-06, + "loss": 5.263, + "step": 4887 + }, + { + "epoch": 2.74, + "learning_rate": 9.76916735366859e-06, + "loss": 4.8673, + "step": 4888 + }, + { + "epoch": 2.74, + "learning_rate": 9.74855729596043e-06, + "loss": 5.1399, + "step": 4889 + }, + { + "epoch": 2.74, + "learning_rate": 9.727947238252268e-06, + "loss": 4.9183, + "step": 4890 + }, + { + "epoch": 2.74, + "learning_rate": 9.707337180544106e-06, + "loss": 5.1612, + "step": 4891 + }, + { + "epoch": 2.74, + "learning_rate": 9.686727122835944e-06, + "loss": 4.9598, + "step": 4892 + }, + { + "epoch": 2.74, + "learning_rate": 9.666117065127783e-06, + "loss": 5.0708, + "step": 4893 + }, + { + "epoch": 2.74, + "learning_rate": 9.645507007419621e-06, + "loss": 4.799, + "step": 4894 + }, + { + "epoch": 2.74, + "learning_rate": 9.62489694971146e-06, + "loss": 5.0378, + "step": 4895 + }, + { + "epoch": 2.74, + "learning_rate": 9.604286892003297e-06, + "loss": 4.8376, + "step": 4896 + }, + { + "epoch": 2.74, + "learning_rate": 9.583676834295137e-06, + "loss": 4.9034, + "step": 4897 + }, + { + "epoch": 2.75, + "learning_rate": 9.563066776586976e-06, + "loss": 5.1397, + "step": 4898 + }, + { + "epoch": 2.75, + "learning_rate": 9.542456718878812e-06, + "loss": 4.8653, + "step": 4899 + }, + { + "epoch": 2.75, + "learning_rate": 9.52184666117065e-06, + "loss": 4.7822, + "step": 4900 + }, + { + "epoch": 2.75, + "learning_rate": 9.50123660346249e-06, + "loss": 4.7661, + "step": 4901 + }, + { + "epoch": 2.75, + "learning_rate": 9.480626545754329e-06, + "loss": 5.0049, + "step": 4902 + }, + { + "epoch": 2.75, + "learning_rate": 9.460016488046167e-06, + "loss": 4.8534, + "step": 4903 + }, + { + "epoch": 2.75, + "learning_rate": 9.439406430338006e-06, + "loss": 4.6969, + "step": 4904 + }, + { + "epoch": 2.75, + "learning_rate": 9.418796372629844e-06, + "loss": 4.6595, + "step": 4905 + }, + { + "epoch": 2.75, + "learning_rate": 9.398186314921682e-06, + "loss": 4.5619, + "step": 4906 + }, + { + "epoch": 2.75, + "learning_rate": 9.37757625721352e-06, + "loss": 4.5803, + "step": 4907 + }, + { + "epoch": 2.75, + "learning_rate": 9.356966199505359e-06, + "loss": 4.9269, + "step": 4908 + }, + { + "epoch": 2.75, + "learning_rate": 9.336356141797197e-06, + "loss": 4.8006, + "step": 4909 + }, + { + "epoch": 2.75, + "learning_rate": 9.315746084089035e-06, + "loss": 4.691, + "step": 4910 + }, + { + "epoch": 2.75, + "learning_rate": 9.295136026380874e-06, + "loss": 4.4799, + "step": 4911 + }, + { + "epoch": 2.75, + "learning_rate": 9.274525968672712e-06, + "loss": 4.2392, + "step": 4912 + }, + { + "epoch": 2.75, + "learning_rate": 9.253915910964552e-06, + "loss": 4.1713, + "step": 4913 + }, + { + "epoch": 2.75, + "learning_rate": 9.23330585325639e-06, + "loss": 3.8492, + "step": 4914 + }, + { + "epoch": 2.76, + "learning_rate": 9.212695795548227e-06, + "loss": 3.9258, + "step": 4915 + }, + { + "epoch": 2.76, + "learning_rate": 9.192085737840065e-06, + "loss": 3.633, + "step": 4916 + }, + { + "epoch": 2.76, + "learning_rate": 9.171475680131905e-06, + "loss": 3.5011, + "step": 4917 + }, + { + "epoch": 2.76, + "learning_rate": 9.150865622423743e-06, + "loss": 2.9365, + "step": 4918 + }, + { + "epoch": 2.76, + "learning_rate": 9.130255564715582e-06, + "loss": 5.7961, + "step": 4919 + }, + { + "epoch": 2.76, + "learning_rate": 9.10964550700742e-06, + "loss": 5.6237, + "step": 4920 + }, + { + "epoch": 2.76, + "learning_rate": 9.089035449299258e-06, + "loss": 5.6298, + "step": 4921 + }, + { + "epoch": 2.76, + "learning_rate": 9.068425391591097e-06, + "loss": 5.9162, + "step": 4922 + }, + { + "epoch": 2.76, + "learning_rate": 9.047815333882935e-06, + "loss": 5.534, + "step": 4923 + }, + { + "epoch": 2.76, + "learning_rate": 9.027205276174775e-06, + "loss": 5.5484, + "step": 4924 + }, + { + "epoch": 2.76, + "learning_rate": 9.006595218466613e-06, + "loss": 5.6853, + "step": 4925 + }, + { + "epoch": 2.76, + "learning_rate": 8.98598516075845e-06, + "loss": 5.5569, + "step": 4926 + }, + { + "epoch": 2.76, + "learning_rate": 8.965375103050288e-06, + "loss": 5.4454, + "step": 4927 + }, + { + "epoch": 2.76, + "learning_rate": 8.944765045342128e-06, + "loss": 5.4268, + "step": 4928 + }, + { + "epoch": 2.76, + "learning_rate": 8.924154987633966e-06, + "loss": 5.0813, + "step": 4929 + }, + { + "epoch": 2.76, + "learning_rate": 8.903544929925805e-06, + "loss": 4.9812, + "step": 4930 + }, + { + "epoch": 2.76, + "learning_rate": 8.882934872217643e-06, + "loss": 5.0007, + "step": 4931 + }, + { + "epoch": 2.76, + "learning_rate": 8.862324814509481e-06, + "loss": 5.3056, + "step": 4932 + }, + { + "epoch": 2.77, + "learning_rate": 8.84171475680132e-06, + "loss": 5.4851, + "step": 4933 + }, + { + "epoch": 2.77, + "learning_rate": 8.821104699093158e-06, + "loss": 5.1525, + "step": 4934 + }, + { + "epoch": 2.77, + "learning_rate": 8.800494641384996e-06, + "loss": 5.0617, + "step": 4935 + }, + { + "epoch": 2.77, + "learning_rate": 8.779884583676836e-06, + "loss": 5.216, + "step": 4936 + }, + { + "epoch": 2.77, + "learning_rate": 8.759274525968673e-06, + "loss": 5.4057, + "step": 4937 + }, + { + "epoch": 2.77, + "learning_rate": 8.738664468260511e-06, + "loss": 5.1924, + "step": 4938 + }, + { + "epoch": 2.77, + "learning_rate": 8.71805441055235e-06, + "loss": 5.105, + "step": 4939 + }, + { + "epoch": 2.77, + "learning_rate": 8.69744435284419e-06, + "loss": 5.0465, + "step": 4940 + }, + { + "epoch": 2.77, + "learning_rate": 8.676834295136028e-06, + "loss": 4.8918, + "step": 4941 + }, + { + "epoch": 2.77, + "learning_rate": 8.656224237427866e-06, + "loss": 4.911, + "step": 4942 + }, + { + "epoch": 2.77, + "learning_rate": 8.635614179719703e-06, + "loss": 4.9606, + "step": 4943 + }, + { + "epoch": 2.77, + "learning_rate": 8.615004122011543e-06, + "loss": 5.4342, + "step": 4944 + }, + { + "epoch": 2.77, + "learning_rate": 8.59439406430338e-06, + "loss": 4.8832, + "step": 4945 + }, + { + "epoch": 2.77, + "learning_rate": 8.573784006595219e-06, + "loss": 4.9707, + "step": 4946 + }, + { + "epoch": 2.77, + "learning_rate": 8.553173948887057e-06, + "loss": 4.9893, + "step": 4947 + }, + { + "epoch": 2.77, + "learning_rate": 8.532563891178896e-06, + "loss": 5.3547, + "step": 4948 + }, + { + "epoch": 2.77, + "learning_rate": 8.511953833470734e-06, + "loss": 5.0437, + "step": 4949 + }, + { + "epoch": 2.77, + "learning_rate": 8.491343775762572e-06, + "loss": 4.8788, + "step": 4950 + }, + { + "epoch": 2.78, + "learning_rate": 8.47073371805441e-06, + "loss": 5.1736, + "step": 4951 + }, + { + "epoch": 2.78, + "learning_rate": 8.45012366034625e-06, + "loss": 4.881, + "step": 4952 + }, + { + "epoch": 2.78, + "learning_rate": 8.429513602638087e-06, + "loss": 5.0827, + "step": 4953 + }, + { + "epoch": 2.78, + "learning_rate": 8.408903544929926e-06, + "loss": 4.5825, + "step": 4954 + }, + { + "epoch": 2.78, + "learning_rate": 8.388293487221764e-06, + "loss": 5.0815, + "step": 4955 + }, + { + "epoch": 2.78, + "learning_rate": 8.367683429513604e-06, + "loss": 4.7131, + "step": 4956 + }, + { + "epoch": 2.78, + "learning_rate": 8.347073371805442e-06, + "loss": 4.7564, + "step": 4957 + }, + { + "epoch": 2.78, + "learning_rate": 8.32646331409728e-06, + "loss": 4.764, + "step": 4958 + }, + { + "epoch": 2.78, + "learning_rate": 8.305853256389117e-06, + "loss": 4.3501, + "step": 4959 + }, + { + "epoch": 2.78, + "learning_rate": 8.285243198680957e-06, + "loss": 4.7096, + "step": 4960 + }, + { + "epoch": 2.78, + "learning_rate": 8.264633140972795e-06, + "loss": 4.1267, + "step": 4961 + }, + { + "epoch": 2.78, + "learning_rate": 8.244023083264634e-06, + "loss": 4.2533, + "step": 4962 + }, + { + "epoch": 2.78, + "learning_rate": 8.223413025556472e-06, + "loss": 3.9581, + "step": 4963 + }, + { + "epoch": 2.78, + "learning_rate": 8.20280296784831e-06, + "loss": 3.9697, + "step": 4964 + }, + { + "epoch": 2.78, + "learning_rate": 8.182192910140148e-06, + "loss": 3.5526, + "step": 4965 + }, + { + "epoch": 2.78, + "learning_rate": 8.161582852431987e-06, + "loss": 3.5424, + "step": 4966 + }, + { + "epoch": 2.78, + "learning_rate": 8.140972794723825e-06, + "loss": 3.0776, + "step": 4967 + }, + { + "epoch": 2.78, + "learning_rate": 8.120362737015665e-06, + "loss": 2.9366, + "step": 4968 + }, + { + "epoch": 2.79, + "learning_rate": 8.099752679307503e-06, + "loss": 5.6801, + "step": 4969 + }, + { + "epoch": 2.79, + "learning_rate": 8.07914262159934e-06, + "loss": 5.5831, + "step": 4970 + }, + { + "epoch": 2.79, + "learning_rate": 8.058532563891178e-06, + "loss": 5.7967, + "step": 4971 + }, + { + "epoch": 2.79, + "learning_rate": 8.037922506183018e-06, + "loss": 5.5855, + "step": 4972 + }, + { + "epoch": 2.79, + "learning_rate": 8.017312448474857e-06, + "loss": 5.5869, + "step": 4973 + }, + { + "epoch": 2.79, + "learning_rate": 7.996702390766695e-06, + "loss": 5.3305, + "step": 4974 + }, + { + "epoch": 2.79, + "learning_rate": 7.976092333058533e-06, + "loss": 5.3512, + "step": 4975 + }, + { + "epoch": 2.79, + "learning_rate": 7.955482275350371e-06, + "loss": 5.6472, + "step": 4976 + }, + { + "epoch": 2.79, + "learning_rate": 7.93487221764221e-06, + "loss": 5.3351, + "step": 4977 + }, + { + "epoch": 2.79, + "learning_rate": 7.914262159934048e-06, + "loss": 5.2073, + "step": 4978 + }, + { + "epoch": 2.79, + "learning_rate": 7.893652102225886e-06, + "loss": 5.0253, + "step": 4979 + }, + { + "epoch": 2.79, + "learning_rate": 7.873042044517726e-06, + "loss": 5.0729, + "step": 4980 + }, + { + "epoch": 2.79, + "learning_rate": 7.852431986809563e-06, + "loss": 5.3109, + "step": 4981 + }, + { + "epoch": 2.79, + "learning_rate": 7.831821929101401e-06, + "loss": 4.8816, + "step": 4982 + }, + { + "epoch": 2.79, + "learning_rate": 7.81121187139324e-06, + "loss": 5.2607, + "step": 4983 + }, + { + "epoch": 2.79, + "learning_rate": 7.79060181368508e-06, + "loss": 4.9707, + "step": 4984 + }, + { + "epoch": 2.79, + "learning_rate": 7.769991755976918e-06, + "loss": 5.2019, + "step": 4985 + }, + { + "epoch": 2.79, + "learning_rate": 7.749381698268756e-06, + "loss": 5.3264, + "step": 4986 + }, + { + "epoch": 2.8, + "learning_rate": 7.728771640560593e-06, + "loss": 4.9444, + "step": 4987 + }, + { + "epoch": 2.8, + "learning_rate": 7.708161582852433e-06, + "loss": 5.1415, + "step": 4988 + }, + { + "epoch": 2.8, + "learning_rate": 7.687551525144271e-06, + "loss": 4.9631, + "step": 4989 + }, + { + "epoch": 2.8, + "learning_rate": 7.66694146743611e-06, + "loss": 5.1895, + "step": 4990 + }, + { + "epoch": 2.8, + "learning_rate": 7.646331409727948e-06, + "loss": 5.0302, + "step": 4991 + }, + { + "epoch": 2.8, + "learning_rate": 7.625721352019787e-06, + "loss": 4.9718, + "step": 4992 + }, + { + "epoch": 2.8, + "learning_rate": 7.605111294311624e-06, + "loss": 5.0668, + "step": 4993 + }, + { + "epoch": 2.8, + "learning_rate": 7.5845012366034625e-06, + "loss": 4.7996, + "step": 4994 + }, + { + "epoch": 2.8, + "learning_rate": 7.563891178895301e-06, + "loss": 5.2613, + "step": 4995 + }, + { + "epoch": 2.8, + "learning_rate": 7.54328112118714e-06, + "loss": 4.7361, + "step": 4996 + }, + { + "epoch": 2.8, + "learning_rate": 7.522671063478978e-06, + "loss": 4.8266, + "step": 4997 + }, + { + "epoch": 2.8, + "learning_rate": 7.5020610057708165e-06, + "loss": 4.9903, + "step": 4998 + }, + { + "epoch": 2.8, + "learning_rate": 7.481450948062654e-06, + "loss": 4.9355, + "step": 4999 + }, + { + "epoch": 2.8, + "learning_rate": 7.460840890354494e-06, + "loss": 4.9792, + "step": 5000 + }, + { + "epoch": 2.8, + "eval_loss": 20.59589195251465, + "eval_runtime": 1341.2706, + "eval_samples_per_second": 1.97, + "eval_steps_per_second": 0.247, + "eval_wer": 1.0008131693772313, + "step": 5000 + }, + { + "epoch": 2.8, + "learning_rate": 7.440230832646332e-06, + "loss": 4.9942, + "step": 5001 + }, + { + "epoch": 2.8, + "learning_rate": 7.41962077493817e-06, + "loss": 5.224, + "step": 5002 + }, + { + "epoch": 2.8, + "learning_rate": 7.399010717230008e-06, + "loss": 4.8965, + "step": 5003 + }, + { + "epoch": 2.8, + "learning_rate": 7.378400659521847e-06, + "loss": 4.5096, + "step": 5004 + }, + { + "epoch": 2.81, + "learning_rate": 7.3577906018136855e-06, + "loss": 4.8244, + "step": 5005 + }, + { + "epoch": 2.81, + "learning_rate": 7.337180544105524e-06, + "loss": 4.8861, + "step": 5006 + }, + { + "epoch": 2.81, + "learning_rate": 7.316570486397362e-06, + "loss": 4.8351, + "step": 5007 + }, + { + "epoch": 2.81, + "learning_rate": 7.295960428689201e-06, + "loss": 4.6445, + "step": 5008 + }, + { + "epoch": 2.81, + "learning_rate": 7.2753503709810395e-06, + "loss": 4.5059, + "step": 5009 + }, + { + "epoch": 2.81, + "learning_rate": 7.254740313272877e-06, + "loss": 4.3818, + "step": 5010 + }, + { + "epoch": 2.81, + "learning_rate": 7.234130255564715e-06, + "loss": 4.6193, + "step": 5011 + }, + { + "epoch": 2.81, + "learning_rate": 7.213520197856554e-06, + "loss": 4.316, + "step": 5012 + }, + { + "epoch": 2.81, + "learning_rate": 7.192910140148393e-06, + "loss": 4.5101, + "step": 5013 + }, + { + "epoch": 2.81, + "learning_rate": 7.172300082440231e-06, + "loss": 4.2116, + "step": 5014 + }, + { + "epoch": 2.81, + "learning_rate": 7.151690024732069e-06, + "loss": 3.8023, + "step": 5015 + }, + { + "epoch": 2.81, + "learning_rate": 7.1310799670239084e-06, + "loss": 4.0554, + "step": 5016 + }, + { + "epoch": 2.81, + "learning_rate": 7.110469909315747e-06, + "loss": 3.2494, + "step": 5017 + }, + { + "epoch": 2.81, + "learning_rate": 7.089859851607584e-06, + "loss": 2.8905, + "step": 5018 + }, + { + "epoch": 2.81, + "learning_rate": 7.0692497938994225e-06, + "loss": 5.5251, + "step": 5019 + }, + { + "epoch": 2.81, + "learning_rate": 7.0486397361912625e-06, + "loss": 5.463, + "step": 5020 + }, + { + "epoch": 2.81, + "learning_rate": 7.0280296784831e-06, + "loss": 5.5152, + "step": 5021 + }, + { + "epoch": 2.82, + "learning_rate": 7.007419620774938e-06, + "loss": 5.6887, + "step": 5022 + }, + { + "epoch": 2.82, + "learning_rate": 6.9868095630667765e-06, + "loss": 5.6961, + "step": 5023 + }, + { + "epoch": 2.82, + "learning_rate": 6.966199505358616e-06, + "loss": 5.2732, + "step": 5024 + }, + { + "epoch": 2.82, + "learning_rate": 6.945589447650454e-06, + "loss": 5.328, + "step": 5025 + }, + { + "epoch": 2.82, + "learning_rate": 6.924979389942292e-06, + "loss": 5.3101, + "step": 5026 + }, + { + "epoch": 2.82, + "learning_rate": 6.90436933223413e-06, + "loss": 5.0332, + "step": 5027 + }, + { + "epoch": 2.82, + "learning_rate": 6.88375927452597e-06, + "loss": 5.2356, + "step": 5028 + }, + { + "epoch": 2.82, + "learning_rate": 6.863149216817807e-06, + "loss": 5.2781, + "step": 5029 + }, + { + "epoch": 2.82, + "learning_rate": 6.8425391591096454e-06, + "loss": 5.0614, + "step": 5030 + }, + { + "epoch": 2.82, + "learning_rate": 6.821929101401484e-06, + "loss": 5.044, + "step": 5031 + }, + { + "epoch": 2.82, + "learning_rate": 6.801319043693323e-06, + "loss": 4.7606, + "step": 5032 + }, + { + "epoch": 2.82, + "learning_rate": 6.780708985985161e-06, + "loss": 4.9385, + "step": 5033 + }, + { + "epoch": 2.82, + "learning_rate": 6.7600989282769995e-06, + "loss": 5.0088, + "step": 5034 + }, + { + "epoch": 2.82, + "learning_rate": 6.739488870568839e-06, + "loss": 4.9419, + "step": 5035 + }, + { + "epoch": 2.82, + "learning_rate": 6.718878812860677e-06, + "loss": 5.0568, + "step": 5036 + }, + { + "epoch": 2.82, + "learning_rate": 6.698268755152514e-06, + "loss": 4.9848, + "step": 5037 + }, + { + "epoch": 2.82, + "learning_rate": 6.677658697444353e-06, + "loss": 4.9024, + "step": 5038 + }, + { + "epoch": 2.82, + "learning_rate": 6.657048639736193e-06, + "loss": 5.2979, + "step": 5039 + }, + { + "epoch": 2.83, + "learning_rate": 6.63643858202803e-06, + "loss": 5.1915, + "step": 5040 + }, + { + "epoch": 2.83, + "learning_rate": 6.615828524319868e-06, + "loss": 4.9671, + "step": 5041 + }, + { + "epoch": 2.83, + "learning_rate": 6.595218466611707e-06, + "loss": 5.0166, + "step": 5042 + }, + { + "epoch": 2.83, + "learning_rate": 6.574608408903546e-06, + "loss": 4.8823, + "step": 5043 + }, + { + "epoch": 2.83, + "learning_rate": 6.553998351195384e-06, + "loss": 5.4211, + "step": 5044 + }, + { + "epoch": 2.83, + "learning_rate": 6.5333882934872224e-06, + "loss": 4.6098, + "step": 5045 + }, + { + "epoch": 2.83, + "learning_rate": 6.51277823577906e-06, + "loss": 5.0251, + "step": 5046 + }, + { + "epoch": 2.83, + "learning_rate": 6.4921681780709e-06, + "loss": 5.0019, + "step": 5047 + }, + { + "epoch": 2.83, + "learning_rate": 6.471558120362737e-06, + "loss": 4.9634, + "step": 5048 + }, + { + "epoch": 2.83, + "learning_rate": 6.450948062654576e-06, + "loss": 4.7749, + "step": 5049 + }, + { + "epoch": 2.83, + "learning_rate": 6.430338004946414e-06, + "loss": 4.8876, + "step": 5050 + }, + { + "epoch": 2.83, + "learning_rate": 6.409727947238253e-06, + "loss": 4.7889, + "step": 5051 + }, + { + "epoch": 2.83, + "learning_rate": 6.389117889530091e-06, + "loss": 4.6369, + "step": 5052 + }, + { + "epoch": 2.83, + "learning_rate": 6.36850783182193e-06, + "loss": 4.9409, + "step": 5053 + }, + { + "epoch": 2.83, + "learning_rate": 6.347897774113767e-06, + "loss": 4.6601, + "step": 5054 + }, + { + "epoch": 2.83, + "learning_rate": 6.327287716405607e-06, + "loss": 5.024, + "step": 5055 + }, + { + "epoch": 2.83, + "learning_rate": 6.3066776586974446e-06, + "loss": 4.8488, + "step": 5056 + }, + { + "epoch": 2.83, + "learning_rate": 6.286067600989283e-06, + "loss": 4.787, + "step": 5057 + }, + { + "epoch": 2.84, + "learning_rate": 6.265457543281121e-06, + "loss": 4.6067, + "step": 5058 + }, + { + "epoch": 2.84, + "learning_rate": 6.2448474855729595e-06, + "loss": 4.86, + "step": 5059 + }, + { + "epoch": 2.84, + "learning_rate": 6.224237427864799e-06, + "loss": 4.6003, + "step": 5060 + }, + { + "epoch": 2.84, + "learning_rate": 6.203627370156637e-06, + "loss": 4.5757, + "step": 5061 + }, + { + "epoch": 2.84, + "learning_rate": 6.183017312448475e-06, + "loss": 4.4151, + "step": 5062 + }, + { + "epoch": 2.84, + "learning_rate": 6.1624072547403135e-06, + "loss": 4.0246, + "step": 5063 + }, + { + "epoch": 2.84, + "learning_rate": 6.141797197032153e-06, + "loss": 4.3471, + "step": 5064 + }, + { + "epoch": 2.84, + "learning_rate": 6.12118713932399e-06, + "loss": 3.9279, + "step": 5065 + }, + { + "epoch": 2.84, + "learning_rate": 6.100577081615829e-06, + "loss": 3.3636, + "step": 5066 + }, + { + "epoch": 2.84, + "learning_rate": 6.0799670239076675e-06, + "loss": 3.729, + "step": 5067 + }, + { + "epoch": 2.84, + "learning_rate": 6.059356966199506e-06, + "loss": 3.9029, + "step": 5068 + }, + { + "epoch": 2.84, + "learning_rate": 6.038746908491344e-06, + "loss": 5.5911, + "step": 5069 + }, + { + "epoch": 2.84, + "learning_rate": 6.018136850783182e-06, + "loss": 5.9112, + "step": 5070 + }, + { + "epoch": 2.84, + "learning_rate": 5.997526793075021e-06, + "loss": 5.7745, + "step": 5071 + }, + { + "epoch": 2.84, + "learning_rate": 5.97691673536686e-06, + "loss": 5.5739, + "step": 5072 + }, + { + "epoch": 2.84, + "learning_rate": 5.956306677658697e-06, + "loss": 5.736, + "step": 5073 + }, + { + "epoch": 2.84, + "learning_rate": 5.9356966199505365e-06, + "loss": 5.7831, + "step": 5074 + }, + { + "epoch": 2.84, + "learning_rate": 5.915086562242375e-06, + "loss": 5.6374, + "step": 5075 + }, + { + "epoch": 2.85, + "learning_rate": 5.894476504534213e-06, + "loss": 5.4493, + "step": 5076 + }, + { + "epoch": 2.85, + "learning_rate": 5.873866446826051e-06, + "loss": 5.7552, + "step": 5077 + }, + { + "epoch": 2.85, + "learning_rate": 5.85325638911789e-06, + "loss": 5.4905, + "step": 5078 + }, + { + "epoch": 2.85, + "learning_rate": 5.832646331409728e-06, + "loss": 5.3928, + "step": 5079 + }, + { + "epoch": 2.85, + "learning_rate": 5.812036273701567e-06, + "loss": 5.3172, + "step": 5080 + }, + { + "epoch": 2.85, + "learning_rate": 5.7914262159934045e-06, + "loss": 5.0844, + "step": 5081 + }, + { + "epoch": 2.85, + "learning_rate": 5.770816158285244e-06, + "loss": 4.7566, + "step": 5082 + }, + { + "epoch": 2.85, + "learning_rate": 5.750206100577082e-06, + "loss": 5.2953, + "step": 5083 + }, + { + "epoch": 2.85, + "learning_rate": 5.72959604286892e-06, + "loss": 4.8955, + "step": 5084 + }, + { + "epoch": 2.85, + "learning_rate": 5.7089859851607586e-06, + "loss": 5.1817, + "step": 5085 + }, + { + "epoch": 2.85, + "learning_rate": 5.688375927452598e-06, + "loss": 4.8461, + "step": 5086 + }, + { + "epoch": 2.85, + "learning_rate": 5.667765869744435e-06, + "loss": 5.0782, + "step": 5087 + }, + { + "epoch": 2.85, + "learning_rate": 5.647155812036274e-06, + "loss": 4.8095, + "step": 5088 + }, + { + "epoch": 2.85, + "learning_rate": 5.626545754328113e-06, + "loss": 4.6509, + "step": 5089 + }, + { + "epoch": 2.85, + "learning_rate": 5.605935696619951e-06, + "loss": 4.9819, + "step": 5090 + }, + { + "epoch": 2.85, + "learning_rate": 5.585325638911789e-06, + "loss": 5.0318, + "step": 5091 + }, + { + "epoch": 2.85, + "learning_rate": 5.5647155812036275e-06, + "loss": 5.074, + "step": 5092 + }, + { + "epoch": 2.85, + "learning_rate": 5.544105523495466e-06, + "loss": 4.9083, + "step": 5093 + }, + { + "epoch": 2.86, + "learning_rate": 5.523495465787305e-06, + "loss": 4.975, + "step": 5094 + }, + { + "epoch": 2.86, + "learning_rate": 5.502885408079142e-06, + "loss": 4.7963, + "step": 5095 + }, + { + "epoch": 2.86, + "learning_rate": 5.4822753503709815e-06, + "loss": 4.9607, + "step": 5096 + }, + { + "epoch": 2.86, + "learning_rate": 5.46166529266282e-06, + "loss": 4.5474, + "step": 5097 + }, + { + "epoch": 2.86, + "learning_rate": 5.441055234954658e-06, + "loss": 4.8999, + "step": 5098 + }, + { + "epoch": 2.86, + "learning_rate": 5.4204451772464964e-06, + "loss": 4.9331, + "step": 5099 + }, + { + "epoch": 2.86, + "learning_rate": 5.399835119538335e-06, + "loss": 4.5656, + "step": 5100 + }, + { + "epoch": 2.86, + "learning_rate": 5.379225061830173e-06, + "loss": 4.4848, + "step": 5101 + }, + { + "epoch": 2.86, + "learning_rate": 5.358615004122012e-06, + "loss": 4.706, + "step": 5102 + }, + { + "epoch": 2.86, + "learning_rate": 5.33800494641385e-06, + "loss": 4.8986, + "step": 5103 + }, + { + "epoch": 2.86, + "learning_rate": 5.317394888705689e-06, + "loss": 4.7054, + "step": 5104 + }, + { + "epoch": 2.86, + "learning_rate": 5.296784830997527e-06, + "loss": 5.2068, + "step": 5105 + }, + { + "epoch": 2.86, + "learning_rate": 5.276174773289365e-06, + "loss": 4.733, + "step": 5106 + }, + { + "epoch": 2.86, + "learning_rate": 5.255564715581204e-06, + "loss": 4.512, + "step": 5107 + }, + { + "epoch": 2.86, + "learning_rate": 5.234954657873043e-06, + "loss": 4.4166, + "step": 5108 + }, + { + "epoch": 2.86, + "learning_rate": 5.21434460016488e-06, + "loss": 4.4602, + "step": 5109 + }, + { + "epoch": 2.86, + "learning_rate": 5.193734542456719e-06, + "loss": 4.5878, + "step": 5110 + }, + { + "epoch": 2.86, + "learning_rate": 5.173124484748558e-06, + "loss": 4.1263, + "step": 5111 + }, + { + "epoch": 2.87, + "learning_rate": 5.152514427040396e-06, + "loss": 4.0768, + "step": 5112 + }, + { + "epoch": 2.87, + "learning_rate": 5.131904369332234e-06, + "loss": 3.9025, + "step": 5113 + }, + { + "epoch": 2.87, + "learning_rate": 5.111294311624073e-06, + "loss": 3.6999, + "step": 5114 + }, + { + "epoch": 2.87, + "learning_rate": 5.090684253915911e-06, + "loss": 3.8803, + "step": 5115 + }, + { + "epoch": 2.87, + "learning_rate": 5.07007419620775e-06, + "loss": 3.4543, + "step": 5116 + }, + { + "epoch": 2.87, + "learning_rate": 5.0494641384995875e-06, + "loss": 3.8122, + "step": 5117 + }, + { + "epoch": 2.87, + "learning_rate": 5.028854080791427e-06, + "loss": 2.4936, + "step": 5118 + }, + { + "epoch": 2.87, + "learning_rate": 5.008244023083265e-06, + "loss": 5.6857, + "step": 5119 + }, + { + "epoch": 2.87, + "learning_rate": 4.987633965375103e-06, + "loss": 5.5729, + "step": 5120 + }, + { + "epoch": 2.87, + "learning_rate": 4.9670239076669415e-06, + "loss": 5.3622, + "step": 5121 + }, + { + "epoch": 2.87, + "learning_rate": 4.94641384995878e-06, + "loss": 5.6823, + "step": 5122 + }, + { + "epoch": 2.87, + "learning_rate": 4.925803792250618e-06, + "loss": 5.2408, + "step": 5123 + }, + { + "epoch": 2.87, + "learning_rate": 4.905193734542457e-06, + "loss": 5.38, + "step": 5124 + }, + { + "epoch": 2.87, + "learning_rate": 4.884583676834295e-06, + "loss": 5.6711, + "step": 5125 + }, + { + "epoch": 2.87, + "learning_rate": 4.863973619126134e-06, + "loss": 5.5224, + "step": 5126 + }, + { + "epoch": 2.87, + "learning_rate": 4.843363561417972e-06, + "loss": 5.2924, + "step": 5127 + }, + { + "epoch": 2.87, + "learning_rate": 4.8227535037098104e-06, + "loss": 5.4853, + "step": 5128 + }, + { + "epoch": 2.88, + "learning_rate": 4.802143446001649e-06, + "loss": 5.2362, + "step": 5129 + }, + { + "epoch": 2.88, + "learning_rate": 4.781533388293488e-06, + "loss": 4.9919, + "step": 5130 + }, + { + "epoch": 2.88, + "learning_rate": 4.760923330585325e-06, + "loss": 5.4204, + "step": 5131 + }, + { + "epoch": 2.88, + "learning_rate": 4.7403132728771645e-06, + "loss": 4.9496, + "step": 5132 + }, + { + "epoch": 2.88, + "learning_rate": 4.719703215169003e-06, + "loss": 5.1621, + "step": 5133 + }, + { + "epoch": 2.88, + "learning_rate": 4.699093157460841e-06, + "loss": 4.8392, + "step": 5134 + }, + { + "epoch": 2.88, + "learning_rate": 4.678483099752679e-06, + "loss": 5.1011, + "step": 5135 + }, + { + "epoch": 2.88, + "learning_rate": 4.657873042044518e-06, + "loss": 5.0214, + "step": 5136 + }, + { + "epoch": 2.88, + "learning_rate": 4.637262984336356e-06, + "loss": 5.0409, + "step": 5137 + }, + { + "epoch": 2.88, + "learning_rate": 4.616652926628195e-06, + "loss": 4.7765, + "step": 5138 + }, + { + "epoch": 2.88, + "learning_rate": 4.5960428689200326e-06, + "loss": 4.8654, + "step": 5139 + }, + { + "epoch": 2.88, + "learning_rate": 4.575432811211872e-06, + "loss": 5.0282, + "step": 5140 + }, + { + "epoch": 2.88, + "learning_rate": 4.55482275350371e-06, + "loss": 5.0863, + "step": 5141 + }, + { + "epoch": 2.88, + "learning_rate": 4.534212695795548e-06, + "loss": 5.0416, + "step": 5142 + }, + { + "epoch": 2.88, + "learning_rate": 4.5136026380873874e-06, + "loss": 4.7986, + "step": 5143 + }, + { + "epoch": 2.88, + "learning_rate": 4.492992580379225e-06, + "loss": 4.7688, + "step": 5144 + }, + { + "epoch": 2.88, + "learning_rate": 4.472382522671064e-06, + "loss": 4.9015, + "step": 5145 + }, + { + "epoch": 2.88, + "learning_rate": 4.451772464962902e-06, + "loss": 4.7375, + "step": 5146 + }, + { + "epoch": 2.89, + "learning_rate": 4.431162407254741e-06, + "loss": 5.1159, + "step": 5147 + }, + { + "epoch": 2.89, + "learning_rate": 4.410552349546579e-06, + "loss": 4.7153, + "step": 5148 + }, + { + "epoch": 2.89, + "learning_rate": 4.389942291838418e-06, + "loss": 4.8147, + "step": 5149 + }, + { + "epoch": 2.89, + "learning_rate": 4.3693322341302555e-06, + "loss": 4.8218, + "step": 5150 + }, + { + "epoch": 2.89, + "learning_rate": 4.348722176422095e-06, + "loss": 4.6215, + "step": 5151 + }, + { + "epoch": 2.89, + "learning_rate": 4.328112118713933e-06, + "loss": 4.7447, + "step": 5152 + }, + { + "epoch": 2.89, + "learning_rate": 4.307502061005771e-06, + "loss": 5.1067, + "step": 5153 + }, + { + "epoch": 2.89, + "learning_rate": 4.2868920032976096e-06, + "loss": 4.74, + "step": 5154 + }, + { + "epoch": 2.89, + "learning_rate": 4.266281945589448e-06, + "loss": 4.7101, + "step": 5155 + }, + { + "epoch": 2.89, + "learning_rate": 4.245671887881286e-06, + "loss": 4.5644, + "step": 5156 + }, + { + "epoch": 2.89, + "learning_rate": 4.225061830173125e-06, + "loss": 4.5044, + "step": 5157 + }, + { + "epoch": 2.89, + "learning_rate": 4.204451772464963e-06, + "loss": 4.9838, + "step": 5158 + }, + { + "epoch": 2.89, + "learning_rate": 4.183841714756802e-06, + "loss": 4.4263, + "step": 5159 + }, + { + "epoch": 2.89, + "learning_rate": 4.16323165704864e-06, + "loss": 4.4137, + "step": 5160 + }, + { + "epoch": 2.89, + "learning_rate": 4.1426215993404785e-06, + "loss": 4.5501, + "step": 5161 + }, + { + "epoch": 2.89, + "learning_rate": 4.122011541632317e-06, + "loss": 4.3387, + "step": 5162 + }, + { + "epoch": 2.89, + "learning_rate": 4.101401483924155e-06, + "loss": 4.1609, + "step": 5163 + }, + { + "epoch": 2.89, + "learning_rate": 4.080791426215993e-06, + "loss": 4.2749, + "step": 5164 + }, + { + "epoch": 2.9, + "learning_rate": 4.0601813685078325e-06, + "loss": 3.6598, + "step": 5165 + }, + { + "epoch": 2.9, + "learning_rate": 4.03957131079967e-06, + "loss": 4.0748, + "step": 5166 + }, + { + "epoch": 2.9, + "learning_rate": 4.018961253091509e-06, + "loss": 3.1914, + "step": 5167 + }, + { + "epoch": 2.9, + "learning_rate": 3.9983511953833474e-06, + "loss": 2.6383, + "step": 5168 + }, + { + "epoch": 2.9, + "learning_rate": 3.977741137675186e-06, + "loss": 5.6315, + "step": 5169 + }, + { + "epoch": 2.9, + "learning_rate": 3.957131079967024e-06, + "loss": 5.3108, + "step": 5170 + }, + { + "epoch": 2.9, + "learning_rate": 3.936521022258863e-06, + "loss": 5.2475, + "step": 5171 + }, + { + "epoch": 2.9, + "learning_rate": 3.915910964550701e-06, + "loss": 5.3597, + "step": 5172 + }, + { + "epoch": 2.9, + "learning_rate": 3.89530090684254e-06, + "loss": 5.0077, + "step": 5173 + }, + { + "epoch": 2.9, + "learning_rate": 3.874690849134378e-06, + "loss": 5.4304, + "step": 5174 + }, + { + "epoch": 2.9, + "learning_rate": 3.854080791426216e-06, + "loss": 5.2624, + "step": 5175 + }, + { + "epoch": 2.9, + "learning_rate": 3.833470733718055e-06, + "loss": 5.0721, + "step": 5176 + }, + { + "epoch": 2.9, + "learning_rate": 3.8128606760098934e-06, + "loss": 5.0394, + "step": 5177 + }, + { + "epoch": 2.9, + "learning_rate": 3.7922506183017312e-06, + "loss": 5.0931, + "step": 5178 + }, + { + "epoch": 2.9, + "learning_rate": 3.77164056059357e-06, + "loss": 4.9715, + "step": 5179 + }, + { + "epoch": 2.9, + "learning_rate": 3.7510305028854083e-06, + "loss": 5.3158, + "step": 5180 + }, + { + "epoch": 2.9, + "learning_rate": 3.730420445177247e-06, + "loss": 5.1167, + "step": 5181 + }, + { + "epoch": 2.9, + "learning_rate": 3.709810387469085e-06, + "loss": 4.8948, + "step": 5182 + }, + { + "epoch": 2.91, + "learning_rate": 3.6892003297609236e-06, + "loss": 4.8175, + "step": 5183 + }, + { + "epoch": 2.91, + "learning_rate": 3.668590272052762e-06, + "loss": 5.2443, + "step": 5184 + }, + { + "epoch": 2.91, + "learning_rate": 3.6479802143446006e-06, + "loss": 5.1076, + "step": 5185 + }, + { + "epoch": 2.91, + "learning_rate": 3.6273701566364385e-06, + "loss": 4.9619, + "step": 5186 + }, + { + "epoch": 2.91, + "learning_rate": 3.606760098928277e-06, + "loss": 5.2482, + "step": 5187 + }, + { + "epoch": 2.91, + "learning_rate": 3.5861500412201155e-06, + "loss": 4.9039, + "step": 5188 + }, + { + "epoch": 2.91, + "learning_rate": 3.5655399835119542e-06, + "loss": 4.6636, + "step": 5189 + }, + { + "epoch": 2.91, + "learning_rate": 3.544929925803792e-06, + "loss": 4.8549, + "step": 5190 + }, + { + "epoch": 2.91, + "learning_rate": 3.5243198680956312e-06, + "loss": 4.7344, + "step": 5191 + }, + { + "epoch": 2.91, + "learning_rate": 3.503709810387469e-06, + "loss": 4.7425, + "step": 5192 + }, + { + "epoch": 2.91, + "learning_rate": 3.483099752679308e-06, + "loss": 4.6267, + "step": 5193 + }, + { + "epoch": 2.91, + "learning_rate": 3.462489694971146e-06, + "loss": 4.9347, + "step": 5194 + }, + { + "epoch": 2.91, + "learning_rate": 3.441879637262985e-06, + "loss": 4.9913, + "step": 5195 + }, + { + "epoch": 2.91, + "learning_rate": 3.4212695795548227e-06, + "loss": 4.6617, + "step": 5196 + }, + { + "epoch": 2.91, + "learning_rate": 3.4006595218466614e-06, + "loss": 4.8447, + "step": 5197 + }, + { + "epoch": 2.91, + "learning_rate": 3.3800494641384997e-06, + "loss": 4.8013, + "step": 5198 + }, + { + "epoch": 2.91, + "learning_rate": 3.3594394064303385e-06, + "loss": 5.3862, + "step": 5199 + }, + { + "epoch": 2.91, + "learning_rate": 3.3388293487221763e-06, + "loss": 4.8574, + "step": 5200 + }, + { + "epoch": 2.92, + "learning_rate": 3.318219291014015e-06, + "loss": 4.7946, + "step": 5201 + }, + { + "epoch": 2.92, + "learning_rate": 3.2976092333058533e-06, + "loss": 4.7291, + "step": 5202 + }, + { + "epoch": 2.92, + "learning_rate": 3.276999175597692e-06, + "loss": 4.7675, + "step": 5203 + }, + { + "epoch": 2.92, + "learning_rate": 3.25638911788953e-06, + "loss": 4.991, + "step": 5204 + }, + { + "epoch": 2.92, + "learning_rate": 3.2357790601813687e-06, + "loss": 4.4183, + "step": 5205 + }, + { + "epoch": 2.92, + "learning_rate": 3.215169002473207e-06, + "loss": 4.5025, + "step": 5206 + }, + { + "epoch": 2.92, + "learning_rate": 3.1945589447650457e-06, + "loss": 4.6671, + "step": 5207 + }, + { + "epoch": 2.92, + "learning_rate": 3.1739488870568836e-06, + "loss": 5.0126, + "step": 5208 + }, + { + "epoch": 2.92, + "learning_rate": 3.1533388293487223e-06, + "loss": 4.6605, + "step": 5209 + }, + { + "epoch": 2.92, + "learning_rate": 3.1327287716405606e-06, + "loss": 4.4703, + "step": 5210 + }, + { + "epoch": 2.92, + "learning_rate": 3.1121187139323993e-06, + "loss": 4.4237, + "step": 5211 + }, + { + "epoch": 2.92, + "learning_rate": 3.0915086562242376e-06, + "loss": 4.5546, + "step": 5212 + }, + { + "epoch": 2.92, + "learning_rate": 3.0708985985160763e-06, + "loss": 3.8409, + "step": 5213 + }, + { + "epoch": 2.92, + "learning_rate": 3.0502885408079146e-06, + "loss": 4.0227, + "step": 5214 + }, + { + "epoch": 2.92, + "learning_rate": 3.029678483099753e-06, + "loss": 4.22, + "step": 5215 + }, + { + "epoch": 2.92, + "learning_rate": 3.009068425391591e-06, + "loss": 3.6168, + "step": 5216 + }, + { + "epoch": 2.92, + "learning_rate": 2.98845836768343e-06, + "loss": 3.33, + "step": 5217 + }, + { + "epoch": 2.92, + "learning_rate": 2.9678483099752682e-06, + "loss": 3.0703, + "step": 5218 + }, + { + "epoch": 2.93, + "learning_rate": 2.9472382522671065e-06, + "loss": 5.4585, + "step": 5219 + }, + { + "epoch": 2.93, + "learning_rate": 2.926628194558945e-06, + "loss": 5.4281, + "step": 5220 + }, + { + "epoch": 2.93, + "learning_rate": 2.9060181368507835e-06, + "loss": 5.4324, + "step": 5221 + }, + { + "epoch": 2.93, + "learning_rate": 2.885408079142622e-06, + "loss": 5.2781, + "step": 5222 + }, + { + "epoch": 2.93, + "learning_rate": 2.86479802143446e-06, + "loss": 5.3337, + "step": 5223 + }, + { + "epoch": 2.93, + "learning_rate": 2.844187963726299e-06, + "loss": 5.3403, + "step": 5224 + }, + { + "epoch": 2.93, + "learning_rate": 2.823577906018137e-06, + "loss": 5.4458, + "step": 5225 + }, + { + "epoch": 2.93, + "learning_rate": 2.8029678483099755e-06, + "loss": 5.2583, + "step": 5226 + }, + { + "epoch": 2.93, + "learning_rate": 2.7823577906018138e-06, + "loss": 5.2006, + "step": 5227 + }, + { + "epoch": 2.93, + "learning_rate": 2.7617477328936525e-06, + "loss": 5.088, + "step": 5228 + }, + { + "epoch": 2.93, + "learning_rate": 2.7411376751854908e-06, + "loss": 4.9515, + "step": 5229 + }, + { + "epoch": 2.93, + "learning_rate": 2.720527617477329e-06, + "loss": 5.2933, + "step": 5230 + }, + { + "epoch": 2.93, + "learning_rate": 2.6999175597691674e-06, + "loss": 4.9053, + "step": 5231 + }, + { + "epoch": 2.93, + "learning_rate": 2.679307502061006e-06, + "loss": 5.1863, + "step": 5232 + }, + { + "epoch": 2.93, + "learning_rate": 2.6586974443528444e-06, + "loss": 4.8394, + "step": 5233 + }, + { + "epoch": 2.93, + "learning_rate": 2.6380873866446827e-06, + "loss": 4.9371, + "step": 5234 + }, + { + "epoch": 2.93, + "learning_rate": 2.6174773289365214e-06, + "loss": 5.3673, + "step": 5235 + }, + { + "epoch": 2.93, + "learning_rate": 2.5968672712283597e-06, + "loss": 5.008, + "step": 5236 + }, + { + "epoch": 2.94, + "learning_rate": 2.576257213520198e-06, + "loss": 4.7417, + "step": 5237 + }, + { + "epoch": 2.94, + "learning_rate": 2.5556471558120363e-06, + "loss": 5.1261, + "step": 5238 + }, + { + "epoch": 2.94, + "learning_rate": 2.535037098103875e-06, + "loss": 5.1043, + "step": 5239 + }, + { + "epoch": 2.94, + "learning_rate": 2.5144270403957133e-06, + "loss": 4.8423, + "step": 5240 + }, + { + "epoch": 2.94, + "learning_rate": 2.4938169826875516e-06, + "loss": 4.8992, + "step": 5241 + }, + { + "epoch": 2.94, + "learning_rate": 2.47320692497939e-06, + "loss": 4.7645, + "step": 5242 + }, + { + "epoch": 2.94, + "learning_rate": 2.4525968672712286e-06, + "loss": 4.9741, + "step": 5243 + }, + { + "epoch": 2.94, + "learning_rate": 2.431986809563067e-06, + "loss": 4.8511, + "step": 5244 + }, + { + "epoch": 2.94, + "learning_rate": 2.4113767518549052e-06, + "loss": 4.9497, + "step": 5245 + }, + { + "epoch": 2.94, + "learning_rate": 2.390766694146744e-06, + "loss": 4.5519, + "step": 5246 + }, + { + "epoch": 2.94, + "learning_rate": 2.3701566364385822e-06, + "loss": 5.0908, + "step": 5247 + }, + { + "epoch": 2.94, + "learning_rate": 2.3495465787304205e-06, + "loss": 4.6203, + "step": 5248 + }, + { + "epoch": 2.94, + "learning_rate": 2.328936521022259e-06, + "loss": 4.7718, + "step": 5249 + }, + { + "epoch": 2.94, + "learning_rate": 2.3083264633140976e-06, + "loss": 4.851, + "step": 5250 + }, + { + "epoch": 2.94, + "learning_rate": 2.287716405605936e-06, + "loss": 4.6061, + "step": 5251 + }, + { + "epoch": 2.94, + "learning_rate": 2.267106347897774e-06, + "loss": 4.7297, + "step": 5252 + }, + { + "epoch": 2.94, + "learning_rate": 2.2464962901896124e-06, + "loss": 4.6439, + "step": 5253 + }, + { + "epoch": 2.95, + "learning_rate": 2.225886232481451e-06, + "loss": 4.8564, + "step": 5254 + }, + { + "epoch": 2.95, + "learning_rate": 2.2052761747732895e-06, + "loss": 4.5379, + "step": 5255 + }, + { + "epoch": 2.95, + "learning_rate": 2.1846661170651278e-06, + "loss": 4.6354, + "step": 5256 + }, + { + "epoch": 2.95, + "learning_rate": 2.1640560593569665e-06, + "loss": 4.819, + "step": 5257 + }, + { + "epoch": 2.95, + "learning_rate": 2.1434460016488048e-06, + "loss": 4.3441, + "step": 5258 + }, + { + "epoch": 2.95, + "learning_rate": 2.122835943940643e-06, + "loss": 4.3509, + "step": 5259 + }, + { + "epoch": 2.95, + "learning_rate": 2.1022258862324814e-06, + "loss": 4.4016, + "step": 5260 + }, + { + "epoch": 2.95, + "learning_rate": 2.08161582852432e-06, + "loss": 4.2381, + "step": 5261 + }, + { + "epoch": 2.95, + "learning_rate": 2.0610057708161584e-06, + "loss": 4.1698, + "step": 5262 + }, + { + "epoch": 2.95, + "learning_rate": 2.0403957131079967e-06, + "loss": 4.3645, + "step": 5263 + }, + { + "epoch": 2.95, + "learning_rate": 2.019785655399835e-06, + "loss": 3.8847, + "step": 5264 + }, + { + "epoch": 2.95, + "learning_rate": 1.9991755976916737e-06, + "loss": 3.3239, + "step": 5265 + }, + { + "epoch": 2.95, + "learning_rate": 1.978565539983512e-06, + "loss": 3.5787, + "step": 5266 + }, + { + "epoch": 2.95, + "learning_rate": 1.9579554822753503e-06, + "loss": 3.1489, + "step": 5267 + }, + { + "epoch": 2.95, + "learning_rate": 1.937345424567189e-06, + "loss": 3.192, + "step": 5268 + }, + { + "epoch": 2.95, + "learning_rate": 1.9167353668590273e-06, + "loss": 5.6025, + "step": 5269 + }, + { + "epoch": 2.95, + "learning_rate": 1.8961253091508656e-06, + "loss": 5.0864, + "step": 5270 + }, + { + "epoch": 2.95, + "learning_rate": 1.8755152514427041e-06, + "loss": 5.3036, + "step": 5271 + }, + { + "epoch": 2.96, + "learning_rate": 1.8549051937345424e-06, + "loss": 5.5223, + "step": 5272 + }, + { + "epoch": 2.96, + "learning_rate": 1.834295136026381e-06, + "loss": 5.2175, + "step": 5273 + }, + { + "epoch": 2.96, + "learning_rate": 1.8136850783182192e-06, + "loss": 4.9827, + "step": 5274 + }, + { + "epoch": 2.96, + "learning_rate": 1.7930750206100577e-06, + "loss": 5.0062, + "step": 5275 + }, + { + "epoch": 2.96, + "learning_rate": 1.772464962901896e-06, + "loss": 5.0195, + "step": 5276 + }, + { + "epoch": 2.96, + "learning_rate": 1.7518549051937346e-06, + "loss": 5.2456, + "step": 5277 + }, + { + "epoch": 2.96, + "learning_rate": 1.731244847485573e-06, + "loss": 5.1905, + "step": 5278 + }, + { + "epoch": 2.96, + "learning_rate": 1.7106347897774114e-06, + "loss": 5.0046, + "step": 5279 + }, + { + "epoch": 2.96, + "learning_rate": 1.6900247320692499e-06, + "loss": 5.1373, + "step": 5280 + }, + { + "epoch": 2.96, + "learning_rate": 1.6694146743610882e-06, + "loss": 4.76, + "step": 5281 + }, + { + "epoch": 2.96, + "learning_rate": 1.6488046166529267e-06, + "loss": 5.1101, + "step": 5282 + }, + { + "epoch": 2.96, + "learning_rate": 1.628194558944765e-06, + "loss": 4.9121, + "step": 5283 + }, + { + "epoch": 2.96, + "learning_rate": 1.6075845012366035e-06, + "loss": 5.1893, + "step": 5284 + }, + { + "epoch": 2.96, + "learning_rate": 1.5869744435284418e-06, + "loss": 4.9847, + "step": 5285 + }, + { + "epoch": 2.96, + "learning_rate": 1.5663643858202803e-06, + "loss": 4.9223, + "step": 5286 + }, + { + "epoch": 2.96, + "learning_rate": 1.5457543281121188e-06, + "loss": 5.0814, + "step": 5287 + }, + { + "epoch": 2.96, + "learning_rate": 1.5251442704039573e-06, + "loss": 4.8673, + "step": 5288 + }, + { + "epoch": 2.96, + "learning_rate": 1.5045342126957956e-06, + "loss": 4.7769, + "step": 5289 + }, + { + "epoch": 2.97, + "learning_rate": 1.4839241549876341e-06, + "loss": 4.7936, + "step": 5290 + }, + { + "epoch": 2.97, + "learning_rate": 1.4633140972794724e-06, + "loss": 4.8707, + "step": 5291 + }, + { + "epoch": 2.97, + "learning_rate": 1.442704039571311e-06, + "loss": 4.6411, + "step": 5292 + }, + { + "epoch": 2.97, + "learning_rate": 1.4220939818631494e-06, + "loss": 4.8262, + "step": 5293 + }, + { + "epoch": 2.97, + "learning_rate": 1.4014839241549877e-06, + "loss": 4.7487, + "step": 5294 + }, + { + "epoch": 2.97, + "learning_rate": 1.3808738664468262e-06, + "loss": 4.869, + "step": 5295 + }, + { + "epoch": 2.97, + "learning_rate": 1.3602638087386645e-06, + "loss": 5.1843, + "step": 5296 + }, + { + "epoch": 2.97, + "learning_rate": 1.339653751030503e-06, + "loss": 4.8043, + "step": 5297 + }, + { + "epoch": 2.97, + "learning_rate": 1.3190436933223413e-06, + "loss": 4.7771, + "step": 5298 + }, + { + "epoch": 2.97, + "learning_rate": 1.2984336356141798e-06, + "loss": 4.6642, + "step": 5299 + }, + { + "epoch": 2.97, + "learning_rate": 1.2778235779060181e-06, + "loss": 4.9434, + "step": 5300 + }, + { + "epoch": 2.97, + "learning_rate": 1.2572135201978567e-06, + "loss": 5.1796, + "step": 5301 + }, + { + "epoch": 2.97, + "learning_rate": 1.236603462489695e-06, + "loss": 4.8209, + "step": 5302 + }, + { + "epoch": 2.97, + "learning_rate": 1.2159934047815335e-06, + "loss": 5.1376, + "step": 5303 + }, + { + "epoch": 2.97, + "learning_rate": 1.195383347073372e-06, + "loss": 4.6316, + "step": 5304 + }, + { + "epoch": 2.97, + "learning_rate": 1.1747732893652103e-06, + "loss": 4.2954, + "step": 5305 + }, + { + "epoch": 2.97, + "learning_rate": 1.1541632316570488e-06, + "loss": 4.8585, + "step": 5306 + }, + { + "epoch": 2.97, + "learning_rate": 1.133553173948887e-06, + "loss": 4.6769, + "step": 5307 + }, + { + "epoch": 2.98, + "learning_rate": 1.1129431162407256e-06, + "loss": 4.6862, + "step": 5308 + }, + { + "epoch": 2.98, + "learning_rate": 1.0923330585325639e-06, + "loss": 4.7266, + "step": 5309 + }, + { + "epoch": 2.98, + "learning_rate": 1.0717230008244024e-06, + "loss": 4.699, + "step": 5310 + }, + { + "epoch": 2.98, + "learning_rate": 1.0511129431162407e-06, + "loss": 4.1216, + "step": 5311 + }, + { + "epoch": 2.98, + "learning_rate": 1.0305028854080792e-06, + "loss": 4.9831, + "step": 5312 + }, + { + "epoch": 2.98, + "learning_rate": 1.0098928276999175e-06, + "loss": 4.4008, + "step": 5313 + }, + { + "epoch": 2.98, + "learning_rate": 9.89282769991756e-07, + "loss": 3.9894, + "step": 5314 + }, + { + "epoch": 2.98, + "learning_rate": 9.686727122835945e-07, + "loss": 3.9423, + "step": 5315 + }, + { + "epoch": 2.98, + "learning_rate": 9.480626545754328e-07, + "loss": 3.214, + "step": 5316 + }, + { + "epoch": 2.98, + "learning_rate": 9.274525968672712e-07, + "loss": 3.733, + "step": 5317 + }, + { + "epoch": 2.98, + "learning_rate": 9.068425391591096e-07, + "loss": 3.2678, + "step": 5318 + }, + { + "epoch": 2.98, + "learning_rate": 8.86232481450948e-07, + "loss": 5.383, + "step": 5319 + }, + { + "epoch": 2.98, + "learning_rate": 8.656224237427865e-07, + "loss": 5.4783, + "step": 5320 + }, + { + "epoch": 2.98, + "learning_rate": 8.450123660346249e-07, + "loss": 5.1129, + "step": 5321 + }, + { + "epoch": 2.98, + "learning_rate": 8.244023083264633e-07, + "loss": 5.1164, + "step": 5322 + }, + { + "epoch": 2.98, + "learning_rate": 8.037922506183017e-07, + "loss": 5.0721, + "step": 5323 + }, + { + "epoch": 2.98, + "learning_rate": 7.831821929101401e-07, + "loss": 5.1312, + "step": 5324 + }, + { + "epoch": 2.98, + "learning_rate": 7.625721352019787e-07, + "loss": 5.1583, + "step": 5325 + }, + { + "epoch": 2.99, + "learning_rate": 7.419620774938171e-07, + "loss": 4.9809, + "step": 5326 + }, + { + "epoch": 2.99, + "learning_rate": 7.213520197856555e-07, + "loss": 5.3964, + "step": 5327 + }, + { + "epoch": 2.99, + "learning_rate": 7.007419620774939e-07, + "loss": 5.2341, + "step": 5328 + }, + { + "epoch": 2.99, + "learning_rate": 6.801319043693323e-07, + "loss": 5.3491, + "step": 5329 + }, + { + "epoch": 2.99, + "learning_rate": 6.595218466611707e-07, + "loss": 4.9862, + "step": 5330 + }, + { + "epoch": 2.99, + "learning_rate": 6.389117889530091e-07, + "loss": 5.1888, + "step": 5331 + }, + { + "epoch": 2.99, + "learning_rate": 6.183017312448475e-07, + "loss": 4.9862, + "step": 5332 + }, + { + "epoch": 2.99, + "learning_rate": 5.97691673536686e-07, + "loss": 5.0949, + "step": 5333 + }, + { + "epoch": 2.99, + "learning_rate": 5.770816158285244e-07, + "loss": 5.1073, + "step": 5334 + }, + { + "epoch": 2.99, + "learning_rate": 5.564715581203628e-07, + "loss": 5.2934, + "step": 5335 + }, + { + "epoch": 2.99, + "learning_rate": 5.358615004122012e-07, + "loss": 4.9402, + "step": 5336 + }, + { + "epoch": 2.99, + "learning_rate": 5.152514427040396e-07, + "loss": 5.1293, + "step": 5337 + }, + { + "epoch": 2.99, + "learning_rate": 4.94641384995878e-07, + "loss": 4.7629, + "step": 5338 + }, + { + "epoch": 2.99, + "learning_rate": 4.740313272877164e-07, + "loss": 4.8445, + "step": 5339 + }, + { + "epoch": 2.99, + "learning_rate": 4.534212695795548e-07, + "loss": 4.9831, + "step": 5340 + }, + { + "epoch": 2.99, + "learning_rate": 4.3281121187139327e-07, + "loss": 5.0559, + "step": 5341 + }, + { + "epoch": 2.99, + "learning_rate": 4.1220115416323167e-07, + "loss": 4.7817, + "step": 5342 + }, + { + "epoch": 2.99, + "learning_rate": 3.9159109645507007e-07, + "loss": 4.418, + "step": 5343 + }, + { + "epoch": 3.0, + "learning_rate": 3.7098103874690853e-07, + "loss": 4.5948, + "step": 5344 + }, + { + "epoch": 3.0, + "learning_rate": 3.5037098103874693e-07, + "loss": 4.681, + "step": 5345 + }, + { + "epoch": 3.0, + "learning_rate": 3.2976092333058533e-07, + "loss": 4.5751, + "step": 5346 + }, + { + "epoch": 3.0, + "learning_rate": 3.0915086562242374e-07, + "loss": 4.5757, + "step": 5347 + }, + { + "epoch": 3.0, + "learning_rate": 2.885408079142622e-07, + "loss": 4.5069, + "step": 5348 + }, + { + "epoch": 3.0, + "learning_rate": 2.679307502061006e-07, + "loss": 4.2085, + "step": 5349 + }, + { + "epoch": 3.0, + "learning_rate": 2.47320692497939e-07, + "loss": 4.1064, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 2.267106347897774e-07, + "loss": 3.474, + "step": 5351 + }, + { + "epoch": 3.0, + "learning_rate": 2.0610057708161583e-07, + "loss": 2.5302, + "step": 5352 + }, + { + "epoch": 3.0, + "step": 5352, + "total_flos": 0.0, + "train_loss": 5.8440248690913075, + "train_runtime": 31164.8267, + "train_samples_per_second": 2.747, + "train_steps_per_second": 0.172 + } + ], + "max_steps": 5352, + "num_train_epochs": 3, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +}