{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-08, "loss": 0.2671, "step": 1 }, { "epoch": 0.0, "learning_rate": 4e-08, "loss": 0.2716, "step": 2 }, { "epoch": 0.0, "learning_rate": 6.000000000000001e-08, "loss": 0.2701, "step": 3 }, { "epoch": 0.0, "learning_rate": 8e-08, "loss": 0.2684, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.0000000000000001e-07, "loss": 0.2631, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.2000000000000002e-07, "loss": 0.2764, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.4e-07, "loss": 0.2688, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.6e-07, "loss": 0.2646, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.8e-07, "loss": 0.2731, "step": 9 }, { "epoch": 0.0, "learning_rate": 2.0000000000000002e-07, "loss": 0.2742, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.2e-07, "loss": 0.2771, "step": 11 }, { "epoch": 0.0, "learning_rate": 2.4000000000000003e-07, "loss": 0.273, "step": 12 }, { "epoch": 0.0, "learning_rate": 2.6e-07, "loss": 0.2545, "step": 13 }, { "epoch": 0.0, "learning_rate": 2.8e-07, "loss": 0.2706, "step": 14 }, { "epoch": 0.0, "learning_rate": 3.0000000000000004e-07, "loss": 0.2569, "step": 15 }, { "epoch": 0.0, "learning_rate": 3.2e-07, "loss": 0.2793, "step": 16 }, { "epoch": 0.0, "learning_rate": 3.4000000000000003e-07, "loss": 0.2765, "step": 17 }, { "epoch": 0.0, "learning_rate": 3.6e-07, "loss": 0.269, "step": 18 }, { "epoch": 0.0, "learning_rate": 3.8e-07, "loss": 0.2648, "step": 19 }, { "epoch": 0.0, "learning_rate": 4.0000000000000003e-07, "loss": 0.2736, "step": 20 }, { "epoch": 0.0, "learning_rate": 4.2000000000000006e-07, "loss": 0.2653, "step": 21 }, { "epoch": 0.0, "learning_rate": 4.4e-07, "loss": 0.2723, "step": 22 }, { "epoch": 0.0, "learning_rate": 4.6000000000000004e-07, "loss": 0.2717, "step": 23 }, { "epoch": 0.0, "learning_rate": 4.800000000000001e-07, "loss": 0.2651, "step": 24 }, { "epoch": 0.01, "learning_rate": 5.000000000000001e-07, "loss": 0.2371, "step": 25 }, { "epoch": 0.01, "learning_rate": 5.2e-07, "loss": 0.2661, "step": 26 }, { "epoch": 0.01, "learning_rate": 5.4e-07, "loss": 0.2702, "step": 27 }, { "epoch": 0.01, "learning_rate": 5.6e-07, "loss": 0.2386, "step": 28 }, { "epoch": 0.01, "learning_rate": 5.800000000000001e-07, "loss": 0.278, "step": 29 }, { "epoch": 0.01, "learning_rate": 6.000000000000001e-07, "loss": 0.2571, "step": 30 }, { "epoch": 0.01, "learning_rate": 6.200000000000001e-07, "loss": 0.2792, "step": 31 }, { "epoch": 0.01, "learning_rate": 6.4e-07, "loss": 0.2697, "step": 32 }, { "epoch": 0.01, "learning_rate": 6.6e-07, "loss": 0.2682, "step": 33 }, { "epoch": 0.01, "learning_rate": 6.800000000000001e-07, "loss": 0.2758, "step": 34 }, { "epoch": 0.01, "learning_rate": 7.000000000000001e-07, "loss": 0.2763, "step": 35 }, { "epoch": 0.01, "learning_rate": 7.2e-07, "loss": 0.2656, "step": 36 }, { "epoch": 0.01, "learning_rate": 7.4e-07, "loss": 0.2612, "step": 37 }, { "epoch": 0.01, "learning_rate": 7.6e-07, "loss": 0.26, "step": 38 }, { "epoch": 0.01, "learning_rate": 7.8e-07, "loss": 0.2725, "step": 39 }, { "epoch": 0.01, "learning_rate": 8.000000000000001e-07, "loss": 0.2702, "step": 40 }, { "epoch": 0.01, "learning_rate": 8.200000000000001e-07, "loss": 0.2567, "step": 41 }, { "epoch": 0.01, "learning_rate": 8.400000000000001e-07, "loss": 0.255, "step": 42 }, { "epoch": 0.01, "learning_rate": 8.6e-07, "loss": 0.2568, "step": 43 }, { "epoch": 0.01, "learning_rate": 8.8e-07, "loss": 0.2662, "step": 44 }, { "epoch": 0.01, "learning_rate": 9.000000000000001e-07, "loss": 0.2192, "step": 45 }, { "epoch": 0.01, "learning_rate": 9.200000000000001e-07, "loss": 0.2508, "step": 46 }, { "epoch": 0.01, "learning_rate": 9.400000000000001e-07, "loss": 0.2468, "step": 47 }, { "epoch": 0.01, "learning_rate": 9.600000000000001e-07, "loss": 0.2054, "step": 48 }, { "epoch": 0.01, "learning_rate": 9.800000000000001e-07, "loss": 0.2587, "step": 49 }, { "epoch": 0.01, "learning_rate": 1.0000000000000002e-06, "loss": 0.266, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.02e-06, "loss": 0.2495, "step": 51 }, { "epoch": 0.01, "learning_rate": 1.04e-06, "loss": 0.2591, "step": 52 }, { "epoch": 0.01, "learning_rate": 1.06e-06, "loss": 0.2592, "step": 53 }, { "epoch": 0.01, "learning_rate": 1.08e-06, "loss": 0.2687, "step": 54 }, { "epoch": 0.01, "learning_rate": 1.1e-06, "loss": 0.2547, "step": 55 }, { "epoch": 0.01, "learning_rate": 1.12e-06, "loss": 0.2594, "step": 56 }, { "epoch": 0.01, "learning_rate": 1.14e-06, "loss": 0.2599, "step": 57 }, { "epoch": 0.01, "learning_rate": 1.1600000000000001e-06, "loss": 0.2563, "step": 58 }, { "epoch": 0.01, "learning_rate": 1.1800000000000001e-06, "loss": 0.2594, "step": 59 }, { "epoch": 0.01, "learning_rate": 1.2000000000000002e-06, "loss": 0.2501, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.2200000000000002e-06, "loss": 0.2551, "step": 61 }, { "epoch": 0.01, "learning_rate": 1.2400000000000002e-06, "loss": 0.2187, "step": 62 }, { "epoch": 0.01, "learning_rate": 1.26e-06, "loss": 0.2113, "step": 63 }, { "epoch": 0.01, "learning_rate": 1.28e-06, "loss": 0.1707, "step": 64 }, { "epoch": 0.01, "learning_rate": 1.3e-06, "loss": 0.2402, "step": 65 }, { "epoch": 0.01, "learning_rate": 1.32e-06, "loss": 0.2417, "step": 66 }, { "epoch": 0.01, "learning_rate": 1.34e-06, "loss": 0.2303, "step": 67 }, { "epoch": 0.01, "learning_rate": 1.3600000000000001e-06, "loss": 0.2365, "step": 68 }, { "epoch": 0.01, "learning_rate": 1.3800000000000001e-06, "loss": 0.2569, "step": 69 }, { "epoch": 0.01, "learning_rate": 1.4000000000000001e-06, "loss": 0.2472, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.42e-06, "loss": 0.2496, "step": 71 }, { "epoch": 0.01, "learning_rate": 1.44e-06, "loss": 0.2433, "step": 72 }, { "epoch": 0.01, "learning_rate": 1.46e-06, "loss": 0.2413, "step": 73 }, { "epoch": 0.01, "learning_rate": 1.48e-06, "loss": 0.2428, "step": 74 }, { "epoch": 0.01, "learning_rate": 1.5e-06, "loss": 0.2318, "step": 75 }, { "epoch": 0.02, "learning_rate": 1.52e-06, "loss": 0.2402, "step": 76 }, { "epoch": 0.02, "learning_rate": 1.54e-06, "loss": 0.2176, "step": 77 }, { "epoch": 0.02, "learning_rate": 1.56e-06, "loss": 0.247, "step": 78 }, { "epoch": 0.02, "learning_rate": 1.5800000000000001e-06, "loss": 0.2483, "step": 79 }, { "epoch": 0.02, "learning_rate": 1.6000000000000001e-06, "loss": 0.2408, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.6200000000000002e-06, "loss": 0.2418, "step": 81 }, { "epoch": 0.02, "learning_rate": 1.6400000000000002e-06, "loss": 0.2405, "step": 82 }, { "epoch": 0.02, "learning_rate": 1.6600000000000002e-06, "loss": 0.2281, "step": 83 }, { "epoch": 0.02, "learning_rate": 1.6800000000000002e-06, "loss": 0.247, "step": 84 }, { "epoch": 0.02, "learning_rate": 1.7000000000000002e-06, "loss": 0.2403, "step": 85 }, { "epoch": 0.02, "learning_rate": 1.72e-06, "loss": 0.2287, "step": 86 }, { "epoch": 0.02, "learning_rate": 1.74e-06, "loss": 0.2375, "step": 87 }, { "epoch": 0.02, "learning_rate": 1.76e-06, "loss": 0.2339, "step": 88 }, { "epoch": 0.02, "learning_rate": 1.7800000000000001e-06, "loss": 0.2388, "step": 89 }, { "epoch": 0.02, "learning_rate": 1.8000000000000001e-06, "loss": 0.2327, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.8200000000000002e-06, "loss": 0.2371, "step": 91 }, { "epoch": 0.02, "learning_rate": 1.8400000000000002e-06, "loss": 0.2166, "step": 92 }, { "epoch": 0.02, "learning_rate": 1.8600000000000002e-06, "loss": 0.2224, "step": 93 }, { "epoch": 0.02, "learning_rate": 1.8800000000000002e-06, "loss": 0.2263, "step": 94 }, { "epoch": 0.02, "learning_rate": 1.9000000000000002e-06, "loss": 0.2368, "step": 95 }, { "epoch": 0.02, "learning_rate": 1.9200000000000003e-06, "loss": 0.2316, "step": 96 }, { "epoch": 0.02, "learning_rate": 1.94e-06, "loss": 0.2267, "step": 97 }, { "epoch": 0.02, "learning_rate": 1.9600000000000003e-06, "loss": 0.2364, "step": 98 }, { "epoch": 0.02, "learning_rate": 1.98e-06, "loss": 0.2242, "step": 99 }, { "epoch": 0.02, "learning_rate": 2.0000000000000003e-06, "loss": 0.2246, "step": 100 }, { "epoch": 0.02, "learning_rate": 2.02e-06, "loss": 0.2265, "step": 101 }, { "epoch": 0.02, "learning_rate": 2.04e-06, "loss": 0.2356, "step": 102 }, { "epoch": 0.02, "learning_rate": 2.06e-06, "loss": 0.2316, "step": 103 }, { "epoch": 0.02, "learning_rate": 2.08e-06, "loss": 0.2326, "step": 104 }, { "epoch": 0.02, "learning_rate": 2.1000000000000002e-06, "loss": 0.2207, "step": 105 }, { "epoch": 0.02, "learning_rate": 2.12e-06, "loss": 0.222, "step": 106 }, { "epoch": 0.02, "learning_rate": 2.1400000000000003e-06, "loss": 0.2304, "step": 107 }, { "epoch": 0.02, "learning_rate": 2.16e-06, "loss": 0.2213, "step": 108 }, { "epoch": 0.02, "learning_rate": 2.1800000000000003e-06, "loss": 0.2253, "step": 109 }, { "epoch": 0.02, "learning_rate": 2.2e-06, "loss": 0.2214, "step": 110 }, { "epoch": 0.02, "learning_rate": 2.2200000000000003e-06, "loss": 0.21, "step": 111 }, { "epoch": 0.02, "learning_rate": 2.24e-06, "loss": 0.1648, "step": 112 }, { "epoch": 0.02, "learning_rate": 2.2600000000000004e-06, "loss": 0.2069, "step": 113 }, { "epoch": 0.02, "learning_rate": 2.28e-06, "loss": 0.221, "step": 114 }, { "epoch": 0.02, "learning_rate": 2.3000000000000004e-06, "loss": 0.2286, "step": 115 }, { "epoch": 0.02, "learning_rate": 2.3200000000000002e-06, "loss": 0.226, "step": 116 }, { "epoch": 0.02, "learning_rate": 2.3400000000000005e-06, "loss": 0.2244, "step": 117 }, { "epoch": 0.02, "learning_rate": 2.3600000000000003e-06, "loss": 0.2209, "step": 118 }, { "epoch": 0.02, "learning_rate": 2.38e-06, "loss": 0.2276, "step": 119 }, { "epoch": 0.02, "learning_rate": 2.4000000000000003e-06, "loss": 0.2216, "step": 120 }, { "epoch": 0.02, "learning_rate": 2.42e-06, "loss": 0.2165, "step": 121 }, { "epoch": 0.02, "learning_rate": 2.4400000000000004e-06, "loss": 0.2252, "step": 122 }, { "epoch": 0.02, "learning_rate": 2.46e-06, "loss": 0.2257, "step": 123 }, { "epoch": 0.02, "learning_rate": 2.4800000000000004e-06, "loss": 0.2261, "step": 124 }, { "epoch": 0.03, "learning_rate": 2.5e-06, "loss": 0.2197, "step": 125 }, { "epoch": 0.03, "learning_rate": 2.52e-06, "loss": 0.2077, "step": 126 }, { "epoch": 0.03, "learning_rate": 2.5400000000000002e-06, "loss": 0.2184, "step": 127 }, { "epoch": 0.03, "learning_rate": 2.56e-06, "loss": 0.2054, "step": 128 }, { "epoch": 0.03, "learning_rate": 2.5800000000000003e-06, "loss": 0.2147, "step": 129 }, { "epoch": 0.03, "learning_rate": 2.6e-06, "loss": 0.2254, "step": 130 }, { "epoch": 0.03, "learning_rate": 2.6200000000000003e-06, "loss": 0.2065, "step": 131 }, { "epoch": 0.03, "learning_rate": 2.64e-06, "loss": 0.2217, "step": 132 }, { "epoch": 0.03, "learning_rate": 2.6600000000000004e-06, "loss": 0.2158, "step": 133 }, { "epoch": 0.03, "learning_rate": 2.68e-06, "loss": 0.2202, "step": 134 }, { "epoch": 0.03, "learning_rate": 2.7000000000000004e-06, "loss": 0.2263, "step": 135 }, { "epoch": 0.03, "learning_rate": 2.7200000000000002e-06, "loss": 0.2127, "step": 136 }, { "epoch": 0.03, "learning_rate": 2.7400000000000004e-06, "loss": 0.2135, "step": 137 }, { "epoch": 0.03, "learning_rate": 2.7600000000000003e-06, "loss": 0.2132, "step": 138 }, { "epoch": 0.03, "learning_rate": 2.7800000000000005e-06, "loss": 0.2159, "step": 139 }, { "epoch": 0.03, "learning_rate": 2.8000000000000003e-06, "loss": 0.206, "step": 140 }, { "epoch": 0.03, "learning_rate": 2.82e-06, "loss": 0.2139, "step": 141 }, { "epoch": 0.03, "learning_rate": 2.84e-06, "loss": 0.2261, "step": 142 }, { "epoch": 0.03, "learning_rate": 2.86e-06, "loss": 0.2226, "step": 143 }, { "epoch": 0.03, "learning_rate": 2.88e-06, "loss": 0.2154, "step": 144 }, { "epoch": 0.03, "learning_rate": 2.9e-06, "loss": 0.2038, "step": 145 }, { "epoch": 0.03, "learning_rate": 2.92e-06, "loss": 0.214, "step": 146 }, { "epoch": 0.03, "learning_rate": 2.9400000000000002e-06, "loss": 0.2156, "step": 147 }, { "epoch": 0.03, "learning_rate": 2.96e-06, "loss": 0.2172, "step": 148 }, { "epoch": 0.03, "learning_rate": 2.9800000000000003e-06, "loss": 0.2292, "step": 149 }, { "epoch": 0.03, "learning_rate": 3e-06, "loss": 0.2106, "step": 150 }, { "epoch": 0.03, "learning_rate": 3.0200000000000003e-06, "loss": 0.2175, "step": 151 }, { "epoch": 0.03, "learning_rate": 3.04e-06, "loss": 0.2025, "step": 152 }, { "epoch": 0.03, "learning_rate": 3.0600000000000003e-06, "loss": 0.2074, "step": 153 }, { "epoch": 0.03, "learning_rate": 3.08e-06, "loss": 0.206, "step": 154 }, { "epoch": 0.03, "learning_rate": 3.1000000000000004e-06, "loss": 0.2113, "step": 155 }, { "epoch": 0.03, "learning_rate": 3.12e-06, "loss": 0.2157, "step": 156 }, { "epoch": 0.03, "learning_rate": 3.1400000000000004e-06, "loss": 0.2086, "step": 157 }, { "epoch": 0.03, "learning_rate": 3.1600000000000002e-06, "loss": 0.192, "step": 158 }, { "epoch": 0.03, "learning_rate": 3.1800000000000005e-06, "loss": 0.2188, "step": 159 }, { "epoch": 0.03, "learning_rate": 3.2000000000000003e-06, "loss": 0.2064, "step": 160 }, { "epoch": 0.03, "learning_rate": 3.2200000000000005e-06, "loss": 0.2048, "step": 161 }, { "epoch": 0.03, "learning_rate": 3.2400000000000003e-06, "loss": 0.205, "step": 162 }, { "epoch": 0.03, "learning_rate": 3.2600000000000006e-06, "loss": 0.2082, "step": 163 }, { "epoch": 0.03, "learning_rate": 3.2800000000000004e-06, "loss": 0.194, "step": 164 }, { "epoch": 0.03, "learning_rate": 3.3000000000000006e-06, "loss": 0.2097, "step": 165 }, { "epoch": 0.03, "learning_rate": 3.3200000000000004e-06, "loss": 0.1949, "step": 166 }, { "epoch": 0.03, "learning_rate": 3.3400000000000006e-06, "loss": 0.1896, "step": 167 }, { "epoch": 0.03, "learning_rate": 3.3600000000000004e-06, "loss": 0.2076, "step": 168 }, { "epoch": 0.03, "learning_rate": 3.3800000000000007e-06, "loss": 0.2063, "step": 169 }, { "epoch": 0.03, "learning_rate": 3.4000000000000005e-06, "loss": 0.2083, "step": 170 }, { "epoch": 0.03, "learning_rate": 3.4200000000000007e-06, "loss": 0.2098, "step": 171 }, { "epoch": 0.03, "learning_rate": 3.44e-06, "loss": 0.2003, "step": 172 }, { "epoch": 0.03, "learning_rate": 3.46e-06, "loss": 0.2016, "step": 173 }, { "epoch": 0.03, "learning_rate": 3.48e-06, "loss": 0.2137, "step": 174 }, { "epoch": 0.04, "learning_rate": 3.5e-06, "loss": 0.1981, "step": 175 }, { "epoch": 0.04, "learning_rate": 3.52e-06, "loss": 0.1942, "step": 176 }, { "epoch": 0.04, "learning_rate": 3.54e-06, "loss": 0.2053, "step": 177 }, { "epoch": 0.04, "learning_rate": 3.5600000000000002e-06, "loss": 0.1979, "step": 178 }, { "epoch": 0.04, "learning_rate": 3.58e-06, "loss": 0.1984, "step": 179 }, { "epoch": 0.04, "learning_rate": 3.6000000000000003e-06, "loss": 0.2078, "step": 180 }, { "epoch": 0.04, "learning_rate": 3.62e-06, "loss": 0.1971, "step": 181 }, { "epoch": 0.04, "learning_rate": 3.6400000000000003e-06, "loss": 0.1908, "step": 182 }, { "epoch": 0.04, "learning_rate": 3.66e-06, "loss": 0.1767, "step": 183 }, { "epoch": 0.04, "learning_rate": 3.6800000000000003e-06, "loss": 0.2003, "step": 184 }, { "epoch": 0.04, "learning_rate": 3.7e-06, "loss": 0.2065, "step": 185 }, { "epoch": 0.04, "learning_rate": 3.7200000000000004e-06, "loss": 0.1986, "step": 186 }, { "epoch": 0.04, "learning_rate": 3.74e-06, "loss": 0.2019, "step": 187 }, { "epoch": 0.04, "learning_rate": 3.7600000000000004e-06, "loss": 0.1964, "step": 188 }, { "epoch": 0.04, "learning_rate": 3.7800000000000002e-06, "loss": 0.1967, "step": 189 }, { "epoch": 0.04, "learning_rate": 3.8000000000000005e-06, "loss": 0.1991, "step": 190 }, { "epoch": 0.04, "learning_rate": 3.820000000000001e-06, "loss": 0.2094, "step": 191 }, { "epoch": 0.04, "learning_rate": 3.8400000000000005e-06, "loss": 0.1965, "step": 192 }, { "epoch": 0.04, "learning_rate": 3.86e-06, "loss": 0.1729, "step": 193 }, { "epoch": 0.04, "learning_rate": 3.88e-06, "loss": 0.1937, "step": 194 }, { "epoch": 0.04, "learning_rate": 3.900000000000001e-06, "loss": 0.1915, "step": 195 }, { "epoch": 0.04, "learning_rate": 3.920000000000001e-06, "loss": 0.1984, "step": 196 }, { "epoch": 0.04, "learning_rate": 3.94e-06, "loss": 0.1955, "step": 197 }, { "epoch": 0.04, "learning_rate": 3.96e-06, "loss": 0.1938, "step": 198 }, { "epoch": 0.04, "learning_rate": 3.980000000000001e-06, "loss": 0.1948, "step": 199 }, { "epoch": 0.04, "learning_rate": 4.000000000000001e-06, "loss": 0.1975, "step": 200 }, { "epoch": 0.04, "learning_rate": 4.0200000000000005e-06, "loss": 0.2027, "step": 201 }, { "epoch": 0.04, "learning_rate": 4.04e-06, "loss": 0.1884, "step": 202 }, { "epoch": 0.04, "learning_rate": 4.060000000000001e-06, "loss": 0.199, "step": 203 }, { "epoch": 0.04, "learning_rate": 4.08e-06, "loss": 0.1951, "step": 204 }, { "epoch": 0.04, "learning_rate": 4.1e-06, "loss": 0.1969, "step": 205 }, { "epoch": 0.04, "learning_rate": 4.12e-06, "loss": 0.1966, "step": 206 }, { "epoch": 0.04, "learning_rate": 4.14e-06, "loss": 0.1887, "step": 207 }, { "epoch": 0.04, "learning_rate": 4.16e-06, "loss": 0.1926, "step": 208 }, { "epoch": 0.04, "learning_rate": 4.18e-06, "loss": 0.1913, "step": 209 }, { "epoch": 0.04, "learning_rate": 4.2000000000000004e-06, "loss": 0.1857, "step": 210 }, { "epoch": 0.04, "learning_rate": 4.22e-06, "loss": 0.1858, "step": 211 }, { "epoch": 0.04, "learning_rate": 4.24e-06, "loss": 0.1795, "step": 212 }, { "epoch": 0.04, "learning_rate": 4.26e-06, "loss": 0.1783, "step": 213 }, { "epoch": 0.04, "learning_rate": 4.2800000000000005e-06, "loss": 0.1905, "step": 214 }, { "epoch": 0.04, "learning_rate": 4.3e-06, "loss": 0.1843, "step": 215 }, { "epoch": 0.04, "learning_rate": 4.32e-06, "loss": 0.1761, "step": 216 }, { "epoch": 0.04, "learning_rate": 4.34e-06, "loss": 0.1937, "step": 217 }, { "epoch": 0.04, "learning_rate": 4.360000000000001e-06, "loss": 0.1913, "step": 218 }, { "epoch": 0.04, "learning_rate": 4.38e-06, "loss": 0.1835, "step": 219 }, { "epoch": 0.04, "learning_rate": 4.4e-06, "loss": 0.1921, "step": 220 }, { "epoch": 0.04, "learning_rate": 4.42e-06, "loss": 0.1895, "step": 221 }, { "epoch": 0.04, "learning_rate": 4.440000000000001e-06, "loss": 0.1966, "step": 222 }, { "epoch": 0.04, "learning_rate": 4.4600000000000005e-06, "loss": 0.1808, "step": 223 }, { "epoch": 0.04, "learning_rate": 4.48e-06, "loss": 0.1977, "step": 224 }, { "epoch": 0.04, "learning_rate": 4.5e-06, "loss": 0.1843, "step": 225 }, { "epoch": 0.05, "learning_rate": 4.520000000000001e-06, "loss": 0.1862, "step": 226 }, { "epoch": 0.05, "learning_rate": 4.540000000000001e-06, "loss": 0.1751, "step": 227 }, { "epoch": 0.05, "learning_rate": 4.56e-06, "loss": 0.1908, "step": 228 }, { "epoch": 0.05, "learning_rate": 4.58e-06, "loss": 0.1909, "step": 229 }, { "epoch": 0.05, "learning_rate": 4.600000000000001e-06, "loss": 0.1951, "step": 230 }, { "epoch": 0.05, "learning_rate": 4.620000000000001e-06, "loss": 0.1841, "step": 231 }, { "epoch": 0.05, "learning_rate": 4.6400000000000005e-06, "loss": 0.1952, "step": 232 }, { "epoch": 0.05, "learning_rate": 4.66e-06, "loss": 0.1961, "step": 233 }, { "epoch": 0.05, "learning_rate": 4.680000000000001e-06, "loss": 0.1938, "step": 234 }, { "epoch": 0.05, "learning_rate": 4.7e-06, "loss": 0.1727, "step": 235 }, { "epoch": 0.05, "learning_rate": 4.7200000000000005e-06, "loss": 0.1997, "step": 236 }, { "epoch": 0.05, "learning_rate": 4.74e-06, "loss": 0.1861, "step": 237 }, { "epoch": 0.05, "learning_rate": 4.76e-06, "loss": 0.1824, "step": 238 }, { "epoch": 0.05, "learning_rate": 4.78e-06, "loss": 0.1946, "step": 239 }, { "epoch": 0.05, "learning_rate": 4.800000000000001e-06, "loss": 0.1981, "step": 240 }, { "epoch": 0.05, "learning_rate": 4.8200000000000004e-06, "loss": 0.1845, "step": 241 }, { "epoch": 0.05, "learning_rate": 4.84e-06, "loss": 0.1642, "step": 242 }, { "epoch": 0.05, "learning_rate": 4.86e-06, "loss": 0.194, "step": 243 }, { "epoch": 0.05, "learning_rate": 4.880000000000001e-06, "loss": 0.197, "step": 244 }, { "epoch": 0.05, "learning_rate": 4.9000000000000005e-06, "loss": 0.19, "step": 245 }, { "epoch": 0.05, "learning_rate": 4.92e-06, "loss": 0.1807, "step": 246 }, { "epoch": 0.05, "learning_rate": 4.94e-06, "loss": 0.1853, "step": 247 }, { "epoch": 0.05, "learning_rate": 4.960000000000001e-06, "loss": 0.1837, "step": 248 }, { "epoch": 0.05, "learning_rate": 4.980000000000001e-06, "loss": 0.1753, "step": 249 }, { "epoch": 0.05, "learning_rate": 5e-06, "loss": 0.1736, "step": 250 }, { "epoch": 0.05, "learning_rate": 5.02e-06, "loss": 0.1841, "step": 251 }, { "epoch": 0.05, "learning_rate": 5.04e-06, "loss": 0.1827, "step": 252 }, { "epoch": 0.05, "learning_rate": 5.060000000000001e-06, "loss": 0.1803, "step": 253 }, { "epoch": 0.05, "learning_rate": 5.0800000000000005e-06, "loss": 0.191, "step": 254 }, { "epoch": 0.05, "learning_rate": 5.1e-06, "loss": 0.1842, "step": 255 }, { "epoch": 0.05, "learning_rate": 5.12e-06, "loss": 0.1621, "step": 256 }, { "epoch": 0.05, "learning_rate": 5.140000000000001e-06, "loss": 0.1942, "step": 257 }, { "epoch": 0.05, "learning_rate": 5.1600000000000006e-06, "loss": 0.1868, "step": 258 }, { "epoch": 0.05, "learning_rate": 5.18e-06, "loss": 0.181, "step": 259 }, { "epoch": 0.05, "learning_rate": 5.2e-06, "loss": 0.1918, "step": 260 }, { "epoch": 0.05, "learning_rate": 5.220000000000001e-06, "loss": 0.1824, "step": 261 }, { "epoch": 0.05, "learning_rate": 5.240000000000001e-06, "loss": 0.1808, "step": 262 }, { "epoch": 0.05, "learning_rate": 5.2600000000000005e-06, "loss": 0.1803, "step": 263 }, { "epoch": 0.05, "learning_rate": 5.28e-06, "loss": 0.1961, "step": 264 }, { "epoch": 0.05, "learning_rate": 5.300000000000001e-06, "loss": 0.1936, "step": 265 }, { "epoch": 0.05, "learning_rate": 5.320000000000001e-06, "loss": 0.1869, "step": 266 }, { "epoch": 0.05, "learning_rate": 5.3400000000000005e-06, "loss": 0.1859, "step": 267 }, { "epoch": 0.05, "learning_rate": 5.36e-06, "loss": 0.1787, "step": 268 }, { "epoch": 0.05, "learning_rate": 5.380000000000001e-06, "loss": 0.1892, "step": 269 }, { "epoch": 0.05, "learning_rate": 5.400000000000001e-06, "loss": 0.1813, "step": 270 }, { "epoch": 0.05, "learning_rate": 5.420000000000001e-06, "loss": 0.1808, "step": 271 }, { "epoch": 0.05, "learning_rate": 5.4400000000000004e-06, "loss": 0.1901, "step": 272 }, { "epoch": 0.05, "learning_rate": 5.460000000000001e-06, "loss": 0.1814, "step": 273 }, { "epoch": 0.05, "learning_rate": 5.480000000000001e-06, "loss": 0.1842, "step": 274 }, { "epoch": 0.06, "learning_rate": 5.500000000000001e-06, "loss": 0.1756, "step": 275 }, { "epoch": 0.06, "learning_rate": 5.5200000000000005e-06, "loss": 0.1865, "step": 276 }, { "epoch": 0.06, "learning_rate": 5.540000000000001e-06, "loss": 0.1753, "step": 277 }, { "epoch": 0.06, "learning_rate": 5.560000000000001e-06, "loss": 0.1758, "step": 278 }, { "epoch": 0.06, "learning_rate": 5.580000000000001e-06, "loss": 0.1758, "step": 279 }, { "epoch": 0.06, "learning_rate": 5.600000000000001e-06, "loss": 0.1788, "step": 280 }, { "epoch": 0.06, "learning_rate": 5.620000000000001e-06, "loss": 0.1764, "step": 281 }, { "epoch": 0.06, "learning_rate": 5.64e-06, "loss": 0.1728, "step": 282 }, { "epoch": 0.06, "learning_rate": 5.66e-06, "loss": 0.1725, "step": 283 }, { "epoch": 0.06, "learning_rate": 5.68e-06, "loss": 0.181, "step": 284 }, { "epoch": 0.06, "learning_rate": 5.7e-06, "loss": 0.1806, "step": 285 }, { "epoch": 0.06, "learning_rate": 5.72e-06, "loss": 0.1811, "step": 286 }, { "epoch": 0.06, "learning_rate": 5.74e-06, "loss": 0.1736, "step": 287 }, { "epoch": 0.06, "learning_rate": 5.76e-06, "loss": 0.1874, "step": 288 }, { "epoch": 0.06, "learning_rate": 5.78e-06, "loss": 0.182, "step": 289 }, { "epoch": 0.06, "learning_rate": 5.8e-06, "loss": 0.1684, "step": 290 }, { "epoch": 0.06, "learning_rate": 5.82e-06, "loss": 0.169, "step": 291 }, { "epoch": 0.06, "learning_rate": 5.84e-06, "loss": 0.1805, "step": 292 }, { "epoch": 0.06, "learning_rate": 5.86e-06, "loss": 0.1773, "step": 293 }, { "epoch": 0.06, "learning_rate": 5.8800000000000005e-06, "loss": 0.176, "step": 294 }, { "epoch": 0.06, "learning_rate": 5.9e-06, "loss": 0.1753, "step": 295 }, { "epoch": 0.06, "learning_rate": 5.92e-06, "loss": 0.171, "step": 296 }, { "epoch": 0.06, "learning_rate": 5.94e-06, "loss": 0.1777, "step": 297 }, { "epoch": 0.06, "learning_rate": 5.9600000000000005e-06, "loss": 0.1619, "step": 298 }, { "epoch": 0.06, "learning_rate": 5.98e-06, "loss": 0.1803, "step": 299 }, { "epoch": 0.06, "learning_rate": 6e-06, "loss": 0.171, "step": 300 }, { "epoch": 0.06, "learning_rate": 6.02e-06, "loss": 0.1773, "step": 301 }, { "epoch": 0.06, "learning_rate": 6.040000000000001e-06, "loss": 0.1734, "step": 302 }, { "epoch": 0.06, "learning_rate": 6.0600000000000004e-06, "loss": 0.1766, "step": 303 }, { "epoch": 0.06, "learning_rate": 6.08e-06, "loss": 0.1727, "step": 304 }, { "epoch": 0.06, "learning_rate": 6.1e-06, "loss": 0.1767, "step": 305 }, { "epoch": 0.06, "learning_rate": 6.120000000000001e-06, "loss": 0.177, "step": 306 }, { "epoch": 0.06, "learning_rate": 6.1400000000000005e-06, "loss": 0.1734, "step": 307 }, { "epoch": 0.06, "learning_rate": 6.16e-06, "loss": 0.1689, "step": 308 }, { "epoch": 0.06, "learning_rate": 6.18e-06, "loss": 0.1726, "step": 309 }, { "epoch": 0.06, "learning_rate": 6.200000000000001e-06, "loss": 0.1814, "step": 310 }, { "epoch": 0.06, "learning_rate": 6.220000000000001e-06, "loss": 0.1844, "step": 311 }, { "epoch": 0.06, "learning_rate": 6.24e-06, "loss": 0.1779, "step": 312 }, { "epoch": 0.06, "learning_rate": 6.26e-06, "loss": 0.1815, "step": 313 }, { "epoch": 0.06, "learning_rate": 6.280000000000001e-06, "loss": 0.1717, "step": 314 }, { "epoch": 0.06, "learning_rate": 6.300000000000001e-06, "loss": 0.1717, "step": 315 }, { "epoch": 0.06, "learning_rate": 6.3200000000000005e-06, "loss": 0.1741, "step": 316 }, { "epoch": 0.06, "learning_rate": 6.34e-06, "loss": 0.1688, "step": 317 }, { "epoch": 0.06, "learning_rate": 6.360000000000001e-06, "loss": 0.178, "step": 318 }, { "epoch": 0.06, "learning_rate": 6.380000000000001e-06, "loss": 0.1694, "step": 319 }, { "epoch": 0.06, "learning_rate": 6.4000000000000006e-06, "loss": 0.1752, "step": 320 }, { "epoch": 0.06, "learning_rate": 6.42e-06, "loss": 0.1693, "step": 321 }, { "epoch": 0.06, "learning_rate": 6.440000000000001e-06, "loss": 0.1796, "step": 322 }, { "epoch": 0.06, "learning_rate": 6.460000000000001e-06, "loss": 0.1701, "step": 323 }, { "epoch": 0.06, "learning_rate": 6.480000000000001e-06, "loss": 0.1706, "step": 324 }, { "epoch": 0.07, "learning_rate": 6.5000000000000004e-06, "loss": 0.1773, "step": 325 }, { "epoch": 0.07, "learning_rate": 6.520000000000001e-06, "loss": 0.1742, "step": 326 }, { "epoch": 0.07, "learning_rate": 6.540000000000001e-06, "loss": 0.1675, "step": 327 }, { "epoch": 0.07, "learning_rate": 6.560000000000001e-06, "loss": 0.1712, "step": 328 }, { "epoch": 0.07, "learning_rate": 6.5800000000000005e-06, "loss": 0.1655, "step": 329 }, { "epoch": 0.07, "learning_rate": 6.600000000000001e-06, "loss": 0.1653, "step": 330 }, { "epoch": 0.07, "learning_rate": 6.620000000000001e-06, "loss": 0.1786, "step": 331 }, { "epoch": 0.07, "learning_rate": 6.640000000000001e-06, "loss": 0.1655, "step": 332 }, { "epoch": 0.07, "learning_rate": 6.660000000000001e-06, "loss": 0.1728, "step": 333 }, { "epoch": 0.07, "learning_rate": 6.680000000000001e-06, "loss": 0.1757, "step": 334 }, { "epoch": 0.07, "learning_rate": 6.700000000000001e-06, "loss": 0.1736, "step": 335 }, { "epoch": 0.07, "learning_rate": 6.720000000000001e-06, "loss": 0.1663, "step": 336 }, { "epoch": 0.07, "learning_rate": 6.740000000000001e-06, "loss": 0.162, "step": 337 }, { "epoch": 0.07, "learning_rate": 6.760000000000001e-06, "loss": 0.1758, "step": 338 }, { "epoch": 0.07, "learning_rate": 6.780000000000001e-06, "loss": 0.1722, "step": 339 }, { "epoch": 0.07, "learning_rate": 6.800000000000001e-06, "loss": 0.1561, "step": 340 }, { "epoch": 0.07, "learning_rate": 6.820000000000001e-06, "loss": 0.1736, "step": 341 }, { "epoch": 0.07, "learning_rate": 6.8400000000000014e-06, "loss": 0.166, "step": 342 }, { "epoch": 0.07, "learning_rate": 6.860000000000001e-06, "loss": 0.166, "step": 343 }, { "epoch": 0.07, "learning_rate": 6.88e-06, "loss": 0.1699, "step": 344 }, { "epoch": 0.07, "learning_rate": 6.9e-06, "loss": 0.1586, "step": 345 }, { "epoch": 0.07, "learning_rate": 6.92e-06, "loss": 0.1785, "step": 346 }, { "epoch": 0.07, "learning_rate": 6.9400000000000005e-06, "loss": 0.1898, "step": 347 }, { "epoch": 0.07, "learning_rate": 6.96e-06, "loss": 0.1585, "step": 348 }, { "epoch": 0.07, "learning_rate": 6.98e-06, "loss": 0.1616, "step": 349 }, { "epoch": 0.07, "learning_rate": 7e-06, "loss": 0.1643, "step": 350 }, { "epoch": 0.07, "learning_rate": 7.0200000000000006e-06, "loss": 0.1676, "step": 351 }, { "epoch": 0.07, "learning_rate": 7.04e-06, "loss": 0.1672, "step": 352 }, { "epoch": 0.07, "learning_rate": 7.06e-06, "loss": 0.1685, "step": 353 }, { "epoch": 0.07, "learning_rate": 7.08e-06, "loss": 0.167, "step": 354 }, { "epoch": 0.07, "learning_rate": 7.100000000000001e-06, "loss": 0.1333, "step": 355 }, { "epoch": 0.07, "learning_rate": 7.1200000000000004e-06, "loss": 0.169, "step": 356 }, { "epoch": 0.07, "learning_rate": 7.14e-06, "loss": 0.177, "step": 357 }, { "epoch": 0.07, "learning_rate": 7.16e-06, "loss": 0.1603, "step": 358 }, { "epoch": 0.07, "learning_rate": 7.180000000000001e-06, "loss": 0.1588, "step": 359 }, { "epoch": 0.07, "learning_rate": 7.2000000000000005e-06, "loss": 0.1608, "step": 360 }, { "epoch": 0.07, "learning_rate": 7.22e-06, "loss": 0.1691, "step": 361 }, { "epoch": 0.07, "learning_rate": 7.24e-06, "loss": 0.1585, "step": 362 }, { "epoch": 0.07, "learning_rate": 7.260000000000001e-06, "loss": 0.1533, "step": 363 }, { "epoch": 0.07, "learning_rate": 7.280000000000001e-06, "loss": 0.166, "step": 364 }, { "epoch": 0.07, "learning_rate": 7.3e-06, "loss": 0.1718, "step": 365 }, { "epoch": 0.07, "learning_rate": 7.32e-06, "loss": 0.1195, "step": 366 }, { "epoch": 0.07, "learning_rate": 7.340000000000001e-06, "loss": 0.1366, "step": 367 }, { "epoch": 0.07, "learning_rate": 7.360000000000001e-06, "loss": 0.162, "step": 368 }, { "epoch": 0.07, "learning_rate": 7.3800000000000005e-06, "loss": 0.1615, "step": 369 }, { "epoch": 0.07, "learning_rate": 7.4e-06, "loss": 0.1633, "step": 370 }, { "epoch": 0.07, "learning_rate": 7.420000000000001e-06, "loss": 0.169, "step": 371 }, { "epoch": 0.07, "learning_rate": 7.440000000000001e-06, "loss": 0.1559, "step": 372 }, { "epoch": 0.07, "learning_rate": 7.4600000000000006e-06, "loss": 0.1766, "step": 373 }, { "epoch": 0.07, "learning_rate": 7.48e-06, "loss": 0.1681, "step": 374 }, { "epoch": 0.07, "learning_rate": 7.500000000000001e-06, "loss": 0.1705, "step": 375 }, { "epoch": 0.08, "learning_rate": 7.520000000000001e-06, "loss": 0.1582, "step": 376 }, { "epoch": 0.08, "learning_rate": 7.540000000000001e-06, "loss": 0.1643, "step": 377 }, { "epoch": 0.08, "learning_rate": 7.5600000000000005e-06, "loss": 0.1484, "step": 378 }, { "epoch": 0.08, "learning_rate": 7.58e-06, "loss": 0.1453, "step": 379 }, { "epoch": 0.08, "learning_rate": 7.600000000000001e-06, "loss": 0.164, "step": 380 }, { "epoch": 0.08, "learning_rate": 7.620000000000001e-06, "loss": 0.161, "step": 381 }, { "epoch": 0.08, "learning_rate": 7.640000000000001e-06, "loss": 0.1733, "step": 382 }, { "epoch": 0.08, "learning_rate": 7.660000000000001e-06, "loss": 0.1626, "step": 383 }, { "epoch": 0.08, "learning_rate": 7.680000000000001e-06, "loss": 0.1694, "step": 384 }, { "epoch": 0.08, "learning_rate": 7.7e-06, "loss": 0.1651, "step": 385 }, { "epoch": 0.08, "learning_rate": 7.72e-06, "loss": 0.1616, "step": 386 }, { "epoch": 0.08, "learning_rate": 7.74e-06, "loss": 0.169, "step": 387 }, { "epoch": 0.08, "learning_rate": 7.76e-06, "loss": 0.1664, "step": 388 }, { "epoch": 0.08, "learning_rate": 7.78e-06, "loss": 0.1613, "step": 389 }, { "epoch": 0.08, "learning_rate": 7.800000000000002e-06, "loss": 0.1551, "step": 390 }, { "epoch": 0.08, "learning_rate": 7.820000000000001e-06, "loss": 0.162, "step": 391 }, { "epoch": 0.08, "learning_rate": 7.840000000000001e-06, "loss": 0.1565, "step": 392 }, { "epoch": 0.08, "learning_rate": 7.860000000000001e-06, "loss": 0.1396, "step": 393 }, { "epoch": 0.08, "learning_rate": 7.88e-06, "loss": 0.1559, "step": 394 }, { "epoch": 0.08, "learning_rate": 7.9e-06, "loss": 0.1699, "step": 395 }, { "epoch": 0.08, "learning_rate": 7.92e-06, "loss": 0.1627, "step": 396 }, { "epoch": 0.08, "learning_rate": 7.94e-06, "loss": 0.1714, "step": 397 }, { "epoch": 0.08, "learning_rate": 7.960000000000002e-06, "loss": 0.151, "step": 398 }, { "epoch": 0.08, "learning_rate": 7.980000000000002e-06, "loss": 0.1563, "step": 399 }, { "epoch": 0.08, "learning_rate": 8.000000000000001e-06, "loss": 0.1606, "step": 400 }, { "epoch": 0.08, "learning_rate": 8.020000000000001e-06, "loss": 0.1622, "step": 401 }, { "epoch": 0.08, "learning_rate": 8.040000000000001e-06, "loss": 0.1635, "step": 402 }, { "epoch": 0.08, "learning_rate": 8.06e-06, "loss": 0.1592, "step": 403 }, { "epoch": 0.08, "learning_rate": 8.08e-06, "loss": 0.151, "step": 404 }, { "epoch": 0.08, "learning_rate": 8.1e-06, "loss": 0.1604, "step": 405 }, { "epoch": 0.08, "learning_rate": 8.120000000000002e-06, "loss": 0.1571, "step": 406 }, { "epoch": 0.08, "learning_rate": 8.14e-06, "loss": 0.1561, "step": 407 }, { "epoch": 0.08, "learning_rate": 8.16e-06, "loss": 0.1618, "step": 408 }, { "epoch": 0.08, "learning_rate": 8.18e-06, "loss": 0.1593, "step": 409 }, { "epoch": 0.08, "learning_rate": 8.2e-06, "loss": 0.1681, "step": 410 }, { "epoch": 0.08, "learning_rate": 8.220000000000001e-06, "loss": 0.1594, "step": 411 }, { "epoch": 0.08, "learning_rate": 8.24e-06, "loss": 0.1591, "step": 412 }, { "epoch": 0.08, "learning_rate": 8.26e-06, "loss": 0.1586, "step": 413 }, { "epoch": 0.08, "learning_rate": 8.28e-06, "loss": 0.1627, "step": 414 }, { "epoch": 0.08, "learning_rate": 8.3e-06, "loss": 0.1589, "step": 415 }, { "epoch": 0.08, "learning_rate": 8.32e-06, "loss": 0.1531, "step": 416 }, { "epoch": 0.08, "learning_rate": 8.34e-06, "loss": 0.1609, "step": 417 }, { "epoch": 0.08, "learning_rate": 8.36e-06, "loss": 0.1506, "step": 418 }, { "epoch": 0.08, "learning_rate": 8.380000000000001e-06, "loss": 0.154, "step": 419 }, { "epoch": 0.08, "learning_rate": 8.400000000000001e-06, "loss": 0.1599, "step": 420 }, { "epoch": 0.08, "learning_rate": 8.42e-06, "loss": 0.1522, "step": 421 }, { "epoch": 0.08, "learning_rate": 8.44e-06, "loss": 0.1569, "step": 422 }, { "epoch": 0.08, "learning_rate": 8.46e-06, "loss": 0.1581, "step": 423 }, { "epoch": 0.08, "learning_rate": 8.48e-06, "loss": 0.1425, "step": 424 }, { "epoch": 0.09, "learning_rate": 8.5e-06, "loss": 0.159, "step": 425 }, { "epoch": 0.09, "learning_rate": 8.52e-06, "loss": 0.1567, "step": 426 }, { "epoch": 0.09, "learning_rate": 8.540000000000001e-06, "loss": 0.1619, "step": 427 }, { "epoch": 0.09, "learning_rate": 8.560000000000001e-06, "loss": 0.1597, "step": 428 }, { "epoch": 0.09, "learning_rate": 8.580000000000001e-06, "loss": 0.1641, "step": 429 }, { "epoch": 0.09, "learning_rate": 8.6e-06, "loss": 0.1615, "step": 430 }, { "epoch": 0.09, "learning_rate": 8.62e-06, "loss": 0.1669, "step": 431 }, { "epoch": 0.09, "learning_rate": 8.64e-06, "loss": 0.1728, "step": 432 }, { "epoch": 0.09, "learning_rate": 8.66e-06, "loss": 0.1556, "step": 433 }, { "epoch": 0.09, "learning_rate": 8.68e-06, "loss": 0.1621, "step": 434 }, { "epoch": 0.09, "learning_rate": 8.700000000000001e-06, "loss": 0.1534, "step": 435 }, { "epoch": 0.09, "learning_rate": 8.720000000000001e-06, "loss": 0.1566, "step": 436 }, { "epoch": 0.09, "learning_rate": 8.740000000000001e-06, "loss": 0.1527, "step": 437 }, { "epoch": 0.09, "learning_rate": 8.76e-06, "loss": 0.1609, "step": 438 }, { "epoch": 0.09, "learning_rate": 8.78e-06, "loss": 0.153, "step": 439 }, { "epoch": 0.09, "learning_rate": 8.8e-06, "loss": 0.1666, "step": 440 }, { "epoch": 0.09, "learning_rate": 8.82e-06, "loss": 0.1589, "step": 441 }, { "epoch": 0.09, "learning_rate": 8.84e-06, "loss": 0.1537, "step": 442 }, { "epoch": 0.09, "learning_rate": 8.860000000000002e-06, "loss": 0.147, "step": 443 }, { "epoch": 0.09, "learning_rate": 8.880000000000001e-06, "loss": 0.151, "step": 444 }, { "epoch": 0.09, "learning_rate": 8.900000000000001e-06, "loss": 0.1471, "step": 445 }, { "epoch": 0.09, "learning_rate": 8.920000000000001e-06, "loss": 0.1603, "step": 446 }, { "epoch": 0.09, "learning_rate": 8.94e-06, "loss": 0.1625, "step": 447 }, { "epoch": 0.09, "learning_rate": 8.96e-06, "loss": 0.1581, "step": 448 }, { "epoch": 0.09, "learning_rate": 8.98e-06, "loss": 0.1571, "step": 449 }, { "epoch": 0.09, "learning_rate": 9e-06, "loss": 0.1542, "step": 450 }, { "epoch": 0.09, "learning_rate": 9.020000000000002e-06, "loss": 0.1575, "step": 451 }, { "epoch": 0.09, "learning_rate": 9.040000000000002e-06, "loss": 0.1569, "step": 452 }, { "epoch": 0.09, "learning_rate": 9.060000000000001e-06, "loss": 0.1523, "step": 453 }, { "epoch": 0.09, "learning_rate": 9.080000000000001e-06, "loss": 0.1652, "step": 454 }, { "epoch": 0.09, "learning_rate": 9.100000000000001e-06, "loss": 0.1583, "step": 455 }, { "epoch": 0.09, "learning_rate": 9.12e-06, "loss": 0.1454, "step": 456 }, { "epoch": 0.09, "learning_rate": 9.14e-06, "loss": 0.1527, "step": 457 }, { "epoch": 0.09, "learning_rate": 9.16e-06, "loss": 0.1649, "step": 458 }, { "epoch": 0.09, "learning_rate": 9.180000000000002e-06, "loss": 0.1532, "step": 459 }, { "epoch": 0.09, "learning_rate": 9.200000000000002e-06, "loss": 0.1583, "step": 460 }, { "epoch": 0.09, "learning_rate": 9.220000000000002e-06, "loss": 0.1508, "step": 461 }, { "epoch": 0.09, "learning_rate": 9.240000000000001e-06, "loss": 0.1668, "step": 462 }, { "epoch": 0.09, "learning_rate": 9.260000000000001e-06, "loss": 0.1599, "step": 463 }, { "epoch": 0.09, "learning_rate": 9.280000000000001e-06, "loss": 0.1532, "step": 464 }, { "epoch": 0.09, "learning_rate": 9.3e-06, "loss": 0.1621, "step": 465 }, { "epoch": 0.09, "learning_rate": 9.32e-06, "loss": 0.1557, "step": 466 }, { "epoch": 0.09, "learning_rate": 9.340000000000002e-06, "loss": 0.145, "step": 467 }, { "epoch": 0.09, "learning_rate": 9.360000000000002e-06, "loss": 0.1584, "step": 468 }, { "epoch": 0.09, "learning_rate": 9.38e-06, "loss": 0.1567, "step": 469 }, { "epoch": 0.09, "learning_rate": 9.4e-06, "loss": 0.1463, "step": 470 }, { "epoch": 0.09, "learning_rate": 9.42e-06, "loss": 0.1494, "step": 471 }, { "epoch": 0.09, "learning_rate": 9.440000000000001e-06, "loss": 0.1565, "step": 472 }, { "epoch": 0.09, "learning_rate": 9.460000000000001e-06, "loss": 0.1546, "step": 473 }, { "epoch": 0.09, "learning_rate": 9.48e-06, "loss": 0.1636, "step": 474 }, { "epoch": 0.1, "learning_rate": 9.5e-06, "loss": 0.1493, "step": 475 }, { "epoch": 0.1, "learning_rate": 9.52e-06, "loss": 0.1567, "step": 476 }, { "epoch": 0.1, "learning_rate": 9.54e-06, "loss": 0.1357, "step": 477 }, { "epoch": 0.1, "learning_rate": 9.56e-06, "loss": 0.1471, "step": 478 }, { "epoch": 0.1, "learning_rate": 9.58e-06, "loss": 0.1546, "step": 479 }, { "epoch": 0.1, "learning_rate": 9.600000000000001e-06, "loss": 0.1588, "step": 480 }, { "epoch": 0.1, "learning_rate": 9.620000000000001e-06, "loss": 0.1691, "step": 481 }, { "epoch": 0.1, "learning_rate": 9.640000000000001e-06, "loss": 0.1457, "step": 482 }, { "epoch": 0.1, "learning_rate": 9.66e-06, "loss": 0.1485, "step": 483 }, { "epoch": 0.1, "learning_rate": 9.68e-06, "loss": 0.1512, "step": 484 }, { "epoch": 0.1, "learning_rate": 9.7e-06, "loss": 0.1523, "step": 485 }, { "epoch": 0.1, "learning_rate": 9.72e-06, "loss": 0.1522, "step": 486 }, { "epoch": 0.1, "learning_rate": 9.74e-06, "loss": 0.1439, "step": 487 }, { "epoch": 0.1, "learning_rate": 9.760000000000001e-06, "loss": 0.1535, "step": 488 }, { "epoch": 0.1, "learning_rate": 9.780000000000001e-06, "loss": 0.1623, "step": 489 }, { "epoch": 0.1, "learning_rate": 9.800000000000001e-06, "loss": 0.1429, "step": 490 }, { "epoch": 0.1, "learning_rate": 9.820000000000001e-06, "loss": 0.1507, "step": 491 }, { "epoch": 0.1, "learning_rate": 9.84e-06, "loss": 0.1487, "step": 492 }, { "epoch": 0.1, "learning_rate": 9.86e-06, "loss": 0.1493, "step": 493 }, { "epoch": 0.1, "learning_rate": 9.88e-06, "loss": 0.1465, "step": 494 }, { "epoch": 0.1, "learning_rate": 9.9e-06, "loss": 0.1652, "step": 495 }, { "epoch": 0.1, "learning_rate": 9.920000000000002e-06, "loss": 0.151, "step": 496 }, { "epoch": 0.1, "learning_rate": 9.940000000000001e-06, "loss": 0.1512, "step": 497 }, { "epoch": 0.1, "learning_rate": 9.960000000000001e-06, "loss": 0.1495, "step": 498 }, { "epoch": 0.1, "learning_rate": 9.980000000000001e-06, "loss": 0.148, "step": 499 }, { "epoch": 0.1, "learning_rate": 1e-05, "loss": 0.1434, "step": 500 }, { "epoch": 0.1, "learning_rate": 9.999998781530372e-06, "loss": 0.1451, "step": 501 }, { "epoch": 0.1, "learning_rate": 9.999995126122076e-06, "loss": 0.1554, "step": 502 }, { "epoch": 0.1, "learning_rate": 9.999989033776898e-06, "loss": 0.1527, "step": 503 }, { "epoch": 0.1, "learning_rate": 9.999980504497803e-06, "loss": 0.1532, "step": 504 }, { "epoch": 0.1, "learning_rate": 9.999969538288953e-06, "loss": 0.1526, "step": 505 }, { "epoch": 0.1, "learning_rate": 9.999956135155688e-06, "loss": 0.1497, "step": 506 }, { "epoch": 0.1, "learning_rate": 9.999940295104546e-06, "loss": 0.1446, "step": 507 }, { "epoch": 0.1, "learning_rate": 9.999922018143242e-06, "loss": 0.1613, "step": 508 }, { "epoch": 0.1, "learning_rate": 9.999901304280686e-06, "loss": 0.1533, "step": 509 }, { "epoch": 0.1, "learning_rate": 9.999878153526974e-06, "loss": 0.1585, "step": 510 }, { "epoch": 0.1, "learning_rate": 9.99985256589339e-06, "loss": 0.1533, "step": 511 }, { "epoch": 0.1, "learning_rate": 9.999824541392404e-06, "loss": 0.1429, "step": 512 }, { "epoch": 0.1, "learning_rate": 9.999794080037675e-06, "loss": 0.1457, "step": 513 }, { "epoch": 0.1, "learning_rate": 9.99976118184405e-06, "loss": 0.1429, "step": 514 }, { "epoch": 0.1, "learning_rate": 9.999725846827562e-06, "loss": 0.1405, "step": 515 }, { "epoch": 0.1, "learning_rate": 9.999688075005434e-06, "loss": 0.1426, "step": 516 }, { "epoch": 0.1, "learning_rate": 9.999647866396073e-06, "loss": 0.1462, "step": 517 }, { "epoch": 0.1, "learning_rate": 9.999605221019082e-06, "loss": 0.155, "step": 518 }, { "epoch": 0.1, "learning_rate": 9.999560138895238e-06, "loss": 0.1668, "step": 519 }, { "epoch": 0.1, "learning_rate": 9.999512620046523e-06, "loss": 0.1616, "step": 520 }, { "epoch": 0.1, "learning_rate": 9.999462664496088e-06, "loss": 0.1557, "step": 521 }, { "epoch": 0.1, "learning_rate": 9.999410272268285e-06, "loss": 0.1828, "step": 522 }, { "epoch": 0.1, "learning_rate": 9.999355443388649e-06, "loss": 0.143, "step": 523 }, { "epoch": 0.1, "learning_rate": 9.999298177883902e-06, "loss": 0.1531, "step": 524 }, { "epoch": 0.1, "learning_rate": 9.999238475781957e-06, "loss": 0.1451, "step": 525 }, { "epoch": 0.11, "learning_rate": 9.999176337111908e-06, "loss": 0.1599, "step": 526 }, { "epoch": 0.11, "learning_rate": 9.999111761904046e-06, "loss": 0.1483, "step": 527 }, { "epoch": 0.11, "learning_rate": 9.99904475018984e-06, "loss": 0.143, "step": 528 }, { "epoch": 0.11, "learning_rate": 9.99897530200195e-06, "loss": 0.1428, "step": 529 }, { "epoch": 0.11, "learning_rate": 9.998903417374228e-06, "loss": 0.1425, "step": 530 }, { "epoch": 0.11, "learning_rate": 9.998829096341706e-06, "loss": 0.1487, "step": 531 }, { "epoch": 0.11, "learning_rate": 9.998752338940612e-06, "loss": 0.1473, "step": 532 }, { "epoch": 0.11, "learning_rate": 9.998673145208351e-06, "loss": 0.1461, "step": 533 }, { "epoch": 0.11, "learning_rate": 9.998591515183524e-06, "loss": 0.151, "step": 534 }, { "epoch": 0.11, "learning_rate": 9.998507448905917e-06, "loss": 0.1813, "step": 535 }, { "epoch": 0.11, "learning_rate": 9.9984209464165e-06, "loss": 0.1506, "step": 536 }, { "epoch": 0.11, "learning_rate": 9.998332007757436e-06, "loss": 0.1436, "step": 537 }, { "epoch": 0.11, "learning_rate": 9.998240632972073e-06, "loss": 0.1499, "step": 538 }, { "epoch": 0.11, "learning_rate": 9.998146822104943e-06, "loss": 0.1521, "step": 539 }, { "epoch": 0.11, "learning_rate": 9.998050575201772e-06, "loss": 0.1425, "step": 540 }, { "epoch": 0.11, "learning_rate": 9.997951892309468e-06, "loss": 0.1471, "step": 541 }, { "epoch": 0.11, "learning_rate": 9.997850773476126e-06, "loss": 0.1456, "step": 542 }, { "epoch": 0.11, "learning_rate": 9.997747218751032e-06, "loss": 0.1462, "step": 543 }, { "epoch": 0.11, "learning_rate": 9.997641228184656e-06, "loss": 0.1446, "step": 544 }, { "epoch": 0.11, "learning_rate": 9.997532801828659e-06, "loss": 0.1411, "step": 545 }, { "epoch": 0.11, "learning_rate": 9.997421939735885e-06, "loss": 0.1514, "step": 546 }, { "epoch": 0.11, "learning_rate": 9.997308641960365e-06, "loss": 0.1395, "step": 547 }, { "epoch": 0.11, "learning_rate": 9.997192908557322e-06, "loss": 0.1427, "step": 548 }, { "epoch": 0.11, "learning_rate": 9.997074739583162e-06, "loss": 0.1474, "step": 549 }, { "epoch": 0.11, "learning_rate": 9.99695413509548e-06, "loss": 0.1537, "step": 550 }, { "epoch": 0.11, "learning_rate": 9.996831095153054e-06, "loss": 0.145, "step": 551 }, { "epoch": 0.11, "learning_rate": 9.996705619815857e-06, "loss": 0.1473, "step": 552 }, { "epoch": 0.11, "learning_rate": 9.99657770914504e-06, "loss": 0.1461, "step": 553 }, { "epoch": 0.11, "learning_rate": 9.996447363202947e-06, "loss": 0.1387, "step": 554 }, { "epoch": 0.11, "learning_rate": 9.996314582053106e-06, "loss": 0.1449, "step": 555 }, { "epoch": 0.11, "learning_rate": 9.996179365760235e-06, "loss": 0.1457, "step": 556 }, { "epoch": 0.11, "learning_rate": 9.996041714390235e-06, "loss": 0.1424, "step": 557 }, { "epoch": 0.11, "learning_rate": 9.995901628010196e-06, "loss": 0.1428, "step": 558 }, { "epoch": 0.11, "learning_rate": 9.995759106688394e-06, "loss": 0.149, "step": 559 }, { "epoch": 0.11, "learning_rate": 9.995614150494293e-06, "loss": 0.1479, "step": 560 }, { "epoch": 0.11, "learning_rate": 9.995466759498543e-06, "loss": 0.1408, "step": 561 }, { "epoch": 0.11, "learning_rate": 9.995316933772978e-06, "loss": 0.1382, "step": 562 }, { "epoch": 0.11, "learning_rate": 9.995164673390624e-06, "loss": 0.1424, "step": 563 }, { "epoch": 0.11, "learning_rate": 9.995009978425692e-06, "loss": 0.1316, "step": 564 }, { "epoch": 0.11, "learning_rate": 9.994852848953574e-06, "loss": 0.1366, "step": 565 }, { "epoch": 0.11, "learning_rate": 9.994693285050858e-06, "loss": 0.1417, "step": 566 }, { "epoch": 0.11, "learning_rate": 9.994531286795309e-06, "loss": 0.1456, "step": 567 }, { "epoch": 0.11, "learning_rate": 9.994366854265886e-06, "loss": 0.1435, "step": 568 }, { "epoch": 0.11, "learning_rate": 9.99419998754273e-06, "loss": 0.1901, "step": 569 }, { "epoch": 0.11, "learning_rate": 9.994030686707171e-06, "loss": 0.1453, "step": 570 }, { "epoch": 0.11, "learning_rate": 9.993858951841724e-06, "loss": 0.1431, "step": 571 }, { "epoch": 0.11, "learning_rate": 9.99368478303009e-06, "loss": 0.1406, "step": 572 }, { "epoch": 0.11, "learning_rate": 9.993508180357154e-06, "loss": 0.1438, "step": 573 }, { "epoch": 0.11, "learning_rate": 9.993329143908994e-06, "loss": 0.1528, "step": 574 }, { "epoch": 0.12, "learning_rate": 9.993147673772869e-06, "loss": 0.1412, "step": 575 }, { "epoch": 0.12, "learning_rate": 9.992963770037227e-06, "loss": 0.1446, "step": 576 }, { "epoch": 0.12, "learning_rate": 9.992777432791697e-06, "loss": 0.1479, "step": 577 }, { "epoch": 0.12, "learning_rate": 9.9925886621271e-06, "loss": 0.1639, "step": 578 }, { "epoch": 0.12, "learning_rate": 9.992397458135438e-06, "loss": 0.1416, "step": 579 }, { "epoch": 0.12, "learning_rate": 9.992203820909906e-06, "loss": 0.1568, "step": 580 }, { "epoch": 0.12, "learning_rate": 9.992007750544876e-06, "loss": 0.1336, "step": 581 }, { "epoch": 0.12, "learning_rate": 9.991809247135912e-06, "loss": 0.1424, "step": 582 }, { "epoch": 0.12, "learning_rate": 9.991608310779762e-06, "loss": 0.1371, "step": 583 }, { "epoch": 0.12, "learning_rate": 9.99140494157436e-06, "loss": 0.1367, "step": 584 }, { "epoch": 0.12, "learning_rate": 9.991199139618828e-06, "loss": 0.1426, "step": 585 }, { "epoch": 0.12, "learning_rate": 9.990990905013466e-06, "loss": 0.1349, "step": 586 }, { "epoch": 0.12, "learning_rate": 9.99078023785977e-06, "loss": 0.1426, "step": 587 }, { "epoch": 0.12, "learning_rate": 9.990567138260414e-06, "loss": 0.1379, "step": 588 }, { "epoch": 0.12, "learning_rate": 9.990351606319261e-06, "loss": 0.141, "step": 589 }, { "epoch": 0.12, "learning_rate": 9.990133642141359e-06, "loss": 0.1461, "step": 590 }, { "epoch": 0.12, "learning_rate": 9.98991324583294e-06, "loss": 0.1403, "step": 591 }, { "epoch": 0.12, "learning_rate": 9.989690417501423e-06, "loss": 0.1421, "step": 592 }, { "epoch": 0.12, "learning_rate": 9.989465157255413e-06, "loss": 0.146, "step": 593 }, { "epoch": 0.12, "learning_rate": 9.989237465204698e-06, "loss": 0.1658, "step": 594 }, { "epoch": 0.12, "learning_rate": 9.989007341460251e-06, "loss": 0.1375, "step": 595 }, { "epoch": 0.12, "learning_rate": 9.988774786134235e-06, "loss": 0.1437, "step": 596 }, { "epoch": 0.12, "learning_rate": 9.988539799339989e-06, "loss": 0.1379, "step": 597 }, { "epoch": 0.12, "learning_rate": 9.98830238119205e-06, "loss": 0.1578, "step": 598 }, { "epoch": 0.12, "learning_rate": 9.988062531806127e-06, "loss": 0.139, "step": 599 }, { "epoch": 0.12, "learning_rate": 9.987820251299121e-06, "loss": 0.1407, "step": 600 }, { "epoch": 0.12, "learning_rate": 9.987575539789119e-06, "loss": 0.136, "step": 601 }, { "epoch": 0.12, "learning_rate": 9.987328397395389e-06, "loss": 0.1404, "step": 602 }, { "epoch": 0.12, "learning_rate": 9.987078824238384e-06, "loss": 0.1413, "step": 603 }, { "epoch": 0.12, "learning_rate": 9.986826820439743e-06, "loss": 0.1466, "step": 604 }, { "epoch": 0.12, "learning_rate": 9.98657238612229e-06, "loss": 0.131, "step": 605 }, { "epoch": 0.12, "learning_rate": 9.986315521410035e-06, "loss": 0.142, "step": 606 }, { "epoch": 0.12, "learning_rate": 9.98605622642817e-06, "loss": 0.1455, "step": 607 }, { "epoch": 0.12, "learning_rate": 9.98579450130307e-06, "loss": 0.1479, "step": 608 }, { "epoch": 0.12, "learning_rate": 9.9855303461623e-06, "loss": 0.1359, "step": 609 }, { "epoch": 0.12, "learning_rate": 9.985263761134602e-06, "loss": 0.1384, "step": 610 }, { "epoch": 0.12, "learning_rate": 9.98499474634991e-06, "loss": 0.1427, "step": 611 }, { "epoch": 0.12, "learning_rate": 9.984723301939337e-06, "loss": 0.1464, "step": 612 }, { "epoch": 0.12, "learning_rate": 9.98444942803518e-06, "loss": 0.1438, "step": 613 }, { "epoch": 0.12, "learning_rate": 9.984173124770924e-06, "loss": 0.1403, "step": 614 }, { "epoch": 0.12, "learning_rate": 9.983894392281237e-06, "loss": 0.1461, "step": 615 }, { "epoch": 0.12, "learning_rate": 9.983613230701967e-06, "loss": 0.1425, "step": 616 }, { "epoch": 0.12, "learning_rate": 9.98332964017015e-06, "loss": 0.1456, "step": 617 }, { "epoch": 0.12, "learning_rate": 9.983043620824005e-06, "loss": 0.1337, "step": 618 }, { "epoch": 0.12, "learning_rate": 9.982755172802933e-06, "loss": 0.1348, "step": 619 }, { "epoch": 0.12, "learning_rate": 9.982464296247523e-06, "loss": 0.1462, "step": 620 }, { "epoch": 0.12, "learning_rate": 9.98217099129954e-06, "loss": 0.1444, "step": 621 }, { "epoch": 0.12, "learning_rate": 9.981875258101944e-06, "loss": 0.1353, "step": 622 }, { "epoch": 0.12, "learning_rate": 9.981577096798864e-06, "loss": 0.1401, "step": 623 }, { "epoch": 0.12, "learning_rate": 9.981276507535625e-06, "loss": 0.1372, "step": 624 }, { "epoch": 0.12, "learning_rate": 9.980973490458728e-06, "loss": 0.14, "step": 625 }, { "epoch": 0.13, "learning_rate": 9.980668045715864e-06, "loss": 0.1837, "step": 626 }, { "epoch": 0.13, "learning_rate": 9.980360173455899e-06, "loss": 0.1384, "step": 627 }, { "epoch": 0.13, "learning_rate": 9.980049873828887e-06, "loss": 0.1344, "step": 628 }, { "epoch": 0.13, "learning_rate": 9.979737146986064e-06, "loss": 0.1331, "step": 629 }, { "epoch": 0.13, "learning_rate": 9.979421993079853e-06, "loss": 0.1372, "step": 630 }, { "epoch": 0.13, "learning_rate": 9.979104412263851e-06, "loss": 0.1375, "step": 631 }, { "epoch": 0.13, "learning_rate": 9.978784404692847e-06, "loss": 0.1361, "step": 632 }, { "epoch": 0.13, "learning_rate": 9.978461970522807e-06, "loss": 0.1326, "step": 633 }, { "epoch": 0.13, "learning_rate": 9.97813710991088e-06, "loss": 0.1391, "step": 634 }, { "epoch": 0.13, "learning_rate": 9.9778098230154e-06, "loss": 0.1555, "step": 635 }, { "epoch": 0.13, "learning_rate": 9.977480109995886e-06, "loss": 0.1364, "step": 636 }, { "epoch": 0.13, "learning_rate": 9.977147971013033e-06, "loss": 0.1358, "step": 637 }, { "epoch": 0.13, "learning_rate": 9.97681340622872e-06, "loss": 0.1424, "step": 638 }, { "epoch": 0.13, "learning_rate": 9.976476415806013e-06, "loss": 0.1529, "step": 639 }, { "epoch": 0.13, "learning_rate": 9.976136999909156e-06, "loss": 0.1321, "step": 640 }, { "epoch": 0.13, "learning_rate": 9.975795158703576e-06, "loss": 0.1387, "step": 641 }, { "epoch": 0.13, "learning_rate": 9.975450892355882e-06, "loss": 0.1349, "step": 642 }, { "epoch": 0.13, "learning_rate": 9.975104201033868e-06, "loss": 0.1332, "step": 643 }, { "epoch": 0.13, "learning_rate": 9.974755084906503e-06, "loss": 0.1312, "step": 644 }, { "epoch": 0.13, "learning_rate": 9.974403544143942e-06, "loss": 0.1228, "step": 645 }, { "epoch": 0.13, "learning_rate": 9.974049578917524e-06, "loss": 0.1462, "step": 646 }, { "epoch": 0.13, "learning_rate": 9.973693189399767e-06, "loss": 0.1522, "step": 647 }, { "epoch": 0.13, "learning_rate": 9.973334375764372e-06, "loss": 0.1361, "step": 648 }, { "epoch": 0.13, "learning_rate": 9.972973138186217e-06, "loss": 0.1327, "step": 649 }, { "epoch": 0.13, "learning_rate": 9.972609476841368e-06, "loss": 0.1382, "step": 650 }, { "epoch": 0.13, "learning_rate": 9.972243391907068e-06, "loss": 0.1393, "step": 651 }, { "epoch": 0.13, "learning_rate": 9.97187488356174e-06, "loss": 0.1416, "step": 652 }, { "epoch": 0.13, "learning_rate": 9.971503951984996e-06, "loss": 0.1317, "step": 653 }, { "epoch": 0.13, "learning_rate": 9.971130597357618e-06, "loss": 0.1395, "step": 654 }, { "epoch": 0.13, "learning_rate": 9.970754819861577e-06, "loss": 0.16, "step": 655 }, { "epoch": 0.13, "learning_rate": 9.970376619680024e-06, "loss": 0.1448, "step": 656 }, { "epoch": 0.13, "learning_rate": 9.969995996997285e-06, "loss": 0.1483, "step": 657 }, { "epoch": 0.13, "learning_rate": 9.969612951998874e-06, "loss": 0.1373, "step": 658 }, { "epoch": 0.13, "learning_rate": 9.969227484871485e-06, "loss": 0.1431, "step": 659 }, { "epoch": 0.13, "learning_rate": 9.968839595802982e-06, "loss": 0.136, "step": 660 }, { "epoch": 0.13, "learning_rate": 9.968449284982424e-06, "loss": 0.2409, "step": 661 }, { "epoch": 0.13, "learning_rate": 9.968056552600043e-06, "loss": 0.147, "step": 662 }, { "epoch": 0.13, "learning_rate": 9.96766139884725e-06, "loss": 0.1402, "step": 663 }, { "epoch": 0.13, "learning_rate": 9.967263823916638e-06, "loss": 0.1342, "step": 664 }, { "epoch": 0.13, "learning_rate": 9.966863828001982e-06, "loss": 0.1337, "step": 665 }, { "epoch": 0.13, "learning_rate": 9.966461411298235e-06, "loss": 0.1363, "step": 666 }, { "epoch": 0.13, "learning_rate": 9.966056574001528e-06, "loss": 0.1369, "step": 667 }, { "epoch": 0.13, "learning_rate": 9.965649316309178e-06, "loss": 0.134, "step": 668 }, { "epoch": 0.13, "learning_rate": 9.965239638419673e-06, "loss": 0.1458, "step": 669 }, { "epoch": 0.13, "learning_rate": 9.964827540532685e-06, "loss": 0.1398, "step": 670 }, { "epoch": 0.13, "learning_rate": 9.964413022849069e-06, "loss": 0.1452, "step": 671 }, { "epoch": 0.13, "learning_rate": 9.963996085570854e-06, "loss": 0.1404, "step": 672 }, { "epoch": 0.13, "learning_rate": 9.96357672890125e-06, "loss": 0.1427, "step": 673 }, { "epoch": 0.13, "learning_rate": 9.963154953044646e-06, "loss": 0.1335, "step": 674 }, { "epoch": 0.14, "learning_rate": 9.962730758206612e-06, "loss": 0.1389, "step": 675 }, { "epoch": 0.14, "learning_rate": 9.962304144593893e-06, "loss": 0.1378, "step": 676 }, { "epoch": 0.14, "learning_rate": 9.961875112414417e-06, "loss": 0.1525, "step": 677 }, { "epoch": 0.14, "learning_rate": 9.96144366187729e-06, "loss": 0.1368, "step": 678 }, { "epoch": 0.14, "learning_rate": 9.961009793192793e-06, "loss": 0.1351, "step": 679 }, { "epoch": 0.14, "learning_rate": 9.960573506572391e-06, "loss": 0.1412, "step": 680 }, { "epoch": 0.14, "learning_rate": 9.960134802228722e-06, "loss": 0.135, "step": 681 }, { "epoch": 0.14, "learning_rate": 9.959693680375608e-06, "loss": 0.1399, "step": 682 }, { "epoch": 0.14, "learning_rate": 9.959250141228046e-06, "loss": 0.1337, "step": 683 }, { "epoch": 0.14, "learning_rate": 9.958804185002209e-06, "loss": 0.1336, "step": 684 }, { "epoch": 0.14, "learning_rate": 9.958355811915452e-06, "loss": 0.1254, "step": 685 }, { "epoch": 0.14, "learning_rate": 9.957905022186309e-06, "loss": 0.131, "step": 686 }, { "epoch": 0.14, "learning_rate": 9.957451816034487e-06, "loss": 0.1317, "step": 687 }, { "epoch": 0.14, "learning_rate": 9.956996193680874e-06, "loss": 0.1345, "step": 688 }, { "epoch": 0.14, "learning_rate": 9.956538155347534e-06, "loss": 0.1354, "step": 689 }, { "epoch": 0.14, "learning_rate": 9.95607770125771e-06, "loss": 0.1339, "step": 690 }, { "epoch": 0.14, "learning_rate": 9.95561483163582e-06, "loss": 0.1323, "step": 691 }, { "epoch": 0.14, "learning_rate": 9.955149546707465e-06, "loss": 0.1623, "step": 692 }, { "epoch": 0.14, "learning_rate": 9.954681846699414e-06, "loss": 0.1358, "step": 693 }, { "epoch": 0.14, "learning_rate": 9.954211731839623e-06, "loss": 0.1335, "step": 694 }, { "epoch": 0.14, "learning_rate": 9.953739202357219e-06, "loss": 0.1383, "step": 695 }, { "epoch": 0.14, "learning_rate": 9.953264258482505e-06, "loss": 0.13, "step": 696 }, { "epoch": 0.14, "learning_rate": 9.952786900446964e-06, "loss": 0.1277, "step": 697 }, { "epoch": 0.14, "learning_rate": 9.952307128483257e-06, "loss": 0.1411, "step": 698 }, { "epoch": 0.14, "learning_rate": 9.951824942825215e-06, "loss": 0.145, "step": 699 }, { "epoch": 0.14, "learning_rate": 9.951340343707852e-06, "loss": 0.1301, "step": 700 }, { "epoch": 0.14, "learning_rate": 9.950853331367356e-06, "loss": 0.1371, "step": 701 }, { "epoch": 0.14, "learning_rate": 9.950363906041089e-06, "loss": 0.1321, "step": 702 }, { "epoch": 0.14, "learning_rate": 9.94987206796759e-06, "loss": 0.1405, "step": 703 }, { "epoch": 0.14, "learning_rate": 9.94937781738658e-06, "loss": 0.133, "step": 704 }, { "epoch": 0.14, "learning_rate": 9.948881154538946e-06, "loss": 0.1681, "step": 705 }, { "epoch": 0.14, "learning_rate": 9.948382079666756e-06, "loss": 0.1475, "step": 706 }, { "epoch": 0.14, "learning_rate": 9.947880593013256e-06, "loss": 0.1189, "step": 707 }, { "epoch": 0.14, "learning_rate": 9.947376694822861e-06, "loss": 0.1312, "step": 708 }, { "epoch": 0.14, "learning_rate": 9.946870385341167e-06, "loss": 0.1378, "step": 709 }, { "epoch": 0.14, "learning_rate": 9.946361664814942e-06, "loss": 0.1275, "step": 710 }, { "epoch": 0.14, "learning_rate": 9.945850533492132e-06, "loss": 0.129, "step": 711 }, { "epoch": 0.14, "learning_rate": 9.945336991621854e-06, "loss": 0.1344, "step": 712 }, { "epoch": 0.14, "learning_rate": 9.944821039454403e-06, "loss": 0.133, "step": 713 }, { "epoch": 0.14, "learning_rate": 9.944302677241247e-06, "loss": 0.1225, "step": 714 }, { "epoch": 0.14, "learning_rate": 9.94378190523503e-06, "loss": 0.1326, "step": 715 }, { "epoch": 0.14, "learning_rate": 9.94325872368957e-06, "loss": 0.1331, "step": 716 }, { "epoch": 0.14, "learning_rate": 9.942733132859861e-06, "loss": 0.1388, "step": 717 }, { "epoch": 0.14, "learning_rate": 9.942205133002067e-06, "loss": 0.1327, "step": 718 }, { "epoch": 0.14, "learning_rate": 9.94167472437353e-06, "loss": 0.1359, "step": 719 }, { "epoch": 0.14, "learning_rate": 9.941141907232766e-06, "loss": 0.1315, "step": 720 }, { "epoch": 0.14, "learning_rate": 9.94060668183946e-06, "loss": 0.1328, "step": 721 }, { "epoch": 0.14, "learning_rate": 9.940069048454478e-06, "loss": 0.152, "step": 722 }, { "epoch": 0.14, "learning_rate": 9.939529007339852e-06, "loss": 0.1571, "step": 723 }, { "epoch": 0.14, "learning_rate": 9.938986558758795e-06, "loss": 0.1331, "step": 724 }, { "epoch": 0.14, "learning_rate": 9.938441702975689e-06, "loss": 0.1266, "step": 725 }, { "epoch": 0.15, "learning_rate": 9.937894440256091e-06, "loss": 0.1306, "step": 726 }, { "epoch": 0.15, "learning_rate": 9.937344770866727e-06, "loss": 0.1343, "step": 727 }, { "epoch": 0.15, "learning_rate": 9.936792695075502e-06, "loss": 0.135, "step": 728 }, { "epoch": 0.15, "learning_rate": 9.936238213151491e-06, "loss": 0.1611, "step": 729 }, { "epoch": 0.15, "learning_rate": 9.93568132536494e-06, "loss": 0.1267, "step": 730 }, { "epoch": 0.15, "learning_rate": 9.93512203198727e-06, "loss": 0.127, "step": 731 }, { "epoch": 0.15, "learning_rate": 9.934560333291077e-06, "loss": 0.1352, "step": 732 }, { "epoch": 0.15, "learning_rate": 9.93399622955012e-06, "loss": 0.1294, "step": 733 }, { "epoch": 0.15, "learning_rate": 9.93342972103934e-06, "loss": 0.1537, "step": 734 }, { "epoch": 0.15, "learning_rate": 9.932860808034847e-06, "loss": 0.1397, "step": 735 }, { "epoch": 0.15, "learning_rate": 9.932289490813922e-06, "loss": 0.1288, "step": 736 }, { "epoch": 0.15, "learning_rate": 9.931715769655017e-06, "loss": 0.1307, "step": 737 }, { "epoch": 0.15, "learning_rate": 9.931139644837755e-06, "loss": 0.1395, "step": 738 }, { "epoch": 0.15, "learning_rate": 9.930561116642936e-06, "loss": 0.125, "step": 739 }, { "epoch": 0.15, "learning_rate": 9.929980185352525e-06, "loss": 0.1312, "step": 740 }, { "epoch": 0.15, "learning_rate": 9.929396851249661e-06, "loss": 0.1365, "step": 741 }, { "epoch": 0.15, "learning_rate": 9.928811114618658e-06, "loss": 0.1328, "step": 742 }, { "epoch": 0.15, "learning_rate": 9.928222975744992e-06, "loss": 0.1315, "step": 743 }, { "epoch": 0.15, "learning_rate": 9.927632434915315e-06, "loss": 0.1351, "step": 744 }, { "epoch": 0.15, "learning_rate": 9.927039492417452e-06, "loss": 0.1346, "step": 745 }, { "epoch": 0.15, "learning_rate": 9.926444148540394e-06, "loss": 0.1281, "step": 746 }, { "epoch": 0.15, "learning_rate": 9.925846403574306e-06, "loss": 0.1558, "step": 747 }, { "epoch": 0.15, "learning_rate": 9.925246257810519e-06, "loss": 0.1252, "step": 748 }, { "epoch": 0.15, "learning_rate": 9.92464371154154e-06, "loss": 0.132, "step": 749 }, { "epoch": 0.15, "learning_rate": 9.924038765061042e-06, "loss": 0.1343, "step": 750 }, { "epoch": 0.15, "learning_rate": 9.923431418663866e-06, "loss": 0.137, "step": 751 }, { "epoch": 0.15, "learning_rate": 9.922821672646028e-06, "loss": 0.1311, "step": 752 }, { "epoch": 0.15, "learning_rate": 9.922209527304709e-06, "loss": 0.1299, "step": 753 }, { "epoch": 0.15, "learning_rate": 9.921594982938262e-06, "loss": 0.1541, "step": 754 }, { "epoch": 0.15, "learning_rate": 9.92097803984621e-06, "loss": 0.1286, "step": 755 }, { "epoch": 0.15, "learning_rate": 9.920358698329242e-06, "loss": 0.1432, "step": 756 }, { "epoch": 0.15, "learning_rate": 9.919736958689216e-06, "loss": 0.132, "step": 757 }, { "epoch": 0.15, "learning_rate": 9.919112821229165e-06, "loss": 0.1359, "step": 758 }, { "epoch": 0.15, "learning_rate": 9.918486286253279e-06, "loss": 0.1339, "step": 759 }, { "epoch": 0.15, "learning_rate": 9.91785735406693e-06, "loss": 0.1393, "step": 760 }, { "epoch": 0.15, "learning_rate": 9.91722602497665e-06, "loss": 0.1357, "step": 761 }, { "epoch": 0.15, "learning_rate": 9.91659229929014e-06, "loss": 0.1273, "step": 762 }, { "epoch": 0.15, "learning_rate": 9.915956177316269e-06, "loss": 0.1309, "step": 763 }, { "epoch": 0.15, "learning_rate": 9.915317659365078e-06, "loss": 0.132, "step": 764 }, { "epoch": 0.15, "learning_rate": 9.914676745747772e-06, "loss": 0.1291, "step": 765 }, { "epoch": 0.15, "learning_rate": 9.914033436776724e-06, "loss": 0.1297, "step": 766 }, { "epoch": 0.15, "learning_rate": 9.913387732765475e-06, "loss": 0.1342, "step": 767 }, { "epoch": 0.15, "learning_rate": 9.912739634028734e-06, "loss": 0.1226, "step": 768 }, { "epoch": 0.15, "learning_rate": 9.912089140882377e-06, "loss": 0.1611, "step": 769 }, { "epoch": 0.15, "learning_rate": 9.911436253643445e-06, "loss": 0.1294, "step": 770 }, { "epoch": 0.15, "learning_rate": 9.910780972630146e-06, "loss": 0.1195, "step": 771 }, { "epoch": 0.15, "learning_rate": 9.91012329816186e-06, "loss": 0.1372, "step": 772 }, { "epoch": 0.15, "learning_rate": 9.909463230559127e-06, "loss": 0.1489, "step": 773 }, { "epoch": 0.15, "learning_rate": 9.908800770143654e-06, "loss": 0.1319, "step": 774 }, { "epoch": 0.15, "learning_rate": 9.908135917238321e-06, "loss": 0.1367, "step": 775 }, { "epoch": 0.16, "learning_rate": 9.907468672167165e-06, "loss": 0.1351, "step": 776 }, { "epoch": 0.16, "learning_rate": 9.906799035255395e-06, "loss": 0.1279, "step": 777 }, { "epoch": 0.16, "learning_rate": 9.906127006829385e-06, "loss": 0.1306, "step": 778 }, { "epoch": 0.16, "learning_rate": 9.90545258721667e-06, "loss": 0.1224, "step": 779 }, { "epoch": 0.16, "learning_rate": 9.904775776745959e-06, "loss": 0.1271, "step": 780 }, { "epoch": 0.16, "learning_rate": 9.904096575747117e-06, "loss": 0.1294, "step": 781 }, { "epoch": 0.16, "learning_rate": 9.903414984551178e-06, "loss": 0.133, "step": 782 }, { "epoch": 0.16, "learning_rate": 9.902731003490344e-06, "loss": 0.1333, "step": 783 }, { "epoch": 0.16, "learning_rate": 9.90204463289798e-06, "loss": 0.135, "step": 784 }, { "epoch": 0.16, "learning_rate": 9.901355873108611e-06, "loss": 0.1318, "step": 785 }, { "epoch": 0.16, "learning_rate": 9.900664724457932e-06, "loss": 0.1957, "step": 786 }, { "epoch": 0.16, "learning_rate": 9.899971187282799e-06, "loss": 0.1281, "step": 787 }, { "epoch": 0.16, "learning_rate": 9.899275261921236e-06, "loss": 0.1335, "step": 788 }, { "epoch": 0.16, "learning_rate": 9.898576948712427e-06, "loss": 0.1441, "step": 789 }, { "epoch": 0.16, "learning_rate": 9.89787624799672e-06, "loss": 0.1286, "step": 790 }, { "epoch": 0.16, "learning_rate": 9.897173160115633e-06, "loss": 0.1282, "step": 791 }, { "epoch": 0.16, "learning_rate": 9.896467685411838e-06, "loss": 0.1243, "step": 792 }, { "epoch": 0.16, "learning_rate": 9.895759824229176e-06, "loss": 0.1276, "step": 793 }, { "epoch": 0.16, "learning_rate": 9.89504957691265e-06, "loss": 0.1376, "step": 794 }, { "epoch": 0.16, "learning_rate": 9.894336943808426e-06, "loss": 0.1318, "step": 795 }, { "epoch": 0.16, "learning_rate": 9.893621925263832e-06, "loss": 0.1285, "step": 796 }, { "epoch": 0.16, "learning_rate": 9.89290452162736e-06, "loss": 0.1296, "step": 797 }, { "epoch": 0.16, "learning_rate": 9.892184733248666e-06, "loss": 0.1346, "step": 798 }, { "epoch": 0.16, "learning_rate": 9.891462560478562e-06, "loss": 0.1387, "step": 799 }, { "epoch": 0.16, "learning_rate": 9.890738003669029e-06, "loss": 0.129, "step": 800 }, { "epoch": 0.16, "learning_rate": 9.890011063173207e-06, "loss": 0.1519, "step": 801 }, { "epoch": 0.16, "learning_rate": 9.889281739345395e-06, "loss": 0.1265, "step": 802 }, { "epoch": 0.16, "learning_rate": 9.88855003254106e-06, "loss": 0.1265, "step": 803 }, { "epoch": 0.16, "learning_rate": 9.887815943116827e-06, "loss": 0.1305, "step": 804 }, { "epoch": 0.16, "learning_rate": 9.887079471430481e-06, "loss": 0.13, "step": 805 }, { "epoch": 0.16, "learning_rate": 9.886340617840968e-06, "loss": 0.1749, "step": 806 }, { "epoch": 0.16, "learning_rate": 9.8855993827084e-06, "loss": 0.1274, "step": 807 }, { "epoch": 0.16, "learning_rate": 9.884855766394041e-06, "loss": 0.1347, "step": 808 }, { "epoch": 0.16, "learning_rate": 9.884109769260326e-06, "loss": 0.1324, "step": 809 }, { "epoch": 0.16, "learning_rate": 9.883361391670841e-06, "loss": 0.139, "step": 810 }, { "epoch": 0.16, "learning_rate": 9.882610633990337e-06, "loss": 0.135, "step": 811 }, { "epoch": 0.16, "learning_rate": 9.881857496584726e-06, "loss": 0.1286, "step": 812 }, { "epoch": 0.16, "learning_rate": 9.881101979821075e-06, "loss": 0.1249, "step": 813 }, { "epoch": 0.16, "learning_rate": 9.880344084067616e-06, "loss": 0.1365, "step": 814 }, { "epoch": 0.16, "learning_rate": 9.879583809693737e-06, "loss": 0.1288, "step": 815 }, { "epoch": 0.16, "learning_rate": 9.878821157069988e-06, "loss": 0.1456, "step": 816 }, { "epoch": 0.16, "learning_rate": 9.878056126568077e-06, "loss": 0.1239, "step": 817 }, { "epoch": 0.16, "learning_rate": 9.877288718560866e-06, "loss": 0.1354, "step": 818 }, { "epoch": 0.16, "learning_rate": 9.876518933422385e-06, "loss": 0.1342, "step": 819 }, { "epoch": 0.16, "learning_rate": 9.875746771527817e-06, "loss": 0.1314, "step": 820 }, { "epoch": 0.16, "learning_rate": 9.874972233253503e-06, "loss": 0.1369, "step": 821 }, { "epoch": 0.16, "learning_rate": 9.874195318976945e-06, "loss": 0.1377, "step": 822 }, { "epoch": 0.16, "learning_rate": 9.873416029076801e-06, "loss": 0.127, "step": 823 }, { "epoch": 0.16, "learning_rate": 9.872634363932887e-06, "loss": 0.1333, "step": 824 }, { "epoch": 0.17, "learning_rate": 9.871850323926178e-06, "loss": 0.1148, "step": 825 }, { "epoch": 0.17, "learning_rate": 9.871063909438803e-06, "loss": 0.125, "step": 826 }, { "epoch": 0.17, "learning_rate": 9.870275120854055e-06, "loss": 0.1287, "step": 827 }, { "epoch": 0.17, "learning_rate": 9.869483958556376e-06, "loss": 0.1197, "step": 828 }, { "epoch": 0.17, "learning_rate": 9.868690422931372e-06, "loss": 0.131, "step": 829 }, { "epoch": 0.17, "learning_rate": 9.867894514365802e-06, "loss": 0.1264, "step": 830 }, { "epoch": 0.17, "learning_rate": 9.867096233247581e-06, "loss": 0.1284, "step": 831 }, { "epoch": 0.17, "learning_rate": 9.866295579965782e-06, "loss": 0.1249, "step": 832 }, { "epoch": 0.17, "learning_rate": 9.865492554910634e-06, "loss": 0.129, "step": 833 }, { "epoch": 0.17, "learning_rate": 9.86468715847352e-06, "loss": 0.1296, "step": 834 }, { "epoch": 0.17, "learning_rate": 9.863879391046985e-06, "loss": 0.1289, "step": 835 }, { "epoch": 0.17, "learning_rate": 9.863069253024719e-06, "loss": 0.1304, "step": 836 }, { "epoch": 0.17, "learning_rate": 9.862256744801576e-06, "loss": 0.1296, "step": 837 }, { "epoch": 0.17, "learning_rate": 9.861441866773564e-06, "loss": 0.1282, "step": 838 }, { "epoch": 0.17, "learning_rate": 9.860624619337844e-06, "loss": 0.1276, "step": 839 }, { "epoch": 0.17, "learning_rate": 9.859805002892733e-06, "loss": 0.1335, "step": 840 }, { "epoch": 0.17, "learning_rate": 9.8589830178377e-06, "loss": 0.129, "step": 841 }, { "epoch": 0.17, "learning_rate": 9.85815866457337e-06, "loss": 0.1284, "step": 842 }, { "epoch": 0.17, "learning_rate": 9.857331943501527e-06, "loss": 0.1391, "step": 843 }, { "epoch": 0.17, "learning_rate": 9.856502855025101e-06, "loss": 0.1427, "step": 844 }, { "epoch": 0.17, "learning_rate": 9.85567139954818e-06, "loss": 0.133, "step": 845 }, { "epoch": 0.17, "learning_rate": 9.854837577476008e-06, "loss": 0.1302, "step": 846 }, { "epoch": 0.17, "learning_rate": 9.854001389214979e-06, "loss": 0.1168, "step": 847 }, { "epoch": 0.17, "learning_rate": 9.853162835172638e-06, "loss": 0.1288, "step": 848 }, { "epoch": 0.17, "learning_rate": 9.852321915757688e-06, "loss": 0.1271, "step": 849 }, { "epoch": 0.17, "learning_rate": 9.851478631379982e-06, "loss": 0.1244, "step": 850 }, { "epoch": 0.17, "learning_rate": 9.85063298245053e-06, "loss": 0.1224, "step": 851 }, { "epoch": 0.17, "learning_rate": 9.849784969381488e-06, "loss": 0.1356, "step": 852 }, { "epoch": 0.17, "learning_rate": 9.848934592586165e-06, "loss": 0.1232, "step": 853 }, { "epoch": 0.17, "learning_rate": 9.84808185247903e-06, "loss": 0.1424, "step": 854 }, { "epoch": 0.17, "learning_rate": 9.847226749475696e-06, "loss": 0.1348, "step": 855 }, { "epoch": 0.17, "learning_rate": 9.846369283992927e-06, "loss": 0.1255, "step": 856 }, { "epoch": 0.17, "learning_rate": 9.845509456448642e-06, "loss": 0.1334, "step": 857 }, { "epoch": 0.17, "learning_rate": 9.844647267261915e-06, "loss": 0.1567, "step": 858 }, { "epoch": 0.17, "learning_rate": 9.843782716852963e-06, "loss": 0.1307, "step": 859 }, { "epoch": 0.17, "learning_rate": 9.842915805643156e-06, "loss": 0.1259, "step": 860 }, { "epoch": 0.17, "learning_rate": 9.84204653405502e-06, "loss": 0.1279, "step": 861 }, { "epoch": 0.17, "learning_rate": 9.841174902512223e-06, "loss": 0.129, "step": 862 }, { "epoch": 0.17, "learning_rate": 9.84030091143959e-06, "loss": 0.1255, "step": 863 }, { "epoch": 0.17, "learning_rate": 9.839424561263094e-06, "loss": 0.1251, "step": 864 }, { "epoch": 0.17, "learning_rate": 9.838545852409857e-06, "loss": 0.1273, "step": 865 }, { "epoch": 0.17, "learning_rate": 9.83766478530815e-06, "loss": 0.1241, "step": 866 }, { "epoch": 0.17, "learning_rate": 9.836781360387396e-06, "loss": 0.1322, "step": 867 }, { "epoch": 0.17, "learning_rate": 9.835895578078165e-06, "loss": 0.1149, "step": 868 }, { "epoch": 0.17, "learning_rate": 9.835007438812177e-06, "loss": 0.1211, "step": 869 }, { "epoch": 0.17, "learning_rate": 9.834116943022299e-06, "loss": 0.125, "step": 870 }, { "epoch": 0.17, "learning_rate": 9.833224091142548e-06, "loss": 0.1304, "step": 871 }, { "epoch": 0.17, "learning_rate": 9.832328883608088e-06, "loss": 0.1358, "step": 872 }, { "epoch": 0.17, "learning_rate": 9.831431320855235e-06, "loss": 0.1333, "step": 873 }, { "epoch": 0.17, "learning_rate": 9.830531403321451e-06, "loss": 0.1256, "step": 874 }, { "epoch": 0.17, "learning_rate": 9.829629131445342e-06, "loss": 0.1234, "step": 875 }, { "epoch": 0.18, "learning_rate": 9.828724505666664e-06, "loss": 0.2638, "step": 876 }, { "epoch": 0.18, "learning_rate": 9.827817526426324e-06, "loss": 0.1654, "step": 877 }, { "epoch": 0.18, "learning_rate": 9.82690819416637e-06, "loss": 0.1257, "step": 878 }, { "epoch": 0.18, "learning_rate": 9.825996509330001e-06, "loss": 0.1278, "step": 879 }, { "epoch": 0.18, "learning_rate": 9.825082472361558e-06, "loss": 0.1236, "step": 880 }, { "epoch": 0.18, "learning_rate": 9.824166083706534e-06, "loss": 0.1259, "step": 881 }, { "epoch": 0.18, "learning_rate": 9.823247343811567e-06, "loss": 0.1712, "step": 882 }, { "epoch": 0.18, "learning_rate": 9.822326253124436e-06, "loss": 0.1285, "step": 883 }, { "epoch": 0.18, "learning_rate": 9.821402812094074e-06, "loss": 0.1415, "step": 884 }, { "epoch": 0.18, "learning_rate": 9.82047702117055e-06, "loss": 0.1327, "step": 885 }, { "epoch": 0.18, "learning_rate": 9.819548880805087e-06, "loss": 0.126, "step": 886 }, { "epoch": 0.18, "learning_rate": 9.81861839145005e-06, "loss": 0.1239, "step": 887 }, { "epoch": 0.18, "learning_rate": 9.817685553558945e-06, "loss": 0.1273, "step": 888 }, { "epoch": 0.18, "learning_rate": 9.816750367586424e-06, "loss": 0.1313, "step": 889 }, { "epoch": 0.18, "learning_rate": 9.815812833988292e-06, "loss": 0.1203, "step": 890 }, { "epoch": 0.18, "learning_rate": 9.814872953221487e-06, "loss": 0.1232, "step": 891 }, { "epoch": 0.18, "learning_rate": 9.813930725744095e-06, "loss": 0.1318, "step": 892 }, { "epoch": 0.18, "learning_rate": 9.812986152015349e-06, "loss": 0.1233, "step": 893 }, { "epoch": 0.18, "learning_rate": 9.81203923249562e-06, "loss": 0.1286, "step": 894 }, { "epoch": 0.18, "learning_rate": 9.811089967646427e-06, "loss": 0.1391, "step": 895 }, { "epoch": 0.18, "learning_rate": 9.81013835793043e-06, "loss": 0.1275, "step": 896 }, { "epoch": 0.18, "learning_rate": 9.809184403811432e-06, "loss": 0.1178, "step": 897 }, { "epoch": 0.18, "learning_rate": 9.808228105754378e-06, "loss": 0.1293, "step": 898 }, { "epoch": 0.18, "learning_rate": 9.807269464225355e-06, "loss": 0.1187, "step": 899 }, { "epoch": 0.18, "learning_rate": 9.806308479691595e-06, "loss": 0.1288, "step": 900 }, { "epoch": 0.18, "learning_rate": 9.80534515262147e-06, "loss": 0.1281, "step": 901 }, { "epoch": 0.18, "learning_rate": 9.804379483484493e-06, "loss": 0.1241, "step": 902 }, { "epoch": 0.18, "learning_rate": 9.803411472751321e-06, "loss": 0.1187, "step": 903 }, { "epoch": 0.18, "learning_rate": 9.80244112089375e-06, "loss": 0.1305, "step": 904 }, { "epoch": 0.18, "learning_rate": 9.801468428384716e-06, "loss": 0.1215, "step": 905 }, { "epoch": 0.18, "learning_rate": 9.8004933956983e-06, "loss": 0.1126, "step": 906 }, { "epoch": 0.18, "learning_rate": 9.799516023309719e-06, "loss": 0.1241, "step": 907 }, { "epoch": 0.18, "learning_rate": 9.798536311695334e-06, "loss": 0.1314, "step": 908 }, { "epoch": 0.18, "learning_rate": 9.797554261332644e-06, "loss": 0.1231, "step": 909 }, { "epoch": 0.18, "learning_rate": 9.796569872700287e-06, "loss": 0.1295, "step": 910 }, { "epoch": 0.18, "learning_rate": 9.795583146278047e-06, "loss": 0.1216, "step": 911 }, { "epoch": 0.18, "learning_rate": 9.794594082546835e-06, "loss": 0.1338, "step": 912 }, { "epoch": 0.18, "learning_rate": 9.793602681988714e-06, "loss": 0.1173, "step": 913 }, { "epoch": 0.18, "learning_rate": 9.79260894508688e-06, "loss": 0.1279, "step": 914 }, { "epoch": 0.18, "learning_rate": 9.791612872325667e-06, "loss": 0.1208, "step": 915 }, { "epoch": 0.18, "learning_rate": 9.79061446419055e-06, "loss": 0.1223, "step": 916 }, { "epoch": 0.18, "learning_rate": 9.789613721168138e-06, "loss": 0.1248, "step": 917 }, { "epoch": 0.18, "learning_rate": 9.788610643746184e-06, "loss": 0.1304, "step": 918 }, { "epoch": 0.18, "learning_rate": 9.787605232413575e-06, "loss": 0.1615, "step": 919 }, { "epoch": 0.18, "learning_rate": 9.786597487660336e-06, "loss": 0.1313, "step": 920 }, { "epoch": 0.18, "learning_rate": 9.785587409977632e-06, "loss": 0.1205, "step": 921 }, { "epoch": 0.18, "learning_rate": 9.784574999857757e-06, "loss": 0.1181, "step": 922 }, { "epoch": 0.18, "learning_rate": 9.783560257794153e-06, "loss": 0.1241, "step": 923 }, { "epoch": 0.18, "learning_rate": 9.78254318428139e-06, "loss": 0.128, "step": 924 }, { "epoch": 0.18, "learning_rate": 9.781523779815178e-06, "loss": 0.1216, "step": 925 }, { "epoch": 0.19, "learning_rate": 9.780502044892363e-06, "loss": 0.125, "step": 926 }, { "epoch": 0.19, "learning_rate": 9.779477980010924e-06, "loss": 0.1229, "step": 927 }, { "epoch": 0.19, "learning_rate": 9.778451585669982e-06, "loss": 0.1285, "step": 928 }, { "epoch": 0.19, "learning_rate": 9.777422862369782e-06, "loss": 0.124, "step": 929 }, { "epoch": 0.19, "learning_rate": 9.776391810611719e-06, "loss": 0.1244, "step": 930 }, { "epoch": 0.19, "learning_rate": 9.775358430898311e-06, "loss": 0.12, "step": 931 }, { "epoch": 0.19, "learning_rate": 9.774322723733216e-06, "loss": 0.1308, "step": 932 }, { "epoch": 0.19, "learning_rate": 9.773284689621223e-06, "loss": 0.2282, "step": 933 }, { "epoch": 0.19, "learning_rate": 9.772244329068261e-06, "loss": 0.1259, "step": 934 }, { "epoch": 0.19, "learning_rate": 9.771201642581384e-06, "loss": 0.1242, "step": 935 }, { "epoch": 0.19, "learning_rate": 9.77015663066879e-06, "loss": 0.125, "step": 936 }, { "epoch": 0.19, "learning_rate": 9.769109293839803e-06, "loss": 0.1278, "step": 937 }, { "epoch": 0.19, "learning_rate": 9.768059632604881e-06, "loss": 0.1257, "step": 938 }, { "epoch": 0.19, "learning_rate": 9.767007647475618e-06, "loss": 0.1222, "step": 939 }, { "epoch": 0.19, "learning_rate": 9.765953338964736e-06, "loss": 0.1296, "step": 940 }, { "epoch": 0.19, "learning_rate": 9.764896707586095e-06, "loss": 0.1193, "step": 941 }, { "epoch": 0.19, "learning_rate": 9.763837753854684e-06, "loss": 0.1374, "step": 942 }, { "epoch": 0.19, "learning_rate": 9.762776478286622e-06, "loss": 0.1232, "step": 943 }, { "epoch": 0.19, "learning_rate": 9.761712881399164e-06, "loss": 0.1143, "step": 944 }, { "epoch": 0.19, "learning_rate": 9.760646963710694e-06, "loss": 0.1244, "step": 945 }, { "epoch": 0.19, "learning_rate": 9.759578725740726e-06, "loss": 0.1297, "step": 946 }, { "epoch": 0.19, "learning_rate": 9.758508168009908e-06, "loss": 0.1286, "step": 947 }, { "epoch": 0.19, "learning_rate": 9.757435291040016e-06, "loss": 0.1186, "step": 948 }, { "epoch": 0.19, "learning_rate": 9.756360095353957e-06, "loss": 0.12, "step": 949 }, { "epoch": 0.19, "learning_rate": 9.755282581475769e-06, "loss": 0.1295, "step": 950 }, { "epoch": 0.19, "learning_rate": 9.754202749930618e-06, "loss": 0.1252, "step": 951 }, { "epoch": 0.19, "learning_rate": 9.7531206012448e-06, "loss": 0.1312, "step": 952 }, { "epoch": 0.19, "learning_rate": 9.752036135945743e-06, "loss": 0.1165, "step": 953 }, { "epoch": 0.19, "learning_rate": 9.750949354562006e-06, "loss": 0.1214, "step": 954 }, { "epoch": 0.19, "learning_rate": 9.749860257623262e-06, "loss": 0.1255, "step": 955 }, { "epoch": 0.19, "learning_rate": 9.748768845660335e-06, "loss": 0.1226, "step": 956 }, { "epoch": 0.19, "learning_rate": 9.74767511920516e-06, "loss": 0.1417, "step": 957 }, { "epoch": 0.19, "learning_rate": 9.746579078790808e-06, "loss": 0.1269, "step": 958 }, { "epoch": 0.19, "learning_rate": 9.745480724951473e-06, "loss": 0.1329, "step": 959 }, { "epoch": 0.19, "learning_rate": 9.744380058222483e-06, "loss": 0.1234, "step": 960 }, { "epoch": 0.19, "learning_rate": 9.743277079140288e-06, "loss": 0.1254, "step": 961 }, { "epoch": 0.19, "learning_rate": 9.742171788242468e-06, "loss": 0.1319, "step": 962 }, { "epoch": 0.19, "learning_rate": 9.741064186067723e-06, "loss": 0.1444, "step": 963 }, { "epoch": 0.19, "learning_rate": 9.739954273155892e-06, "loss": 0.1175, "step": 964 }, { "epoch": 0.19, "learning_rate": 9.73884205004793e-06, "loss": 0.1178, "step": 965 }, { "epoch": 0.19, "learning_rate": 9.73772751728592e-06, "loss": 0.1181, "step": 966 }, { "epoch": 0.19, "learning_rate": 9.736610675413073e-06, "loss": 0.1247, "step": 967 }, { "epoch": 0.19, "learning_rate": 9.735491524973723e-06, "loss": 0.1272, "step": 968 }, { "epoch": 0.19, "learning_rate": 9.73437006651333e-06, "loss": 0.1208, "step": 969 }, { "epoch": 0.19, "learning_rate": 9.733246300578482e-06, "loss": 0.1256, "step": 970 }, { "epoch": 0.19, "learning_rate": 9.732120227716887e-06, "loss": 0.1255, "step": 971 }, { "epoch": 0.19, "learning_rate": 9.73099184847738e-06, "loss": 0.1147, "step": 972 }, { "epoch": 0.19, "learning_rate": 9.72986116340992e-06, "loss": 0.1261, "step": 973 }, { "epoch": 0.19, "learning_rate": 9.728728173065584e-06, "loss": 0.1243, "step": 974 }, { "epoch": 0.2, "learning_rate": 9.727592877996585e-06, "loss": 0.1231, "step": 975 }, { "epoch": 0.2, "learning_rate": 9.726455278756249e-06, "loss": 0.1175, "step": 976 }, { "epoch": 0.2, "learning_rate": 9.725315375899025e-06, "loss": 0.1202, "step": 977 }, { "epoch": 0.2, "learning_rate": 9.724173169980492e-06, "loss": 0.1302, "step": 978 }, { "epoch": 0.2, "learning_rate": 9.723028661557345e-06, "loss": 0.1223, "step": 979 }, { "epoch": 0.2, "learning_rate": 9.721881851187406e-06, "loss": 0.1191, "step": 980 }, { "epoch": 0.2, "learning_rate": 9.720732739429614e-06, "loss": 0.1252, "step": 981 }, { "epoch": 0.2, "learning_rate": 9.719581326844033e-06, "loss": 0.1188, "step": 982 }, { "epoch": 0.2, "learning_rate": 9.718427613991848e-06, "loss": 0.1269, "step": 983 }, { "epoch": 0.2, "learning_rate": 9.717271601435363e-06, "loss": 0.1199, "step": 984 }, { "epoch": 0.2, "learning_rate": 9.716113289738005e-06, "loss": 0.1306, "step": 985 }, { "epoch": 0.2, "learning_rate": 9.714952679464324e-06, "loss": 0.1189, "step": 986 }, { "epoch": 0.2, "learning_rate": 9.713789771179983e-06, "loss": 0.1302, "step": 987 }, { "epoch": 0.2, "learning_rate": 9.712624565451772e-06, "loss": 0.1283, "step": 988 }, { "epoch": 0.2, "learning_rate": 9.711457062847596e-06, "loss": 0.1308, "step": 989 }, { "epoch": 0.2, "learning_rate": 9.710287263936485e-06, "loss": 0.12, "step": 990 }, { "epoch": 0.2, "learning_rate": 9.709115169288582e-06, "loss": 0.1363, "step": 991 }, { "epoch": 0.2, "learning_rate": 9.707940779475151e-06, "loss": 0.1198, "step": 992 }, { "epoch": 0.2, "learning_rate": 9.706764095068579e-06, "loss": 0.1252, "step": 993 }, { "epoch": 0.2, "learning_rate": 9.705585116642364e-06, "loss": 0.1229, "step": 994 }, { "epoch": 0.2, "learning_rate": 9.704403844771128e-06, "loss": 0.1221, "step": 995 }, { "epoch": 0.2, "learning_rate": 9.703220280030607e-06, "loss": 0.1177, "step": 996 }, { "epoch": 0.2, "learning_rate": 9.702034422997658e-06, "loss": 0.1205, "step": 997 }, { "epoch": 0.2, "learning_rate": 9.700846274250252e-06, "loss": 0.121, "step": 998 }, { "epoch": 0.2, "learning_rate": 9.699655834367479e-06, "loss": 0.1219, "step": 999 }, { "epoch": 0.2, "learning_rate": 9.698463103929542e-06, "loss": 0.12, "step": 1000 }, { "epoch": 0.2, "learning_rate": 9.697268083517767e-06, "loss": 0.129, "step": 1001 }, { "epoch": 0.2, "learning_rate": 9.696070773714592e-06, "loss": 0.1178, "step": 1002 }, { "epoch": 0.2, "learning_rate": 9.69487117510357e-06, "loss": 0.1173, "step": 1003 }, { "epoch": 0.2, "learning_rate": 9.693669288269371e-06, "loss": 0.1219, "step": 1004 }, { "epoch": 0.2, "learning_rate": 9.69246511379778e-06, "loss": 0.1217, "step": 1005 }, { "epoch": 0.2, "learning_rate": 9.691258652275698e-06, "loss": 0.1122, "step": 1006 }, { "epoch": 0.2, "learning_rate": 9.690049904291139e-06, "loss": 0.1234, "step": 1007 }, { "epoch": 0.2, "learning_rate": 9.68883887043323e-06, "loss": 0.1191, "step": 1008 }, { "epoch": 0.2, "learning_rate": 9.687625551292219e-06, "loss": 0.1391, "step": 1009 }, { "epoch": 0.2, "learning_rate": 9.68640994745946e-06, "loss": 0.126, "step": 1010 }, { "epoch": 0.2, "learning_rate": 9.68519205952742e-06, "loss": 0.126, "step": 1011 }, { "epoch": 0.2, "learning_rate": 9.68397188808969e-06, "loss": 0.2146, "step": 1012 }, { "epoch": 0.2, "learning_rate": 9.682749433740963e-06, "loss": 0.1178, "step": 1013 }, { "epoch": 0.2, "learning_rate": 9.681524697077047e-06, "loss": 0.1198, "step": 1014 }, { "epoch": 0.2, "learning_rate": 9.680297678694867e-06, "loss": 0.1197, "step": 1015 }, { "epoch": 0.2, "learning_rate": 9.679068379192455e-06, "loss": 0.1215, "step": 1016 }, { "epoch": 0.2, "learning_rate": 9.677836799168958e-06, "loss": 0.1146, "step": 1017 }, { "epoch": 0.2, "learning_rate": 9.67660293922463e-06, "loss": 0.1262, "step": 1018 }, { "epoch": 0.2, "learning_rate": 9.675366799960842e-06, "loss": 0.1165, "step": 1019 }, { "epoch": 0.2, "learning_rate": 9.674128381980073e-06, "loss": 0.1213, "step": 1020 }, { "epoch": 0.2, "learning_rate": 9.672887685885913e-06, "loss": 0.1136, "step": 1021 }, { "epoch": 0.2, "learning_rate": 9.671644712283061e-06, "loss": 0.1292, "step": 1022 }, { "epoch": 0.2, "learning_rate": 9.670399461777328e-06, "loss": 0.1146, "step": 1023 }, { "epoch": 0.2, "learning_rate": 9.669151934975635e-06, "loss": 0.1221, "step": 1024 }, { "epoch": 0.2, "learning_rate": 9.667902132486009e-06, "loss": 0.1232, "step": 1025 }, { "epoch": 0.21, "learning_rate": 9.666650054917591e-06, "loss": 0.102, "step": 1026 }, { "epoch": 0.21, "learning_rate": 9.665395702880627e-06, "loss": 0.1205, "step": 1027 }, { "epoch": 0.21, "learning_rate": 9.664139076986473e-06, "loss": 0.1443, "step": 1028 }, { "epoch": 0.21, "learning_rate": 9.662880177847595e-06, "loss": 0.124, "step": 1029 }, { "epoch": 0.21, "learning_rate": 9.661619006077562e-06, "loss": 0.128, "step": 1030 }, { "epoch": 0.21, "learning_rate": 9.660355562291055e-06, "loss": 0.1212, "step": 1031 }, { "epoch": 0.21, "learning_rate": 9.659089847103863e-06, "loss": 0.1301, "step": 1032 }, { "epoch": 0.21, "learning_rate": 9.65782186113288e-06, "loss": 0.1177, "step": 1033 }, { "epoch": 0.21, "learning_rate": 9.656551604996102e-06, "loss": 0.1054, "step": 1034 }, { "epoch": 0.21, "learning_rate": 9.655279079312643e-06, "loss": 0.1253, "step": 1035 }, { "epoch": 0.21, "learning_rate": 9.654004284702712e-06, "loss": 0.137, "step": 1036 }, { "epoch": 0.21, "learning_rate": 9.65272722178763e-06, "loss": 0.1587, "step": 1037 }, { "epoch": 0.21, "learning_rate": 9.651447891189824e-06, "loss": 0.1204, "step": 1038 }, { "epoch": 0.21, "learning_rate": 9.650166293532822e-06, "loss": 0.129, "step": 1039 }, { "epoch": 0.21, "learning_rate": 9.648882429441258e-06, "loss": 0.1153, "step": 1040 }, { "epoch": 0.21, "learning_rate": 9.647596299540874e-06, "loss": 0.127, "step": 1041 }, { "epoch": 0.21, "learning_rate": 9.646307904458513e-06, "loss": 0.1221, "step": 1042 }, { "epoch": 0.21, "learning_rate": 9.645017244822124e-06, "loss": 0.1228, "step": 1043 }, { "epoch": 0.21, "learning_rate": 9.643724321260757e-06, "loss": 0.1214, "step": 1044 }, { "epoch": 0.21, "learning_rate": 9.642429134404568e-06, "loss": 0.1182, "step": 1045 }, { "epoch": 0.21, "learning_rate": 9.641131684884817e-06, "loss": 0.1275, "step": 1046 }, { "epoch": 0.21, "learning_rate": 9.639831973333864e-06, "loss": 0.1272, "step": 1047 }, { "epoch": 0.21, "learning_rate": 9.638530000385171e-06, "loss": 0.1298, "step": 1048 }, { "epoch": 0.21, "learning_rate": 9.637225766673309e-06, "loss": 0.116, "step": 1049 }, { "epoch": 0.21, "learning_rate": 9.635919272833938e-06, "loss": 0.1155, "step": 1050 }, { "epoch": 0.21, "learning_rate": 9.634610519503833e-06, "loss": 0.1198, "step": 1051 }, { "epoch": 0.21, "learning_rate": 9.633299507320862e-06, "loss": 0.1103, "step": 1052 }, { "epoch": 0.21, "learning_rate": 9.631986236923998e-06, "loss": 0.0967, "step": 1053 }, { "epoch": 0.21, "learning_rate": 9.630670708953311e-06, "loss": 0.1285, "step": 1054 }, { "epoch": 0.21, "learning_rate": 9.629352924049975e-06, "loss": 0.1183, "step": 1055 }, { "epoch": 0.21, "learning_rate": 9.628032882856262e-06, "loss": 0.1181, "step": 1056 }, { "epoch": 0.21, "learning_rate": 9.626710586015543e-06, "loss": 0.1278, "step": 1057 }, { "epoch": 0.21, "learning_rate": 9.62538603417229e-06, "loss": 0.1278, "step": 1058 }, { "epoch": 0.21, "learning_rate": 9.624059227972077e-06, "loss": 0.1189, "step": 1059 }, { "epoch": 0.21, "learning_rate": 9.622730168061568e-06, "loss": 0.1196, "step": 1060 }, { "epoch": 0.21, "learning_rate": 9.62139885508853e-06, "loss": 0.1135, "step": 1061 }, { "epoch": 0.21, "learning_rate": 9.620065289701835e-06, "loss": 0.1264, "step": 1062 }, { "epoch": 0.21, "learning_rate": 9.61872947255144e-06, "loss": 0.1186, "step": 1063 }, { "epoch": 0.21, "learning_rate": 9.617391404288412e-06, "loss": 0.1295, "step": 1064 }, { "epoch": 0.21, "learning_rate": 9.616051085564905e-06, "loss": 0.1216, "step": 1065 }, { "epoch": 0.21, "learning_rate": 9.614708517034176e-06, "loss": 0.1169, "step": 1066 }, { "epoch": 0.21, "learning_rate": 9.613363699350575e-06, "loss": 0.1192, "step": 1067 }, { "epoch": 0.21, "learning_rate": 9.612016633169552e-06, "loss": 0.119, "step": 1068 }, { "epoch": 0.21, "learning_rate": 9.610667319147648e-06, "loss": 0.1207, "step": 1069 }, { "epoch": 0.21, "learning_rate": 9.609315757942504e-06, "loss": 0.118, "step": 1070 }, { "epoch": 0.21, "learning_rate": 9.607961950212855e-06, "loss": 0.1199, "step": 1071 }, { "epoch": 0.21, "learning_rate": 9.606605896618528e-06, "loss": 0.1171, "step": 1072 }, { "epoch": 0.21, "learning_rate": 9.605247597820448e-06, "loss": 0.1141, "step": 1073 }, { "epoch": 0.21, "learning_rate": 9.603887054480636e-06, "loss": 0.1195, "step": 1074 }, { "epoch": 0.21, "learning_rate": 9.602524267262202e-06, "loss": 0.1185, "step": 1075 }, { "epoch": 0.22, "learning_rate": 9.601159236829353e-06, "loss": 0.117, "step": 1076 }, { "epoch": 0.22, "learning_rate": 9.599791963847388e-06, "loss": 0.1225, "step": 1077 }, { "epoch": 0.22, "learning_rate": 9.598422448982697e-06, "loss": 0.1122, "step": 1078 }, { "epoch": 0.22, "learning_rate": 9.597050692902765e-06, "loss": 0.1401, "step": 1079 }, { "epoch": 0.22, "learning_rate": 9.595676696276173e-06, "loss": 0.1233, "step": 1080 }, { "epoch": 0.22, "learning_rate": 9.594300459772588e-06, "loss": 0.1244, "step": 1081 }, { "epoch": 0.22, "learning_rate": 9.592921984062771e-06, "loss": 0.1169, "step": 1082 }, { "epoch": 0.22, "learning_rate": 9.591541269818574e-06, "loss": 0.1252, "step": 1083 }, { "epoch": 0.22, "learning_rate": 9.590158317712941e-06, "loss": 0.1215, "step": 1084 }, { "epoch": 0.22, "learning_rate": 9.588773128419907e-06, "loss": 0.1183, "step": 1085 }, { "epoch": 0.22, "learning_rate": 9.587385702614593e-06, "loss": 0.1199, "step": 1086 }, { "epoch": 0.22, "learning_rate": 9.585996040973218e-06, "loss": 0.1134, "step": 1087 }, { "epoch": 0.22, "learning_rate": 9.584604144173084e-06, "loss": 0.115, "step": 1088 }, { "epoch": 0.22, "learning_rate": 9.583210012892582e-06, "loss": 0.1166, "step": 1089 }, { "epoch": 0.22, "learning_rate": 9.581813647811199e-06, "loss": 0.1356, "step": 1090 }, { "epoch": 0.22, "learning_rate": 9.580415049609503e-06, "loss": 0.1217, "step": 1091 }, { "epoch": 0.22, "learning_rate": 9.579014218969158e-06, "loss": 0.1227, "step": 1092 }, { "epoch": 0.22, "learning_rate": 9.577611156572908e-06, "loss": 0.1154, "step": 1093 }, { "epoch": 0.22, "learning_rate": 9.576205863104588e-06, "loss": 0.1284, "step": 1094 }, { "epoch": 0.22, "learning_rate": 9.574798339249124e-06, "loss": 0.1211, "step": 1095 }, { "epoch": 0.22, "learning_rate": 9.573388585692525e-06, "loss": 0.1137, "step": 1096 }, { "epoch": 0.22, "learning_rate": 9.571976603121889e-06, "loss": 0.1167, "step": 1097 }, { "epoch": 0.22, "learning_rate": 9.570562392225395e-06, "loss": 0.121, "step": 1098 }, { "epoch": 0.22, "learning_rate": 9.569145953692316e-06, "loss": 0.116, "step": 1099 }, { "epoch": 0.22, "learning_rate": 9.567727288213005e-06, "loss": 0.1166, "step": 1100 }, { "epoch": 0.22, "learning_rate": 9.566306396478904e-06, "loss": 0.1204, "step": 1101 }, { "epoch": 0.22, "learning_rate": 9.564883279182538e-06, "loss": 0.1128, "step": 1102 }, { "epoch": 0.22, "learning_rate": 9.563457937017514e-06, "loss": 0.143, "step": 1103 }, { "epoch": 0.22, "learning_rate": 9.562030370678533e-06, "loss": 0.1223, "step": 1104 }, { "epoch": 0.22, "learning_rate": 9.560600580861366e-06, "loss": 0.1261, "step": 1105 }, { "epoch": 0.22, "learning_rate": 9.55916856826288e-06, "loss": 0.1143, "step": 1106 }, { "epoch": 0.22, "learning_rate": 9.557734333581019e-06, "loss": 0.1131, "step": 1107 }, { "epoch": 0.22, "learning_rate": 9.556297877514812e-06, "loss": 0.1281, "step": 1108 }, { "epoch": 0.22, "learning_rate": 9.554859200764371e-06, "loss": 0.1167, "step": 1109 }, { "epoch": 0.22, "learning_rate": 9.553418304030886e-06, "loss": 0.117, "step": 1110 }, { "epoch": 0.22, "learning_rate": 9.551975188016638e-06, "loss": 0.1135, "step": 1111 }, { "epoch": 0.22, "learning_rate": 9.550529853424979e-06, "loss": 0.1157, "step": 1112 }, { "epoch": 0.22, "learning_rate": 9.549082300960351e-06, "loss": 0.1224, "step": 1113 }, { "epoch": 0.22, "learning_rate": 9.547632531328273e-06, "loss": 0.112, "step": 1114 }, { "epoch": 0.22, "learning_rate": 9.546180545235344e-06, "loss": 0.1186, "step": 1115 }, { "epoch": 0.22, "learning_rate": 9.544726343389245e-06, "loss": 0.1221, "step": 1116 }, { "epoch": 0.22, "learning_rate": 9.543269926498735e-06, "loss": 0.1237, "step": 1117 }, { "epoch": 0.22, "learning_rate": 9.541811295273657e-06, "loss": 0.1379, "step": 1118 }, { "epoch": 0.22, "learning_rate": 9.540350450424927e-06, "loss": 0.1192, "step": 1119 }, { "epoch": 0.22, "learning_rate": 9.538887392664544e-06, "loss": 0.1284, "step": 1120 }, { "epoch": 0.22, "learning_rate": 9.537422122705585e-06, "loss": 0.1111, "step": 1121 }, { "epoch": 0.22, "learning_rate": 9.535954641262206e-06, "loss": 0.1225, "step": 1122 }, { "epoch": 0.22, "learning_rate": 9.534484949049636e-06, "loss": 0.1183, "step": 1123 }, { "epoch": 0.22, "learning_rate": 9.53301304678419e-06, "loss": 0.1239, "step": 1124 }, { "epoch": 0.23, "learning_rate": 9.531538935183252e-06, "loss": 0.1105, "step": 1125 }, { "epoch": 0.23, "learning_rate": 9.530062614965286e-06, "loss": 0.1058, "step": 1126 }, { "epoch": 0.23, "learning_rate": 9.528584086849832e-06, "loss": 0.1196, "step": 1127 }, { "epoch": 0.23, "learning_rate": 9.52710335155751e-06, "loss": 0.1171, "step": 1128 }, { "epoch": 0.23, "learning_rate": 9.525620409810009e-06, "loss": 0.12, "step": 1129 }, { "epoch": 0.23, "learning_rate": 9.524135262330098e-06, "loss": 0.2945, "step": 1130 }, { "epoch": 0.23, "learning_rate": 9.52264790984162e-06, "loss": 0.1136, "step": 1131 }, { "epoch": 0.23, "learning_rate": 9.521158353069494e-06, "loss": 0.134, "step": 1132 }, { "epoch": 0.23, "learning_rate": 9.51966659273971e-06, "loss": 0.1156, "step": 1133 }, { "epoch": 0.23, "learning_rate": 9.518172629579334e-06, "loss": 0.1177, "step": 1134 }, { "epoch": 0.23, "learning_rate": 9.516676464316505e-06, "loss": 0.116, "step": 1135 }, { "epoch": 0.23, "learning_rate": 9.515178097680437e-06, "loss": 0.1267, "step": 1136 }, { "epoch": 0.23, "learning_rate": 9.513677530401415e-06, "loss": 0.1098, "step": 1137 }, { "epoch": 0.23, "learning_rate": 9.512174763210798e-06, "loss": 0.1252, "step": 1138 }, { "epoch": 0.23, "learning_rate": 9.510669796841014e-06, "loss": 0.1144, "step": 1139 }, { "epoch": 0.23, "learning_rate": 9.50916263202557e-06, "loss": 0.1189, "step": 1140 }, { "epoch": 0.23, "learning_rate": 9.507653269499035e-06, "loss": 0.1312, "step": 1141 }, { "epoch": 0.23, "learning_rate": 9.506141709997058e-06, "loss": 0.111, "step": 1142 }, { "epoch": 0.23, "learning_rate": 9.504627954256352e-06, "loss": 0.1029, "step": 1143 }, { "epoch": 0.23, "learning_rate": 9.503112003014702e-06, "loss": 0.1143, "step": 1144 }, { "epoch": 0.23, "learning_rate": 9.501593857010968e-06, "loss": 0.1097, "step": 1145 }, { "epoch": 0.23, "learning_rate": 9.500073516985074e-06, "loss": 0.1199, "step": 1146 }, { "epoch": 0.23, "learning_rate": 9.498550983678016e-06, "loss": 0.1122, "step": 1147 }, { "epoch": 0.23, "learning_rate": 9.497026257831856e-06, "loss": 0.1224, "step": 1148 }, { "epoch": 0.23, "learning_rate": 9.495499340189729e-06, "loss": 0.1151, "step": 1149 }, { "epoch": 0.23, "learning_rate": 9.493970231495836e-06, "loss": 0.1174, "step": 1150 }, { "epoch": 0.23, "learning_rate": 9.492438932495444e-06, "loss": 0.1207, "step": 1151 }, { "epoch": 0.23, "learning_rate": 9.490905443934892e-06, "loss": 0.1101, "step": 1152 }, { "epoch": 0.23, "learning_rate": 9.489369766561584e-06, "loss": 0.1191, "step": 1153 }, { "epoch": 0.23, "learning_rate": 9.487831901123989e-06, "loss": 0.1178, "step": 1154 }, { "epoch": 0.23, "learning_rate": 9.486291848371642e-06, "loss": 0.2089, "step": 1155 }, { "epoch": 0.23, "learning_rate": 9.484749609055151e-06, "loss": 0.1184, "step": 1156 }, { "epoch": 0.23, "learning_rate": 9.48320518392618e-06, "loss": 0.1409, "step": 1157 }, { "epoch": 0.23, "learning_rate": 9.481658573737465e-06, "loss": 0.1283, "step": 1158 }, { "epoch": 0.23, "learning_rate": 9.480109779242805e-06, "loss": 0.121, "step": 1159 }, { "epoch": 0.23, "learning_rate": 9.478558801197065e-06, "loss": 0.1121, "step": 1160 }, { "epoch": 0.23, "learning_rate": 9.47700564035617e-06, "loss": 0.1167, "step": 1161 }, { "epoch": 0.23, "learning_rate": 9.475450297477113e-06, "loss": 0.1235, "step": 1162 }, { "epoch": 0.23, "learning_rate": 9.473892773317952e-06, "loss": 0.116, "step": 1163 }, { "epoch": 0.23, "learning_rate": 9.4723330686378e-06, "loss": 0.1164, "step": 1164 }, { "epoch": 0.23, "learning_rate": 9.470771184196842e-06, "loss": 0.1119, "step": 1165 }, { "epoch": 0.23, "learning_rate": 9.46920712075632e-06, "loss": 0.1126, "step": 1166 }, { "epoch": 0.23, "learning_rate": 9.46764087907854e-06, "loss": 0.1159, "step": 1167 }, { "epoch": 0.23, "learning_rate": 9.46607245992687e-06, "loss": 0.1188, "step": 1168 }, { "epoch": 0.23, "learning_rate": 9.464501864065735e-06, "loss": 0.1201, "step": 1169 }, { "epoch": 0.23, "learning_rate": 9.46292909226063e-06, "loss": 0.11, "step": 1170 }, { "epoch": 0.23, "learning_rate": 9.461354145278098e-06, "loss": 0.1164, "step": 1171 }, { "epoch": 0.23, "learning_rate": 9.459777023885754e-06, "loss": 0.1151, "step": 1172 }, { "epoch": 0.23, "learning_rate": 9.458197728852268e-06, "loss": 0.1146, "step": 1173 }, { "epoch": 0.23, "learning_rate": 9.456616260947367e-06, "loss": 0.1154, "step": 1174 }, { "epoch": 0.23, "learning_rate": 9.45503262094184e-06, "loss": 0.1095, "step": 1175 }, { "epoch": 0.24, "learning_rate": 9.453446809607534e-06, "loss": 0.1169, "step": 1176 }, { "epoch": 0.24, "learning_rate": 9.451858827717354e-06, "loss": 0.1157, "step": 1177 }, { "epoch": 0.24, "learning_rate": 9.450268676045261e-06, "loss": 0.113, "step": 1178 }, { "epoch": 0.24, "learning_rate": 9.448676355366282e-06, "loss": 0.1193, "step": 1179 }, { "epoch": 0.24, "learning_rate": 9.44708186645649e-06, "loss": 0.1213, "step": 1180 }, { "epoch": 0.24, "learning_rate": 9.445485210093018e-06, "loss": 0.1202, "step": 1181 }, { "epoch": 0.24, "learning_rate": 9.443886387054058e-06, "loss": 0.1191, "step": 1182 }, { "epoch": 0.24, "learning_rate": 9.44228539811886e-06, "loss": 0.1098, "step": 1183 }, { "epoch": 0.24, "learning_rate": 9.440682244067724e-06, "loss": 0.1147, "step": 1184 }, { "epoch": 0.24, "learning_rate": 9.439076925682006e-06, "loss": 0.1158, "step": 1185 }, { "epoch": 0.24, "learning_rate": 9.437469443744124e-06, "loss": 0.1156, "step": 1186 }, { "epoch": 0.24, "learning_rate": 9.435859799037541e-06, "loss": 0.1171, "step": 1187 }, { "epoch": 0.24, "learning_rate": 9.43424799234678e-06, "loss": 0.1133, "step": 1188 }, { "epoch": 0.24, "learning_rate": 9.432634024457414e-06, "loss": 0.1126, "step": 1189 }, { "epoch": 0.24, "learning_rate": 9.431017896156074e-06, "loss": 0.1128, "step": 1190 }, { "epoch": 0.24, "learning_rate": 9.429399608230441e-06, "loss": 0.127, "step": 1191 }, { "epoch": 0.24, "learning_rate": 9.427779161469246e-06, "loss": 0.1189, "step": 1192 }, { "epoch": 0.24, "learning_rate": 9.426156556662276e-06, "loss": 0.1187, "step": 1193 }, { "epoch": 0.24, "learning_rate": 9.424531794600372e-06, "loss": 0.1196, "step": 1194 }, { "epoch": 0.24, "learning_rate": 9.42290487607542e-06, "loss": 0.1182, "step": 1195 }, { "epoch": 0.24, "learning_rate": 9.421275801880363e-06, "loss": 0.1226, "step": 1196 }, { "epoch": 0.24, "learning_rate": 9.419644572809189e-06, "loss": 0.1224, "step": 1197 }, { "epoch": 0.24, "learning_rate": 9.418011189656942e-06, "loss": 0.1143, "step": 1198 }, { "epoch": 0.24, "learning_rate": 9.41637565321971e-06, "loss": 0.1084, "step": 1199 }, { "epoch": 0.24, "learning_rate": 9.414737964294636e-06, "loss": 0.1141, "step": 1200 }, { "epoch": 0.24, "learning_rate": 9.41309812367991e-06, "loss": 0.1115, "step": 1201 }, { "epoch": 0.24, "learning_rate": 9.411456132174768e-06, "loss": 0.1088, "step": 1202 }, { "epoch": 0.24, "learning_rate": 9.409811990579498e-06, "loss": 0.1156, "step": 1203 }, { "epoch": 0.24, "learning_rate": 9.408165699695435e-06, "loss": 0.1293, "step": 1204 }, { "epoch": 0.24, "learning_rate": 9.406517260324962e-06, "loss": 0.1266, "step": 1205 }, { "epoch": 0.24, "learning_rate": 9.404866673271506e-06, "loss": 0.1175, "step": 1206 }, { "epoch": 0.24, "learning_rate": 9.403213939339546e-06, "loss": 0.1155, "step": 1207 }, { "epoch": 0.24, "learning_rate": 9.401559059334601e-06, "loss": 0.113, "step": 1208 }, { "epoch": 0.24, "learning_rate": 9.399902034063244e-06, "loss": 0.1173, "step": 1209 }, { "epoch": 0.24, "learning_rate": 9.398242864333084e-06, "loss": 0.1111, "step": 1210 }, { "epoch": 0.24, "learning_rate": 9.396581550952781e-06, "loss": 0.1121, "step": 1211 }, { "epoch": 0.24, "learning_rate": 9.394918094732044e-06, "loss": 0.1168, "step": 1212 }, { "epoch": 0.24, "learning_rate": 9.393252496481615e-06, "loss": 0.1195, "step": 1213 }, { "epoch": 0.24, "learning_rate": 9.39158475701329e-06, "loss": 0.118, "step": 1214 }, { "epoch": 0.24, "learning_rate": 9.389914877139903e-06, "loss": 0.1095, "step": 1215 }, { "epoch": 0.24, "learning_rate": 9.388242857675336e-06, "loss": 0.1133, "step": 1216 }, { "epoch": 0.24, "learning_rate": 9.386568699434509e-06, "loss": 0.1221, "step": 1217 }, { "epoch": 0.24, "learning_rate": 9.384892403233384e-06, "loss": 0.1169, "step": 1218 }, { "epoch": 0.24, "learning_rate": 9.383213969888972e-06, "loss": 0.1166, "step": 1219 }, { "epoch": 0.24, "learning_rate": 9.381533400219319e-06, "loss": 0.111, "step": 1220 }, { "epoch": 0.24, "learning_rate": 9.379850695043513e-06, "loss": 0.1026, "step": 1221 }, { "epoch": 0.24, "learning_rate": 9.378165855181687e-06, "loss": 0.122, "step": 1222 }, { "epoch": 0.24, "learning_rate": 9.376478881455008e-06, "loss": 0.1122, "step": 1223 }, { "epoch": 0.24, "learning_rate": 9.37478977468569e-06, "loss": 0.1177, "step": 1224 }, { "epoch": 0.24, "learning_rate": 9.37309853569698e-06, "loss": 0.1146, "step": 1225 }, { "epoch": 0.25, "learning_rate": 9.371405165313169e-06, "loss": 0.1154, "step": 1226 }, { "epoch": 0.25, "learning_rate": 9.369709664359585e-06, "loss": 0.1153, "step": 1227 }, { "epoch": 0.25, "learning_rate": 9.368012033662594e-06, "loss": 0.1113, "step": 1228 }, { "epoch": 0.25, "learning_rate": 9.366312274049602e-06, "loss": 0.1133, "step": 1229 }, { "epoch": 0.25, "learning_rate": 9.364610386349048e-06, "loss": 0.1198, "step": 1230 }, { "epoch": 0.25, "learning_rate": 9.362906371390416e-06, "loss": 0.1148, "step": 1231 }, { "epoch": 0.25, "learning_rate": 9.361200230004219e-06, "loss": 0.1129, "step": 1232 }, { "epoch": 0.25, "learning_rate": 9.35949196302201e-06, "loss": 0.1147, "step": 1233 }, { "epoch": 0.25, "learning_rate": 9.357781571276379e-06, "loss": 0.1084, "step": 1234 }, { "epoch": 0.25, "learning_rate": 9.356069055600949e-06, "loss": 0.116, "step": 1235 }, { "epoch": 0.25, "learning_rate": 9.354354416830377e-06, "loss": 0.1164, "step": 1236 }, { "epoch": 0.25, "learning_rate": 9.352637655800362e-06, "loss": 0.116, "step": 1237 }, { "epoch": 0.25, "learning_rate": 9.35091877334763e-06, "loss": 0.1086, "step": 1238 }, { "epoch": 0.25, "learning_rate": 9.349197770309942e-06, "loss": 0.1189, "step": 1239 }, { "epoch": 0.25, "learning_rate": 9.347474647526095e-06, "loss": 0.1142, "step": 1240 }, { "epoch": 0.25, "learning_rate": 9.34574940583592e-06, "loss": 0.1102, "step": 1241 }, { "epoch": 0.25, "learning_rate": 9.344022046080277e-06, "loss": 0.1179, "step": 1242 }, { "epoch": 0.25, "learning_rate": 9.342292569101061e-06, "loss": 0.1146, "step": 1243 }, { "epoch": 0.25, "learning_rate": 9.340560975741198e-06, "loss": 0.123, "step": 1244 }, { "epoch": 0.25, "learning_rate": 9.338827266844643e-06, "loss": 0.1099, "step": 1245 }, { "epoch": 0.25, "learning_rate": 9.337091443256388e-06, "loss": 0.1105, "step": 1246 }, { "epoch": 0.25, "learning_rate": 9.33535350582245e-06, "loss": 0.1191, "step": 1247 }, { "epoch": 0.25, "learning_rate": 9.333613455389883e-06, "loss": 0.1106, "step": 1248 }, { "epoch": 0.25, "learning_rate": 9.33187129280676e-06, "loss": 0.1185, "step": 1249 }, { "epoch": 0.25, "learning_rate": 9.330127018922195e-06, "loss": 0.1263, "step": 1250 }, { "epoch": 0.25, "learning_rate": 9.328380634586322e-06, "loss": 0.1129, "step": 1251 }, { "epoch": 0.25, "learning_rate": 9.326632140650311e-06, "loss": 0.1102, "step": 1252 }, { "epoch": 0.25, "learning_rate": 9.324881537966355e-06, "loss": 0.1122, "step": 1253 }, { "epoch": 0.25, "learning_rate": 9.323128827387675e-06, "loss": 0.1165, "step": 1254 }, { "epoch": 0.25, "learning_rate": 9.321374009768525e-06, "loss": 0.1181, "step": 1255 }, { "epoch": 0.25, "learning_rate": 9.319617085964177e-06, "loss": 0.109, "step": 1256 }, { "epoch": 0.25, "learning_rate": 9.317858056830938e-06, "loss": 0.1164, "step": 1257 }, { "epoch": 0.25, "learning_rate": 9.316096923226135e-06, "loss": 0.113, "step": 1258 }, { "epoch": 0.25, "learning_rate": 9.314333686008125e-06, "loss": 0.1086, "step": 1259 }, { "epoch": 0.25, "learning_rate": 9.312568346036288e-06, "loss": 0.1131, "step": 1260 }, { "epoch": 0.25, "learning_rate": 9.31080090417103e-06, "loss": 0.1187, "step": 1261 }, { "epoch": 0.25, "learning_rate": 9.309031361273775e-06, "loss": 0.1109, "step": 1262 }, { "epoch": 0.25, "learning_rate": 9.307259718206984e-06, "loss": 0.1136, "step": 1263 }, { "epoch": 0.25, "learning_rate": 9.305485975834132e-06, "loss": 0.1104, "step": 1264 }, { "epoch": 0.25, "learning_rate": 9.30371013501972e-06, "loss": 0.1103, "step": 1265 }, { "epoch": 0.25, "learning_rate": 9.301932196629267e-06, "loss": 0.1106, "step": 1266 }, { "epoch": 0.25, "learning_rate": 9.300152161529325e-06, "loss": 0.1101, "step": 1267 }, { "epoch": 0.25, "learning_rate": 9.298370030587456e-06, "loss": 0.1236, "step": 1268 }, { "epoch": 0.25, "learning_rate": 9.296585804672253e-06, "loss": 0.1169, "step": 1269 }, { "epoch": 0.25, "learning_rate": 9.294799484653323e-06, "loss": 0.1183, "step": 1270 }, { "epoch": 0.25, "learning_rate": 9.293011071401299e-06, "loss": 0.1142, "step": 1271 }, { "epoch": 0.25, "learning_rate": 9.291220565787829e-06, "loss": 0.1092, "step": 1272 }, { "epoch": 0.25, "learning_rate": 9.289427968685588e-06, "loss": 0.1151, "step": 1273 }, { "epoch": 0.25, "learning_rate": 9.287633280968263e-06, "loss": 0.1159, "step": 1274 }, { "epoch": 0.26, "learning_rate": 9.285836503510562e-06, "loss": 0.1172, "step": 1275 }, { "epoch": 0.26, "learning_rate": 9.284037637188215e-06, "loss": 0.1243, "step": 1276 }, { "epoch": 0.26, "learning_rate": 9.282236682877968e-06, "loss": 0.1125, "step": 1277 }, { "epoch": 0.26, "learning_rate": 9.280433641457582e-06, "loss": 0.1022, "step": 1278 }, { "epoch": 0.26, "learning_rate": 9.278628513805838e-06, "loss": 0.1168, "step": 1279 }, { "epoch": 0.26, "learning_rate": 9.276821300802535e-06, "loss": 0.1136, "step": 1280 }, { "epoch": 0.26, "learning_rate": 9.275012003328483e-06, "loss": 0.1039, "step": 1281 }, { "epoch": 0.26, "learning_rate": 9.273200622265516e-06, "loss": 0.1168, "step": 1282 }, { "epoch": 0.26, "learning_rate": 9.271387158496477e-06, "loss": 0.1194, "step": 1283 }, { "epoch": 0.26, "learning_rate": 9.269571612905227e-06, "loss": 0.1144, "step": 1284 }, { "epoch": 0.26, "learning_rate": 9.267753986376638e-06, "loss": 0.1155, "step": 1285 }, { "epoch": 0.26, "learning_rate": 9.265934279796602e-06, "loss": 0.1149, "step": 1286 }, { "epoch": 0.26, "learning_rate": 9.264112494052022e-06, "loss": 0.1097, "step": 1287 }, { "epoch": 0.26, "learning_rate": 9.262288630030814e-06, "loss": 0.1132, "step": 1288 }, { "epoch": 0.26, "learning_rate": 9.260462688621906e-06, "loss": 0.1116, "step": 1289 }, { "epoch": 0.26, "learning_rate": 9.25863467071524e-06, "loss": 0.1048, "step": 1290 }, { "epoch": 0.26, "learning_rate": 9.256804577201768e-06, "loss": 0.1097, "step": 1291 }, { "epoch": 0.26, "learning_rate": 9.25497240897346e-06, "loss": 0.1151, "step": 1292 }, { "epoch": 0.26, "learning_rate": 9.25313816692329e-06, "loss": 0.1109, "step": 1293 }, { "epoch": 0.26, "learning_rate": 9.251301851945244e-06, "loss": 0.111, "step": 1294 }, { "epoch": 0.26, "learning_rate": 9.24946346493432e-06, "loss": 0.1173, "step": 1295 }, { "epoch": 0.26, "learning_rate": 9.247623006786529e-06, "loss": 0.1091, "step": 1296 }, { "epoch": 0.26, "learning_rate": 9.245780478398883e-06, "loss": 0.111, "step": 1297 }, { "epoch": 0.26, "learning_rate": 9.24393588066941e-06, "loss": 0.1062, "step": 1298 }, { "epoch": 0.26, "learning_rate": 9.242089214497146e-06, "loss": 0.1124, "step": 1299 }, { "epoch": 0.26, "learning_rate": 9.24024048078213e-06, "loss": 0.1241, "step": 1300 }, { "epoch": 0.26, "learning_rate": 9.238389680425417e-06, "loss": 0.1188, "step": 1301 }, { "epoch": 0.26, "learning_rate": 9.236536814329062e-06, "loss": 0.107, "step": 1302 }, { "epoch": 0.26, "learning_rate": 9.234681883396129e-06, "loss": 0.1235, "step": 1303 }, { "epoch": 0.26, "learning_rate": 9.232824888530689e-06, "loss": 0.115, "step": 1304 }, { "epoch": 0.26, "learning_rate": 9.230965830637821e-06, "loss": 0.1174, "step": 1305 }, { "epoch": 0.26, "learning_rate": 9.229104710623604e-06, "loss": 0.1032, "step": 1306 }, { "epoch": 0.26, "learning_rate": 9.227241529395127e-06, "loss": 0.1124, "step": 1307 }, { "epoch": 0.26, "learning_rate": 9.225376287860484e-06, "loss": 0.1072, "step": 1308 }, { "epoch": 0.26, "learning_rate": 9.223508986928766e-06, "loss": 0.1077, "step": 1309 }, { "epoch": 0.26, "learning_rate": 9.221639627510076e-06, "loss": 0.1088, "step": 1310 }, { "epoch": 0.26, "learning_rate": 9.219768210515518e-06, "loss": 0.1323, "step": 1311 }, { "epoch": 0.26, "learning_rate": 9.217894736857195e-06, "loss": 0.1141, "step": 1312 }, { "epoch": 0.26, "learning_rate": 9.216019207448216e-06, "loss": 0.1146, "step": 1313 }, { "epoch": 0.26, "learning_rate": 9.214141623202694e-06, "loss": 0.1344, "step": 1314 }, { "epoch": 0.26, "learning_rate": 9.21226198503574e-06, "loss": 0.1145, "step": 1315 }, { "epoch": 0.26, "learning_rate": 9.210380293863462e-06, "loss": 0.1038, "step": 1316 }, { "epoch": 0.26, "learning_rate": 9.208496550602979e-06, "loss": 0.1104, "step": 1317 }, { "epoch": 0.26, "learning_rate": 9.206610756172402e-06, "loss": 0.1169, "step": 1318 }, { "epoch": 0.26, "learning_rate": 9.204722911490847e-06, "loss": 0.1082, "step": 1319 }, { "epoch": 0.26, "learning_rate": 9.202833017478421e-06, "loss": 0.1064, "step": 1320 }, { "epoch": 0.26, "learning_rate": 9.200941075056242e-06, "loss": 0.1091, "step": 1321 }, { "epoch": 0.26, "learning_rate": 9.199047085146415e-06, "loss": 0.1139, "step": 1322 }, { "epoch": 0.26, "learning_rate": 9.197151048672051e-06, "loss": 0.1307, "step": 1323 }, { "epoch": 0.26, "learning_rate": 9.195252966557252e-06, "loss": 0.1121, "step": 1324 }, { "epoch": 0.27, "learning_rate": 9.193352839727122e-06, "loss": 0.1145, "step": 1325 }, { "epoch": 0.27, "learning_rate": 9.191450669107758e-06, "loss": 0.1045, "step": 1326 }, { "epoch": 0.27, "learning_rate": 9.189546455626258e-06, "loss": 0.1186, "step": 1327 }, { "epoch": 0.27, "learning_rate": 9.18764020021071e-06, "loss": 0.1114, "step": 1328 }, { "epoch": 0.27, "learning_rate": 9.1857319037902e-06, "loss": 0.1082, "step": 1329 }, { "epoch": 0.27, "learning_rate": 9.18382156729481e-06, "loss": 0.1065, "step": 1330 }, { "epoch": 0.27, "learning_rate": 9.181909191655613e-06, "loss": 0.1168, "step": 1331 }, { "epoch": 0.27, "learning_rate": 9.179994777804677e-06, "loss": 0.115, "step": 1332 }, { "epoch": 0.27, "learning_rate": 9.178078326675069e-06, "loss": 0.1129, "step": 1333 }, { "epoch": 0.27, "learning_rate": 9.176159839200838e-06, "loss": 0.1108, "step": 1334 }, { "epoch": 0.27, "learning_rate": 9.174239316317034e-06, "loss": 0.1121, "step": 1335 }, { "epoch": 0.27, "learning_rate": 9.172316758959695e-06, "loss": 0.1144, "step": 1336 }, { "epoch": 0.27, "learning_rate": 9.170392168065858e-06, "loss": 0.1217, "step": 1337 }, { "epoch": 0.27, "learning_rate": 9.168465544573538e-06, "loss": 0.1143, "step": 1338 }, { "epoch": 0.27, "learning_rate": 9.16653688942175e-06, "loss": 0.1077, "step": 1339 }, { "epoch": 0.27, "learning_rate": 9.164606203550498e-06, "loss": 0.1155, "step": 1340 }, { "epoch": 0.27, "learning_rate": 9.162673487900775e-06, "loss": 0.1128, "step": 1341 }, { "epoch": 0.27, "learning_rate": 9.160738743414564e-06, "loss": 0.1178, "step": 1342 }, { "epoch": 0.27, "learning_rate": 9.158801971034832e-06, "loss": 0.1146, "step": 1343 }, { "epoch": 0.27, "learning_rate": 9.156863171705543e-06, "loss": 0.1137, "step": 1344 }, { "epoch": 0.27, "learning_rate": 9.154922346371641e-06, "loss": 0.1956, "step": 1345 }, { "epoch": 0.27, "learning_rate": 9.152979495979064e-06, "loss": 0.1027, "step": 1346 }, { "epoch": 0.27, "learning_rate": 9.15103462147473e-06, "loss": 0.1029, "step": 1347 }, { "epoch": 0.27, "learning_rate": 9.14908772380655e-06, "loss": 0.1164, "step": 1348 }, { "epoch": 0.27, "learning_rate": 9.147138803923417e-06, "loss": 0.1098, "step": 1349 }, { "epoch": 0.27, "learning_rate": 9.145187862775208e-06, "loss": 0.1508, "step": 1350 }, { "epoch": 0.27, "learning_rate": 9.143234901312794e-06, "loss": 0.1066, "step": 1351 }, { "epoch": 0.27, "learning_rate": 9.141279920488021e-06, "loss": 0.1125, "step": 1352 }, { "epoch": 0.27, "learning_rate": 9.139322921253724e-06, "loss": 0.1092, "step": 1353 }, { "epoch": 0.27, "learning_rate": 9.13736390456372e-06, "loss": 0.1186, "step": 1354 }, { "epoch": 0.27, "learning_rate": 9.13540287137281e-06, "loss": 0.1147, "step": 1355 }, { "epoch": 0.27, "learning_rate": 9.133439822636779e-06, "loss": 0.1168, "step": 1356 }, { "epoch": 0.27, "learning_rate": 9.13147475931239e-06, "loss": 0.1173, "step": 1357 }, { "epoch": 0.27, "learning_rate": 9.129507682357393e-06, "loss": 0.1093, "step": 1358 }, { "epoch": 0.27, "learning_rate": 9.12753859273052e-06, "loss": 0.1159, "step": 1359 }, { "epoch": 0.27, "learning_rate": 9.125567491391476e-06, "loss": 0.1127, "step": 1360 }, { "epoch": 0.27, "learning_rate": 9.123594379300956e-06, "loss": 0.1074, "step": 1361 }, { "epoch": 0.27, "learning_rate": 9.12161925742063e-06, "loss": 0.1087, "step": 1362 }, { "epoch": 0.27, "learning_rate": 9.119642126713147e-06, "loss": 0.1216, "step": 1363 }, { "epoch": 0.27, "learning_rate": 9.117662988142138e-06, "loss": 0.1047, "step": 1364 }, { "epoch": 0.27, "learning_rate": 9.115681842672211e-06, "loss": 0.1149, "step": 1365 }, { "epoch": 0.27, "learning_rate": 9.11369869126895e-06, "loss": 0.1059, "step": 1366 }, { "epoch": 0.27, "learning_rate": 9.111713534898923e-06, "loss": 0.1098, "step": 1367 }, { "epoch": 0.27, "learning_rate": 9.109726374529666e-06, "loss": 0.1106, "step": 1368 }, { "epoch": 0.27, "learning_rate": 9.107737211129702e-06, "loss": 0.1051, "step": 1369 }, { "epoch": 0.27, "learning_rate": 9.10574604566852e-06, "loss": 0.1169, "step": 1370 }, { "epoch": 0.27, "learning_rate": 9.103752879116595e-06, "loss": 0.1113, "step": 1371 }, { "epoch": 0.27, "learning_rate": 9.101757712445369e-06, "loss": 0.1164, "step": 1372 }, { "epoch": 0.27, "learning_rate": 9.099760546627262e-06, "loss": 0.1, "step": 1373 }, { "epoch": 0.27, "learning_rate": 9.09776138263567e-06, "loss": 0.0614, "step": 1374 }, { "epoch": 0.28, "learning_rate": 9.09576022144496e-06, "loss": 0.0718, "step": 1375 }, { "epoch": 0.28, "learning_rate": 9.093757064030473e-06, "loss": 0.1191, "step": 1376 }, { "epoch": 0.28, "learning_rate": 9.091751911368524e-06, "loss": 0.1249, "step": 1377 }, { "epoch": 0.28, "learning_rate": 9.089744764436404e-06, "loss": 0.1102, "step": 1378 }, { "epoch": 0.28, "learning_rate": 9.087735624212365e-06, "loss": 0.1117, "step": 1379 }, { "epoch": 0.28, "learning_rate": 9.085724491675642e-06, "loss": 0.1195, "step": 1380 }, { "epoch": 0.28, "learning_rate": 9.083711367806438e-06, "loss": 0.162, "step": 1381 }, { "epoch": 0.28, "learning_rate": 9.08169625358592e-06, "loss": 0.1183, "step": 1382 }, { "epoch": 0.28, "learning_rate": 9.079679149996235e-06, "loss": 0.1063, "step": 1383 }, { "epoch": 0.28, "learning_rate": 9.077660058020492e-06, "loss": 0.1066, "step": 1384 }, { "epoch": 0.28, "learning_rate": 9.07563897864277e-06, "loss": 0.1086, "step": 1385 }, { "epoch": 0.28, "learning_rate": 9.073615912848126e-06, "loss": 0.1107, "step": 1386 }, { "epoch": 0.28, "learning_rate": 9.07159086162257e-06, "loss": 0.1155, "step": 1387 }, { "epoch": 0.28, "learning_rate": 9.069563825953092e-06, "loss": 0.1219, "step": 1388 }, { "epoch": 0.28, "learning_rate": 9.06753480682764e-06, "loss": 0.1044, "step": 1389 }, { "epoch": 0.28, "learning_rate": 9.065503805235139e-06, "loss": 0.1116, "step": 1390 }, { "epoch": 0.28, "learning_rate": 9.06347082216547e-06, "loss": 0.1141, "step": 1391 }, { "epoch": 0.28, "learning_rate": 9.061435858609486e-06, "loss": 0.1083, "step": 1392 }, { "epoch": 0.28, "learning_rate": 9.059398915559005e-06, "loss": 0.1104, "step": 1393 }, { "epoch": 0.28, "learning_rate": 9.057359994006806e-06, "loss": 0.1294, "step": 1394 }, { "epoch": 0.28, "learning_rate": 9.055319094946633e-06, "loss": 0.1189, "step": 1395 }, { "epoch": 0.28, "learning_rate": 9.0532762193732e-06, "loss": 0.1109, "step": 1396 }, { "epoch": 0.28, "learning_rate": 9.051231368282177e-06, "loss": 0.1162, "step": 1397 }, { "epoch": 0.28, "learning_rate": 9.0491845426702e-06, "loss": 0.1152, "step": 1398 }, { "epoch": 0.28, "learning_rate": 9.047135743534866e-06, "loss": 0.1113, "step": 1399 }, { "epoch": 0.28, "learning_rate": 9.045084971874738e-06, "loss": 0.1154, "step": 1400 }, { "epoch": 0.28, "learning_rate": 9.043032228689333e-06, "loss": 0.1093, "step": 1401 }, { "epoch": 0.28, "learning_rate": 9.040977514979136e-06, "loss": 0.1101, "step": 1402 }, { "epoch": 0.28, "learning_rate": 9.038920831745587e-06, "loss": 0.1286, "step": 1403 }, { "epoch": 0.28, "learning_rate": 9.036862179991092e-06, "loss": 0.12, "step": 1404 }, { "epoch": 0.28, "learning_rate": 9.03480156071901e-06, "loss": 0.1119, "step": 1405 }, { "epoch": 0.28, "learning_rate": 9.032738974933663e-06, "loss": 0.1075, "step": 1406 }, { "epoch": 0.28, "learning_rate": 9.03067442364033e-06, "loss": 0.11, "step": 1407 }, { "epoch": 0.28, "learning_rate": 9.028607907845247e-06, "loss": 0.1164, "step": 1408 }, { "epoch": 0.28, "learning_rate": 9.026539428555609e-06, "loss": 0.1162, "step": 1409 }, { "epoch": 0.28, "learning_rate": 9.02446898677957e-06, "loss": 0.1043, "step": 1410 }, { "epoch": 0.28, "learning_rate": 9.022396583526238e-06, "loss": 0.3045, "step": 1411 }, { "epoch": 0.28, "learning_rate": 9.020322219805674e-06, "loss": 0.109, "step": 1412 }, { "epoch": 0.28, "learning_rate": 9.0182458966289e-06, "loss": 0.1293, "step": 1413 }, { "epoch": 0.28, "learning_rate": 9.01616761500789e-06, "loss": 0.2057, "step": 1414 }, { "epoch": 0.28, "learning_rate": 9.014087375955574e-06, "loss": 0.1107, "step": 1415 }, { "epoch": 0.28, "learning_rate": 9.012005180485834e-06, "loss": 0.1256, "step": 1416 }, { "epoch": 0.28, "learning_rate": 9.009921029613506e-06, "loss": 0.129, "step": 1417 }, { "epoch": 0.28, "learning_rate": 9.007834924354384e-06, "loss": 0.1089, "step": 1418 }, { "epoch": 0.28, "learning_rate": 9.005746865725206e-06, "loss": 0.1158, "step": 1419 }, { "epoch": 0.28, "learning_rate": 9.003656854743667e-06, "loss": 0.1057, "step": 1420 }, { "epoch": 0.28, "learning_rate": 9.001564892428416e-06, "loss": 0.1117, "step": 1421 }, { "epoch": 0.28, "learning_rate": 8.999470979799048e-06, "loss": 0.1206, "step": 1422 }, { "epoch": 0.28, "learning_rate": 8.99737511787611e-06, "loss": 0.1146, "step": 1423 }, { "epoch": 0.28, "learning_rate": 8.9952773076811e-06, "loss": 0.1208, "step": 1424 }, { "epoch": 0.28, "learning_rate": 8.993177550236464e-06, "loss": 0.1052, "step": 1425 }, { "epoch": 0.29, "learning_rate": 8.991075846565603e-06, "loss": 0.1139, "step": 1426 }, { "epoch": 0.29, "learning_rate": 8.988972197692857e-06, "loss": 0.1035, "step": 1427 }, { "epoch": 0.29, "learning_rate": 8.986866604643518e-06, "loss": 0.1043, "step": 1428 }, { "epoch": 0.29, "learning_rate": 8.984759068443832e-06, "loss": 0.106, "step": 1429 }, { "epoch": 0.29, "learning_rate": 8.982649590120982e-06, "loss": 0.1132, "step": 1430 }, { "epoch": 0.29, "learning_rate": 8.980538170703104e-06, "loss": 0.1082, "step": 1431 }, { "epoch": 0.29, "learning_rate": 8.978424811219277e-06, "loss": 0.1385, "step": 1432 }, { "epoch": 0.29, "learning_rate": 8.97630951269953e-06, "loss": 0.1057, "step": 1433 }, { "epoch": 0.29, "learning_rate": 8.97419227617483e-06, "loss": 0.083, "step": 1434 }, { "epoch": 0.29, "learning_rate": 8.972073102677091e-06, "loss": 0.1045, "step": 1435 }, { "epoch": 0.29, "learning_rate": 8.969951993239177e-06, "loss": 0.116, "step": 1436 }, { "epoch": 0.29, "learning_rate": 8.96782894889489e-06, "loss": 0.1263, "step": 1437 }, { "epoch": 0.29, "learning_rate": 8.965703970678974e-06, "loss": 0.1315, "step": 1438 }, { "epoch": 0.29, "learning_rate": 8.963577059627117e-06, "loss": 0.1076, "step": 1439 }, { "epoch": 0.29, "learning_rate": 8.961448216775955e-06, "loss": 0.1148, "step": 1440 }, { "epoch": 0.29, "learning_rate": 8.959317443163054e-06, "loss": 0.1028, "step": 1441 }, { "epoch": 0.29, "learning_rate": 8.957184739826929e-06, "loss": 0.1101, "step": 1442 }, { "epoch": 0.29, "learning_rate": 8.955050107807035e-06, "loss": 0.1053, "step": 1443 }, { "epoch": 0.29, "learning_rate": 8.952913548143766e-06, "loss": 0.1121, "step": 1444 }, { "epoch": 0.29, "learning_rate": 8.950775061878453e-06, "loss": 0.1133, "step": 1445 }, { "epoch": 0.29, "learning_rate": 8.94863465005337e-06, "loss": 0.1131, "step": 1446 }, { "epoch": 0.29, "learning_rate": 8.946492313711725e-06, "loss": 0.106, "step": 1447 }, { "epoch": 0.29, "learning_rate": 8.944348053897672e-06, "loss": 0.1105, "step": 1448 }, { "epoch": 0.29, "learning_rate": 8.942201871656292e-06, "loss": 0.1029, "step": 1449 }, { "epoch": 0.29, "learning_rate": 8.94005376803361e-06, "loss": 0.1125, "step": 1450 }, { "epoch": 0.29, "learning_rate": 8.937903744076587e-06, "loss": 0.106, "step": 1451 }, { "epoch": 0.29, "learning_rate": 8.935751800833117e-06, "loss": 0.1082, "step": 1452 }, { "epoch": 0.29, "learning_rate": 8.933597939352031e-06, "loss": 0.1071, "step": 1453 }, { "epoch": 0.29, "learning_rate": 8.931442160683094e-06, "loss": 0.1042, "step": 1454 }, { "epoch": 0.29, "learning_rate": 8.92928446587701e-06, "loss": 0.1077, "step": 1455 }, { "epoch": 0.29, "learning_rate": 8.92712485598541e-06, "loss": 0.1048, "step": 1456 }, { "epoch": 0.29, "learning_rate": 8.924963332060863e-06, "loss": 0.1165, "step": 1457 }, { "epoch": 0.29, "learning_rate": 8.922799895156868e-06, "loss": 0.1128, "step": 1458 }, { "epoch": 0.29, "learning_rate": 8.920634546327857e-06, "loss": 0.1428, "step": 1459 }, { "epoch": 0.29, "learning_rate": 8.9184672866292e-06, "loss": 0.1182, "step": 1460 }, { "epoch": 0.29, "learning_rate": 8.916298117117188e-06, "loss": 0.1152, "step": 1461 }, { "epoch": 0.29, "learning_rate": 8.91412703884905e-06, "loss": 0.1514, "step": 1462 }, { "epoch": 0.29, "learning_rate": 8.911954052882941e-06, "loss": 0.1093, "step": 1463 }, { "epoch": 0.29, "learning_rate": 8.909779160277951e-06, "loss": 0.1144, "step": 1464 }, { "epoch": 0.29, "learning_rate": 8.907602362094094e-06, "loss": 0.113, "step": 1465 }, { "epoch": 0.29, "learning_rate": 8.905423659392316e-06, "loss": 0.1417, "step": 1466 }, { "epoch": 0.29, "learning_rate": 8.903243053234492e-06, "loss": 0.1082, "step": 1467 }, { "epoch": 0.29, "learning_rate": 8.90106054468342e-06, "loss": 0.1034, "step": 1468 }, { "epoch": 0.29, "learning_rate": 8.898876134802827e-06, "loss": 0.1081, "step": 1469 }, { "epoch": 0.29, "learning_rate": 8.896689824657371e-06, "loss": 0.1281, "step": 1470 }, { "epoch": 0.29, "learning_rate": 8.894501615312633e-06, "loss": 0.1106, "step": 1471 }, { "epoch": 0.29, "learning_rate": 8.892311507835118e-06, "loss": 0.1292, "step": 1472 }, { "epoch": 0.29, "learning_rate": 8.890119503292258e-06, "loss": 0.121, "step": 1473 }, { "epoch": 0.29, "learning_rate": 8.887925602752411e-06, "loss": 0.113, "step": 1474 }, { "epoch": 0.29, "learning_rate": 8.885729807284855e-06, "loss": 0.1064, "step": 1475 }, { "epoch": 0.3, "learning_rate": 8.883532117959797e-06, "loss": 0.103, "step": 1476 }, { "epoch": 0.3, "learning_rate": 8.88133253584836e-06, "loss": 0.1144, "step": 1477 }, { "epoch": 0.3, "learning_rate": 8.879131062022598e-06, "loss": 0.1129, "step": 1478 }, { "epoch": 0.3, "learning_rate": 8.87692769755548e-06, "loss": 0.1106, "step": 1479 }, { "epoch": 0.3, "learning_rate": 8.874722443520898e-06, "loss": 0.1147, "step": 1480 }, { "epoch": 0.3, "learning_rate": 8.872515300993669e-06, "loss": 0.1048, "step": 1481 }, { "epoch": 0.3, "learning_rate": 8.870306271049527e-06, "loss": 0.1065, "step": 1482 }, { "epoch": 0.3, "learning_rate": 8.868095354765125e-06, "loss": 0.1135, "step": 1483 }, { "epoch": 0.3, "learning_rate": 8.865882553218036e-06, "loss": 0.1139, "step": 1484 }, { "epoch": 0.3, "learning_rate": 8.863667867486756e-06, "loss": 0.1104, "step": 1485 }, { "epoch": 0.3, "learning_rate": 8.861451298650692e-06, "loss": 0.1093, "step": 1486 }, { "epoch": 0.3, "learning_rate": 8.859232847790175e-06, "loss": 0.1103, "step": 1487 }, { "epoch": 0.3, "learning_rate": 8.857012515986452e-06, "loss": 0.1059, "step": 1488 }, { "epoch": 0.3, "learning_rate": 8.854790304321682e-06, "loss": 0.1057, "step": 1489 }, { "epoch": 0.3, "learning_rate": 8.852566213878947e-06, "loss": 0.1021, "step": 1490 }, { "epoch": 0.3, "learning_rate": 8.85034024574224e-06, "loss": 0.1051, "step": 1491 }, { "epoch": 0.3, "learning_rate": 8.848112400996473e-06, "loss": 0.1091, "step": 1492 }, { "epoch": 0.3, "learning_rate": 8.84588268072747e-06, "loss": 0.1021, "step": 1493 }, { "epoch": 0.3, "learning_rate": 8.843651086021966e-06, "loss": 0.1098, "step": 1494 }, { "epoch": 0.3, "learning_rate": 8.841417617967618e-06, "loss": 0.2254, "step": 1495 }, { "epoch": 0.3, "learning_rate": 8.83918227765299e-06, "loss": 0.1635, "step": 1496 }, { "epoch": 0.3, "learning_rate": 8.836945066167556e-06, "loss": 0.1439, "step": 1497 }, { "epoch": 0.3, "learning_rate": 8.834705984601708e-06, "loss": 0.1064, "step": 1498 }, { "epoch": 0.3, "learning_rate": 8.83246503404675e-06, "loss": 0.1194, "step": 1499 }, { "epoch": 0.3, "learning_rate": 8.83022221559489e-06, "loss": 0.1048, "step": 1500 }, { "epoch": 0.3, "learning_rate": 8.827977530339254e-06, "loss": 0.1145, "step": 1501 }, { "epoch": 0.3, "learning_rate": 8.825730979373873e-06, "loss": 0.105, "step": 1502 }, { "epoch": 0.3, "learning_rate": 8.823482563793687e-06, "loss": 0.1108, "step": 1503 }, { "epoch": 0.3, "learning_rate": 8.821232284694545e-06, "loss": 0.1158, "step": 1504 }, { "epoch": 0.3, "learning_rate": 8.818980143173212e-06, "loss": 0.1149, "step": 1505 }, { "epoch": 0.3, "learning_rate": 8.81672614032735e-06, "loss": 0.1229, "step": 1506 }, { "epoch": 0.3, "learning_rate": 8.814470277255532e-06, "loss": 0.1477, "step": 1507 }, { "epoch": 0.3, "learning_rate": 8.81221255505724e-06, "loss": 0.1201, "step": 1508 }, { "epoch": 0.3, "learning_rate": 8.80995297483286e-06, "loss": 0.1162, "step": 1509 }, { "epoch": 0.3, "learning_rate": 8.807691537683685e-06, "loss": 0.1329, "step": 1510 }, { "epoch": 0.3, "learning_rate": 8.80542824471191e-06, "loss": 0.1129, "step": 1511 }, { "epoch": 0.3, "learning_rate": 8.803163097020637e-06, "loss": 0.1046, "step": 1512 }, { "epoch": 0.3, "learning_rate": 8.80089609571387e-06, "loss": 0.1073, "step": 1513 }, { "epoch": 0.3, "learning_rate": 8.798627241896524e-06, "loss": 0.1196, "step": 1514 }, { "epoch": 0.3, "learning_rate": 8.796356536674404e-06, "loss": 0.113, "step": 1515 }, { "epoch": 0.3, "learning_rate": 8.794083981154229e-06, "loss": 0.1231, "step": 1516 }, { "epoch": 0.3, "learning_rate": 8.791809576443611e-06, "loss": 0.1008, "step": 1517 }, { "epoch": 0.3, "learning_rate": 8.789533323651067e-06, "loss": 0.1095, "step": 1518 }, { "epoch": 0.3, "learning_rate": 8.78725522388602e-06, "loss": 0.1122, "step": 1519 }, { "epoch": 0.3, "learning_rate": 8.784975278258783e-06, "loss": 0.1146, "step": 1520 }, { "epoch": 0.3, "learning_rate": 8.782693487880575e-06, "loss": 0.1061, "step": 1521 }, { "epoch": 0.3, "learning_rate": 8.780409853863517e-06, "loss": 0.0945, "step": 1522 }, { "epoch": 0.3, "learning_rate": 8.778124377320619e-06, "loss": 0.1135, "step": 1523 }, { "epoch": 0.3, "learning_rate": 8.775837059365796e-06, "loss": 0.1146, "step": 1524 }, { "epoch": 0.3, "learning_rate": 8.773547901113862e-06, "loss": 0.1184, "step": 1525 }, { "epoch": 0.31, "learning_rate": 8.77125690368052e-06, "loss": 0.1084, "step": 1526 }, { "epoch": 0.31, "learning_rate": 8.768964068182378e-06, "loss": 0.1365, "step": 1527 }, { "epoch": 0.31, "learning_rate": 8.766669395736936e-06, "loss": 0.106, "step": 1528 }, { "epoch": 0.31, "learning_rate": 8.764372887462587e-06, "loss": 0.1118, "step": 1529 }, { "epoch": 0.31, "learning_rate": 8.762074544478622e-06, "loss": 0.1089, "step": 1530 }, { "epoch": 0.31, "learning_rate": 8.759774367905228e-06, "loss": 0.1182, "step": 1531 }, { "epoch": 0.31, "learning_rate": 8.757472358863481e-06, "loss": 0.1143, "step": 1532 }, { "epoch": 0.31, "learning_rate": 8.755168518475351e-06, "loss": 0.1187, "step": 1533 }, { "epoch": 0.31, "learning_rate": 8.752862847863707e-06, "loss": 0.1108, "step": 1534 }, { "epoch": 0.31, "learning_rate": 8.750555348152299e-06, "loss": 0.1068, "step": 1535 }, { "epoch": 0.31, "learning_rate": 8.748246020465776e-06, "loss": 0.113, "step": 1536 }, { "epoch": 0.31, "learning_rate": 8.745934865929676e-06, "loss": 0.1023, "step": 1537 }, { "epoch": 0.31, "learning_rate": 8.743621885670431e-06, "loss": 0.1118, "step": 1538 }, { "epoch": 0.31, "learning_rate": 8.741307080815357e-06, "loss": 0.1226, "step": 1539 }, { "epoch": 0.31, "learning_rate": 8.73899045249266e-06, "loss": 0.1046, "step": 1540 }, { "epoch": 0.31, "learning_rate": 8.736672001831438e-06, "loss": 0.1064, "step": 1541 }, { "epoch": 0.31, "learning_rate": 8.73435172996168e-06, "loss": 0.1035, "step": 1542 }, { "epoch": 0.31, "learning_rate": 8.732029638014249e-06, "loss": 0.1066, "step": 1543 }, { "epoch": 0.31, "learning_rate": 8.729705727120911e-06, "loss": 0.1282, "step": 1544 }, { "epoch": 0.31, "learning_rate": 8.727379998414311e-06, "loss": 0.1082, "step": 1545 }, { "epoch": 0.31, "learning_rate": 8.725052453027982e-06, "loss": 0.1105, "step": 1546 }, { "epoch": 0.31, "learning_rate": 8.722723092096337e-06, "loss": 0.1091, "step": 1547 }, { "epoch": 0.31, "learning_rate": 8.720391916754683e-06, "loss": 0.1198, "step": 1548 }, { "epoch": 0.31, "learning_rate": 8.718058928139205e-06, "loss": 0.1018, "step": 1549 }, { "epoch": 0.31, "learning_rate": 8.715724127386971e-06, "loss": 0.1111, "step": 1550 }, { "epoch": 0.31, "learning_rate": 8.713387515635938e-06, "loss": 0.1108, "step": 1551 }, { "epoch": 0.31, "learning_rate": 8.711049094024942e-06, "loss": 0.1053, "step": 1552 }, { "epoch": 0.31, "learning_rate": 8.708708863693696e-06, "loss": 0.1054, "step": 1553 }, { "epoch": 0.31, "learning_rate": 8.706366825782805e-06, "loss": 0.1071, "step": 1554 }, { "epoch": 0.31, "learning_rate": 8.70402298143375e-06, "loss": 0.119, "step": 1555 }, { "epoch": 0.31, "learning_rate": 8.701677331788891e-06, "loss": 0.1311, "step": 1556 }, { "epoch": 0.31, "learning_rate": 8.699329877991469e-06, "loss": 0.1056, "step": 1557 }, { "epoch": 0.31, "learning_rate": 8.696980621185602e-06, "loss": 0.1097, "step": 1558 }, { "epoch": 0.31, "learning_rate": 8.694629562516295e-06, "loss": 0.1099, "step": 1559 }, { "epoch": 0.31, "learning_rate": 8.692276703129421e-06, "loss": 0.1071, "step": 1560 }, { "epoch": 0.31, "learning_rate": 8.689922044171735e-06, "loss": 0.1261, "step": 1561 }, { "epoch": 0.31, "learning_rate": 8.68756558679087e-06, "loss": 0.12, "step": 1562 }, { "epoch": 0.31, "learning_rate": 8.685207332135337e-06, "loss": 0.1047, "step": 1563 }, { "epoch": 0.31, "learning_rate": 8.682847281354517e-06, "loss": 0.0982, "step": 1564 }, { "epoch": 0.31, "learning_rate": 8.680485435598674e-06, "loss": 0.1248, "step": 1565 }, { "epoch": 0.31, "learning_rate": 8.678121796018938e-06, "loss": 0.1188, "step": 1566 }, { "epoch": 0.31, "learning_rate": 8.675756363767322e-06, "loss": 0.1013, "step": 1567 }, { "epoch": 0.31, "learning_rate": 8.673389139996708e-06, "loss": 0.2168, "step": 1568 }, { "epoch": 0.31, "learning_rate": 8.671020125860851e-06, "loss": 0.1109, "step": 1569 }, { "epoch": 0.31, "learning_rate": 8.668649322514382e-06, "loss": 0.1086, "step": 1570 }, { "epoch": 0.31, "learning_rate": 8.666276731112802e-06, "loss": 0.1165, "step": 1571 }, { "epoch": 0.31, "learning_rate": 8.66390235281248e-06, "loss": 0.1074, "step": 1572 }, { "epoch": 0.31, "learning_rate": 8.66152618877066e-06, "loss": 0.1079, "step": 1573 }, { "epoch": 0.31, "learning_rate": 8.659148240145456e-06, "loss": 0.1084, "step": 1574 }, { "epoch": 0.32, "learning_rate": 8.656768508095853e-06, "loss": 0.1051, "step": 1575 }, { "epoch": 0.32, "learning_rate": 8.654386993781703e-06, "loss": 0.1188, "step": 1576 }, { "epoch": 0.32, "learning_rate": 8.652003698363724e-06, "loss": 0.1196, "step": 1577 }, { "epoch": 0.32, "learning_rate": 8.649618623003509e-06, "loss": 0.111, "step": 1578 }, { "epoch": 0.32, "learning_rate": 8.647231768863513e-06, "loss": 0.1029, "step": 1579 }, { "epoch": 0.32, "learning_rate": 8.644843137107058e-06, "loss": 0.1104, "step": 1580 }, { "epoch": 0.32, "learning_rate": 8.642452728898339e-06, "loss": 0.1075, "step": 1581 }, { "epoch": 0.32, "learning_rate": 8.640060545402407e-06, "loss": 0.1057, "step": 1582 }, { "epoch": 0.32, "learning_rate": 8.637666587785185e-06, "loss": 0.107, "step": 1583 }, { "epoch": 0.32, "learning_rate": 8.63527085721346e-06, "loss": 0.1039, "step": 1584 }, { "epoch": 0.32, "learning_rate": 8.632873354854881e-06, "loss": 0.1103, "step": 1585 }, { "epoch": 0.32, "learning_rate": 8.630474081877959e-06, "loss": 0.1076, "step": 1586 }, { "epoch": 0.32, "learning_rate": 8.628073039452076e-06, "loss": 0.1209, "step": 1587 }, { "epoch": 0.32, "learning_rate": 8.625670228747467e-06, "loss": 0.1198, "step": 1588 }, { "epoch": 0.32, "learning_rate": 8.623265650935233e-06, "loss": 0.1059, "step": 1589 }, { "epoch": 0.32, "learning_rate": 8.620859307187339e-06, "loss": 0.115, "step": 1590 }, { "epoch": 0.32, "learning_rate": 8.618451198676602e-06, "loss": 0.1006, "step": 1591 }, { "epoch": 0.32, "learning_rate": 8.616041326576711e-06, "loss": 0.1146, "step": 1592 }, { "epoch": 0.32, "learning_rate": 8.613629692062204e-06, "loss": 0.1398, "step": 1593 }, { "epoch": 0.32, "learning_rate": 8.611216296308485e-06, "loss": 0.1079, "step": 1594 }, { "epoch": 0.32, "learning_rate": 8.608801140491811e-06, "loss": 0.1307, "step": 1595 }, { "epoch": 0.32, "learning_rate": 8.606384225789304e-06, "loss": 0.1094, "step": 1596 }, { "epoch": 0.32, "learning_rate": 8.603965553378934e-06, "loss": 0.1126, "step": 1597 }, { "epoch": 0.32, "learning_rate": 8.601545124439535e-06, "loss": 0.1144, "step": 1598 }, { "epoch": 0.32, "learning_rate": 8.599122940150795e-06, "loss": 0.1099, "step": 1599 }, { "epoch": 0.32, "learning_rate": 8.596699001693257e-06, "loss": 0.1103, "step": 1600 }, { "epoch": 0.32, "learning_rate": 8.594273310248317e-06, "loss": 0.1032, "step": 1601 }, { "epoch": 0.32, "learning_rate": 8.591845866998231e-06, "loss": 0.1109, "step": 1602 }, { "epoch": 0.32, "learning_rate": 8.589416673126104e-06, "loss": 0.1036, "step": 1603 }, { "epoch": 0.32, "learning_rate": 8.586985729815895e-06, "loss": 0.1248, "step": 1604 }, { "epoch": 0.32, "learning_rate": 8.584553038252415e-06, "loss": 0.1446, "step": 1605 }, { "epoch": 0.32, "learning_rate": 8.58211859962133e-06, "loss": 0.1288, "step": 1606 }, { "epoch": 0.32, "learning_rate": 8.579682415109156e-06, "loss": 0.1083, "step": 1607 }, { "epoch": 0.32, "learning_rate": 8.57724448590326e-06, "loss": 0.1056, "step": 1608 }, { "epoch": 0.32, "learning_rate": 8.574804813191859e-06, "loss": 0.1039, "step": 1609 }, { "epoch": 0.32, "learning_rate": 8.572363398164017e-06, "loss": 0.0992, "step": 1610 }, { "epoch": 0.32, "learning_rate": 8.569920242009655e-06, "loss": 0.1215, "step": 1611 }, { "epoch": 0.32, "learning_rate": 8.567475345919532e-06, "loss": 0.1034, "step": 1612 }, { "epoch": 0.32, "learning_rate": 8.565028711085266e-06, "loss": 0.1061, "step": 1613 }, { "epoch": 0.32, "learning_rate": 8.562580338699313e-06, "loss": 0.1072, "step": 1614 }, { "epoch": 0.32, "learning_rate": 8.560130229954985e-06, "loss": 0.1056, "step": 1615 }, { "epoch": 0.32, "learning_rate": 8.557678386046429e-06, "loss": 0.1072, "step": 1616 }, { "epoch": 0.32, "learning_rate": 8.555224808168644e-06, "loss": 0.1266, "step": 1617 }, { "epoch": 0.32, "learning_rate": 8.55276949751748e-06, "loss": 0.1302, "step": 1618 }, { "epoch": 0.32, "learning_rate": 8.550312455289624e-06, "loss": 0.1028, "step": 1619 }, { "epoch": 0.32, "learning_rate": 8.547853682682605e-06, "loss": 0.1032, "step": 1620 }, { "epoch": 0.32, "learning_rate": 8.545393180894801e-06, "loss": 0.1047, "step": 1621 }, { "epoch": 0.32, "learning_rate": 8.542930951125432e-06, "loss": 0.1101, "step": 1622 }, { "epoch": 0.32, "learning_rate": 8.540466994574556e-06, "loss": 0.1124, "step": 1623 }, { "epoch": 0.32, "learning_rate": 8.538001312443078e-06, "loss": 0.1033, "step": 1624 }, { "epoch": 0.33, "learning_rate": 8.535533905932739e-06, "loss": 0.1135, "step": 1625 }, { "epoch": 0.33, "learning_rate": 8.533064776246126e-06, "loss": 0.1037, "step": 1626 }, { "epoch": 0.33, "learning_rate": 8.530593924586659e-06, "loss": 0.1033, "step": 1627 }, { "epoch": 0.33, "learning_rate": 8.528121352158604e-06, "loss": 0.1117, "step": 1628 }, { "epoch": 0.33, "learning_rate": 8.525647060167063e-06, "loss": 0.1137, "step": 1629 }, { "epoch": 0.33, "learning_rate": 8.523171049817974e-06, "loss": 0.1031, "step": 1630 }, { "epoch": 0.33, "learning_rate": 8.520693322318116e-06, "loss": 0.11, "step": 1631 }, { "epoch": 0.33, "learning_rate": 8.518213878875103e-06, "loss": 0.1247, "step": 1632 }, { "epoch": 0.33, "learning_rate": 8.515732720697383e-06, "loss": 0.1116, "step": 1633 }, { "epoch": 0.33, "learning_rate": 8.513249848994248e-06, "loss": 0.1022, "step": 1634 }, { "epoch": 0.33, "learning_rate": 8.510765264975813e-06, "loss": 0.1003, "step": 1635 }, { "epoch": 0.33, "learning_rate": 8.508278969853037e-06, "loss": 0.1069, "step": 1636 }, { "epoch": 0.33, "learning_rate": 8.505790964837712e-06, "loss": 0.1049, "step": 1637 }, { "epoch": 0.33, "learning_rate": 8.50330125114246e-06, "loss": 0.0954, "step": 1638 }, { "epoch": 0.33, "learning_rate": 8.500809829980734e-06, "loss": 0.1084, "step": 1639 }, { "epoch": 0.33, "learning_rate": 8.498316702566828e-06, "loss": 0.1056, "step": 1640 }, { "epoch": 0.33, "learning_rate": 8.495821870115857e-06, "loss": 0.0986, "step": 1641 }, { "epoch": 0.33, "learning_rate": 8.493325333843776e-06, "loss": 0.1094, "step": 1642 }, { "epoch": 0.33, "learning_rate": 8.490827094967364e-06, "loss": 0.123, "step": 1643 }, { "epoch": 0.33, "learning_rate": 8.488327154704232e-06, "loss": 0.1072, "step": 1644 }, { "epoch": 0.33, "learning_rate": 8.485825514272824e-06, "loss": 0.102, "step": 1645 }, { "epoch": 0.33, "learning_rate": 8.483322174892404e-06, "loss": 0.1025, "step": 1646 }, { "epoch": 0.33, "learning_rate": 8.480817137783073e-06, "loss": 0.0982, "step": 1647 }, { "epoch": 0.33, "learning_rate": 8.478310404165756e-06, "loss": 0.1214, "step": 1648 }, { "epoch": 0.33, "learning_rate": 8.4758019752622e-06, "loss": 0.1032, "step": 1649 }, { "epoch": 0.33, "learning_rate": 8.473291852294986e-06, "loss": 0.1077, "step": 1650 }, { "epoch": 0.33, "learning_rate": 8.47078003648752e-06, "loss": 0.1062, "step": 1651 }, { "epoch": 0.33, "learning_rate": 8.468266529064025e-06, "loss": 0.1072, "step": 1652 }, { "epoch": 0.33, "learning_rate": 8.465751331249558e-06, "loss": 0.1099, "step": 1653 }, { "epoch": 0.33, "learning_rate": 8.463234444269994e-06, "loss": 0.1109, "step": 1654 }, { "epoch": 0.33, "learning_rate": 8.460715869352035e-06, "loss": 0.1151, "step": 1655 }, { "epoch": 0.33, "learning_rate": 8.458195607723201e-06, "loss": 0.1509, "step": 1656 }, { "epoch": 0.33, "learning_rate": 8.45567366061184e-06, "loss": 0.1078, "step": 1657 }, { "epoch": 0.33, "learning_rate": 8.453150029247115e-06, "loss": 0.1031, "step": 1658 }, { "epoch": 0.33, "learning_rate": 8.450624714859016e-06, "loss": 0.1366, "step": 1659 }, { "epoch": 0.33, "learning_rate": 8.44809771867835e-06, "loss": 0.1038, "step": 1660 }, { "epoch": 0.33, "learning_rate": 8.445569041936743e-06, "loss": 0.1053, "step": 1661 }, { "epoch": 0.33, "learning_rate": 8.443038685866643e-06, "loss": 0.1134, "step": 1662 }, { "epoch": 0.33, "learning_rate": 8.440506651701315e-06, "loss": 0.1081, "step": 1663 }, { "epoch": 0.33, "learning_rate": 8.437972940674838e-06, "loss": 0.0988, "step": 1664 }, { "epoch": 0.33, "learning_rate": 8.435437554022116e-06, "loss": 0.1332, "step": 1665 }, { "epoch": 0.33, "learning_rate": 8.432900492978864e-06, "loss": 0.1031, "step": 1666 }, { "epoch": 0.33, "learning_rate": 8.430361758781616e-06, "loss": 0.1071, "step": 1667 }, { "epoch": 0.33, "learning_rate": 8.427821352667719e-06, "loss": 0.1147, "step": 1668 }, { "epoch": 0.33, "learning_rate": 8.425279275875336e-06, "loss": 0.1059, "step": 1669 }, { "epoch": 0.33, "learning_rate": 8.422735529643445e-06, "loss": 0.1097, "step": 1670 }, { "epoch": 0.33, "learning_rate": 8.420190115211835e-06, "loss": 0.1092, "step": 1671 }, { "epoch": 0.33, "learning_rate": 8.417643033821114e-06, "loss": 0.1083, "step": 1672 }, { "epoch": 0.33, "learning_rate": 8.415094286712694e-06, "loss": 0.1117, "step": 1673 }, { "epoch": 0.33, "learning_rate": 8.412543875128809e-06, "loss": 0.1063, "step": 1674 }, { "epoch": 0.34, "learning_rate": 8.409991800312493e-06, "loss": 0.1069, "step": 1675 }, { "epoch": 0.34, "learning_rate": 8.4074380635076e-06, "loss": 0.0993, "step": 1676 }, { "epoch": 0.34, "learning_rate": 8.404882665958788e-06, "loss": 0.0966, "step": 1677 }, { "epoch": 0.34, "learning_rate": 8.402325608911527e-06, "loss": 0.1037, "step": 1678 }, { "epoch": 0.34, "learning_rate": 8.399766893612096e-06, "loss": 0.1084, "step": 1679 }, { "epoch": 0.34, "learning_rate": 8.397206521307584e-06, "loss": 0.1035, "step": 1680 }, { "epoch": 0.34, "learning_rate": 8.394644493245882e-06, "loss": 0.1047, "step": 1681 }, { "epoch": 0.34, "learning_rate": 8.392080810675692e-06, "loss": 0.1018, "step": 1682 }, { "epoch": 0.34, "learning_rate": 8.389515474846522e-06, "loss": 0.105, "step": 1683 }, { "epoch": 0.34, "learning_rate": 8.386948487008687e-06, "loss": 0.1202, "step": 1684 }, { "epoch": 0.34, "learning_rate": 8.384379848413304e-06, "loss": 0.1466, "step": 1685 }, { "epoch": 0.34, "learning_rate": 8.381809560312298e-06, "loss": 0.1398, "step": 1686 }, { "epoch": 0.34, "learning_rate": 8.379237623958393e-06, "loss": 0.1138, "step": 1687 }, { "epoch": 0.34, "learning_rate": 8.376664040605122e-06, "loss": 0.1319, "step": 1688 }, { "epoch": 0.34, "learning_rate": 8.374088811506819e-06, "loss": 0.1039, "step": 1689 }, { "epoch": 0.34, "learning_rate": 8.371511937918616e-06, "loss": 0.1035, "step": 1690 }, { "epoch": 0.34, "learning_rate": 8.368933421096454e-06, "loss": 0.1025, "step": 1691 }, { "epoch": 0.34, "learning_rate": 8.366353262297069e-06, "loss": 0.1056, "step": 1692 }, { "epoch": 0.34, "learning_rate": 8.363771462778e-06, "loss": 0.1081, "step": 1693 }, { "epoch": 0.34, "learning_rate": 8.361188023797581e-06, "loss": 0.1138, "step": 1694 }, { "epoch": 0.34, "learning_rate": 8.358602946614952e-06, "loss": 0.1033, "step": 1695 }, { "epoch": 0.34, "learning_rate": 8.356016232490047e-06, "loss": 0.1097, "step": 1696 }, { "epoch": 0.34, "learning_rate": 8.353427882683601e-06, "loss": 0.1133, "step": 1697 }, { "epoch": 0.34, "learning_rate": 8.350837898457142e-06, "loss": 0.0997, "step": 1698 }, { "epoch": 0.34, "learning_rate": 8.348246281072998e-06, "loss": 0.1091, "step": 1699 }, { "epoch": 0.34, "learning_rate": 8.345653031794292e-06, "loss": 0.1137, "step": 1700 }, { "epoch": 0.34, "learning_rate": 8.343058151884942e-06, "loss": 0.1008, "step": 1701 }, { "epoch": 0.34, "learning_rate": 8.34046164260966e-06, "loss": 0.1082, "step": 1702 }, { "epoch": 0.34, "learning_rate": 8.337863505233954e-06, "loss": 0.1072, "step": 1703 }, { "epoch": 0.34, "learning_rate": 8.335263741024123e-06, "loss": 0.1041, "step": 1704 }, { "epoch": 0.34, "learning_rate": 8.332662351247262e-06, "loss": 0.1049, "step": 1705 }, { "epoch": 0.34, "learning_rate": 8.33005933717126e-06, "loss": 0.1024, "step": 1706 }, { "epoch": 0.34, "learning_rate": 8.327454700064788e-06, "loss": 0.1017, "step": 1707 }, { "epoch": 0.34, "learning_rate": 8.324848441197317e-06, "loss": 0.0979, "step": 1708 }, { "epoch": 0.34, "learning_rate": 8.32224056183911e-06, "loss": 0.1153, "step": 1709 }, { "epoch": 0.34, "learning_rate": 8.319631063261209e-06, "loss": 0.1078, "step": 1710 }, { "epoch": 0.34, "learning_rate": 8.317019946735456e-06, "loss": 0.0985, "step": 1711 }, { "epoch": 0.34, "learning_rate": 8.314407213534477e-06, "loss": 0.1067, "step": 1712 }, { "epoch": 0.34, "learning_rate": 8.311792864931686e-06, "loss": 0.1082, "step": 1713 }, { "epoch": 0.34, "learning_rate": 8.309176902201283e-06, "loss": 0.1121, "step": 1714 }, { "epoch": 0.34, "learning_rate": 8.30655932661826e-06, "loss": 0.109, "step": 1715 }, { "epoch": 0.34, "learning_rate": 8.303940139458389e-06, "loss": 0.0992, "step": 1716 }, { "epoch": 0.34, "learning_rate": 8.301319341998231e-06, "loss": 0.1125, "step": 1717 }, { "epoch": 0.34, "learning_rate": 8.298696935515132e-06, "loss": 0.1079, "step": 1718 }, { "epoch": 0.34, "learning_rate": 8.296072921287217e-06, "loss": 0.0987, "step": 1719 }, { "epoch": 0.34, "learning_rate": 8.293447300593402e-06, "loss": 0.1021, "step": 1720 }, { "epoch": 0.34, "learning_rate": 8.290820074713383e-06, "loss": 0.1017, "step": 1721 }, { "epoch": 0.34, "learning_rate": 8.288191244927637e-06, "loss": 0.1094, "step": 1722 }, { "epoch": 0.34, "learning_rate": 8.285560812517423e-06, "loss": 0.1074, "step": 1723 }, { "epoch": 0.34, "learning_rate": 8.282928778764783e-06, "loss": 0.1142, "step": 1724 }, { "epoch": 0.34, "learning_rate": 8.280295144952537e-06, "loss": 0.1151, "step": 1725 }, { "epoch": 0.35, "learning_rate": 8.277659912364288e-06, "loss": 0.1007, "step": 1726 }, { "epoch": 0.35, "learning_rate": 8.275023082284413e-06, "loss": 0.105, "step": 1727 }, { "epoch": 0.35, "learning_rate": 8.272384655998075e-06, "loss": 0.0976, "step": 1728 }, { "epoch": 0.35, "learning_rate": 8.269744634791207e-06, "loss": 0.1007, "step": 1729 }, { "epoch": 0.35, "learning_rate": 8.267103019950529e-06, "loss": 0.1041, "step": 1730 }, { "epoch": 0.35, "learning_rate": 8.264459812763525e-06, "loss": 0.1034, "step": 1731 }, { "epoch": 0.35, "learning_rate": 8.261815014518465e-06, "loss": 0.0994, "step": 1732 }, { "epoch": 0.35, "learning_rate": 8.259168626504395e-06, "loss": 0.1056, "step": 1733 }, { "epoch": 0.35, "learning_rate": 8.256520650011126e-06, "loss": 0.2918, "step": 1734 }, { "epoch": 0.35, "learning_rate": 8.253871086329255e-06, "loss": 0.2105, "step": 1735 }, { "epoch": 0.35, "learning_rate": 8.251219936750145e-06, "loss": 0.0999, "step": 1736 }, { "epoch": 0.35, "learning_rate": 8.248567202565934e-06, "loss": 0.1055, "step": 1737 }, { "epoch": 0.35, "learning_rate": 8.24591288506953e-06, "loss": 0.1125, "step": 1738 }, { "epoch": 0.35, "learning_rate": 8.243256985554622e-06, "loss": 0.1347, "step": 1739 }, { "epoch": 0.35, "learning_rate": 8.240599505315656e-06, "loss": 0.1023, "step": 1740 }, { "epoch": 0.35, "learning_rate": 8.237940445647858e-06, "loss": 0.1102, "step": 1741 }, { "epoch": 0.35, "learning_rate": 8.235279807847223e-06, "loss": 0.1105, "step": 1742 }, { "epoch": 0.35, "learning_rate": 8.232617593210512e-06, "loss": 0.0898, "step": 1743 }, { "epoch": 0.35, "learning_rate": 8.229953803035256e-06, "loss": 0.0977, "step": 1744 }, { "epoch": 0.35, "learning_rate": 8.227288438619754e-06, "loss": 0.1016, "step": 1745 }, { "epoch": 0.35, "learning_rate": 8.224621501263073e-06, "loss": 0.1006, "step": 1746 }, { "epoch": 0.35, "learning_rate": 8.221952992265046e-06, "loss": 0.105, "step": 1747 }, { "epoch": 0.35, "learning_rate": 8.21928291292627e-06, "loss": 0.1004, "step": 1748 }, { "epoch": 0.35, "learning_rate": 8.21661126454811e-06, "loss": 0.104, "step": 1749 }, { "epoch": 0.35, "learning_rate": 8.213938048432697e-06, "loss": 0.1103, "step": 1750 }, { "epoch": 0.35, "learning_rate": 8.211263265882923e-06, "loss": 0.1016, "step": 1751 }, { "epoch": 0.35, "learning_rate": 8.208586918202444e-06, "loss": 0.1036, "step": 1752 }, { "epoch": 0.35, "learning_rate": 8.205909006695679e-06, "loss": 0.1042, "step": 1753 }, { "epoch": 0.35, "learning_rate": 8.203229532667808e-06, "loss": 0.1017, "step": 1754 }, { "epoch": 0.35, "learning_rate": 8.200548497424779e-06, "loss": 0.1075, "step": 1755 }, { "epoch": 0.35, "learning_rate": 8.197865902273291e-06, "loss": 0.1071, "step": 1756 }, { "epoch": 0.35, "learning_rate": 8.19518174852081e-06, "loss": 0.1128, "step": 1757 }, { "epoch": 0.35, "learning_rate": 8.192496037475562e-06, "loss": 0.1101, "step": 1758 }, { "epoch": 0.35, "learning_rate": 8.189808770446528e-06, "loss": 0.1086, "step": 1759 }, { "epoch": 0.35, "learning_rate": 8.18711994874345e-06, "loss": 0.1066, "step": 1760 }, { "epoch": 0.35, "learning_rate": 8.184429573676825e-06, "loss": 0.1003, "step": 1761 }, { "epoch": 0.35, "learning_rate": 8.181737646557912e-06, "loss": 0.106, "step": 1762 }, { "epoch": 0.35, "learning_rate": 8.179044168698722e-06, "loss": 0.1209, "step": 1763 }, { "epoch": 0.35, "learning_rate": 8.176349141412022e-06, "loss": 0.1031, "step": 1764 }, { "epoch": 0.35, "learning_rate": 8.173652566011339e-06, "loss": 0.1015, "step": 1765 }, { "epoch": 0.35, "learning_rate": 8.170954443810947e-06, "loss": 0.0991, "step": 1766 }, { "epoch": 0.35, "learning_rate": 8.168254776125883e-06, "loss": 0.1017, "step": 1767 }, { "epoch": 0.35, "learning_rate": 8.165553564271928e-06, "loss": 0.0989, "step": 1768 }, { "epoch": 0.35, "learning_rate": 8.162850809565623e-06, "loss": 0.1022, "step": 1769 }, { "epoch": 0.35, "learning_rate": 8.160146513324256e-06, "loss": 0.1011, "step": 1770 }, { "epoch": 0.35, "learning_rate": 8.157440676865866e-06, "loss": 0.1058, "step": 1771 }, { "epoch": 0.35, "learning_rate": 8.154733301509249e-06, "loss": 0.0986, "step": 1772 }, { "epoch": 0.35, "learning_rate": 8.152024388573945e-06, "loss": 0.1177, "step": 1773 }, { "epoch": 0.35, "learning_rate": 8.149313939380244e-06, "loss": 0.108, "step": 1774 }, { "epoch": 0.35, "learning_rate": 8.146601955249187e-06, "loss": 0.0943, "step": 1775 }, { "epoch": 0.36, "learning_rate": 8.143888437502565e-06, "loss": 0.0989, "step": 1776 }, { "epoch": 0.36, "learning_rate": 8.141173387462908e-06, "loss": 0.1065, "step": 1777 }, { "epoch": 0.36, "learning_rate": 8.138456806453503e-06, "loss": 0.1027, "step": 1778 }, { "epoch": 0.36, "learning_rate": 8.135738695798377e-06, "loss": 0.1076, "step": 1779 }, { "epoch": 0.36, "learning_rate": 8.133019056822303e-06, "loss": 0.1025, "step": 1780 }, { "epoch": 0.36, "learning_rate": 8.130297890850803e-06, "loss": 0.1102, "step": 1781 }, { "epoch": 0.36, "learning_rate": 8.127575199210136e-06, "loss": 0.1147, "step": 1782 }, { "epoch": 0.36, "learning_rate": 8.124850983227313e-06, "loss": 0.0982, "step": 1783 }, { "epoch": 0.36, "learning_rate": 8.12212524423008e-06, "loss": 0.1047, "step": 1784 }, { "epoch": 0.36, "learning_rate": 8.119397983546932e-06, "loss": 0.1023, "step": 1785 }, { "epoch": 0.36, "learning_rate": 8.116669202507102e-06, "loss": 0.1047, "step": 1786 }, { "epoch": 0.36, "learning_rate": 8.113938902440563e-06, "loss": 0.109, "step": 1787 }, { "epoch": 0.36, "learning_rate": 8.111207084678033e-06, "loss": 0.1081, "step": 1788 }, { "epoch": 0.36, "learning_rate": 8.108473750550965e-06, "loss": 0.1002, "step": 1789 }, { "epoch": 0.36, "learning_rate": 8.105738901391553e-06, "loss": 0.1067, "step": 1790 }, { "epoch": 0.36, "learning_rate": 8.103002538532729e-06, "loss": 0.0948, "step": 1791 }, { "epoch": 0.36, "learning_rate": 8.100264663308165e-06, "loss": 0.1047, "step": 1792 }, { "epoch": 0.36, "learning_rate": 8.097525277052265e-06, "loss": 0.1022, "step": 1793 }, { "epoch": 0.36, "learning_rate": 8.094784381100174e-06, "loss": 0.0982, "step": 1794 }, { "epoch": 0.36, "learning_rate": 8.092041976787772e-06, "loss": 0.1021, "step": 1795 }, { "epoch": 0.36, "learning_rate": 8.089298065451673e-06, "loss": 0.1068, "step": 1796 }, { "epoch": 0.36, "learning_rate": 8.086552648429225e-06, "loss": 0.1109, "step": 1797 }, { "epoch": 0.36, "learning_rate": 8.083805727058514e-06, "loss": 0.1081, "step": 1798 }, { "epoch": 0.36, "learning_rate": 8.081057302678352e-06, "loss": 0.1048, "step": 1799 }, { "epoch": 0.36, "learning_rate": 8.078307376628292e-06, "loss": 0.1077, "step": 1800 }, { "epoch": 0.36, "learning_rate": 8.075555950248613e-06, "loss": 0.1031, "step": 1801 }, { "epoch": 0.36, "learning_rate": 8.072803024880322e-06, "loss": 0.1038, "step": 1802 }, { "epoch": 0.36, "learning_rate": 8.07004860186517e-06, "loss": 0.0998, "step": 1803 }, { "epoch": 0.36, "learning_rate": 8.067292682545622e-06, "loss": 0.107, "step": 1804 }, { "epoch": 0.36, "learning_rate": 8.064535268264883e-06, "loss": 0.1038, "step": 1805 }, { "epoch": 0.36, "learning_rate": 8.061776360366883e-06, "loss": 0.1044, "step": 1806 }, { "epoch": 0.36, "learning_rate": 8.05901596019628e-06, "loss": 0.1098, "step": 1807 }, { "epoch": 0.36, "learning_rate": 8.05625406909846e-06, "loss": 0.0973, "step": 1808 }, { "epoch": 0.36, "learning_rate": 8.053490688419532e-06, "loss": 0.1103, "step": 1809 }, { "epoch": 0.36, "learning_rate": 8.05072581950634e-06, "loss": 0.1044, "step": 1810 }, { "epoch": 0.36, "learning_rate": 8.047959463706441e-06, "loss": 0.1103, "step": 1811 }, { "epoch": 0.36, "learning_rate": 8.045191622368128e-06, "loss": 0.1017, "step": 1812 }, { "epoch": 0.36, "learning_rate": 8.04242229684041e-06, "loss": 0.1009, "step": 1813 }, { "epoch": 0.36, "learning_rate": 8.039651488473028e-06, "loss": 0.1034, "step": 1814 }, { "epoch": 0.36, "learning_rate": 8.036879198616434e-06, "loss": 0.1046, "step": 1815 }, { "epoch": 0.36, "learning_rate": 8.034105428621812e-06, "loss": 0.1016, "step": 1816 }, { "epoch": 0.36, "learning_rate": 8.031330179841062e-06, "loss": 0.1033, "step": 1817 }, { "epoch": 0.36, "learning_rate": 8.028553453626809e-06, "loss": 0.1027, "step": 1818 }, { "epoch": 0.36, "learning_rate": 8.02577525133239e-06, "loss": 0.1076, "step": 1819 }, { "epoch": 0.36, "learning_rate": 8.022995574311876e-06, "loss": 0.1059, "step": 1820 }, { "epoch": 0.36, "learning_rate": 8.020214423920039e-06, "loss": 0.0993, "step": 1821 }, { "epoch": 0.36, "learning_rate": 8.017431801512384e-06, "loss": 0.1196, "step": 1822 }, { "epoch": 0.36, "learning_rate": 8.014647708445124e-06, "loss": 0.1027, "step": 1823 }, { "epoch": 0.36, "learning_rate": 8.011862146075194e-06, "loss": 0.0968, "step": 1824 }, { "epoch": 0.36, "learning_rate": 8.009075115760243e-06, "loss": 0.1324, "step": 1825 }, { "epoch": 0.37, "learning_rate": 8.006286618858634e-06, "loss": 0.1004, "step": 1826 }, { "epoch": 0.37, "learning_rate": 8.003496656729448e-06, "loss": 0.1078, "step": 1827 }, { "epoch": 0.37, "learning_rate": 8.000705230732478e-06, "loss": 0.0986, "step": 1828 }, { "epoch": 0.37, "learning_rate": 7.997912342228232e-06, "loss": 0.1009, "step": 1829 }, { "epoch": 0.37, "learning_rate": 7.99511799257793e-06, "loss": 0.1122, "step": 1830 }, { "epoch": 0.37, "learning_rate": 7.992322183143504e-06, "loss": 0.0933, "step": 1831 }, { "epoch": 0.37, "learning_rate": 7.989524915287595e-06, "loss": 0.0852, "step": 1832 }, { "epoch": 0.37, "learning_rate": 7.986726190373562e-06, "loss": 0.1029, "step": 1833 }, { "epoch": 0.37, "learning_rate": 7.983926009765464e-06, "loss": 0.104, "step": 1834 }, { "epoch": 0.37, "learning_rate": 7.981124374828079e-06, "loss": 0.2474, "step": 1835 }, { "epoch": 0.37, "learning_rate": 7.978321286926892e-06, "loss": 0.1007, "step": 1836 }, { "epoch": 0.37, "learning_rate": 7.975516747428087e-06, "loss": 0.0961, "step": 1837 }, { "epoch": 0.37, "learning_rate": 7.972710757698567e-06, "loss": 0.0973, "step": 1838 }, { "epoch": 0.37, "learning_rate": 7.969903319105935e-06, "loss": 0.1033, "step": 1839 }, { "epoch": 0.37, "learning_rate": 7.967094433018508e-06, "loss": 0.2057, "step": 1840 }, { "epoch": 0.37, "learning_rate": 7.964284100805297e-06, "loss": 0.1424, "step": 1841 }, { "epoch": 0.37, "learning_rate": 7.961472323836025e-06, "loss": 0.111, "step": 1842 }, { "epoch": 0.37, "learning_rate": 7.95865910348112e-06, "loss": 0.1072, "step": 1843 }, { "epoch": 0.37, "learning_rate": 7.95584444111171e-06, "loss": 0.0952, "step": 1844 }, { "epoch": 0.37, "learning_rate": 7.953028338099628e-06, "loss": 0.1159, "step": 1845 }, { "epoch": 0.37, "learning_rate": 7.950210795817406e-06, "loss": 0.0967, "step": 1846 }, { "epoch": 0.37, "learning_rate": 7.947391815638284e-06, "loss": 0.1056, "step": 1847 }, { "epoch": 0.37, "learning_rate": 7.944571398936193e-06, "loss": 0.1008, "step": 1848 }, { "epoch": 0.37, "learning_rate": 7.941749547085778e-06, "loss": 0.1098, "step": 1849 }, { "epoch": 0.37, "learning_rate": 7.938926261462366e-06, "loss": 0.1093, "step": 1850 }, { "epoch": 0.37, "learning_rate": 7.936101543441998e-06, "loss": 0.105, "step": 1851 }, { "epoch": 0.37, "learning_rate": 7.933275394401407e-06, "loss": 0.0997, "step": 1852 }, { "epoch": 0.37, "learning_rate": 7.930447815718022e-06, "loss": 0.1019, "step": 1853 }, { "epoch": 0.37, "learning_rate": 7.927618808769971e-06, "loss": 0.1036, "step": 1854 }, { "epoch": 0.37, "learning_rate": 7.92478837493608e-06, "loss": 0.1045, "step": 1855 }, { "epoch": 0.37, "learning_rate": 7.921956515595861e-06, "loss": 0.0994, "step": 1856 }, { "epoch": 0.37, "learning_rate": 7.919123232129535e-06, "loss": 0.1123, "step": 1857 }, { "epoch": 0.37, "learning_rate": 7.916288525918008e-06, "loss": 0.148, "step": 1858 }, { "epoch": 0.37, "learning_rate": 7.913452398342882e-06, "loss": 0.1001, "step": 1859 }, { "epoch": 0.37, "learning_rate": 7.910614850786448e-06, "loss": 0.1102, "step": 1860 }, { "epoch": 0.37, "learning_rate": 7.907775884631694e-06, "loss": 0.1041, "step": 1861 }, { "epoch": 0.37, "learning_rate": 7.904935501262301e-06, "loss": 0.0992, "step": 1862 }, { "epoch": 0.37, "learning_rate": 7.90209370206263e-06, "loss": 0.1072, "step": 1863 }, { "epoch": 0.37, "learning_rate": 7.899250488417746e-06, "loss": 0.1083, "step": 1864 }, { "epoch": 0.37, "learning_rate": 7.896405861713393e-06, "loss": 0.0935, "step": 1865 }, { "epoch": 0.37, "learning_rate": 7.893559823336013e-06, "loss": 0.0964, "step": 1866 }, { "epoch": 0.37, "learning_rate": 7.890712374672724e-06, "loss": 0.1038, "step": 1867 }, { "epoch": 0.37, "learning_rate": 7.887863517111337e-06, "loss": 0.1025, "step": 1868 }, { "epoch": 0.37, "learning_rate": 7.88501325204036e-06, "loss": 0.1051, "step": 1869 }, { "epoch": 0.37, "learning_rate": 7.882161580848966e-06, "loss": 0.1196, "step": 1870 }, { "epoch": 0.37, "learning_rate": 7.879308504927034e-06, "loss": 0.1292, "step": 1871 }, { "epoch": 0.37, "learning_rate": 7.876454025665114e-06, "loss": 0.104, "step": 1872 }, { "epoch": 0.37, "learning_rate": 7.873598144454444e-06, "loss": 0.0945, "step": 1873 }, { "epoch": 0.37, "learning_rate": 7.87074086268695e-06, "loss": 0.0987, "step": 1874 }, { "epoch": 0.38, "learning_rate": 7.86788218175523e-06, "loss": 0.0992, "step": 1875 }, { "epoch": 0.38, "learning_rate": 7.865022103052578e-06, "loss": 0.1123, "step": 1876 }, { "epoch": 0.38, "learning_rate": 7.862160627972956e-06, "loss": 0.1043, "step": 1877 }, { "epoch": 0.38, "learning_rate": 7.859297757911013e-06, "loss": 0.1152, "step": 1878 }, { "epoch": 0.38, "learning_rate": 7.856433494262078e-06, "loss": 0.1047, "step": 1879 }, { "epoch": 0.38, "learning_rate": 7.85356783842216e-06, "loss": 0.1004, "step": 1880 }, { "epoch": 0.38, "learning_rate": 7.850700791787941e-06, "loss": 0.1089, "step": 1881 }, { "epoch": 0.38, "learning_rate": 7.847832355756788e-06, "loss": 0.0965, "step": 1882 }, { "epoch": 0.38, "learning_rate": 7.844962531726742e-06, "loss": 0.107, "step": 1883 }, { "epoch": 0.38, "learning_rate": 7.842091321096515e-06, "loss": 0.1022, "step": 1884 }, { "epoch": 0.38, "learning_rate": 7.839218725265507e-06, "loss": 0.1008, "step": 1885 }, { "epoch": 0.38, "learning_rate": 7.836344745633785e-06, "loss": 0.1067, "step": 1886 }, { "epoch": 0.38, "learning_rate": 7.833469383602086e-06, "loss": 0.1093, "step": 1887 }, { "epoch": 0.38, "learning_rate": 7.830592640571833e-06, "loss": 0.1099, "step": 1888 }, { "epoch": 0.38, "learning_rate": 7.827714517945116e-06, "loss": 0.1064, "step": 1889 }, { "epoch": 0.38, "learning_rate": 7.82483501712469e-06, "loss": 0.0986, "step": 1890 }, { "epoch": 0.38, "learning_rate": 7.821954139513997e-06, "loss": 0.1008, "step": 1891 }, { "epoch": 0.38, "learning_rate": 7.819071886517134e-06, "loss": 0.1, "step": 1892 }, { "epoch": 0.38, "learning_rate": 7.816188259538885e-06, "loss": 0.1018, "step": 1893 }, { "epoch": 0.38, "learning_rate": 7.813303259984685e-06, "loss": 0.1109, "step": 1894 }, { "epoch": 0.38, "learning_rate": 7.810416889260653e-06, "loss": 0.093, "step": 1895 }, { "epoch": 0.38, "learning_rate": 7.807529148773572e-06, "loss": 0.0976, "step": 1896 }, { "epoch": 0.38, "learning_rate": 7.80464003993089e-06, "loss": 0.1189, "step": 1897 }, { "epoch": 0.38, "learning_rate": 7.801749564140724e-06, "loss": 0.1088, "step": 1898 }, { "epoch": 0.38, "learning_rate": 7.798857722811857e-06, "loss": 0.1034, "step": 1899 }, { "epoch": 0.38, "learning_rate": 7.795964517353734e-06, "loss": 0.1059, "step": 1900 }, { "epoch": 0.38, "learning_rate": 7.793069949176474e-06, "loss": 0.1019, "step": 1901 }, { "epoch": 0.38, "learning_rate": 7.79017401969085e-06, "loss": 0.1089, "step": 1902 }, { "epoch": 0.38, "learning_rate": 7.787276730308304e-06, "loss": 0.1011, "step": 1903 }, { "epoch": 0.38, "learning_rate": 7.78437808244094e-06, "loss": 0.1014, "step": 1904 }, { "epoch": 0.38, "learning_rate": 7.781478077501526e-06, "loss": 0.1094, "step": 1905 }, { "epoch": 0.38, "learning_rate": 7.778576716903484e-06, "loss": 0.1121, "step": 1906 }, { "epoch": 0.38, "learning_rate": 7.775674002060905e-06, "loss": 0.1003, "step": 1907 }, { "epoch": 0.38, "learning_rate": 7.772769934388537e-06, "loss": 0.1019, "step": 1908 }, { "epoch": 0.38, "learning_rate": 7.769864515301787e-06, "loss": 0.1042, "step": 1909 }, { "epoch": 0.38, "learning_rate": 7.76695774621672e-06, "loss": 0.103, "step": 1910 }, { "epoch": 0.38, "learning_rate": 7.764049628550063e-06, "loss": 0.1146, "step": 1911 }, { "epoch": 0.38, "learning_rate": 7.761140163719194e-06, "loss": 0.1014, "step": 1912 }, { "epoch": 0.38, "learning_rate": 7.758229353142153e-06, "loss": 0.1082, "step": 1913 }, { "epoch": 0.38, "learning_rate": 7.755317198237631e-06, "loss": 0.1012, "step": 1914 }, { "epoch": 0.38, "learning_rate": 7.752403700424978e-06, "loss": 0.108, "step": 1915 }, { "epoch": 0.38, "learning_rate": 7.7494888611242e-06, "loss": 0.1068, "step": 1916 }, { "epoch": 0.38, "learning_rate": 7.74657268175595e-06, "loss": 0.0978, "step": 1917 }, { "epoch": 0.38, "learning_rate": 7.743655163741544e-06, "loss": 0.1039, "step": 1918 }, { "epoch": 0.38, "learning_rate": 7.740736308502939e-06, "loss": 0.1033, "step": 1919 }, { "epoch": 0.38, "learning_rate": 7.737816117462752e-06, "loss": 0.1032, "step": 1920 }, { "epoch": 0.38, "learning_rate": 7.734894592044249e-06, "loss": 0.1145, "step": 1921 }, { "epoch": 0.38, "learning_rate": 7.731971733671347e-06, "loss": 0.1, "step": 1922 }, { "epoch": 0.38, "learning_rate": 7.729047543768608e-06, "loss": 0.1036, "step": 1923 }, { "epoch": 0.38, "learning_rate": 7.726122023761252e-06, "loss": 0.1059, "step": 1924 }, { "epoch": 0.39, "learning_rate": 7.723195175075136e-06, "loss": 0.1112, "step": 1925 }, { "epoch": 0.39, "learning_rate": 7.720266999136774e-06, "loss": 0.1094, "step": 1926 }, { "epoch": 0.39, "learning_rate": 7.717337497373324e-06, "loss": 0.0998, "step": 1927 }, { "epoch": 0.39, "learning_rate": 7.714406671212589e-06, "loss": 0.1098, "step": 1928 }, { "epoch": 0.39, "learning_rate": 7.711474522083015e-06, "loss": 0.1096, "step": 1929 }, { "epoch": 0.39, "learning_rate": 7.7085410514137e-06, "loss": 0.1055, "step": 1930 }, { "epoch": 0.39, "learning_rate": 7.70560626063438e-06, "loss": 0.1021, "step": 1931 }, { "epoch": 0.39, "learning_rate": 7.702670151175435e-06, "loss": 0.096, "step": 1932 }, { "epoch": 0.39, "learning_rate": 7.699732724467894e-06, "loss": 0.1071, "step": 1933 }, { "epoch": 0.39, "learning_rate": 7.696793981943418e-06, "loss": 0.1009, "step": 1934 }, { "epoch": 0.39, "learning_rate": 7.693853925034316e-06, "loss": 0.1024, "step": 1935 }, { "epoch": 0.39, "learning_rate": 7.690912555173536e-06, "loss": 0.0995, "step": 1936 }, { "epoch": 0.39, "learning_rate": 7.687969873794667e-06, "loss": 0.1007, "step": 1937 }, { "epoch": 0.39, "learning_rate": 7.685025882331936e-06, "loss": 0.1075, "step": 1938 }, { "epoch": 0.39, "learning_rate": 7.682080582220206e-06, "loss": 0.1022, "step": 1939 }, { "epoch": 0.39, "learning_rate": 7.679133974894984e-06, "loss": 0.1056, "step": 1940 }, { "epoch": 0.39, "learning_rate": 7.676186061792408e-06, "loss": 0.1116, "step": 1941 }, { "epoch": 0.39, "learning_rate": 7.673236844349257e-06, "loss": 0.1073, "step": 1942 }, { "epoch": 0.39, "learning_rate": 7.670286324002943e-06, "loss": 0.1026, "step": 1943 }, { "epoch": 0.39, "learning_rate": 7.667334502191514e-06, "loss": 0.0963, "step": 1944 }, { "epoch": 0.39, "learning_rate": 7.66438138035365e-06, "loss": 0.0953, "step": 1945 }, { "epoch": 0.39, "learning_rate": 7.66142695992867e-06, "loss": 0.1026, "step": 1946 }, { "epoch": 0.39, "learning_rate": 7.658471242356521e-06, "loss": 0.1267, "step": 1947 }, { "epoch": 0.39, "learning_rate": 7.655514229077784e-06, "loss": 0.0995, "step": 1948 }, { "epoch": 0.39, "learning_rate": 7.652555921533671e-06, "loss": 0.0992, "step": 1949 }, { "epoch": 0.39, "learning_rate": 7.649596321166024e-06, "loss": 0.1016, "step": 1950 }, { "epoch": 0.39, "learning_rate": 7.646635429417322e-06, "loss": 0.1028, "step": 1951 }, { "epoch": 0.39, "learning_rate": 7.64367324773066e-06, "loss": 0.1172, "step": 1952 }, { "epoch": 0.39, "learning_rate": 7.640709777549773e-06, "loss": 0.1016, "step": 1953 }, { "epoch": 0.39, "learning_rate": 7.637745020319019e-06, "loss": 0.099, "step": 1954 }, { "epoch": 0.39, "learning_rate": 7.634778977483389e-06, "loss": 0.1024, "step": 1955 }, { "epoch": 0.39, "learning_rate": 7.63181165048849e-06, "loss": 0.1031, "step": 1956 }, { "epoch": 0.39, "learning_rate": 7.628843040780567e-06, "loss": 0.1025, "step": 1957 }, { "epoch": 0.39, "learning_rate": 7.6258731498064796e-06, "loss": 0.1142, "step": 1958 }, { "epoch": 0.39, "learning_rate": 7.622901979013717e-06, "loss": 0.1005, "step": 1959 }, { "epoch": 0.39, "learning_rate": 7.619929529850397e-06, "loss": 0.1024, "step": 1960 }, { "epoch": 0.39, "learning_rate": 7.616955803765249e-06, "loss": 0.1068, "step": 1961 }, { "epoch": 0.39, "learning_rate": 7.613980802207633e-06, "loss": 0.1156, "step": 1962 }, { "epoch": 0.39, "learning_rate": 7.6110045266275305e-06, "loss": 0.1023, "step": 1963 }, { "epoch": 0.39, "learning_rate": 7.6080269784755405e-06, "loss": 0.1072, "step": 1964 }, { "epoch": 0.39, "learning_rate": 7.605048159202884e-06, "loss": 0.105, "step": 1965 }, { "epoch": 0.39, "learning_rate": 7.6020680702613995e-06, "loss": 0.1086, "step": 1966 }, { "epoch": 0.39, "learning_rate": 7.5990867131035474e-06, "loss": 0.1075, "step": 1967 }, { "epoch": 0.39, "learning_rate": 7.596104089182408e-06, "loss": 0.0975, "step": 1968 }, { "epoch": 0.39, "learning_rate": 7.5931201999516715e-06, "loss": 0.1079, "step": 1969 }, { "epoch": 0.39, "learning_rate": 7.590135046865652e-06, "loss": 0.1116, "step": 1970 }, { "epoch": 0.39, "learning_rate": 7.587148631379276e-06, "loss": 0.1094, "step": 1971 }, { "epoch": 0.39, "learning_rate": 7.5841609549480854e-06, "loss": 0.1014, "step": 1972 }, { "epoch": 0.39, "learning_rate": 7.581172019028238e-06, "loss": 0.1119, "step": 1973 }, { "epoch": 0.39, "learning_rate": 7.578181825076506e-06, "loss": 0.1014, "step": 1974 }, { "epoch": 0.4, "learning_rate": 7.575190374550272e-06, "loss": 0.0996, "step": 1975 }, { "epoch": 0.4, "learning_rate": 7.572197668907533e-06, "loss": 0.1073, "step": 1976 }, { "epoch": 0.4, "learning_rate": 7.569203709606898e-06, "loss": 0.1002, "step": 1977 }, { "epoch": 0.4, "learning_rate": 7.566208498107586e-06, "loss": 0.0966, "step": 1978 }, { "epoch": 0.4, "learning_rate": 7.563212035869426e-06, "loss": 0.0976, "step": 1979 }, { "epoch": 0.4, "learning_rate": 7.560214324352858e-06, "loss": 0.0956, "step": 1980 }, { "epoch": 0.4, "learning_rate": 7.55721536501893e-06, "loss": 0.102, "step": 1981 }, { "epoch": 0.4, "learning_rate": 7.5542151593293e-06, "loss": 0.1004, "step": 1982 }, { "epoch": 0.4, "learning_rate": 7.55121370874623e-06, "loss": 0.0937, "step": 1983 }, { "epoch": 0.4, "learning_rate": 7.548211014732589e-06, "loss": 0.1013, "step": 1984 }, { "epoch": 0.4, "learning_rate": 7.545207078751858e-06, "loss": 0.1064, "step": 1985 }, { "epoch": 0.4, "learning_rate": 7.542201902268115e-06, "loss": 0.1144, "step": 1986 }, { "epoch": 0.4, "learning_rate": 7.539195486746047e-06, "loss": 0.1046, "step": 1987 }, { "epoch": 0.4, "learning_rate": 7.536187833650947e-06, "loss": 0.0967, "step": 1988 }, { "epoch": 0.4, "learning_rate": 7.533178944448705e-06, "loss": 0.1043, "step": 1989 }, { "epoch": 0.4, "learning_rate": 7.530168820605819e-06, "loss": 0.2216, "step": 1990 }, { "epoch": 0.4, "learning_rate": 7.527157463589389e-06, "loss": 0.0971, "step": 1991 }, { "epoch": 0.4, "learning_rate": 7.52414487486711e-06, "loss": 0.1112, "step": 1992 }, { "epoch": 0.4, "learning_rate": 7.521131055907283e-06, "loss": 0.1115, "step": 1993 }, { "epoch": 0.4, "learning_rate": 7.518116008178805e-06, "loss": 0.0978, "step": 1994 }, { "epoch": 0.4, "learning_rate": 7.515099733151177e-06, "loss": 0.0997, "step": 1995 }, { "epoch": 0.4, "learning_rate": 7.512082232294491e-06, "loss": 0.102, "step": 1996 }, { "epoch": 0.4, "learning_rate": 7.509063507079443e-06, "loss": 0.1029, "step": 1997 }, { "epoch": 0.4, "learning_rate": 7.5060435589773215e-06, "loss": 0.0995, "step": 1998 }, { "epoch": 0.4, "learning_rate": 7.503022389460014e-06, "loss": 0.1057, "step": 1999 }, { "epoch": 0.4, "learning_rate": 7.500000000000001e-06, "loss": 0.099, "step": 2000 }, { "epoch": 0.4, "learning_rate": 7.496976392070358e-06, "loss": 0.0978, "step": 2001 }, { "epoch": 0.4, "learning_rate": 7.493951567144755e-06, "loss": 0.1224, "step": 2002 }, { "epoch": 0.4, "learning_rate": 7.490925526697455e-06, "loss": 0.1175, "step": 2003 }, { "epoch": 0.4, "learning_rate": 7.487898272203314e-06, "loss": 0.1113, "step": 2004 }, { "epoch": 0.4, "learning_rate": 7.484869805137778e-06, "loss": 0.1022, "step": 2005 }, { "epoch": 0.4, "learning_rate": 7.481840126976885e-06, "loss": 0.094, "step": 2006 }, { "epoch": 0.4, "learning_rate": 7.478809239197264e-06, "loss": 0.1012, "step": 2007 }, { "epoch": 0.4, "learning_rate": 7.475777143276133e-06, "loss": 0.1123, "step": 2008 }, { "epoch": 0.4, "learning_rate": 7.4727438406912986e-06, "loss": 0.0947, "step": 2009 }, { "epoch": 0.4, "learning_rate": 7.469709332921155e-06, "loss": 0.1008, "step": 2010 }, { "epoch": 0.4, "learning_rate": 7.4666736214446855e-06, "loss": 0.1002, "step": 2011 }, { "epoch": 0.4, "learning_rate": 7.463636707741458e-06, "loss": 0.1025, "step": 2012 }, { "epoch": 0.4, "learning_rate": 7.460598593291628e-06, "loss": 0.0969, "step": 2013 }, { "epoch": 0.4, "learning_rate": 7.4575592795759356e-06, "loss": 0.1176, "step": 2014 }, { "epoch": 0.4, "learning_rate": 7.454518768075705e-06, "loss": 0.1021, "step": 2015 }, { "epoch": 0.4, "learning_rate": 7.451477060272844e-06, "loss": 0.0955, "step": 2016 }, { "epoch": 0.4, "learning_rate": 7.448434157649846e-06, "loss": 0.0988, "step": 2017 }, { "epoch": 0.4, "learning_rate": 7.445390061689782e-06, "loss": 0.0975, "step": 2018 }, { "epoch": 0.4, "learning_rate": 7.44234477387631e-06, "loss": 0.1353, "step": 2019 }, { "epoch": 0.4, "learning_rate": 7.4392982956936644e-06, "loss": 0.1037, "step": 2020 }, { "epoch": 0.4, "learning_rate": 7.436250628626662e-06, "loss": 0.1033, "step": 2021 }, { "epoch": 0.4, "learning_rate": 7.433201774160701e-06, "loss": 0.0954, "step": 2022 }, { "epoch": 0.4, "learning_rate": 7.430151733781752e-06, "loss": 0.0979, "step": 2023 }, { "epoch": 0.4, "learning_rate": 7.42710050897637e-06, "loss": 0.0981, "step": 2024 }, { "epoch": 0.41, "learning_rate": 7.424048101231687e-06, "loss": 0.0931, "step": 2025 }, { "epoch": 0.41, "learning_rate": 7.4209945120354045e-06, "loss": 0.0997, "step": 2026 }, { "epoch": 0.41, "learning_rate": 7.4179397428758085e-06, "loss": 0.0941, "step": 2027 }, { "epoch": 0.41, "learning_rate": 7.414883795241754e-06, "loss": 0.1088, "step": 2028 }, { "epoch": 0.41, "learning_rate": 7.411826670622676e-06, "loss": 0.0928, "step": 2029 }, { "epoch": 0.41, "learning_rate": 7.408768370508577e-06, "loss": 0.0974, "step": 2030 }, { "epoch": 0.41, "learning_rate": 7.405708896390037e-06, "loss": 0.0967, "step": 2031 }, { "epoch": 0.41, "learning_rate": 7.402648249758204e-06, "loss": 0.0998, "step": 2032 }, { "epoch": 0.41, "learning_rate": 7.3995864321048036e-06, "loss": 0.1265, "step": 2033 }, { "epoch": 0.41, "learning_rate": 7.396523444922126e-06, "loss": 0.1005, "step": 2034 }, { "epoch": 0.41, "learning_rate": 7.393459289703035e-06, "loss": 0.0959, "step": 2035 }, { "epoch": 0.41, "learning_rate": 7.390393967940962e-06, "loss": 0.0976, "step": 2036 }, { "epoch": 0.41, "learning_rate": 7.3873274811299065e-06, "loss": 0.1057, "step": 2037 }, { "epoch": 0.41, "learning_rate": 7.3842598307644396e-06, "loss": 0.0954, "step": 2038 }, { "epoch": 0.41, "learning_rate": 7.381191018339697e-06, "loss": 0.0994, "step": 2039 }, { "epoch": 0.41, "learning_rate": 7.378121045351378e-06, "loss": 0.1042, "step": 2040 }, { "epoch": 0.41, "learning_rate": 7.37504991329575e-06, "loss": 0.0959, "step": 2041 }, { "epoch": 0.41, "learning_rate": 7.371977623669646e-06, "loss": 0.0991, "step": 2042 }, { "epoch": 0.41, "learning_rate": 7.368904177970466e-06, "loss": 0.0958, "step": 2043 }, { "epoch": 0.41, "learning_rate": 7.365829577696166e-06, "loss": 0.1003, "step": 2044 }, { "epoch": 0.41, "learning_rate": 7.362753824345271e-06, "loss": 0.0958, "step": 2045 }, { "epoch": 0.41, "learning_rate": 7.3596769194168646e-06, "loss": 0.1067, "step": 2046 }, { "epoch": 0.41, "learning_rate": 7.3565988644105926e-06, "loss": 0.1029, "step": 2047 }, { "epoch": 0.41, "learning_rate": 7.353519660826665e-06, "loss": 0.1005, "step": 2048 }, { "epoch": 0.41, "learning_rate": 7.350439310165842e-06, "loss": 0.0998, "step": 2049 }, { "epoch": 0.41, "learning_rate": 7.347357813929455e-06, "loss": 0.0975, "step": 2050 }, { "epoch": 0.41, "learning_rate": 7.344275173619385e-06, "loss": 0.1053, "step": 2051 }, { "epoch": 0.41, "learning_rate": 7.341191390738073e-06, "loss": 0.0947, "step": 2052 }, { "epoch": 0.41, "learning_rate": 7.33810646678852e-06, "loss": 0.0978, "step": 2053 }, { "epoch": 0.41, "learning_rate": 7.335020403274277e-06, "loss": 0.1017, "step": 2054 }, { "epoch": 0.41, "learning_rate": 7.3319332016994575e-06, "loss": 0.1008, "step": 2055 }, { "epoch": 0.41, "learning_rate": 7.3288448635687215e-06, "loss": 0.0977, "step": 2056 }, { "epoch": 0.41, "learning_rate": 7.325755390387293e-06, "loss": 0.0941, "step": 2057 }, { "epoch": 0.41, "learning_rate": 7.32266478366094e-06, "loss": 0.0996, "step": 2058 }, { "epoch": 0.41, "learning_rate": 7.319573044895986e-06, "loss": 0.0996, "step": 2059 }, { "epoch": 0.41, "learning_rate": 7.31648017559931e-06, "loss": 0.0989, "step": 2060 }, { "epoch": 0.41, "learning_rate": 7.313386177278335e-06, "loss": 0.0943, "step": 2061 }, { "epoch": 0.41, "learning_rate": 7.310291051441044e-06, "loss": 0.0989, "step": 2062 }, { "epoch": 0.41, "learning_rate": 7.307194799595958e-06, "loss": 0.0964, "step": 2063 }, { "epoch": 0.41, "learning_rate": 7.3040974232521555e-06, "loss": 0.1018, "step": 2064 }, { "epoch": 0.41, "learning_rate": 7.300998923919259e-06, "loss": 0.096, "step": 2065 }, { "epoch": 0.41, "learning_rate": 7.297899303107441e-06, "loss": 0.0984, "step": 2066 }, { "epoch": 0.41, "learning_rate": 7.294798562327417e-06, "loss": 0.0935, "step": 2067 }, { "epoch": 0.41, "learning_rate": 7.291696703090449e-06, "loss": 0.0983, "step": 2068 }, { "epoch": 0.41, "learning_rate": 7.288593726908351e-06, "loss": 0.0986, "step": 2069 }, { "epoch": 0.41, "learning_rate": 7.285489635293472e-06, "loss": 0.1073, "step": 2070 }, { "epoch": 0.41, "learning_rate": 7.282384429758709e-06, "loss": 0.0987, "step": 2071 }, { "epoch": 0.41, "learning_rate": 7.279278111817502e-06, "loss": 0.102, "step": 2072 }, { "epoch": 0.41, "learning_rate": 7.27617068298383e-06, "loss": 0.0977, "step": 2073 }, { "epoch": 0.41, "learning_rate": 7.27306214477222e-06, "loss": 0.1027, "step": 2074 }, { "epoch": 0.41, "learning_rate": 7.269952498697734e-06, "loss": 0.0954, "step": 2075 }, { "epoch": 0.42, "learning_rate": 7.266841746275977e-06, "loss": 0.1092, "step": 2076 }, { "epoch": 0.42, "learning_rate": 7.26372988902309e-06, "loss": 0.1035, "step": 2077 }, { "epoch": 0.42, "learning_rate": 7.260616928455754e-06, "loss": 0.1034, "step": 2078 }, { "epoch": 0.42, "learning_rate": 7.257502866091192e-06, "loss": 0.1361, "step": 2079 }, { "epoch": 0.42, "learning_rate": 7.254387703447154e-06, "loss": 0.1009, "step": 2080 }, { "epoch": 0.42, "learning_rate": 7.251271442041938e-06, "loss": 0.099, "step": 2081 }, { "epoch": 0.42, "learning_rate": 7.24815408339437e-06, "loss": 0.0945, "step": 2082 }, { "epoch": 0.42, "learning_rate": 7.245035629023812e-06, "loss": 0.0978, "step": 2083 }, { "epoch": 0.42, "learning_rate": 7.241916080450163e-06, "loss": 0.0992, "step": 2084 }, { "epoch": 0.42, "learning_rate": 7.238795439193849e-06, "loss": 0.0955, "step": 2085 }, { "epoch": 0.42, "learning_rate": 7.235673706775837e-06, "loss": 0.0982, "step": 2086 }, { "epoch": 0.42, "learning_rate": 7.2325508847176175e-06, "loss": 0.1127, "step": 2087 }, { "epoch": 0.42, "learning_rate": 7.2294269745412214e-06, "loss": 0.1058, "step": 2088 }, { "epoch": 0.42, "learning_rate": 7.226301977769199e-06, "loss": 0.101, "step": 2089 }, { "epoch": 0.42, "learning_rate": 7.223175895924638e-06, "loss": 0.0982, "step": 2090 }, { "epoch": 0.42, "learning_rate": 7.220048730531154e-06, "loss": 0.1024, "step": 2091 }, { "epoch": 0.42, "learning_rate": 7.216920483112886e-06, "loss": 0.0959, "step": 2092 }, { "epoch": 0.42, "learning_rate": 7.21379115519451e-06, "loss": 0.1048, "step": 2093 }, { "epoch": 0.42, "learning_rate": 7.210660748301214e-06, "loss": 0.1008, "step": 2094 }, { "epoch": 0.42, "learning_rate": 7.207529263958727e-06, "loss": 0.0963, "step": 2095 }, { "epoch": 0.42, "learning_rate": 7.2043967036932935e-06, "loss": 0.0959, "step": 2096 }, { "epoch": 0.42, "learning_rate": 7.201263069031686e-06, "loss": 0.1014, "step": 2097 }, { "epoch": 0.42, "learning_rate": 7.1981283615012e-06, "loss": 0.0994, "step": 2098 }, { "epoch": 0.42, "learning_rate": 7.194992582629654e-06, "loss": 0.0992, "step": 2099 }, { "epoch": 0.42, "learning_rate": 7.191855733945388e-06, "loss": 0.1022, "step": 2100 }, { "epoch": 0.42, "learning_rate": 7.188717816977264e-06, "loss": 0.0938, "step": 2101 }, { "epoch": 0.42, "learning_rate": 7.185578833254665e-06, "loss": 0.1169, "step": 2102 }, { "epoch": 0.42, "learning_rate": 7.182438784307495e-06, "loss": 0.0966, "step": 2103 }, { "epoch": 0.42, "learning_rate": 7.179297671666171e-06, "loss": 0.0866, "step": 2104 }, { "epoch": 0.42, "learning_rate": 7.176155496861639e-06, "loss": 0.1574, "step": 2105 }, { "epoch": 0.42, "learning_rate": 7.173012261425352e-06, "loss": 0.1038, "step": 2106 }, { "epoch": 0.42, "learning_rate": 7.169867966889288e-06, "loss": 0.0972, "step": 2107 }, { "epoch": 0.42, "learning_rate": 7.166722614785937e-06, "loss": 0.1064, "step": 2108 }, { "epoch": 0.42, "learning_rate": 7.1635762066483035e-06, "loss": 0.1117, "step": 2109 }, { "epoch": 0.42, "learning_rate": 7.160428744009913e-06, "loss": 0.1093, "step": 2110 }, { "epoch": 0.42, "learning_rate": 7.157280228404796e-06, "loss": 0.095, "step": 2111 }, { "epoch": 0.42, "learning_rate": 7.154130661367503e-06, "loss": 0.0981, "step": 2112 }, { "epoch": 0.42, "learning_rate": 7.150980044433094e-06, "loss": 0.0967, "step": 2113 }, { "epoch": 0.42, "learning_rate": 7.1478283791371415e-06, "loss": 0.0996, "step": 2114 }, { "epoch": 0.42, "learning_rate": 7.1446756670157306e-06, "loss": 0.0982, "step": 2115 }, { "epoch": 0.42, "learning_rate": 7.141521909605452e-06, "loss": 0.0972, "step": 2116 }, { "epoch": 0.42, "learning_rate": 7.138367108443411e-06, "loss": 0.0968, "step": 2117 }, { "epoch": 0.42, "learning_rate": 7.135211265067217e-06, "loss": 0.0972, "step": 2118 }, { "epoch": 0.42, "learning_rate": 7.1320543810149945e-06, "loss": 0.1009, "step": 2119 }, { "epoch": 0.42, "learning_rate": 7.128896457825364e-06, "loss": 0.0929, "step": 2120 }, { "epoch": 0.42, "learning_rate": 7.125737497037464e-06, "loss": 0.0971, "step": 2121 }, { "epoch": 0.42, "learning_rate": 7.12257750019093e-06, "loss": 0.093, "step": 2122 }, { "epoch": 0.42, "learning_rate": 7.119416468825908e-06, "loss": 0.0975, "step": 2123 }, { "epoch": 0.42, "learning_rate": 7.116254404483049e-06, "loss": 0.1049, "step": 2124 }, { "epoch": 0.42, "learning_rate": 7.113091308703498e-06, "loss": 0.1168, "step": 2125 }, { "epoch": 0.43, "learning_rate": 7.1099271830289155e-06, "loss": 0.1041, "step": 2126 }, { "epoch": 0.43, "learning_rate": 7.106762029001455e-06, "loss": 0.1076, "step": 2127 }, { "epoch": 0.43, "learning_rate": 7.103595848163775e-06, "loss": 0.0923, "step": 2128 }, { "epoch": 0.43, "learning_rate": 7.100428642059033e-06, "loss": 0.1101, "step": 2129 }, { "epoch": 0.43, "learning_rate": 7.0972604122308865e-06, "loss": 0.0943, "step": 2130 }, { "epoch": 0.43, "learning_rate": 7.094091160223493e-06, "loss": 0.0979, "step": 2131 }, { "epoch": 0.43, "learning_rate": 7.090920887581507e-06, "loss": 0.1085, "step": 2132 }, { "epoch": 0.43, "learning_rate": 7.087749595850084e-06, "loss": 0.1023, "step": 2133 }, { "epoch": 0.43, "learning_rate": 7.0845772865748684e-06, "loss": 0.0939, "step": 2134 }, { "epoch": 0.43, "learning_rate": 7.081403961302007e-06, "loss": 0.0944, "step": 2135 }, { "epoch": 0.43, "learning_rate": 7.07822962157814e-06, "loss": 0.0993, "step": 2136 }, { "epoch": 0.43, "learning_rate": 7.075054268950402e-06, "loss": 0.1032, "step": 2137 }, { "epoch": 0.43, "learning_rate": 7.071877904966422e-06, "loss": 0.102, "step": 2138 }, { "epoch": 0.43, "learning_rate": 7.0687005311743195e-06, "loss": 0.1007, "step": 2139 }, { "epoch": 0.43, "learning_rate": 7.06552214912271e-06, "loss": 0.0946, "step": 2140 }, { "epoch": 0.43, "learning_rate": 7.0623427603606965e-06, "loss": 0.0912, "step": 2141 }, { "epoch": 0.43, "learning_rate": 7.059162366437875e-06, "loss": 0.0969, "step": 2142 }, { "epoch": 0.43, "learning_rate": 7.0559809689043325e-06, "loss": 0.0892, "step": 2143 }, { "epoch": 0.43, "learning_rate": 7.052798569310641e-06, "loss": 0.0992, "step": 2144 }, { "epoch": 0.43, "learning_rate": 7.049615169207864e-06, "loss": 0.0994, "step": 2145 }, { "epoch": 0.43, "learning_rate": 7.0464307701475544e-06, "loss": 0.1006, "step": 2146 }, { "epoch": 0.43, "learning_rate": 7.043245373681746e-06, "loss": 0.0951, "step": 2147 }, { "epoch": 0.43, "learning_rate": 7.0400589813629645e-06, "loss": 0.0925, "step": 2148 }, { "epoch": 0.43, "learning_rate": 7.036871594744218e-06, "loss": 0.1006, "step": 2149 }, { "epoch": 0.43, "learning_rate": 7.033683215379002e-06, "loss": 0.1, "step": 2150 }, { "epoch": 0.43, "learning_rate": 7.030493844821291e-06, "loss": 0.0946, "step": 2151 }, { "epoch": 0.43, "learning_rate": 7.027303484625547e-06, "loss": 0.0916, "step": 2152 }, { "epoch": 0.43, "learning_rate": 7.024112136346713e-06, "loss": 0.0964, "step": 2153 }, { "epoch": 0.43, "learning_rate": 7.0209198015402115e-06, "loss": 0.1249, "step": 2154 }, { "epoch": 0.43, "learning_rate": 7.0177264817619514e-06, "loss": 0.0982, "step": 2155 }, { "epoch": 0.43, "learning_rate": 7.014532178568314e-06, "loss": 0.0952, "step": 2156 }, { "epoch": 0.43, "learning_rate": 7.011336893516167e-06, "loss": 0.1029, "step": 2157 }, { "epoch": 0.43, "learning_rate": 7.008140628162851e-06, "loss": 0.1187, "step": 2158 }, { "epoch": 0.43, "learning_rate": 7.0049433840661875e-06, "loss": 0.1077, "step": 2159 }, { "epoch": 0.43, "learning_rate": 7.0017451627844765e-06, "loss": 0.1007, "step": 2160 }, { "epoch": 0.43, "learning_rate": 6.998545965876489e-06, "loss": 0.1061, "step": 2161 }, { "epoch": 0.43, "learning_rate": 6.995345794901477e-06, "loss": 0.0956, "step": 2162 }, { "epoch": 0.43, "learning_rate": 6.992144651419163e-06, "loss": 0.1019, "step": 2163 }, { "epoch": 0.43, "learning_rate": 6.98894253698975e-06, "loss": 0.1341, "step": 2164 }, { "epoch": 0.43, "learning_rate": 6.985739453173903e-06, "loss": 0.1433, "step": 2165 }, { "epoch": 0.43, "learning_rate": 6.9825354015327715e-06, "loss": 0.0986, "step": 2166 }, { "epoch": 0.43, "learning_rate": 6.979330383627969e-06, "loss": 0.0956, "step": 2167 }, { "epoch": 0.43, "learning_rate": 6.976124401021583e-06, "loss": 0.113, "step": 2168 }, { "epoch": 0.43, "learning_rate": 6.97291745527617e-06, "loss": 0.1015, "step": 2169 }, { "epoch": 0.43, "learning_rate": 6.9697095479547564e-06, "loss": 0.0973, "step": 2170 }, { "epoch": 0.43, "learning_rate": 6.966500680620837e-06, "loss": 0.0926, "step": 2171 }, { "epoch": 0.43, "learning_rate": 6.963290854838376e-06, "loss": 0.0907, "step": 2172 }, { "epoch": 0.43, "learning_rate": 6.960080072171802e-06, "loss": 0.0988, "step": 2173 }, { "epoch": 0.43, "learning_rate": 6.9568683341860135e-06, "loss": 0.0958, "step": 2174 }, { "epoch": 0.43, "learning_rate": 6.953655642446368e-06, "loss": 0.0982, "step": 2175 }, { "epoch": 0.44, "learning_rate": 6.950441998518699e-06, "loss": 0.0989, "step": 2176 }, { "epoch": 0.44, "learning_rate": 6.947227403969293e-06, "loss": 0.0898, "step": 2177 }, { "epoch": 0.44, "learning_rate": 6.944011860364905e-06, "loss": 0.0922, "step": 2178 }, { "epoch": 0.44, "learning_rate": 6.940795369272754e-06, "loss": 0.0934, "step": 2179 }, { "epoch": 0.44, "learning_rate": 6.9375779322605154e-06, "loss": 0.1012, "step": 2180 }, { "epoch": 0.44, "learning_rate": 6.934359550896332e-06, "loss": 0.0986, "step": 2181 }, { "epoch": 0.44, "learning_rate": 6.9311402267488004e-06, "loss": 0.0936, "step": 2182 }, { "epoch": 0.44, "learning_rate": 6.927919961386984e-06, "loss": 0.1021, "step": 2183 }, { "epoch": 0.44, "learning_rate": 6.924698756380398e-06, "loss": 0.0946, "step": 2184 }, { "epoch": 0.44, "learning_rate": 6.921476613299018e-06, "loss": 0.098, "step": 2185 }, { "epoch": 0.44, "learning_rate": 6.9182535337132824e-06, "loss": 0.0954, "step": 2186 }, { "epoch": 0.44, "learning_rate": 6.915029519194076e-06, "loss": 0.0955, "step": 2187 }, { "epoch": 0.44, "learning_rate": 6.911804571312746e-06, "loss": 0.092, "step": 2188 }, { "epoch": 0.44, "learning_rate": 6.908578691641092e-06, "loss": 0.0995, "step": 2189 }, { "epoch": 0.44, "learning_rate": 6.905351881751372e-06, "loss": 0.0927, "step": 2190 }, { "epoch": 0.44, "learning_rate": 6.9021241432162886e-06, "loss": 0.0947, "step": 2191 }, { "epoch": 0.44, "learning_rate": 6.898895477609007e-06, "loss": 0.0989, "step": 2192 }, { "epoch": 0.44, "learning_rate": 6.895665886503136e-06, "loss": 0.1005, "step": 2193 }, { "epoch": 0.44, "learning_rate": 6.892435371472741e-06, "loss": 0.1031, "step": 2194 }, { "epoch": 0.44, "learning_rate": 6.889203934092337e-06, "loss": 0.1015, "step": 2195 }, { "epoch": 0.44, "learning_rate": 6.885971575936884e-06, "loss": 0.1018, "step": 2196 }, { "epoch": 0.44, "learning_rate": 6.882738298581797e-06, "loss": 0.0962, "step": 2197 }, { "epoch": 0.44, "learning_rate": 6.879504103602934e-06, "loss": 0.0957, "step": 2198 }, { "epoch": 0.44, "learning_rate": 6.876268992576605e-06, "loss": 0.0951, "step": 2199 }, { "epoch": 0.44, "learning_rate": 6.873032967079562e-06, "loss": 0.0722, "step": 2200 }, { "epoch": 0.44, "learning_rate": 6.869796028689002e-06, "loss": 0.095, "step": 2201 }, { "epoch": 0.44, "learning_rate": 6.866558178982575e-06, "loss": 0.0926, "step": 2202 }, { "epoch": 0.44, "learning_rate": 6.863319419538366e-06, "loss": 0.0948, "step": 2203 }, { "epoch": 0.44, "learning_rate": 6.860079751934908e-06, "loss": 0.0964, "step": 2204 }, { "epoch": 0.44, "learning_rate": 6.856839177751175e-06, "loss": 0.1008, "step": 2205 }, { "epoch": 0.44, "learning_rate": 6.853597698566583e-06, "loss": 0.1205, "step": 2206 }, { "epoch": 0.44, "learning_rate": 6.850355315960992e-06, "loss": 0.103, "step": 2207 }, { "epoch": 0.44, "learning_rate": 6.847112031514698e-06, "loss": 0.1101, "step": 2208 }, { "epoch": 0.44, "learning_rate": 6.843867846808438e-06, "loss": 0.0917, "step": 2209 }, { "epoch": 0.44, "learning_rate": 6.840622763423391e-06, "loss": 0.1077, "step": 2210 }, { "epoch": 0.44, "learning_rate": 6.837376782941168e-06, "loss": 0.1, "step": 2211 }, { "epoch": 0.44, "learning_rate": 6.834129906943822e-06, "loss": 0.1111, "step": 2212 }, { "epoch": 0.44, "learning_rate": 6.830882137013839e-06, "loss": 0.1051, "step": 2213 }, { "epoch": 0.44, "learning_rate": 6.827633474734145e-06, "loss": 0.0952, "step": 2214 }, { "epoch": 0.44, "learning_rate": 6.824383921688098e-06, "loss": 0.0999, "step": 2215 }, { "epoch": 0.44, "learning_rate": 6.821133479459492e-06, "loss": 0.0948, "step": 2216 }, { "epoch": 0.44, "learning_rate": 6.81788214963255e-06, "loss": 0.0971, "step": 2217 }, { "epoch": 0.44, "learning_rate": 6.814629933791932e-06, "loss": 0.0924, "step": 2218 }, { "epoch": 0.44, "learning_rate": 6.811376833522729e-06, "loss": 0.1143, "step": 2219 }, { "epoch": 0.44, "learning_rate": 6.808122850410461e-06, "loss": 0.0953, "step": 2220 }, { "epoch": 0.44, "learning_rate": 6.804867986041084e-06, "loss": 0.0999, "step": 2221 }, { "epoch": 0.44, "learning_rate": 6.8016122420009745e-06, "loss": 0.1013, "step": 2222 }, { "epoch": 0.44, "learning_rate": 6.798355619876944e-06, "loss": 0.0953, "step": 2223 }, { "epoch": 0.44, "learning_rate": 6.7950981212562315e-06, "loss": 0.0951, "step": 2224 }, { "epoch": 0.45, "learning_rate": 6.7918397477265e-06, "loss": 0.092, "step": 2225 }, { "epoch": 0.45, "learning_rate": 6.788580500875848e-06, "loss": 0.0975, "step": 2226 }, { "epoch": 0.45, "learning_rate": 6.785320382292783e-06, "loss": 0.1051, "step": 2227 }, { "epoch": 0.45, "learning_rate": 6.782059393566254e-06, "loss": 0.0947, "step": 2228 }, { "epoch": 0.45, "learning_rate": 6.778797536285625e-06, "loss": 0.0921, "step": 2229 }, { "epoch": 0.45, "learning_rate": 6.775534812040686e-06, "loss": 0.0969, "step": 2230 }, { "epoch": 0.45, "learning_rate": 6.772271222421649e-06, "loss": 0.0924, "step": 2231 }, { "epoch": 0.45, "learning_rate": 6.769006769019147e-06, "loss": 0.1088, "step": 2232 }, { "epoch": 0.45, "learning_rate": 6.765741453424237e-06, "loss": 0.1005, "step": 2233 }, { "epoch": 0.45, "learning_rate": 6.762475277228393e-06, "loss": 0.1017, "step": 2234 }, { "epoch": 0.45, "learning_rate": 6.759208242023509e-06, "loss": 0.0829, "step": 2235 }, { "epoch": 0.45, "learning_rate": 6.755940349401901e-06, "loss": 0.0923, "step": 2236 }, { "epoch": 0.45, "learning_rate": 6.752671600956295e-06, "loss": 0.1087, "step": 2237 }, { "epoch": 0.45, "learning_rate": 6.749401998279845e-06, "loss": 0.1004, "step": 2238 }, { "epoch": 0.45, "learning_rate": 6.746131542966112e-06, "loss": 0.1147, "step": 2239 }, { "epoch": 0.45, "learning_rate": 6.7428602366090764e-06, "loss": 0.0909, "step": 2240 }, { "epoch": 0.45, "learning_rate": 6.739588080803134e-06, "loss": 0.0951, "step": 2241 }, { "epoch": 0.45, "learning_rate": 6.736315077143095e-06, "loss": 0.1165, "step": 2242 }, { "epoch": 0.45, "learning_rate": 6.733041227224182e-06, "loss": 0.0941, "step": 2243 }, { "epoch": 0.45, "learning_rate": 6.729766532642024e-06, "loss": 0.1094, "step": 2244 }, { "epoch": 0.45, "learning_rate": 6.7264909949926735e-06, "loss": 0.1104, "step": 2245 }, { "epoch": 0.45, "learning_rate": 6.723214615872585e-06, "loss": 0.0924, "step": 2246 }, { "epoch": 0.45, "learning_rate": 6.719937396878628e-06, "loss": 0.0974, "step": 2247 }, { "epoch": 0.45, "learning_rate": 6.716659339608077e-06, "loss": 0.121, "step": 2248 }, { "epoch": 0.45, "learning_rate": 6.713380445658618e-06, "loss": 0.0913, "step": 2249 }, { "epoch": 0.45, "learning_rate": 6.710100716628345e-06, "loss": 0.0965, "step": 2250 }, { "epoch": 0.45, "learning_rate": 6.7068201541157555e-06, "loss": 0.099, "step": 2251 }, { "epoch": 0.45, "learning_rate": 6.70353875971976e-06, "loss": 0.1005, "step": 2252 }, { "epoch": 0.45, "learning_rate": 6.700256535039665e-06, "loss": 0.1054, "step": 2253 }, { "epoch": 0.45, "learning_rate": 6.6969734816751906e-06, "loss": 0.0993, "step": 2254 }, { "epoch": 0.45, "learning_rate": 6.693689601226458e-06, "loss": 0.0984, "step": 2255 }, { "epoch": 0.45, "learning_rate": 6.690404895293987e-06, "loss": 0.1069, "step": 2256 }, { "epoch": 0.45, "learning_rate": 6.687119365478707e-06, "loss": 0.0856, "step": 2257 }, { "epoch": 0.45, "learning_rate": 6.683833013381942e-06, "loss": 0.0961, "step": 2258 }, { "epoch": 0.45, "learning_rate": 6.680545840605423e-06, "loss": 0.1098, "step": 2259 }, { "epoch": 0.45, "learning_rate": 6.677257848751276e-06, "loss": 0.0918, "step": 2260 }, { "epoch": 0.45, "learning_rate": 6.673969039422029e-06, "loss": 0.1078, "step": 2261 }, { "epoch": 0.45, "learning_rate": 6.6706794142206085e-06, "loss": 0.0892, "step": 2262 }, { "epoch": 0.45, "learning_rate": 6.6673889747503364e-06, "loss": 0.0996, "step": 2263 }, { "epoch": 0.45, "learning_rate": 6.664097722614934e-06, "loss": 0.1031, "step": 2264 }, { "epoch": 0.45, "learning_rate": 6.6608056594185166e-06, "loss": 0.1107, "step": 2265 }, { "epoch": 0.45, "learning_rate": 6.657512786765599e-06, "loss": 0.101, "step": 2266 }, { "epoch": 0.45, "learning_rate": 6.654219106261082e-06, "loss": 0.1116, "step": 2267 }, { "epoch": 0.45, "learning_rate": 6.6509246195102685e-06, "loss": 0.109, "step": 2268 }, { "epoch": 0.45, "learning_rate": 6.647629328118852e-06, "loss": 0.1067, "step": 2269 }, { "epoch": 0.45, "learning_rate": 6.644333233692917e-06, "loss": 0.0944, "step": 2270 }, { "epoch": 0.45, "learning_rate": 6.64103633783894e-06, "loss": 0.0986, "step": 2271 }, { "epoch": 0.45, "learning_rate": 6.637738642163785e-06, "loss": 0.1224, "step": 2272 }, { "epoch": 0.45, "learning_rate": 6.634440148274712e-06, "loss": 0.0946, "step": 2273 }, { "epoch": 0.45, "learning_rate": 6.631140857779368e-06, "loss": 0.1097, "step": 2274 }, { "epoch": 0.46, "learning_rate": 6.627840772285784e-06, "loss": 0.098, "step": 2275 }, { "epoch": 0.46, "learning_rate": 6.624539893402383e-06, "loss": 0.0983, "step": 2276 }, { "epoch": 0.46, "learning_rate": 6.6212382227379726e-06, "loss": 0.0938, "step": 2277 }, { "epoch": 0.46, "learning_rate": 6.617935761901748e-06, "loss": 0.0992, "step": 2278 }, { "epoch": 0.46, "learning_rate": 6.614632512503289e-06, "loss": 0.1051, "step": 2279 }, { "epoch": 0.46, "learning_rate": 6.611328476152557e-06, "loss": 0.09, "step": 2280 }, { "epoch": 0.46, "learning_rate": 6.6080236544599e-06, "loss": 0.098, "step": 2281 }, { "epoch": 0.46, "learning_rate": 6.604718049036047e-06, "loss": 0.0952, "step": 2282 }, { "epoch": 0.46, "learning_rate": 6.601411661492114e-06, "loss": 0.0965, "step": 2283 }, { "epoch": 0.46, "learning_rate": 6.59810449343959e-06, "loss": 0.0927, "step": 2284 }, { "epoch": 0.46, "learning_rate": 6.594796546490351e-06, "loss": 0.1114, "step": 2285 }, { "epoch": 0.46, "learning_rate": 6.591487822256648e-06, "loss": 0.0919, "step": 2286 }, { "epoch": 0.46, "learning_rate": 6.588178322351113e-06, "loss": 0.0937, "step": 2287 }, { "epoch": 0.46, "learning_rate": 6.58486804838676e-06, "loss": 0.0953, "step": 2288 }, { "epoch": 0.46, "learning_rate": 6.58155700197697e-06, "loss": 0.0943, "step": 2289 }, { "epoch": 0.46, "learning_rate": 6.578245184735513e-06, "loss": 0.0967, "step": 2290 }, { "epoch": 0.46, "learning_rate": 6.574932598276524e-06, "loss": 0.0918, "step": 2291 }, { "epoch": 0.46, "learning_rate": 6.571619244214521e-06, "loss": 0.0946, "step": 2292 }, { "epoch": 0.46, "learning_rate": 6.5683051241643894e-06, "loss": 0.0979, "step": 2293 }, { "epoch": 0.46, "learning_rate": 6.5649902397413915e-06, "loss": 0.1173, "step": 2294 }, { "epoch": 0.46, "learning_rate": 6.561674592561164e-06, "loss": 0.101, "step": 2295 }, { "epoch": 0.46, "learning_rate": 6.558358184239709e-06, "loss": 0.103, "step": 2296 }, { "epoch": 0.46, "learning_rate": 6.55504101639341e-06, "loss": 0.1014, "step": 2297 }, { "epoch": 0.46, "learning_rate": 6.551723090639008e-06, "loss": 0.1038, "step": 2298 }, { "epoch": 0.46, "learning_rate": 6.548404408593622e-06, "loss": 0.0931, "step": 2299 }, { "epoch": 0.46, "learning_rate": 6.545084971874738e-06, "loss": 0.0949, "step": 2300 }, { "epoch": 0.46, "learning_rate": 6.541764782100208e-06, "loss": 0.099, "step": 2301 }, { "epoch": 0.46, "learning_rate": 6.538443840888254e-06, "loss": 0.0903, "step": 2302 }, { "epoch": 0.46, "learning_rate": 6.53512214985746e-06, "loss": 0.0934, "step": 2303 }, { "epoch": 0.46, "learning_rate": 6.53179971062678e-06, "loss": 0.0975, "step": 2304 }, { "epoch": 0.46, "learning_rate": 6.5284765248155295e-06, "loss": 0.1056, "step": 2305 }, { "epoch": 0.46, "learning_rate": 6.525152594043389e-06, "loss": 0.0946, "step": 2306 }, { "epoch": 0.46, "learning_rate": 6.5218279199304014e-06, "loss": 0.0931, "step": 2307 }, { "epoch": 0.46, "learning_rate": 6.518502504096972e-06, "loss": 0.1243, "step": 2308 }, { "epoch": 0.46, "learning_rate": 6.5151763481638705e-06, "loss": 0.093, "step": 2309 }, { "epoch": 0.46, "learning_rate": 6.5118494537522235e-06, "loss": 0.0958, "step": 2310 }, { "epoch": 0.46, "learning_rate": 6.508521822483518e-06, "loss": 0.0965, "step": 2311 }, { "epoch": 0.46, "learning_rate": 6.505193455979603e-06, "loss": 0.0967, "step": 2312 }, { "epoch": 0.46, "learning_rate": 6.501864355862682e-06, "loss": 0.0942, "step": 2313 }, { "epoch": 0.46, "learning_rate": 6.49853452375532e-06, "loss": 0.1038, "step": 2314 }, { "epoch": 0.46, "learning_rate": 6.495203961280434e-06, "loss": 0.1038, "step": 2315 }, { "epoch": 0.46, "learning_rate": 6.491872670061302e-06, "loss": 0.0942, "step": 2316 }, { "epoch": 0.46, "learning_rate": 6.4885406517215535e-06, "loss": 0.1013, "step": 2317 }, { "epoch": 0.46, "learning_rate": 6.485207907885175e-06, "loss": 0.1, "step": 2318 }, { "epoch": 0.46, "learning_rate": 6.481874440176506e-06, "loss": 0.0925, "step": 2319 }, { "epoch": 0.46, "learning_rate": 6.4785402502202345e-06, "loss": 0.0972, "step": 2320 }, { "epoch": 0.46, "learning_rate": 6.4752053396414075e-06, "loss": 0.0968, "step": 2321 }, { "epoch": 0.46, "learning_rate": 6.471869710065418e-06, "loss": 0.1055, "step": 2322 }, { "epoch": 0.46, "learning_rate": 6.4685333631180145e-06, "loss": 0.1021, "step": 2323 }, { "epoch": 0.46, "learning_rate": 6.465196300425287e-06, "loss": 0.0953, "step": 2324 }, { "epoch": 0.47, "learning_rate": 6.461858523613684e-06, "loss": 0.1036, "step": 2325 }, { "epoch": 0.47, "learning_rate": 6.458520034309995e-06, "loss": 0.092, "step": 2326 }, { "epoch": 0.47, "learning_rate": 6.455180834141359e-06, "loss": 0.1043, "step": 2327 }, { "epoch": 0.47, "learning_rate": 6.451840924735264e-06, "loss": 0.0939, "step": 2328 }, { "epoch": 0.47, "learning_rate": 6.448500307719537e-06, "loss": 0.0835, "step": 2329 }, { "epoch": 0.47, "learning_rate": 6.445158984722358e-06, "loss": 0.0994, "step": 2330 }, { "epoch": 0.47, "learning_rate": 6.441816957372247e-06, "loss": 0.1025, "step": 2331 }, { "epoch": 0.47, "learning_rate": 6.438474227298065e-06, "loss": 0.0946, "step": 2332 }, { "epoch": 0.47, "learning_rate": 6.435130796129019e-06, "loss": 0.1008, "step": 2333 }, { "epoch": 0.47, "learning_rate": 6.431786665494657e-06, "loss": 0.1018, "step": 2334 }, { "epoch": 0.47, "learning_rate": 6.428441837024868e-06, "loss": 0.0978, "step": 2335 }, { "epoch": 0.47, "learning_rate": 6.425096312349881e-06, "loss": 0.0937, "step": 2336 }, { "epoch": 0.47, "learning_rate": 6.421750093100264e-06, "loss": 0.1113, "step": 2337 }, { "epoch": 0.47, "learning_rate": 6.418403180906923e-06, "loss": 0.0949, "step": 2338 }, { "epoch": 0.47, "learning_rate": 6.415055577401101e-06, "loss": 0.1029, "step": 2339 }, { "epoch": 0.47, "learning_rate": 6.411707284214384e-06, "loss": 0.0942, "step": 2340 }, { "epoch": 0.47, "learning_rate": 6.408358302978683e-06, "loss": 0.0918, "step": 2341 }, { "epoch": 0.47, "learning_rate": 6.4050086353262565e-06, "loss": 0.096, "step": 2342 }, { "epoch": 0.47, "learning_rate": 6.401658282889689e-06, "loss": 0.0943, "step": 2343 }, { "epoch": 0.47, "learning_rate": 6.3983072473019e-06, "loss": 0.0927, "step": 2344 }, { "epoch": 0.47, "learning_rate": 6.3949555301961474e-06, "loss": 0.1024, "step": 2345 }, { "epoch": 0.47, "learning_rate": 6.391603133206015e-06, "loss": 0.1299, "step": 2346 }, { "epoch": 0.47, "learning_rate": 6.388250057965421e-06, "loss": 0.0969, "step": 2347 }, { "epoch": 0.47, "learning_rate": 6.384896306108612e-06, "loss": 0.1253, "step": 2348 }, { "epoch": 0.47, "learning_rate": 6.3815418792701686e-06, "loss": 0.1104, "step": 2349 }, { "epoch": 0.47, "learning_rate": 6.378186779084996e-06, "loss": 0.135, "step": 2350 }, { "epoch": 0.47, "learning_rate": 6.374831007188331e-06, "loss": 0.0886, "step": 2351 }, { "epoch": 0.47, "learning_rate": 6.371474565215734e-06, "loss": 0.0957, "step": 2352 }, { "epoch": 0.47, "learning_rate": 6.368117454803093e-06, "loss": 0.0974, "step": 2353 }, { "epoch": 0.47, "learning_rate": 6.364759677586627e-06, "loss": 0.0786, "step": 2354 }, { "epoch": 0.47, "learning_rate": 6.361401235202872e-06, "loss": 0.0959, "step": 2355 }, { "epoch": 0.47, "learning_rate": 6.358042129288694e-06, "loss": 0.0938, "step": 2356 }, { "epoch": 0.47, "learning_rate": 6.35468236148128e-06, "loss": 0.0988, "step": 2357 }, { "epoch": 0.47, "learning_rate": 6.35132193341814e-06, "loss": 0.0957, "step": 2358 }, { "epoch": 0.47, "learning_rate": 6.3479608467371055e-06, "loss": 0.1025, "step": 2359 }, { "epoch": 0.47, "learning_rate": 6.344599103076329e-06, "loss": 0.1434, "step": 2360 }, { "epoch": 0.47, "learning_rate": 6.341236704074285e-06, "loss": 0.0923, "step": 2361 }, { "epoch": 0.47, "learning_rate": 6.337873651369764e-06, "loss": 0.0901, "step": 2362 }, { "epoch": 0.47, "learning_rate": 6.334509946601879e-06, "loss": 0.0947, "step": 2363 }, { "epoch": 0.47, "learning_rate": 6.331145591410057e-06, "loss": 0.0964, "step": 2364 }, { "epoch": 0.47, "learning_rate": 6.327780587434045e-06, "loss": 0.0941, "step": 2365 }, { "epoch": 0.47, "learning_rate": 6.324414936313904e-06, "loss": 0.1036, "step": 2366 }, { "epoch": 0.47, "learning_rate": 6.321048639690013e-06, "loss": 0.0932, "step": 2367 }, { "epoch": 0.47, "learning_rate": 6.317681699203065e-06, "loss": 0.0973, "step": 2368 }, { "epoch": 0.47, "learning_rate": 6.314314116494061e-06, "loss": 0.1048, "step": 2369 }, { "epoch": 0.47, "learning_rate": 6.310945893204324e-06, "loss": 0.0932, "step": 2370 }, { "epoch": 0.47, "learning_rate": 6.307577030975485e-06, "loss": 0.0924, "step": 2371 }, { "epoch": 0.47, "learning_rate": 6.304207531449486e-06, "loss": 0.1088, "step": 2372 }, { "epoch": 0.47, "learning_rate": 6.3008373962685785e-06, "loss": 0.0894, "step": 2373 }, { "epoch": 0.47, "learning_rate": 6.297466627075327e-06, "loss": 0.0982, "step": 2374 }, { "epoch": 0.47, "learning_rate": 6.294095225512604e-06, "loss": 0.0937, "step": 2375 }, { "epoch": 0.48, "learning_rate": 6.290723193223589e-06, "loss": 0.1, "step": 2376 }, { "epoch": 0.48, "learning_rate": 6.28735053185177e-06, "loss": 0.094, "step": 2377 }, { "epoch": 0.48, "learning_rate": 6.28397724304094e-06, "loss": 0.0926, "step": 2378 }, { "epoch": 0.48, "learning_rate": 6.280603328435199e-06, "loss": 0.1036, "step": 2379 }, { "epoch": 0.48, "learning_rate": 6.277228789678953e-06, "loss": 0.0963, "step": 2380 }, { "epoch": 0.48, "learning_rate": 6.273853628416911e-06, "loss": 0.0956, "step": 2381 }, { "epoch": 0.48, "learning_rate": 6.270477846294086e-06, "loss": 0.1017, "step": 2382 }, { "epoch": 0.48, "learning_rate": 6.267101444955792e-06, "loss": 0.097, "step": 2383 }, { "epoch": 0.48, "learning_rate": 6.2637244260476474e-06, "loss": 0.0956, "step": 2384 }, { "epoch": 0.48, "learning_rate": 6.26034679121557e-06, "loss": 0.1003, "step": 2385 }, { "epoch": 0.48, "learning_rate": 6.256968542105775e-06, "loss": 0.0996, "step": 2386 }, { "epoch": 0.48, "learning_rate": 6.2535896803647845e-06, "loss": 0.1085, "step": 2387 }, { "epoch": 0.48, "learning_rate": 6.250210207639411e-06, "loss": 0.0907, "step": 2388 }, { "epoch": 0.48, "learning_rate": 6.24683012557677e-06, "loss": 0.0998, "step": 2389 }, { "epoch": 0.48, "learning_rate": 6.243449435824276e-06, "loss": 0.0956, "step": 2390 }, { "epoch": 0.48, "learning_rate": 6.240068140029628e-06, "loss": 0.0995, "step": 2391 }, { "epoch": 0.48, "learning_rate": 6.236686239840836e-06, "loss": 0.0913, "step": 2392 }, { "epoch": 0.48, "learning_rate": 6.233303736906193e-06, "loss": 0.0937, "step": 2393 }, { "epoch": 0.48, "learning_rate": 6.229920632874291e-06, "loss": 0.1039, "step": 2394 }, { "epoch": 0.48, "learning_rate": 6.2265369293940135e-06, "loss": 0.0921, "step": 2395 }, { "epoch": 0.48, "learning_rate": 6.223152628114537e-06, "loss": 0.0948, "step": 2396 }, { "epoch": 0.48, "learning_rate": 6.219767730685329e-06, "loss": 0.0935, "step": 2397 }, { "epoch": 0.48, "learning_rate": 6.216382238756147e-06, "loss": 0.0898, "step": 2398 }, { "epoch": 0.48, "learning_rate": 6.212996153977038e-06, "loss": 0.0966, "step": 2399 }, { "epoch": 0.48, "learning_rate": 6.209609477998339e-06, "loss": 0.1039, "step": 2400 }, { "epoch": 0.48, "learning_rate": 6.206222212470675e-06, "loss": 0.1146, "step": 2401 }, { "epoch": 0.48, "learning_rate": 6.202834359044959e-06, "loss": 0.0961, "step": 2402 }, { "epoch": 0.48, "learning_rate": 6.199445919372388e-06, "loss": 0.1021, "step": 2403 }, { "epoch": 0.48, "learning_rate": 6.1960568951044475e-06, "loss": 0.0932, "step": 2404 }, { "epoch": 0.48, "learning_rate": 6.192667287892905e-06, "loss": 0.0892, "step": 2405 }, { "epoch": 0.48, "learning_rate": 6.189277099389816e-06, "loss": 0.0911, "step": 2406 }, { "epoch": 0.48, "learning_rate": 6.185886331247516e-06, "loss": 0.0965, "step": 2407 }, { "epoch": 0.48, "learning_rate": 6.182494985118625e-06, "loss": 0.0921, "step": 2408 }, { "epoch": 0.48, "learning_rate": 6.179103062656042e-06, "loss": 0.0959, "step": 2409 }, { "epoch": 0.48, "learning_rate": 6.17571056551295e-06, "loss": 0.0925, "step": 2410 }, { "epoch": 0.48, "learning_rate": 6.172317495342812e-06, "loss": 0.0933, "step": 2411 }, { "epoch": 0.48, "learning_rate": 6.168923853799369e-06, "loss": 0.0895, "step": 2412 }, { "epoch": 0.48, "learning_rate": 6.16552964253664e-06, "loss": 0.1331, "step": 2413 }, { "epoch": 0.48, "learning_rate": 6.1621348632089205e-06, "loss": 0.0992, "step": 2414 }, { "epoch": 0.48, "learning_rate": 6.158739517470786e-06, "loss": 0.0931, "step": 2415 }, { "epoch": 0.48, "learning_rate": 6.155343606977091e-06, "loss": 0.093, "step": 2416 }, { "epoch": 0.48, "learning_rate": 6.151947133382954e-06, "loss": 0.0973, "step": 2417 }, { "epoch": 0.48, "learning_rate": 6.148550098343778e-06, "loss": 0.0954, "step": 2418 }, { "epoch": 0.48, "learning_rate": 6.145152503515239e-06, "loss": 0.0983, "step": 2419 }, { "epoch": 0.48, "learning_rate": 6.141754350553279e-06, "loss": 0.0887, "step": 2420 }, { "epoch": 0.48, "learning_rate": 6.138355641114121e-06, "loss": 0.0955, "step": 2421 }, { "epoch": 0.48, "learning_rate": 6.134956376854251e-06, "loss": 0.0927, "step": 2422 }, { "epoch": 0.48, "learning_rate": 6.13155655943043e-06, "loss": 0.1028, "step": 2423 }, { "epoch": 0.48, "learning_rate": 6.128156190499688e-06, "loss": 0.1018, "step": 2424 }, { "epoch": 0.48, "learning_rate": 6.124755271719326e-06, "loss": 0.098, "step": 2425 }, { "epoch": 0.49, "learning_rate": 6.121353804746907e-06, "loss": 0.0871, "step": 2426 }, { "epoch": 0.49, "learning_rate": 6.117951791240265e-06, "loss": 0.0857, "step": 2427 }, { "epoch": 0.49, "learning_rate": 6.114549232857503e-06, "loss": 0.0886, "step": 2428 }, { "epoch": 0.49, "learning_rate": 6.111146131256983e-06, "loss": 0.0955, "step": 2429 }, { "epoch": 0.49, "learning_rate": 6.107742488097338e-06, "loss": 0.1017, "step": 2430 }, { "epoch": 0.49, "learning_rate": 6.10433830503746e-06, "loss": 0.0909, "step": 2431 }, { "epoch": 0.49, "learning_rate": 6.100933583736508e-06, "loss": 0.0898, "step": 2432 }, { "epoch": 0.49, "learning_rate": 6.097528325853903e-06, "loss": 0.1066, "step": 2433 }, { "epoch": 0.49, "learning_rate": 6.094122533049324e-06, "loss": 0.1015, "step": 2434 }, { "epoch": 0.49, "learning_rate": 6.090716206982714e-06, "loss": 0.1049, "step": 2435 }, { "epoch": 0.49, "learning_rate": 6.087309349314275e-06, "loss": 0.0956, "step": 2436 }, { "epoch": 0.49, "learning_rate": 6.083901961704467e-06, "loss": 0.0971, "step": 2437 }, { "epoch": 0.49, "learning_rate": 6.080494045814011e-06, "loss": 0.0927, "step": 2438 }, { "epoch": 0.49, "learning_rate": 6.077085603303883e-06, "loss": 0.095, "step": 2439 }, { "epoch": 0.49, "learning_rate": 6.073676635835317e-06, "loss": 0.1053, "step": 2440 }, { "epoch": 0.49, "learning_rate": 6.0702671450698005e-06, "loss": 0.1412, "step": 2441 }, { "epoch": 0.49, "learning_rate": 6.066857132669081e-06, "loss": 0.0897, "step": 2442 }, { "epoch": 0.49, "learning_rate": 6.0634466002951545e-06, "loss": 0.1159, "step": 2443 }, { "epoch": 0.49, "learning_rate": 6.060035549610275e-06, "loss": 0.0961, "step": 2444 }, { "epoch": 0.49, "learning_rate": 6.056623982276945e-06, "loss": 0.0887, "step": 2445 }, { "epoch": 0.49, "learning_rate": 6.0532118999579206e-06, "loss": 0.0976, "step": 2446 }, { "epoch": 0.49, "learning_rate": 6.049799304316214e-06, "loss": 0.0957, "step": 2447 }, { "epoch": 0.49, "learning_rate": 6.046386197015076e-06, "loss": 0.0966, "step": 2448 }, { "epoch": 0.49, "learning_rate": 6.04297257971802e-06, "loss": 0.102, "step": 2449 }, { "epoch": 0.49, "learning_rate": 6.039558454088796e-06, "loss": 0.1017, "step": 2450 }, { "epoch": 0.49, "learning_rate": 6.036143821791413e-06, "loss": 0.0912, "step": 2451 }, { "epoch": 0.49, "learning_rate": 6.032728684490118e-06, "loss": 0.0954, "step": 2452 }, { "epoch": 0.49, "learning_rate": 6.029313043849407e-06, "loss": 0.0969, "step": 2453 }, { "epoch": 0.49, "learning_rate": 6.025896901534023e-06, "loss": 0.0948, "step": 2454 }, { "epoch": 0.49, "learning_rate": 6.022480259208951e-06, "loss": 0.0993, "step": 2455 }, { "epoch": 0.49, "learning_rate": 6.019063118539425e-06, "loss": 0.0977, "step": 2456 }, { "epoch": 0.49, "learning_rate": 6.015645481190912e-06, "loss": 0.0921, "step": 2457 }, { "epoch": 0.49, "learning_rate": 6.01222734882913e-06, "loss": 0.0912, "step": 2458 }, { "epoch": 0.49, "learning_rate": 6.008808723120035e-06, "loss": 0.1001, "step": 2459 }, { "epoch": 0.49, "learning_rate": 6.005389605729824e-06, "loss": 0.0938, "step": 2460 }, { "epoch": 0.49, "learning_rate": 6.001969998324932e-06, "loss": 0.0899, "step": 2461 }, { "epoch": 0.49, "learning_rate": 5.9985499025720354e-06, "loss": 0.096, "step": 2462 }, { "epoch": 0.49, "learning_rate": 5.995129320138047e-06, "loss": 0.0942, "step": 2463 }, { "epoch": 0.49, "learning_rate": 5.991708252690117e-06, "loss": 0.0929, "step": 2464 }, { "epoch": 0.49, "learning_rate": 5.988286701895631e-06, "loss": 0.091, "step": 2465 }, { "epoch": 0.49, "learning_rate": 5.984864669422214e-06, "loss": 0.0914, "step": 2466 }, { "epoch": 0.49, "learning_rate": 5.98144215693772e-06, "loss": 0.1006, "step": 2467 }, { "epoch": 0.49, "learning_rate": 5.978019166110242e-06, "loss": 0.0976, "step": 2468 }, { "epoch": 0.49, "learning_rate": 5.974595698608103e-06, "loss": 0.0904, "step": 2469 }, { "epoch": 0.49, "learning_rate": 5.97117175609986e-06, "loss": 0.1386, "step": 2470 }, { "epoch": 0.49, "learning_rate": 5.967747340254303e-06, "loss": 0.0973, "step": 2471 }, { "epoch": 0.49, "learning_rate": 5.964322452740445e-06, "loss": 0.0973, "step": 2472 }, { "epoch": 0.49, "learning_rate": 5.960897095227541e-06, "loss": 0.0955, "step": 2473 }, { "epoch": 0.49, "learning_rate": 5.957471269385065e-06, "loss": 0.0897, "step": 2474 }, { "epoch": 0.49, "learning_rate": 5.954044976882725e-06, "loss": 0.0967, "step": 2475 }, { "epoch": 0.5, "learning_rate": 5.950618219390451e-06, "loss": 0.0948, "step": 2476 }, { "epoch": 0.5, "learning_rate": 5.947190998578407e-06, "loss": 0.1037, "step": 2477 }, { "epoch": 0.5, "learning_rate": 5.943763316116977e-06, "loss": 0.1018, "step": 2478 }, { "epoch": 0.5, "learning_rate": 5.94033517367677e-06, "loss": 0.086, "step": 2479 }, { "epoch": 0.5, "learning_rate": 5.936906572928625e-06, "loss": 0.0916, "step": 2480 }, { "epoch": 0.5, "learning_rate": 5.933477515543595e-06, "loss": 0.097, "step": 2481 }, { "epoch": 0.5, "learning_rate": 5.930048003192965e-06, "loss": 0.0944, "step": 2482 }, { "epoch": 0.5, "learning_rate": 5.926618037548237e-06, "loss": 0.1, "step": 2483 }, { "epoch": 0.5, "learning_rate": 5.923187620281135e-06, "loss": 0.0904, "step": 2484 }, { "epoch": 0.5, "learning_rate": 5.919756753063601e-06, "loss": 0.0972, "step": 2485 }, { "epoch": 0.5, "learning_rate": 5.9163254375677995e-06, "loss": 0.0915, "step": 2486 }, { "epoch": 0.5, "learning_rate": 5.912893675466112e-06, "loss": 0.0943, "step": 2487 }, { "epoch": 0.5, "learning_rate": 5.909461468431135e-06, "loss": 0.1061, "step": 2488 }, { "epoch": 0.5, "learning_rate": 5.906028818135687e-06, "loss": 0.09, "step": 2489 }, { "epoch": 0.5, "learning_rate": 5.902595726252801e-06, "loss": 0.0948, "step": 2490 }, { "epoch": 0.5, "learning_rate": 5.8991621944557224e-06, "loss": 0.0945, "step": 2491 }, { "epoch": 0.5, "learning_rate": 5.8957282244179125e-06, "loss": 0.1137, "step": 2492 }, { "epoch": 0.5, "learning_rate": 5.892293817813048e-06, "loss": 0.1043, "step": 2493 }, { "epoch": 0.5, "learning_rate": 5.8888589763150165e-06, "loss": 0.0961, "step": 2494 }, { "epoch": 0.5, "learning_rate": 5.885423701597918e-06, "loss": 0.0968, "step": 2495 }, { "epoch": 0.5, "learning_rate": 5.881987995336062e-06, "loss": 0.1028, "step": 2496 }, { "epoch": 0.5, "learning_rate": 5.878551859203974e-06, "loss": 0.1033, "step": 2497 }, { "epoch": 0.5, "learning_rate": 5.8751152948763815e-06, "loss": 0.0942, "step": 2498 }, { "epoch": 0.5, "learning_rate": 5.871678304028224e-06, "loss": 0.0994, "step": 2499 }, { "epoch": 0.5, "learning_rate": 5.8682408883346535e-06, "loss": 0.0928, "step": 2500 }, { "epoch": 0.5, "learning_rate": 5.8648030494710195e-06, "loss": 0.0967, "step": 2501 }, { "epoch": 0.5, "learning_rate": 5.8613647891128845e-06, "loss": 0.1103, "step": 2502 }, { "epoch": 0.5, "learning_rate": 5.857926108936015e-06, "loss": 0.1006, "step": 2503 }, { "epoch": 0.5, "learning_rate": 5.854487010616384e-06, "loss": 0.095, "step": 2504 }, { "epoch": 0.5, "learning_rate": 5.851047495830163e-06, "loss": 0.0893, "step": 2505 }, { "epoch": 0.5, "learning_rate": 5.847607566253732e-06, "loss": 0.0928, "step": 2506 }, { "epoch": 0.5, "learning_rate": 5.844167223563669e-06, "loss": 0.0915, "step": 2507 }, { "epoch": 0.5, "learning_rate": 5.840726469436758e-06, "loss": 0.095, "step": 2508 }, { "epoch": 0.5, "learning_rate": 5.837285305549978e-06, "loss": 0.0979, "step": 2509 }, { "epoch": 0.5, "learning_rate": 5.8338437335805124e-06, "loss": 0.1, "step": 2510 }, { "epoch": 0.5, "learning_rate": 5.83040175520574e-06, "loss": 0.0921, "step": 2511 }, { "epoch": 0.5, "learning_rate": 5.826959372103239e-06, "loss": 0.0954, "step": 2512 }, { "epoch": 0.5, "learning_rate": 5.823516585950787e-06, "loss": 0.1018, "step": 2513 }, { "epoch": 0.5, "learning_rate": 5.8200733984263556e-06, "loss": 0.0901, "step": 2514 }, { "epoch": 0.5, "learning_rate": 5.816629811208112e-06, "loss": 0.0903, "step": 2515 }, { "epoch": 0.5, "learning_rate": 5.813185825974419e-06, "loss": 0.5963, "step": 2516 }, { "epoch": 0.5, "learning_rate": 5.809741444403831e-06, "loss": 0.1171, "step": 2517 }, { "epoch": 0.5, "learning_rate": 5.8062966681751046e-06, "loss": 0.1026, "step": 2518 }, { "epoch": 0.5, "learning_rate": 5.802851498967173e-06, "loss": 0.0969, "step": 2519 }, { "epoch": 0.5, "learning_rate": 5.799405938459175e-06, "loss": 0.096, "step": 2520 }, { "epoch": 0.5, "learning_rate": 5.795959988330434e-06, "loss": 0.1123, "step": 2521 }, { "epoch": 0.5, "learning_rate": 5.792513650260465e-06, "loss": 0.0997, "step": 2522 }, { "epoch": 0.5, "learning_rate": 5.78906692592897e-06, "loss": 0.0952, "step": 2523 }, { "epoch": 0.5, "learning_rate": 5.78561981701584e-06, "loss": 0.0959, "step": 2524 }, { "epoch": 0.51, "learning_rate": 5.782172325201155e-06, "loss": 0.0925, "step": 2525 }, { "epoch": 0.51, "learning_rate": 5.778724452165181e-06, "loss": 0.1055, "step": 2526 }, { "epoch": 0.51, "learning_rate": 5.77527619958837e-06, "loss": 0.0998, "step": 2527 }, { "epoch": 0.51, "learning_rate": 5.771827569151357e-06, "loss": 0.0908, "step": 2528 }, { "epoch": 0.51, "learning_rate": 5.768378562534962e-06, "loss": 0.1039, "step": 2529 }, { "epoch": 0.51, "learning_rate": 5.764929181420191e-06, "loss": 0.0955, "step": 2530 }, { "epoch": 0.51, "learning_rate": 5.761479427488229e-06, "loss": 0.101, "step": 2531 }, { "epoch": 0.51, "learning_rate": 5.7580293024204455e-06, "loss": 0.0879, "step": 2532 }, { "epoch": 0.51, "learning_rate": 5.7545788078983875e-06, "loss": 0.1172, "step": 2533 }, { "epoch": 0.51, "learning_rate": 5.751127945603786e-06, "loss": 0.0986, "step": 2534 }, { "epoch": 0.51, "learning_rate": 5.747676717218549e-06, "loss": 0.0908, "step": 2535 }, { "epoch": 0.51, "learning_rate": 5.744225124424762e-06, "loss": 0.0997, "step": 2536 }, { "epoch": 0.51, "learning_rate": 5.740773168904691e-06, "loss": 0.0975, "step": 2537 }, { "epoch": 0.51, "learning_rate": 5.737320852340776e-06, "loss": 0.0965, "step": 2538 }, { "epoch": 0.51, "learning_rate": 5.733868176415633e-06, "loss": 0.0953, "step": 2539 }, { "epoch": 0.51, "learning_rate": 5.730415142812059e-06, "loss": 0.1, "step": 2540 }, { "epoch": 0.51, "learning_rate": 5.726961753213016e-06, "loss": 0.0926, "step": 2541 }, { "epoch": 0.51, "learning_rate": 5.723508009301646e-06, "loss": 0.0956, "step": 2542 }, { "epoch": 0.51, "learning_rate": 5.720053912761261e-06, "loss": 0.1093, "step": 2543 }, { "epoch": 0.51, "learning_rate": 5.716599465275347e-06, "loss": 0.1023, "step": 2544 }, { "epoch": 0.51, "learning_rate": 5.7131446685275595e-06, "loss": 0.1031, "step": 2545 }, { "epoch": 0.51, "learning_rate": 5.709689524201723e-06, "loss": 0.0964, "step": 2546 }, { "epoch": 0.51, "learning_rate": 5.706234033981835e-06, "loss": 0.0897, "step": 2547 }, { "epoch": 0.51, "learning_rate": 5.702778199552055e-06, "loss": 0.0947, "step": 2548 }, { "epoch": 0.51, "learning_rate": 5.6993220225967214e-06, "loss": 0.1046, "step": 2549 }, { "epoch": 0.51, "learning_rate": 5.695865504800328e-06, "loss": 0.0949, "step": 2550 }, { "epoch": 0.51, "learning_rate": 5.692408647847542e-06, "loss": 0.095, "step": 2551 }, { "epoch": 0.51, "learning_rate": 5.68895145342319e-06, "loss": 0.0997, "step": 2552 }, { "epoch": 0.51, "learning_rate": 5.685493923212273e-06, "loss": 0.1033, "step": 2553 }, { "epoch": 0.51, "learning_rate": 5.682036058899942e-06, "loss": 0.0905, "step": 2554 }, { "epoch": 0.51, "learning_rate": 5.678577862171523e-06, "loss": 0.1373, "step": 2555 }, { "epoch": 0.51, "learning_rate": 5.675119334712496e-06, "loss": 0.0945, "step": 2556 }, { "epoch": 0.51, "learning_rate": 5.671660478208508e-06, "loss": 0.1052, "step": 2557 }, { "epoch": 0.51, "learning_rate": 5.668201294345363e-06, "loss": 0.0924, "step": 2558 }, { "epoch": 0.51, "learning_rate": 5.6647417848090225e-06, "loss": 0.0916, "step": 2559 }, { "epoch": 0.51, "learning_rate": 5.661281951285613e-06, "loss": 0.0905, "step": 2560 }, { "epoch": 0.51, "learning_rate": 5.657821795461413e-06, "loss": 0.0933, "step": 2561 }, { "epoch": 0.51, "learning_rate": 5.654361319022862e-06, "loss": 0.095, "step": 2562 }, { "epoch": 0.51, "learning_rate": 5.650900523656553e-06, "loss": 0.0887, "step": 2563 }, { "epoch": 0.51, "learning_rate": 5.647439411049235e-06, "loss": 0.0973, "step": 2564 }, { "epoch": 0.51, "learning_rate": 5.643977982887815e-06, "loss": 0.1152, "step": 2565 }, { "epoch": 0.51, "learning_rate": 5.640516240859348e-06, "loss": 0.1112, "step": 2566 }, { "epoch": 0.51, "learning_rate": 5.6370541866510476e-06, "loss": 0.1043, "step": 2567 }, { "epoch": 0.51, "learning_rate": 5.633591821950274e-06, "loss": 0.0865, "step": 2568 }, { "epoch": 0.51, "learning_rate": 5.630129148444543e-06, "loss": 0.0888, "step": 2569 }, { "epoch": 0.51, "learning_rate": 5.626666167821522e-06, "loss": 0.0926, "step": 2570 }, { "epoch": 0.51, "learning_rate": 5.623202881769023e-06, "loss": 0.0922, "step": 2571 }, { "epoch": 0.51, "learning_rate": 5.6197392919750095e-06, "loss": 0.1045, "step": 2572 }, { "epoch": 0.51, "learning_rate": 5.616275400127594e-06, "loss": 0.0948, "step": 2573 }, { "epoch": 0.51, "learning_rate": 5.612811207915034e-06, "loss": 0.1121, "step": 2574 }, { "epoch": 0.52, "learning_rate": 5.609346717025738e-06, "loss": 0.0973, "step": 2575 }, { "epoch": 0.52, "learning_rate": 5.605881929148254e-06, "loss": 0.0892, "step": 2576 }, { "epoch": 0.52, "learning_rate": 5.6024168459712765e-06, "loss": 0.0888, "step": 2577 }, { "epoch": 0.52, "learning_rate": 5.598951469183649e-06, "loss": 0.092, "step": 2578 }, { "epoch": 0.52, "learning_rate": 5.59548580047435e-06, "loss": 0.1048, "step": 2579 }, { "epoch": 0.52, "learning_rate": 5.592019841532507e-06, "loss": 0.101, "step": 2580 }, { "epoch": 0.52, "learning_rate": 5.588553594047382e-06, "loss": 0.0935, "step": 2581 }, { "epoch": 0.52, "learning_rate": 5.585087059708389e-06, "loss": 0.0958, "step": 2582 }, { "epoch": 0.52, "learning_rate": 5.581620240205068e-06, "loss": 0.0861, "step": 2583 }, { "epoch": 0.52, "learning_rate": 5.578153137227109e-06, "loss": 0.089, "step": 2584 }, { "epoch": 0.52, "learning_rate": 5.5746857524643335e-06, "loss": 0.0876, "step": 2585 }, { "epoch": 0.52, "learning_rate": 5.5712180876067045e-06, "loss": 0.1173, "step": 2586 }, { "epoch": 0.52, "learning_rate": 5.567750144344318e-06, "loss": 0.1032, "step": 2587 }, { "epoch": 0.52, "learning_rate": 5.5642819243674085e-06, "loss": 0.0895, "step": 2588 }, { "epoch": 0.52, "learning_rate": 5.560813429366345e-06, "loss": 0.1046, "step": 2589 }, { "epoch": 0.52, "learning_rate": 5.557344661031628e-06, "loss": 0.0931, "step": 2590 }, { "epoch": 0.52, "learning_rate": 5.553875621053893e-06, "loss": 0.094, "step": 2591 }, { "epoch": 0.52, "learning_rate": 5.5504063111239116e-06, "loss": 0.0958, "step": 2592 }, { "epoch": 0.52, "learning_rate": 5.546936732932578e-06, "loss": 0.0951, "step": 2593 }, { "epoch": 0.52, "learning_rate": 5.543466888170927e-06, "loss": 0.0905, "step": 2594 }, { "epoch": 0.52, "learning_rate": 5.539996778530114e-06, "loss": 0.0918, "step": 2595 }, { "epoch": 0.52, "learning_rate": 5.536526405701433e-06, "loss": 0.0958, "step": 2596 }, { "epoch": 0.52, "learning_rate": 5.5330557713763e-06, "loss": 0.0906, "step": 2597 }, { "epoch": 0.52, "learning_rate": 5.52958487724626e-06, "loss": 0.0966, "step": 2598 }, { "epoch": 0.52, "learning_rate": 5.526113725002984e-06, "loss": 0.0875, "step": 2599 }, { "epoch": 0.52, "learning_rate": 5.522642316338268e-06, "loss": 0.0906, "step": 2600 }, { "epoch": 0.52, "learning_rate": 5.519170652944037e-06, "loss": 0.0862, "step": 2601 }, { "epoch": 0.52, "learning_rate": 5.515698736512337e-06, "loss": 0.0922, "step": 2602 }, { "epoch": 0.52, "learning_rate": 5.512226568735338e-06, "loss": 0.0931, "step": 2603 }, { "epoch": 0.52, "learning_rate": 5.508754151305332e-06, "loss": 0.0861, "step": 2604 }, { "epoch": 0.52, "learning_rate": 5.505281485914732e-06, "loss": 0.0929, "step": 2605 }, { "epoch": 0.52, "learning_rate": 5.5018085742560745e-06, "loss": 0.0905, "step": 2606 }, { "epoch": 0.52, "learning_rate": 5.498335418022015e-06, "loss": 0.0908, "step": 2607 }, { "epoch": 0.52, "learning_rate": 5.4948620189053255e-06, "loss": 0.0969, "step": 2608 }, { "epoch": 0.52, "learning_rate": 5.491388378598899e-06, "loss": 0.0967, "step": 2609 }, { "epoch": 0.52, "learning_rate": 5.487914498795748e-06, "loss": 0.0887, "step": 2610 }, { "epoch": 0.52, "learning_rate": 5.484440381188997e-06, "loss": 0.1037, "step": 2611 }, { "epoch": 0.52, "learning_rate": 5.480966027471889e-06, "loss": 0.0967, "step": 2612 }, { "epoch": 0.52, "learning_rate": 5.477491439337782e-06, "loss": 0.0971, "step": 2613 }, { "epoch": 0.52, "learning_rate": 5.474016618480147e-06, "loss": 0.0906, "step": 2614 }, { "epoch": 0.52, "learning_rate": 5.470541566592573e-06, "loss": 0.0941, "step": 2615 }, { "epoch": 0.52, "learning_rate": 5.467066285368754e-06, "loss": 0.0912, "step": 2616 }, { "epoch": 0.52, "learning_rate": 5.463590776502501e-06, "loss": 0.0939, "step": 2617 }, { "epoch": 0.52, "learning_rate": 5.460115041687737e-06, "loss": 0.0953, "step": 2618 }, { "epoch": 0.52, "learning_rate": 5.456639082618489e-06, "loss": 0.0922, "step": 2619 }, { "epoch": 0.52, "learning_rate": 5.453162900988902e-06, "loss": 0.0978, "step": 2620 }, { "epoch": 0.52, "learning_rate": 5.449686498493219e-06, "loss": 0.0939, "step": 2621 }, { "epoch": 0.52, "learning_rate": 5.446209876825803e-06, "loss": 0.0949, "step": 2622 }, { "epoch": 0.52, "learning_rate": 5.442733037681112e-06, "loss": 0.114, "step": 2623 }, { "epoch": 0.52, "learning_rate": 5.439255982753717e-06, "loss": 0.0876, "step": 2624 }, { "epoch": 0.53, "learning_rate": 5.435778713738292e-06, "loss": 0.0964, "step": 2625 }, { "epoch": 0.53, "learning_rate": 5.432301232329615e-06, "loss": 0.0951, "step": 2626 }, { "epoch": 0.53, "learning_rate": 5.42882354022257e-06, "loss": 0.1108, "step": 2627 }, { "epoch": 0.53, "learning_rate": 5.425345639112141e-06, "loss": 0.1074, "step": 2628 }, { "epoch": 0.53, "learning_rate": 5.4218675306934145e-06, "loss": 0.1, "step": 2629 }, { "epoch": 0.53, "learning_rate": 5.41838921666158e-06, "loss": 0.0954, "step": 2630 }, { "epoch": 0.53, "learning_rate": 5.41491069871192e-06, "loss": 0.095, "step": 2631 }, { "epoch": 0.53, "learning_rate": 5.411431978539829e-06, "loss": 0.0894, "step": 2632 }, { "epoch": 0.53, "learning_rate": 5.4079530578407895e-06, "loss": 0.0926, "step": 2633 }, { "epoch": 0.53, "learning_rate": 5.404473938310384e-06, "loss": 0.0892, "step": 2634 }, { "epoch": 0.53, "learning_rate": 5.400994621644294e-06, "loss": 0.0971, "step": 2635 }, { "epoch": 0.53, "learning_rate": 5.3975151095383e-06, "loss": 0.0912, "step": 2636 }, { "epoch": 0.53, "learning_rate": 5.394035403688268e-06, "loss": 0.0922, "step": 2637 }, { "epoch": 0.53, "learning_rate": 5.390555505790168e-06, "loss": 0.1004, "step": 2638 }, { "epoch": 0.53, "learning_rate": 5.3870754175400595e-06, "loss": 0.0883, "step": 2639 }, { "epoch": 0.53, "learning_rate": 5.383595140634093e-06, "loss": 0.0877, "step": 2640 }, { "epoch": 0.53, "learning_rate": 5.380114676768516e-06, "loss": 0.0934, "step": 2641 }, { "epoch": 0.53, "learning_rate": 5.376634027639664e-06, "loss": 0.0914, "step": 2642 }, { "epoch": 0.53, "learning_rate": 5.373153194943962e-06, "loss": 0.0932, "step": 2643 }, { "epoch": 0.53, "learning_rate": 5.3696721803779265e-06, "loss": 0.0997, "step": 2644 }, { "epoch": 0.53, "learning_rate": 5.366190985638159e-06, "loss": 0.0908, "step": 2645 }, { "epoch": 0.53, "learning_rate": 5.362709612421355e-06, "loss": 0.0951, "step": 2646 }, { "epoch": 0.53, "learning_rate": 5.359228062424292e-06, "loss": 0.0969, "step": 2647 }, { "epoch": 0.53, "learning_rate": 5.355746337343835e-06, "loss": 0.0971, "step": 2648 }, { "epoch": 0.53, "learning_rate": 5.352264438876935e-06, "loss": 0.0964, "step": 2649 }, { "epoch": 0.53, "learning_rate": 5.348782368720627e-06, "loss": 0.1053, "step": 2650 }, { "epoch": 0.53, "learning_rate": 5.345300128572031e-06, "loss": 0.1037, "step": 2651 }, { "epoch": 0.53, "learning_rate": 5.341817720128344e-06, "loss": 0.0926, "step": 2652 }, { "epoch": 0.53, "learning_rate": 5.338335145086855e-06, "loss": 0.1033, "step": 2653 }, { "epoch": 0.53, "learning_rate": 5.334852405144926e-06, "loss": 0.094, "step": 2654 }, { "epoch": 0.53, "learning_rate": 5.3313695020000026e-06, "loss": 0.0884, "step": 2655 }, { "epoch": 0.53, "learning_rate": 5.327886437349609e-06, "loss": 0.0895, "step": 2656 }, { "epoch": 0.53, "learning_rate": 5.3244032128913485e-06, "loss": 0.0949, "step": 2657 }, { "epoch": 0.53, "learning_rate": 5.320919830322903e-06, "loss": 0.09, "step": 2658 }, { "epoch": 0.53, "learning_rate": 5.317436291342031e-06, "loss": 0.0985, "step": 2659 }, { "epoch": 0.53, "learning_rate": 5.3139525976465675e-06, "loss": 0.1015, "step": 2660 }, { "epoch": 0.53, "learning_rate": 5.310468750934421e-06, "loss": 0.1162, "step": 2661 }, { "epoch": 0.53, "learning_rate": 5.306984752903578e-06, "loss": 0.103, "step": 2662 }, { "epoch": 0.53, "learning_rate": 5.303500605252095e-06, "loss": 0.095, "step": 2663 }, { "epoch": 0.53, "learning_rate": 5.300016309678104e-06, "loss": 0.0979, "step": 2664 }, { "epoch": 0.53, "learning_rate": 5.296531867879809e-06, "loss": 0.0891, "step": 2665 }, { "epoch": 0.53, "learning_rate": 5.293047281555482e-06, "loss": 0.0938, "step": 2666 }, { "epoch": 0.53, "learning_rate": 5.289562552403472e-06, "loss": 0.0881, "step": 2667 }, { "epoch": 0.53, "learning_rate": 5.2860776821221915e-06, "loss": 0.1031, "step": 2668 }, { "epoch": 0.53, "learning_rate": 5.282592672410124e-06, "loss": 0.1054, "step": 2669 }, { "epoch": 0.53, "learning_rate": 5.27910752496582e-06, "loss": 0.1038, "step": 2670 }, { "epoch": 0.53, "learning_rate": 5.275622241487899e-06, "loss": 0.1019, "step": 2671 }, { "epoch": 0.53, "learning_rate": 5.272136823675046e-06, "loss": 0.0936, "step": 2672 }, { "epoch": 0.53, "learning_rate": 5.268651273226011e-06, "loss": 0.0876, "step": 2673 }, { "epoch": 0.53, "learning_rate": 5.26516559183961e-06, "loss": 0.0968, "step": 2674 }, { "epoch": 0.54, "learning_rate": 5.2616797812147205e-06, "loss": 0.0934, "step": 2675 }, { "epoch": 0.54, "learning_rate": 5.258193843050283e-06, "loss": 0.1006, "step": 2676 }, { "epoch": 0.54, "learning_rate": 5.254707779045305e-06, "loss": 0.1008, "step": 2677 }, { "epoch": 0.54, "learning_rate": 5.251221590898848e-06, "loss": 0.093, "step": 2678 }, { "epoch": 0.54, "learning_rate": 5.247735280310041e-06, "loss": 0.0859, "step": 2679 }, { "epoch": 0.54, "learning_rate": 5.244248848978067e-06, "loss": 0.0884, "step": 2680 }, { "epoch": 0.54, "learning_rate": 5.240762298602171e-06, "loss": 0.0852, "step": 2681 }, { "epoch": 0.54, "learning_rate": 5.237275630881658e-06, "loss": 0.0909, "step": 2682 }, { "epoch": 0.54, "learning_rate": 5.233788847515882e-06, "loss": 0.0923, "step": 2683 }, { "epoch": 0.54, "learning_rate": 5.230301950204261e-06, "loss": 0.0906, "step": 2684 }, { "epoch": 0.54, "learning_rate": 5.226814940646268e-06, "loss": 0.0905, "step": 2685 }, { "epoch": 0.54, "learning_rate": 5.223327820541432e-06, "loss": 0.0861, "step": 2686 }, { "epoch": 0.54, "learning_rate": 5.219840591589325e-06, "loss": 0.0912, "step": 2687 }, { "epoch": 0.54, "learning_rate": 5.216353255489586e-06, "loss": 0.1518, "step": 2688 }, { "epoch": 0.54, "learning_rate": 5.212865813941899e-06, "loss": 0.0937, "step": 2689 }, { "epoch": 0.54, "learning_rate": 5.209378268645998e-06, "loss": 0.0942, "step": 2690 }, { "epoch": 0.54, "learning_rate": 5.205890621301676e-06, "loss": 0.0905, "step": 2691 }, { "epoch": 0.54, "learning_rate": 5.202402873608763e-06, "loss": 0.0976, "step": 2692 }, { "epoch": 0.54, "learning_rate": 5.19891502726715e-06, "loss": 0.0922, "step": 2693 }, { "epoch": 0.54, "learning_rate": 5.195427083976768e-06, "loss": 0.0925, "step": 2694 }, { "epoch": 0.54, "learning_rate": 5.1919390454376e-06, "loss": 0.0956, "step": 2695 }, { "epoch": 0.54, "learning_rate": 5.188450913349674e-06, "loss": 0.0983, "step": 2696 }, { "epoch": 0.54, "learning_rate": 5.18496268941306e-06, "loss": 0.1122, "step": 2697 }, { "epoch": 0.54, "learning_rate": 5.18147437532788e-06, "loss": 0.088, "step": 2698 }, { "epoch": 0.54, "learning_rate": 5.177985972794293e-06, "loss": 0.1296, "step": 2699 }, { "epoch": 0.54, "learning_rate": 5.174497483512506e-06, "loss": 0.1019, "step": 2700 }, { "epoch": 0.54, "learning_rate": 5.171008909182765e-06, "loss": 0.0969, "step": 2701 }, { "epoch": 0.54, "learning_rate": 5.167520251505358e-06, "loss": 0.0954, "step": 2702 }, { "epoch": 0.54, "learning_rate": 5.164031512180616e-06, "loss": 0.0931, "step": 2703 }, { "epoch": 0.54, "learning_rate": 5.160542692908909e-06, "loss": 0.1047, "step": 2704 }, { "epoch": 0.54, "learning_rate": 5.157053795390642e-06, "loss": 0.0932, "step": 2705 }, { "epoch": 0.54, "learning_rate": 5.153564821326265e-06, "loss": 0.0983, "step": 2706 }, { "epoch": 0.54, "learning_rate": 5.150075772416256e-06, "loss": 0.0915, "step": 2707 }, { "epoch": 0.54, "learning_rate": 5.146586650361143e-06, "loss": 0.0923, "step": 2708 }, { "epoch": 0.54, "learning_rate": 5.143097456861474e-06, "loss": 0.0905, "step": 2709 }, { "epoch": 0.54, "learning_rate": 5.139608193617846e-06, "loss": 0.0928, "step": 2710 }, { "epoch": 0.54, "learning_rate": 5.136118862330876e-06, "loss": 0.0998, "step": 2711 }, { "epoch": 0.54, "learning_rate": 5.13262946470123e-06, "loss": 0.1025, "step": 2712 }, { "epoch": 0.54, "learning_rate": 5.1291400024295946e-06, "loss": 0.0908, "step": 2713 }, { "epoch": 0.54, "learning_rate": 5.1256504772166885e-06, "loss": 0.0887, "step": 2714 }, { "epoch": 0.54, "learning_rate": 5.1221608907632665e-06, "loss": 0.106, "step": 2715 }, { "epoch": 0.54, "learning_rate": 5.118671244770111e-06, "loss": 0.09, "step": 2716 }, { "epoch": 0.54, "learning_rate": 5.115181540938032e-06, "loss": 0.0883, "step": 2717 }, { "epoch": 0.54, "learning_rate": 5.111691780967869e-06, "loss": 0.0952, "step": 2718 }, { "epoch": 0.54, "learning_rate": 5.1082019665604895e-06, "loss": 0.0894, "step": 2719 }, { "epoch": 0.54, "learning_rate": 5.1047120994167855e-06, "loss": 0.09, "step": 2720 }, { "epoch": 0.54, "learning_rate": 5.101222181237676e-06, "loss": 0.0889, "step": 2721 }, { "epoch": 0.54, "learning_rate": 5.097732213724107e-06, "loss": 0.0944, "step": 2722 }, { "epoch": 0.54, "learning_rate": 5.0942421985770415e-06, "loss": 0.0876, "step": 2723 }, { "epoch": 0.54, "learning_rate": 5.090752137497474e-06, "loss": 0.0973, "step": 2724 }, { "epoch": 0.55, "learning_rate": 5.087262032186418e-06, "loss": 0.0923, "step": 2725 }, { "epoch": 0.55, "learning_rate": 5.083771884344908e-06, "loss": 0.0835, "step": 2726 }, { "epoch": 0.55, "learning_rate": 5.080281695673999e-06, "loss": 0.0854, "step": 2727 }, { "epoch": 0.55, "learning_rate": 5.0767914678747655e-06, "loss": 0.0937, "step": 2728 }, { "epoch": 0.55, "learning_rate": 5.073301202648304e-06, "loss": 0.0853, "step": 2729 }, { "epoch": 0.55, "learning_rate": 5.069810901695727e-06, "loss": 0.0969, "step": 2730 }, { "epoch": 0.55, "learning_rate": 5.066320566718165e-06, "loss": 0.0886, "step": 2731 }, { "epoch": 0.55, "learning_rate": 5.062830199416764e-06, "loss": 0.0894, "step": 2732 }, { "epoch": 0.55, "learning_rate": 5.059339801492687e-06, "loss": 0.0885, "step": 2733 }, { "epoch": 0.55, "learning_rate": 5.055849374647112e-06, "loss": 0.0902, "step": 2734 }, { "epoch": 0.55, "learning_rate": 5.05235892058123e-06, "loss": 0.0934, "step": 2735 }, { "epoch": 0.55, "learning_rate": 5.048868440996246e-06, "loss": 0.085, "step": 2736 }, { "epoch": 0.55, "learning_rate": 5.045377937593376e-06, "loss": 0.0896, "step": 2737 }, { "epoch": 0.55, "learning_rate": 5.041887412073853e-06, "loss": 0.094, "step": 2738 }, { "epoch": 0.55, "learning_rate": 5.038396866138915e-06, "loss": 0.0896, "step": 2739 }, { "epoch": 0.55, "learning_rate": 5.034906301489808e-06, "loss": 0.091, "step": 2740 }, { "epoch": 0.55, "learning_rate": 5.031415719827796e-06, "loss": 0.094, "step": 2741 }, { "epoch": 0.55, "learning_rate": 5.027925122854141e-06, "loss": 0.0887, "step": 2742 }, { "epoch": 0.55, "learning_rate": 5.024434512270123e-06, "loss": 0.0933, "step": 2743 }, { "epoch": 0.55, "learning_rate": 5.0209438897770205e-06, "loss": 0.0997, "step": 2744 }, { "epoch": 0.55, "learning_rate": 5.0174532570761194e-06, "loss": 0.0915, "step": 2745 }, { "epoch": 0.55, "learning_rate": 5.013962615868714e-06, "loss": 0.0986, "step": 2746 }, { "epoch": 0.55, "learning_rate": 5.010471967856096e-06, "loss": 0.0914, "step": 2747 }, { "epoch": 0.55, "learning_rate": 5.006981314739573e-06, "loss": 0.0877, "step": 2748 }, { "epoch": 0.55, "learning_rate": 5.003490658220438e-06, "loss": 0.0922, "step": 2749 }, { "epoch": 0.55, "learning_rate": 5e-06, "loss": 0.0912, "step": 2750 }, { "epoch": 0.55, "learning_rate": 4.996509341779563e-06, "loss": 0.0923, "step": 2751 }, { "epoch": 0.55, "learning_rate": 4.993018685260428e-06, "loss": 0.0853, "step": 2752 }, { "epoch": 0.55, "learning_rate": 4.9895280321439036e-06, "loss": 0.0888, "step": 2753 }, { "epoch": 0.55, "learning_rate": 4.986037384131288e-06, "loss": 0.1033, "step": 2754 }, { "epoch": 0.55, "learning_rate": 4.982546742923883e-06, "loss": 0.0881, "step": 2755 }, { "epoch": 0.55, "learning_rate": 4.979056110222982e-06, "loss": 0.0871, "step": 2756 }, { "epoch": 0.55, "learning_rate": 4.975565487729879e-06, "loss": 0.0893, "step": 2757 }, { "epoch": 0.55, "learning_rate": 4.9720748771458595e-06, "loss": 0.0914, "step": 2758 }, { "epoch": 0.55, "learning_rate": 4.968584280172206e-06, "loss": 0.0881, "step": 2759 }, { "epoch": 0.55, "learning_rate": 4.965093698510192e-06, "loss": 0.1116, "step": 2760 }, { "epoch": 0.55, "learning_rate": 4.961603133861086e-06, "loss": 0.0917, "step": 2761 }, { "epoch": 0.55, "learning_rate": 4.9581125879261476e-06, "loss": 0.0927, "step": 2762 }, { "epoch": 0.55, "learning_rate": 4.954622062406623e-06, "loss": 0.0973, "step": 2763 }, { "epoch": 0.55, "learning_rate": 4.951131559003756e-06, "loss": 0.0906, "step": 2764 }, { "epoch": 0.55, "learning_rate": 4.9476410794187726e-06, "loss": 0.0919, "step": 2765 }, { "epoch": 0.55, "learning_rate": 4.94415062535289e-06, "loss": 0.1012, "step": 2766 }, { "epoch": 0.55, "learning_rate": 4.940660198507315e-06, "loss": 0.095, "step": 2767 }, { "epoch": 0.55, "learning_rate": 4.937169800583237e-06, "loss": 0.1176, "step": 2768 }, { "epoch": 0.55, "learning_rate": 4.933679433281837e-06, "loss": 0.1481, "step": 2769 }, { "epoch": 0.55, "learning_rate": 4.9301890983042744e-06, "loss": 0.0917, "step": 2770 }, { "epoch": 0.55, "learning_rate": 4.926698797351697e-06, "loss": 0.0956, "step": 2771 }, { "epoch": 0.55, "learning_rate": 4.923208532125235e-06, "loss": 0.0873, "step": 2772 }, { "epoch": 0.55, "learning_rate": 4.9197183043260035e-06, "loss": 0.1079, "step": 2773 }, { "epoch": 0.55, "learning_rate": 4.9162281156550945e-06, "loss": 0.0848, "step": 2774 }, { "epoch": 0.56, "learning_rate": 4.9127379678135825e-06, "loss": 0.087, "step": 2775 }, { "epoch": 0.56, "learning_rate": 4.9092478625025266e-06, "loss": 0.0823, "step": 2776 }, { "epoch": 0.56, "learning_rate": 4.90575780142296e-06, "loss": 0.0937, "step": 2777 }, { "epoch": 0.56, "learning_rate": 4.902267786275895e-06, "loss": 0.0822, "step": 2778 }, { "epoch": 0.56, "learning_rate": 4.898777818762325e-06, "loss": 0.1023, "step": 2779 }, { "epoch": 0.56, "learning_rate": 4.895287900583216e-06, "loss": 0.0902, "step": 2780 }, { "epoch": 0.56, "learning_rate": 4.891798033439511e-06, "loss": 0.0995, "step": 2781 }, { "epoch": 0.56, "learning_rate": 4.888308219032133e-06, "loss": 0.0905, "step": 2782 }, { "epoch": 0.56, "learning_rate": 4.88481845906197e-06, "loss": 0.0939, "step": 2783 }, { "epoch": 0.56, "learning_rate": 4.881328755229892e-06, "loss": 0.0923, "step": 2784 }, { "epoch": 0.56, "learning_rate": 4.877839109236735e-06, "loss": 0.0899, "step": 2785 }, { "epoch": 0.56, "learning_rate": 4.874349522783313e-06, "loss": 0.0871, "step": 2786 }, { "epoch": 0.56, "learning_rate": 4.870859997570407e-06, "loss": 0.0934, "step": 2787 }, { "epoch": 0.56, "learning_rate": 4.86737053529877e-06, "loss": 0.0884, "step": 2788 }, { "epoch": 0.56, "learning_rate": 4.863881137669123e-06, "loss": 0.0927, "step": 2789 }, { "epoch": 0.56, "learning_rate": 4.860391806382157e-06, "loss": 0.0892, "step": 2790 }, { "epoch": 0.56, "learning_rate": 4.856902543138528e-06, "loss": 0.0894, "step": 2791 }, { "epoch": 0.56, "learning_rate": 4.853413349638859e-06, "loss": 0.0876, "step": 2792 }, { "epoch": 0.56, "learning_rate": 4.8499242275837444e-06, "loss": 0.0879, "step": 2793 }, { "epoch": 0.56, "learning_rate": 4.846435178673737e-06, "loss": 0.0944, "step": 2794 }, { "epoch": 0.56, "learning_rate": 4.842946204609359e-06, "loss": 0.0897, "step": 2795 }, { "epoch": 0.56, "learning_rate": 4.839457307091093e-06, "loss": 0.1156, "step": 2796 }, { "epoch": 0.56, "learning_rate": 4.835968487819384e-06, "loss": 0.0969, "step": 2797 }, { "epoch": 0.56, "learning_rate": 4.832479748494643e-06, "loss": 0.0926, "step": 2798 }, { "epoch": 0.56, "learning_rate": 4.828991090817238e-06, "loss": 0.1005, "step": 2799 }, { "epoch": 0.56, "learning_rate": 4.825502516487497e-06, "loss": 0.1018, "step": 2800 }, { "epoch": 0.56, "learning_rate": 4.822014027205708e-06, "loss": 0.0951, "step": 2801 }, { "epoch": 0.56, "learning_rate": 4.818525624672122e-06, "loss": 0.0883, "step": 2802 }, { "epoch": 0.56, "learning_rate": 4.815037310586941e-06, "loss": 0.0903, "step": 2803 }, { "epoch": 0.56, "learning_rate": 4.811549086650327e-06, "loss": 0.0934, "step": 2804 }, { "epoch": 0.56, "learning_rate": 4.8080609545624004e-06, "loss": 0.0906, "step": 2805 }, { "epoch": 0.56, "learning_rate": 4.8045729160232326e-06, "loss": 0.088, "step": 2806 }, { "epoch": 0.56, "learning_rate": 4.801084972732851e-06, "loss": 0.0917, "step": 2807 }, { "epoch": 0.56, "learning_rate": 4.797597126391238e-06, "loss": 0.0889, "step": 2808 }, { "epoch": 0.56, "learning_rate": 4.794109378698327e-06, "loss": 0.096, "step": 2809 }, { "epoch": 0.56, "learning_rate": 4.7906217313540035e-06, "loss": 0.0942, "step": 2810 }, { "epoch": 0.56, "learning_rate": 4.787134186058103e-06, "loss": 0.0917, "step": 2811 }, { "epoch": 0.56, "learning_rate": 4.783646744510416e-06, "loss": 0.0958, "step": 2812 }, { "epoch": 0.56, "learning_rate": 4.780159408410677e-06, "loss": 0.0905, "step": 2813 }, { "epoch": 0.56, "learning_rate": 4.7766721794585704e-06, "loss": 0.0988, "step": 2814 }, { "epoch": 0.56, "learning_rate": 4.7731850593537316e-06, "loss": 0.0956, "step": 2815 }, { "epoch": 0.56, "learning_rate": 4.769698049795739e-06, "loss": 0.0879, "step": 2816 }, { "epoch": 0.56, "learning_rate": 4.766211152484122e-06, "loss": 0.0948, "step": 2817 }, { "epoch": 0.56, "learning_rate": 4.762724369118346e-06, "loss": 0.0929, "step": 2818 }, { "epoch": 0.56, "learning_rate": 4.759237701397831e-06, "loss": 0.1038, "step": 2819 }, { "epoch": 0.56, "learning_rate": 4.755751151021934e-06, "loss": 0.1091, "step": 2820 }, { "epoch": 0.56, "learning_rate": 4.752264719689961e-06, "loss": 0.0926, "step": 2821 }, { "epoch": 0.56, "learning_rate": 4.748778409101153e-06, "loss": 0.0889, "step": 2822 }, { "epoch": 0.56, "learning_rate": 4.745292220954696e-06, "loss": 0.1461, "step": 2823 }, { "epoch": 0.56, "learning_rate": 4.741806156949718e-06, "loss": 0.2161, "step": 2824 }, { "epoch": 0.56, "learning_rate": 4.738320218785281e-06, "loss": 0.3013, "step": 2825 }, { "epoch": 0.57, "learning_rate": 4.734834408160393e-06, "loss": 0.0879, "step": 2826 }, { "epoch": 0.57, "learning_rate": 4.73134872677399e-06, "loss": 0.0909, "step": 2827 }, { "epoch": 0.57, "learning_rate": 4.727863176324955e-06, "loss": 0.0902, "step": 2828 }, { "epoch": 0.57, "learning_rate": 4.7243777585121034e-06, "loss": 0.0976, "step": 2829 }, { "epoch": 0.57, "learning_rate": 4.720892475034181e-06, "loss": 0.0919, "step": 2830 }, { "epoch": 0.57, "learning_rate": 4.717407327589878e-06, "loss": 0.0891, "step": 2831 }, { "epoch": 0.57, "learning_rate": 4.71392231787781e-06, "loss": 0.0936, "step": 2832 }, { "epoch": 0.57, "learning_rate": 4.710437447596528e-06, "loss": 0.0897, "step": 2833 }, { "epoch": 0.57, "learning_rate": 4.706952718444518e-06, "loss": 0.0905, "step": 2834 }, { "epoch": 0.57, "learning_rate": 4.703468132120193e-06, "loss": 0.0906, "step": 2835 }, { "epoch": 0.57, "learning_rate": 4.699983690321898e-06, "loss": 0.0932, "step": 2836 }, { "epoch": 0.57, "learning_rate": 4.696499394747906e-06, "loss": 0.0901, "step": 2837 }, { "epoch": 0.57, "learning_rate": 4.693015247096423e-06, "loss": 0.0892, "step": 2838 }, { "epoch": 0.57, "learning_rate": 4.689531249065581e-06, "loss": 0.0887, "step": 2839 }, { "epoch": 0.57, "learning_rate": 4.686047402353433e-06, "loss": 0.0999, "step": 2840 }, { "epoch": 0.57, "learning_rate": 4.68256370865797e-06, "loss": 0.0971, "step": 2841 }, { "epoch": 0.57, "learning_rate": 4.679080169677097e-06, "loss": 0.0963, "step": 2842 }, { "epoch": 0.57, "learning_rate": 4.675596787108652e-06, "loss": 0.0868, "step": 2843 }, { "epoch": 0.57, "learning_rate": 4.672113562650394e-06, "loss": 0.0857, "step": 2844 }, { "epoch": 0.57, "learning_rate": 4.668630498000001e-06, "loss": 0.099, "step": 2845 }, { "epoch": 0.57, "learning_rate": 4.6651475948550765e-06, "loss": 0.0951, "step": 2846 }, { "epoch": 0.57, "learning_rate": 4.661664854913147e-06, "loss": 0.0964, "step": 2847 }, { "epoch": 0.57, "learning_rate": 4.658182279871657e-06, "loss": 0.0917, "step": 2848 }, { "epoch": 0.57, "learning_rate": 4.654699871427972e-06, "loss": 0.0873, "step": 2849 }, { "epoch": 0.57, "learning_rate": 4.651217631279374e-06, "loss": 0.0854, "step": 2850 }, { "epoch": 0.57, "learning_rate": 4.6477355611230655e-06, "loss": 0.0878, "step": 2851 }, { "epoch": 0.57, "learning_rate": 4.644253662656167e-06, "loss": 0.0891, "step": 2852 }, { "epoch": 0.57, "learning_rate": 4.6407719375757095e-06, "loss": 0.093, "step": 2853 }, { "epoch": 0.57, "learning_rate": 4.637290387578647e-06, "loss": 0.0942, "step": 2854 }, { "epoch": 0.57, "learning_rate": 4.6338090143618435e-06, "loss": 0.0963, "step": 2855 }, { "epoch": 0.57, "learning_rate": 4.630327819622076e-06, "loss": 0.0922, "step": 2856 }, { "epoch": 0.57, "learning_rate": 4.6268468050560394e-06, "loss": 0.092, "step": 2857 }, { "epoch": 0.57, "learning_rate": 4.6233659723603374e-06, "loss": 0.0951, "step": 2858 }, { "epoch": 0.57, "learning_rate": 4.619885323231484e-06, "loss": 0.1045, "step": 2859 }, { "epoch": 0.57, "learning_rate": 4.6164048593659076e-06, "loss": 0.0914, "step": 2860 }, { "epoch": 0.57, "learning_rate": 4.612924582459943e-06, "loss": 0.0887, "step": 2861 }, { "epoch": 0.57, "learning_rate": 4.609444494209834e-06, "loss": 0.0798, "step": 2862 }, { "epoch": 0.57, "learning_rate": 4.605964596311733e-06, "loss": 0.0915, "step": 2863 }, { "epoch": 0.57, "learning_rate": 4.602484890461702e-06, "loss": 0.0912, "step": 2864 }, { "epoch": 0.57, "learning_rate": 4.5990053783557066e-06, "loss": 0.0927, "step": 2865 }, { "epoch": 0.57, "learning_rate": 4.595526061689617e-06, "loss": 0.0996, "step": 2866 }, { "epoch": 0.57, "learning_rate": 4.592046942159213e-06, "loss": 0.1611, "step": 2867 }, { "epoch": 0.57, "learning_rate": 4.588568021460172e-06, "loss": 0.1022, "step": 2868 }, { "epoch": 0.57, "learning_rate": 4.5850893012880806e-06, "loss": 0.0948, "step": 2869 }, { "epoch": 0.57, "learning_rate": 4.581610783338424e-06, "loss": 0.0972, "step": 2870 }, { "epoch": 0.57, "learning_rate": 4.578132469306588e-06, "loss": 0.0912, "step": 2871 }, { "epoch": 0.57, "learning_rate": 4.57465436088786e-06, "loss": 0.0885, "step": 2872 }, { "epoch": 0.57, "learning_rate": 4.571176459777431e-06, "loss": 0.0961, "step": 2873 }, { "epoch": 0.57, "learning_rate": 4.5676987676703865e-06, "loss": 0.094, "step": 2874 }, { "epoch": 0.57, "learning_rate": 4.564221286261709e-06, "loss": 0.1037, "step": 2875 }, { "epoch": 0.58, "learning_rate": 4.560744017246284e-06, "loss": 0.0827, "step": 2876 }, { "epoch": 0.58, "learning_rate": 4.557266962318889e-06, "loss": 0.0916, "step": 2877 }, { "epoch": 0.58, "learning_rate": 4.553790123174198e-06, "loss": 0.0967, "step": 2878 }, { "epoch": 0.58, "learning_rate": 4.5503135015067815e-06, "loss": 0.1196, "step": 2879 }, { "epoch": 0.58, "learning_rate": 4.546837099011101e-06, "loss": 0.0901, "step": 2880 }, { "epoch": 0.58, "learning_rate": 4.543360917381512e-06, "loss": 0.0888, "step": 2881 }, { "epoch": 0.58, "learning_rate": 4.539884958312265e-06, "loss": 0.0816, "step": 2882 }, { "epoch": 0.58, "learning_rate": 4.5364092234975e-06, "loss": 0.0849, "step": 2883 }, { "epoch": 0.58, "learning_rate": 4.532933714631248e-06, "loss": 0.093, "step": 2884 }, { "epoch": 0.58, "learning_rate": 4.529458433407429e-06, "loss": 0.0855, "step": 2885 }, { "epoch": 0.58, "learning_rate": 4.525983381519853e-06, "loss": 0.0916, "step": 2886 }, { "epoch": 0.58, "learning_rate": 4.522508560662219e-06, "loss": 0.092, "step": 2887 }, { "epoch": 0.58, "learning_rate": 4.519033972528114e-06, "loss": 0.0913, "step": 2888 }, { "epoch": 0.58, "learning_rate": 4.5155596188110055e-06, "loss": 0.0865, "step": 2889 }, { "epoch": 0.58, "learning_rate": 4.512085501204254e-06, "loss": 0.0937, "step": 2890 }, { "epoch": 0.58, "learning_rate": 4.508611621401102e-06, "loss": 0.0898, "step": 2891 }, { "epoch": 0.58, "learning_rate": 4.505137981094675e-06, "loss": 0.0955, "step": 2892 }, { "epoch": 0.58, "learning_rate": 4.5016645819779865e-06, "loss": 0.0785, "step": 2893 }, { "epoch": 0.58, "learning_rate": 4.4981914257439254e-06, "loss": 0.1503, "step": 2894 }, { "epoch": 0.58, "learning_rate": 4.494718514085269e-06, "loss": 0.095, "step": 2895 }, { "epoch": 0.58, "learning_rate": 4.491245848694669e-06, "loss": 0.0922, "step": 2896 }, { "epoch": 0.58, "learning_rate": 4.487773431264664e-06, "loss": 0.0935, "step": 2897 }, { "epoch": 0.58, "learning_rate": 4.484301263487664e-06, "loss": 0.0883, "step": 2898 }, { "epoch": 0.58, "learning_rate": 4.4808293470559645e-06, "loss": 0.0877, "step": 2899 }, { "epoch": 0.58, "learning_rate": 4.477357683661734e-06, "loss": 0.099, "step": 2900 }, { "epoch": 0.58, "learning_rate": 4.473886274997018e-06, "loss": 0.0898, "step": 2901 }, { "epoch": 0.58, "learning_rate": 4.470415122753742e-06, "loss": 0.0928, "step": 2902 }, { "epoch": 0.58, "learning_rate": 4.466944228623701e-06, "loss": 0.0999, "step": 2903 }, { "epoch": 0.58, "learning_rate": 4.463473594298567e-06, "loss": 0.087, "step": 2904 }, { "epoch": 0.58, "learning_rate": 4.460003221469886e-06, "loss": 0.0935, "step": 2905 }, { "epoch": 0.58, "learning_rate": 4.456533111829076e-06, "loss": 0.0906, "step": 2906 }, { "epoch": 0.58, "learning_rate": 4.453063267067424e-06, "loss": 0.0925, "step": 2907 }, { "epoch": 0.58, "learning_rate": 4.44959368887609e-06, "loss": 0.0884, "step": 2908 }, { "epoch": 0.58, "learning_rate": 4.446124378946108e-06, "loss": 0.0831, "step": 2909 }, { "epoch": 0.58, "learning_rate": 4.442655338968373e-06, "loss": 0.1486, "step": 2910 }, { "epoch": 0.58, "learning_rate": 4.439186570633656e-06, "loss": 0.1157, "step": 2911 }, { "epoch": 0.58, "learning_rate": 4.4357180756325915e-06, "loss": 0.0967, "step": 2912 }, { "epoch": 0.58, "learning_rate": 4.432249855655681e-06, "loss": 0.0898, "step": 2913 }, { "epoch": 0.58, "learning_rate": 4.428781912393299e-06, "loss": 0.101, "step": 2914 }, { "epoch": 0.58, "learning_rate": 4.425314247535668e-06, "loss": 0.0892, "step": 2915 }, { "epoch": 0.58, "learning_rate": 4.4218468627728935e-06, "loss": 0.0931, "step": 2916 }, { "epoch": 0.58, "learning_rate": 4.418379759794934e-06, "loss": 0.094, "step": 2917 }, { "epoch": 0.58, "learning_rate": 4.414912940291614e-06, "loss": 0.0889, "step": 2918 }, { "epoch": 0.58, "learning_rate": 4.4114464059526185e-06, "loss": 0.0927, "step": 2919 }, { "epoch": 0.58, "learning_rate": 4.4079801584674955e-06, "loss": 0.0957, "step": 2920 }, { "epoch": 0.58, "learning_rate": 4.404514199525651e-06, "loss": 0.089, "step": 2921 }, { "epoch": 0.58, "learning_rate": 4.401048530816353e-06, "loss": 0.089, "step": 2922 }, { "epoch": 0.58, "learning_rate": 4.397583154028725e-06, "loss": 0.0917, "step": 2923 }, { "epoch": 0.58, "learning_rate": 4.394118070851749e-06, "loss": 0.0938, "step": 2924 }, { "epoch": 0.58, "learning_rate": 4.390653282974264e-06, "loss": 0.084, "step": 2925 }, { "epoch": 0.59, "learning_rate": 4.387188792084967e-06, "loss": 0.0853, "step": 2926 }, { "epoch": 0.59, "learning_rate": 4.383724599872407e-06, "loss": 0.0891, "step": 2927 }, { "epoch": 0.59, "learning_rate": 4.380260708024991e-06, "loss": 0.091, "step": 2928 }, { "epoch": 0.59, "learning_rate": 4.376797118230978e-06, "loss": 0.0882, "step": 2929 }, { "epoch": 0.59, "learning_rate": 4.373333832178478e-06, "loss": 0.0861, "step": 2930 }, { "epoch": 0.59, "learning_rate": 4.369870851555457e-06, "loss": 0.0865, "step": 2931 }, { "epoch": 0.59, "learning_rate": 4.366408178049728e-06, "loss": 0.0901, "step": 2932 }, { "epoch": 0.59, "learning_rate": 4.362945813348956e-06, "loss": 0.0916, "step": 2933 }, { "epoch": 0.59, "learning_rate": 4.359483759140654e-06, "loss": 0.0851, "step": 2934 }, { "epoch": 0.59, "learning_rate": 4.356022017112187e-06, "loss": 0.0844, "step": 2935 }, { "epoch": 0.59, "learning_rate": 4.352560588950766e-06, "loss": 0.0973, "step": 2936 }, { "epoch": 0.59, "learning_rate": 4.349099476343448e-06, "loss": 0.0902, "step": 2937 }, { "epoch": 0.59, "learning_rate": 4.34563868097714e-06, "loss": 0.0897, "step": 2938 }, { "epoch": 0.59, "learning_rate": 4.342178204538588e-06, "loss": 0.0849, "step": 2939 }, { "epoch": 0.59, "learning_rate": 4.3387180487143875e-06, "loss": 0.0893, "step": 2940 }, { "epoch": 0.59, "learning_rate": 4.335258215190979e-06, "loss": 0.0982, "step": 2941 }, { "epoch": 0.59, "learning_rate": 4.331798705654639e-06, "loss": 0.0911, "step": 2942 }, { "epoch": 0.59, "learning_rate": 4.328339521791493e-06, "loss": 0.0872, "step": 2943 }, { "epoch": 0.59, "learning_rate": 4.3248806652875045e-06, "loss": 0.0887, "step": 2944 }, { "epoch": 0.59, "learning_rate": 4.321422137828479e-06, "loss": 0.0998, "step": 2945 }, { "epoch": 0.59, "learning_rate": 4.317963941100059e-06, "loss": 0.0847, "step": 2946 }, { "epoch": 0.59, "learning_rate": 4.314506076787729e-06, "loss": 0.0767, "step": 2947 }, { "epoch": 0.59, "learning_rate": 4.31104854657681e-06, "loss": 0.0834, "step": 2948 }, { "epoch": 0.59, "learning_rate": 4.307591352152459e-06, "loss": 0.0888, "step": 2949 }, { "epoch": 0.59, "learning_rate": 4.304134495199675e-06, "loss": 0.0893, "step": 2950 }, { "epoch": 0.59, "learning_rate": 4.300677977403281e-06, "loss": 0.1879, "step": 2951 }, { "epoch": 0.59, "learning_rate": 4.297221800447946e-06, "loss": 0.0939, "step": 2952 }, { "epoch": 0.59, "learning_rate": 4.293765966018167e-06, "loss": 0.0899, "step": 2953 }, { "epoch": 0.59, "learning_rate": 4.290310475798278e-06, "loss": 0.106, "step": 2954 }, { "epoch": 0.59, "learning_rate": 4.286855331472442e-06, "loss": 0.0972, "step": 2955 }, { "epoch": 0.59, "learning_rate": 4.283400534724654e-06, "loss": 0.0944, "step": 2956 }, { "epoch": 0.59, "learning_rate": 4.279946087238739e-06, "loss": 0.0912, "step": 2957 }, { "epoch": 0.59, "learning_rate": 4.2764919906983545e-06, "loss": 0.0922, "step": 2958 }, { "epoch": 0.59, "learning_rate": 4.273038246786986e-06, "loss": 0.1133, "step": 2959 }, { "epoch": 0.59, "learning_rate": 4.269584857187942e-06, "loss": 0.089, "step": 2960 }, { "epoch": 0.59, "learning_rate": 4.266131823584368e-06, "loss": 0.098, "step": 2961 }, { "epoch": 0.59, "learning_rate": 4.262679147659227e-06, "loss": 0.1134, "step": 2962 }, { "epoch": 0.59, "learning_rate": 4.259226831095311e-06, "loss": 0.1025, "step": 2963 }, { "epoch": 0.59, "learning_rate": 4.255774875575239e-06, "loss": 0.0851, "step": 2964 }, { "epoch": 0.59, "learning_rate": 4.2523232827814534e-06, "loss": 0.0875, "step": 2965 }, { "epoch": 0.59, "learning_rate": 4.248872054396215e-06, "loss": 0.0855, "step": 2966 }, { "epoch": 0.59, "learning_rate": 4.245421192101613e-06, "loss": 0.0935, "step": 2967 }, { "epoch": 0.59, "learning_rate": 4.241970697579557e-06, "loss": 0.096, "step": 2968 }, { "epoch": 0.59, "learning_rate": 4.238520572511773e-06, "loss": 0.093, "step": 2969 }, { "epoch": 0.59, "learning_rate": 4.23507081857981e-06, "loss": 0.0898, "step": 2970 }, { "epoch": 0.59, "learning_rate": 4.23162143746504e-06, "loss": 0.092, "step": 2971 }, { "epoch": 0.59, "learning_rate": 4.228172430848645e-06, "loss": 0.0935, "step": 2972 }, { "epoch": 0.59, "learning_rate": 4.224723800411631e-06, "loss": 0.091, "step": 2973 }, { "epoch": 0.59, "learning_rate": 4.22127554783482e-06, "loss": 0.0825, "step": 2974 }, { "epoch": 0.59, "learning_rate": 4.217827674798845e-06, "loss": 0.0854, "step": 2975 }, { "epoch": 0.6, "learning_rate": 4.2143801829841635e-06, "loss": 0.1086, "step": 2976 }, { "epoch": 0.6, "learning_rate": 4.210933074071033e-06, "loss": 0.0885, "step": 2977 }, { "epoch": 0.6, "learning_rate": 4.207486349739538e-06, "loss": 0.097, "step": 2978 }, { "epoch": 0.6, "learning_rate": 4.204040011669567e-06, "loss": 0.0915, "step": 2979 }, { "epoch": 0.6, "learning_rate": 4.200594061540827e-06, "loss": 0.0929, "step": 2980 }, { "epoch": 0.6, "learning_rate": 4.197148501032829e-06, "loss": 0.0902, "step": 2981 }, { "epoch": 0.6, "learning_rate": 4.193703331824898e-06, "loss": 0.09, "step": 2982 }, { "epoch": 0.6, "learning_rate": 4.190258555596168e-06, "loss": 0.0936, "step": 2983 }, { "epoch": 0.6, "learning_rate": 4.186814174025582e-06, "loss": 0.084, "step": 2984 }, { "epoch": 0.6, "learning_rate": 4.183370188791891e-06, "loss": 0.0994, "step": 2985 }, { "epoch": 0.6, "learning_rate": 4.179926601573645e-06, "loss": 0.0878, "step": 2986 }, { "epoch": 0.6, "learning_rate": 4.176483414049214e-06, "loss": 0.087, "step": 2987 }, { "epoch": 0.6, "learning_rate": 4.173040627896762e-06, "loss": 0.1037, "step": 2988 }, { "epoch": 0.6, "learning_rate": 4.169598244794261e-06, "loss": 0.0779, "step": 2989 }, { "epoch": 0.6, "learning_rate": 4.166156266419489e-06, "loss": 0.086, "step": 2990 }, { "epoch": 0.6, "learning_rate": 4.162714694450023e-06, "loss": 0.0859, "step": 2991 }, { "epoch": 0.6, "learning_rate": 4.159273530563243e-06, "loss": 0.0862, "step": 2992 }, { "epoch": 0.6, "learning_rate": 4.155832776436331e-06, "loss": 0.0852, "step": 2993 }, { "epoch": 0.6, "learning_rate": 4.15239243374627e-06, "loss": 0.0875, "step": 2994 }, { "epoch": 0.6, "learning_rate": 4.148952504169839e-06, "loss": 0.0925, "step": 2995 }, { "epoch": 0.6, "learning_rate": 4.145512989383618e-06, "loss": 0.0844, "step": 2996 }, { "epoch": 0.6, "learning_rate": 4.142073891063986e-06, "loss": 0.0839, "step": 2997 }, { "epoch": 0.6, "learning_rate": 4.138635210887117e-06, "loss": 0.0937, "step": 2998 }, { "epoch": 0.6, "learning_rate": 4.135196950528982e-06, "loss": 0.0832, "step": 2999 }, { "epoch": 0.6, "learning_rate": 4.131759111665349e-06, "loss": 0.0847, "step": 3000 }, { "epoch": 0.6, "learning_rate": 4.128321695971775e-06, "loss": 0.0887, "step": 3001 }, { "epoch": 0.6, "learning_rate": 4.124884705123619e-06, "loss": 0.0833, "step": 3002 }, { "epoch": 0.6, "learning_rate": 4.121448140796029e-06, "loss": 0.0854, "step": 3003 }, { "epoch": 0.6, "learning_rate": 4.118012004663939e-06, "loss": 0.0932, "step": 3004 }, { "epoch": 0.6, "learning_rate": 4.114576298402085e-06, "loss": 0.0954, "step": 3005 }, { "epoch": 0.6, "learning_rate": 4.111141023684986e-06, "loss": 0.0911, "step": 3006 }, { "epoch": 0.6, "learning_rate": 4.107706182186954e-06, "loss": 0.0903, "step": 3007 }, { "epoch": 0.6, "learning_rate": 4.104271775582089e-06, "loss": 0.0923, "step": 3008 }, { "epoch": 0.6, "learning_rate": 4.100837805544279e-06, "loss": 0.0875, "step": 3009 }, { "epoch": 0.6, "learning_rate": 4.0974042737472005e-06, "loss": 0.0901, "step": 3010 }, { "epoch": 0.6, "learning_rate": 4.093971181864313e-06, "loss": 0.0864, "step": 3011 }, { "epoch": 0.6, "learning_rate": 4.090538531568867e-06, "loss": 0.0889, "step": 3012 }, { "epoch": 0.6, "learning_rate": 4.087106324533891e-06, "loss": 0.0802, "step": 3013 }, { "epoch": 0.6, "learning_rate": 4.083674562432203e-06, "loss": 0.0901, "step": 3014 }, { "epoch": 0.6, "learning_rate": 4.0802432469364e-06, "loss": 0.0941, "step": 3015 }, { "epoch": 0.6, "learning_rate": 4.0768123797188665e-06, "loss": 0.0632, "step": 3016 }, { "epoch": 0.6, "learning_rate": 4.073381962451764e-06, "loss": 0.0904, "step": 3017 }, { "epoch": 0.6, "learning_rate": 4.069951996807034e-06, "loss": 0.0815, "step": 3018 }, { "epoch": 0.6, "learning_rate": 4.066522484456406e-06, "loss": 0.0851, "step": 3019 }, { "epoch": 0.6, "learning_rate": 4.063093427071376e-06, "loss": 0.1242, "step": 3020 }, { "epoch": 0.6, "learning_rate": 4.0596648263232315e-06, "loss": 0.0955, "step": 3021 }, { "epoch": 0.6, "learning_rate": 4.0562366838830255e-06, "loss": 0.0916, "step": 3022 }, { "epoch": 0.6, "learning_rate": 4.052809001421595e-06, "loss": 0.09, "step": 3023 }, { "epoch": 0.6, "learning_rate": 4.0493817806095504e-06, "loss": 0.0853, "step": 3024 }, { "epoch": 0.6, "learning_rate": 4.045955023117276e-06, "loss": 0.0878, "step": 3025 }, { "epoch": 0.61, "learning_rate": 4.042528730614935e-06, "loss": 0.0943, "step": 3026 }, { "epoch": 0.61, "learning_rate": 4.039102904772459e-06, "loss": 0.0914, "step": 3027 }, { "epoch": 0.61, "learning_rate": 4.035677547259555e-06, "loss": 0.0862, "step": 3028 }, { "epoch": 0.61, "learning_rate": 4.032252659745699e-06, "loss": 0.0976, "step": 3029 }, { "epoch": 0.61, "learning_rate": 4.028828243900141e-06, "loss": 0.0923, "step": 3030 }, { "epoch": 0.61, "learning_rate": 4.025404301391898e-06, "loss": 0.0981, "step": 3031 }, { "epoch": 0.61, "learning_rate": 4.02198083388976e-06, "loss": 0.0913, "step": 3032 }, { "epoch": 0.61, "learning_rate": 4.018557843062282e-06, "loss": 0.0901, "step": 3033 }, { "epoch": 0.61, "learning_rate": 4.015135330577787e-06, "loss": 0.0852, "step": 3034 }, { "epoch": 0.61, "learning_rate": 4.0117132981043695e-06, "loss": 0.0994, "step": 3035 }, { "epoch": 0.61, "learning_rate": 4.0082917473098845e-06, "loss": 0.0883, "step": 3036 }, { "epoch": 0.61, "learning_rate": 4.004870679861953e-06, "loss": 0.0728, "step": 3037 }, { "epoch": 0.61, "learning_rate": 4.001450097427965e-06, "loss": 0.093, "step": 3038 }, { "epoch": 0.61, "learning_rate": 3.9980300016750696e-06, "loss": 0.0866, "step": 3039 }, { "epoch": 0.61, "learning_rate": 3.994610394270178e-06, "loss": 0.0832, "step": 3040 }, { "epoch": 0.61, "learning_rate": 3.991191276879966e-06, "loss": 0.0901, "step": 3041 }, { "epoch": 0.61, "learning_rate": 3.987772651170871e-06, "loss": 0.0858, "step": 3042 }, { "epoch": 0.61, "learning_rate": 3.98435451880909e-06, "loss": 0.0869, "step": 3043 }, { "epoch": 0.61, "learning_rate": 3.980936881460576e-06, "loss": 0.0943, "step": 3044 }, { "epoch": 0.61, "learning_rate": 3.977519740791049e-06, "loss": 0.084, "step": 3045 }, { "epoch": 0.61, "learning_rate": 3.974103098465976e-06, "loss": 0.0849, "step": 3046 }, { "epoch": 0.61, "learning_rate": 3.970686956150595e-06, "loss": 0.0895, "step": 3047 }, { "epoch": 0.61, "learning_rate": 3.967271315509884e-06, "loss": 0.0925, "step": 3048 }, { "epoch": 0.61, "learning_rate": 3.963856178208588e-06, "loss": 0.097, "step": 3049 }, { "epoch": 0.61, "learning_rate": 3.960441545911205e-06, "loss": 0.1132, "step": 3050 }, { "epoch": 0.61, "learning_rate": 3.957027420281981e-06, "loss": 0.0909, "step": 3051 }, { "epoch": 0.61, "learning_rate": 3.9536138029849244e-06, "loss": 0.1035, "step": 3052 }, { "epoch": 0.61, "learning_rate": 3.950200695683788e-06, "loss": 0.0913, "step": 3053 }, { "epoch": 0.61, "learning_rate": 3.94678810004208e-06, "loss": 0.0919, "step": 3054 }, { "epoch": 0.61, "learning_rate": 3.943376017723058e-06, "loss": 0.0856, "step": 3055 }, { "epoch": 0.61, "learning_rate": 3.939964450389728e-06, "loss": 0.0899, "step": 3056 }, { "epoch": 0.61, "learning_rate": 3.936553399704848e-06, "loss": 0.0907, "step": 3057 }, { "epoch": 0.61, "learning_rate": 3.933142867330921e-06, "loss": 0.0902, "step": 3058 }, { "epoch": 0.61, "learning_rate": 3.9297328549302e-06, "loss": 0.0914, "step": 3059 }, { "epoch": 0.61, "learning_rate": 3.926323364164684e-06, "loss": 0.0939, "step": 3060 }, { "epoch": 0.61, "learning_rate": 3.922914396696118e-06, "loss": 0.1069, "step": 3061 }, { "epoch": 0.61, "learning_rate": 3.91950595418599e-06, "loss": 0.0871, "step": 3062 }, { "epoch": 0.61, "learning_rate": 3.9160980382955336e-06, "loss": 0.0883, "step": 3063 }, { "epoch": 0.61, "learning_rate": 3.912690650685726e-06, "loss": 0.0859, "step": 3064 }, { "epoch": 0.61, "learning_rate": 3.909283793017289e-06, "loss": 0.0929, "step": 3065 }, { "epoch": 0.61, "learning_rate": 3.905877466950679e-06, "loss": 0.0835, "step": 3066 }, { "epoch": 0.61, "learning_rate": 3.902471674146099e-06, "loss": 0.0848, "step": 3067 }, { "epoch": 0.61, "learning_rate": 3.899066416263493e-06, "loss": 0.084, "step": 3068 }, { "epoch": 0.61, "learning_rate": 3.895661694962542e-06, "loss": 0.0815, "step": 3069 }, { "epoch": 0.61, "learning_rate": 3.892257511902664e-06, "loss": 0.0899, "step": 3070 }, { "epoch": 0.61, "learning_rate": 3.888853868743018e-06, "loss": 0.0905, "step": 3071 }, { "epoch": 0.61, "learning_rate": 3.885450767142498e-06, "loss": 0.0925, "step": 3072 }, { "epoch": 0.61, "learning_rate": 3.882048208759735e-06, "loss": 0.0372, "step": 3073 }, { "epoch": 0.61, "learning_rate": 3.8786461952530955e-06, "loss": 0.0621, "step": 3074 }, { "epoch": 0.61, "learning_rate": 3.875244728280676e-06, "loss": 0.087, "step": 3075 }, { "epoch": 0.62, "learning_rate": 3.871843809500313e-06, "loss": 0.0901, "step": 3076 }, { "epoch": 0.62, "learning_rate": 3.868443440569571e-06, "loss": 0.0959, "step": 3077 }, { "epoch": 0.62, "learning_rate": 3.865043623145751e-06, "loss": 0.089, "step": 3078 }, { "epoch": 0.62, "learning_rate": 3.86164435888588e-06, "loss": 0.0872, "step": 3079 }, { "epoch": 0.62, "learning_rate": 3.8582456494467214e-06, "loss": 0.0885, "step": 3080 }, { "epoch": 0.62, "learning_rate": 3.854847496484762e-06, "loss": 0.0845, "step": 3081 }, { "epoch": 0.62, "learning_rate": 3.8514499016562216e-06, "loss": 0.0923, "step": 3082 }, { "epoch": 0.62, "learning_rate": 3.8480528666170495e-06, "loss": 0.1037, "step": 3083 }, { "epoch": 0.62, "learning_rate": 3.844656393022912e-06, "loss": 0.091, "step": 3084 }, { "epoch": 0.62, "learning_rate": 3.841260482529215e-06, "loss": 0.0898, "step": 3085 }, { "epoch": 0.62, "learning_rate": 3.83786513679108e-06, "loss": 0.0867, "step": 3086 }, { "epoch": 0.62, "learning_rate": 3.834470357463362e-06, "loss": 0.0874, "step": 3087 }, { "epoch": 0.62, "learning_rate": 3.831076146200633e-06, "loss": 0.0869, "step": 3088 }, { "epoch": 0.62, "learning_rate": 3.827682504657187e-06, "loss": 0.0886, "step": 3089 }, { "epoch": 0.62, "learning_rate": 3.82428943448705e-06, "loss": 0.0876, "step": 3090 }, { "epoch": 0.62, "learning_rate": 3.820896937343959e-06, "loss": 0.0899, "step": 3091 }, { "epoch": 0.62, "learning_rate": 3.817505014881378e-06, "loss": 0.0895, "step": 3092 }, { "epoch": 0.62, "learning_rate": 3.814113668752486e-06, "loss": 0.0961, "step": 3093 }, { "epoch": 0.62, "learning_rate": 3.810722900610186e-06, "loss": 0.0862, "step": 3094 }, { "epoch": 0.62, "learning_rate": 3.8073327121070968e-06, "loss": 0.0884, "step": 3095 }, { "epoch": 0.62, "learning_rate": 3.8039431048955537e-06, "loss": 0.0918, "step": 3096 }, { "epoch": 0.62, "learning_rate": 3.8005540806276132e-06, "loss": 0.0969, "step": 3097 }, { "epoch": 0.62, "learning_rate": 3.797165640955041e-06, "loss": 0.0871, "step": 3098 }, { "epoch": 0.62, "learning_rate": 3.793777787529325e-06, "loss": 0.0804, "step": 3099 }, { "epoch": 0.62, "learning_rate": 3.790390522001662e-06, "loss": 0.0837, "step": 3100 }, { "epoch": 0.62, "learning_rate": 3.787003846022964e-06, "loss": 0.0912, "step": 3101 }, { "epoch": 0.62, "learning_rate": 3.7836177612438557e-06, "loss": 0.0799, "step": 3102 }, { "epoch": 0.62, "learning_rate": 3.7802322693146726e-06, "loss": 0.0919, "step": 3103 }, { "epoch": 0.62, "learning_rate": 3.776847371885464e-06, "loss": 0.0914, "step": 3104 }, { "epoch": 0.62, "learning_rate": 3.7734630706059873e-06, "loss": 0.1152, "step": 3105 }, { "epoch": 0.62, "learning_rate": 3.77007936712571e-06, "loss": 0.0871, "step": 3106 }, { "epoch": 0.62, "learning_rate": 3.7666962630938084e-06, "loss": 0.0998, "step": 3107 }, { "epoch": 0.62, "learning_rate": 3.7633137601591647e-06, "loss": 0.0894, "step": 3108 }, { "epoch": 0.62, "learning_rate": 3.759931859970374e-06, "loss": 0.0911, "step": 3109 }, { "epoch": 0.62, "learning_rate": 3.756550564175727e-06, "loss": 0.0853, "step": 3110 }, { "epoch": 0.62, "learning_rate": 3.7531698744232307e-06, "loss": 0.0929, "step": 3111 }, { "epoch": 0.62, "learning_rate": 3.74978979236059e-06, "loss": 0.0981, "step": 3112 }, { "epoch": 0.62, "learning_rate": 3.7464103196352176e-06, "loss": 0.0873, "step": 3113 }, { "epoch": 0.62, "learning_rate": 3.7430314578942263e-06, "loss": 0.0913, "step": 3114 }, { "epoch": 0.62, "learning_rate": 3.7396532087844318e-06, "loss": 0.0913, "step": 3115 }, { "epoch": 0.62, "learning_rate": 3.736275573952354e-06, "loss": 0.0957, "step": 3116 }, { "epoch": 0.62, "learning_rate": 3.7328985550442086e-06, "loss": 0.0907, "step": 3117 }, { "epoch": 0.62, "learning_rate": 3.7295221537059162e-06, "loss": 0.0897, "step": 3118 }, { "epoch": 0.62, "learning_rate": 3.7261463715830902e-06, "loss": 0.089, "step": 3119 }, { "epoch": 0.62, "learning_rate": 3.7227712103210485e-06, "loss": 0.0947, "step": 3120 }, { "epoch": 0.62, "learning_rate": 3.7193966715648026e-06, "loss": 0.0925, "step": 3121 }, { "epoch": 0.62, "learning_rate": 3.716022756959061e-06, "loss": 0.0834, "step": 3122 }, { "epoch": 0.62, "learning_rate": 3.7126494681482317e-06, "loss": 0.0931, "step": 3123 }, { "epoch": 0.62, "learning_rate": 3.709276806776412e-06, "loss": 0.0869, "step": 3124 }, { "epoch": 0.62, "learning_rate": 3.705904774487396e-06, "loss": 0.0879, "step": 3125 }, { "epoch": 0.63, "learning_rate": 3.7025333729246733e-06, "loss": 0.1013, "step": 3126 }, { "epoch": 0.63, "learning_rate": 3.699162603731423e-06, "loss": 0.0908, "step": 3127 }, { "epoch": 0.63, "learning_rate": 3.695792468550517e-06, "loss": 0.0895, "step": 3128 }, { "epoch": 0.63, "learning_rate": 3.6924229690245163e-06, "loss": 0.0863, "step": 3129 }, { "epoch": 0.63, "learning_rate": 3.6890541067956775e-06, "loss": 0.0843, "step": 3130 }, { "epoch": 0.63, "learning_rate": 3.68568588350594e-06, "loss": 0.0893, "step": 3131 }, { "epoch": 0.63, "learning_rate": 3.6823183007969375e-06, "loss": 0.0902, "step": 3132 }, { "epoch": 0.63, "learning_rate": 3.678951360309988e-06, "loss": 0.0809, "step": 3133 }, { "epoch": 0.63, "learning_rate": 3.6755850636860956e-06, "loss": 0.0831, "step": 3134 }, { "epoch": 0.63, "learning_rate": 3.672219412565956e-06, "loss": 0.0886, "step": 3135 }, { "epoch": 0.63, "learning_rate": 3.668854408589945e-06, "loss": 0.0949, "step": 3136 }, { "epoch": 0.63, "learning_rate": 3.6654900533981234e-06, "loss": 0.0887, "step": 3137 }, { "epoch": 0.63, "learning_rate": 3.6621263486302373e-06, "loss": 0.0848, "step": 3138 }, { "epoch": 0.63, "learning_rate": 3.6587632959257168e-06, "loss": 0.0851, "step": 3139 }, { "epoch": 0.63, "learning_rate": 3.655400896923672e-06, "loss": 0.0893, "step": 3140 }, { "epoch": 0.63, "learning_rate": 3.6520391532628953e-06, "loss": 0.1096, "step": 3141 }, { "epoch": 0.63, "learning_rate": 3.648678066581861e-06, "loss": 0.0885, "step": 3142 }, { "epoch": 0.63, "learning_rate": 3.645317638518721e-06, "loss": 0.1048, "step": 3143 }, { "epoch": 0.63, "learning_rate": 3.6419578707113055e-06, "loss": 0.0982, "step": 3144 }, { "epoch": 0.63, "learning_rate": 3.6385987647971287e-06, "loss": 0.0947, "step": 3145 }, { "epoch": 0.63, "learning_rate": 3.635240322413375e-06, "loss": 0.0848, "step": 3146 }, { "epoch": 0.63, "learning_rate": 3.6318825451969085e-06, "loss": 0.0833, "step": 3147 }, { "epoch": 0.63, "learning_rate": 3.628525434784268e-06, "loss": 0.0904, "step": 3148 }, { "epoch": 0.63, "learning_rate": 3.625168992811671e-06, "loss": 0.0893, "step": 3149 }, { "epoch": 0.63, "learning_rate": 3.6218132209150047e-06, "loss": 0.0861, "step": 3150 }, { "epoch": 0.63, "learning_rate": 3.618458120729832e-06, "loss": 0.0871, "step": 3151 }, { "epoch": 0.63, "learning_rate": 3.6151036938913887e-06, "loss": 0.0924, "step": 3152 }, { "epoch": 0.63, "learning_rate": 3.61174994203458e-06, "loss": 0.0934, "step": 3153 }, { "epoch": 0.63, "learning_rate": 3.608396866793988e-06, "loss": 0.0877, "step": 3154 }, { "epoch": 0.63, "learning_rate": 3.6050444698038547e-06, "loss": 0.0793, "step": 3155 }, { "epoch": 0.63, "learning_rate": 3.6016927526981014e-06, "loss": 0.0574, "step": 3156 }, { "epoch": 0.63, "learning_rate": 3.598341717110313e-06, "loss": 0.0981, "step": 3157 }, { "epoch": 0.63, "learning_rate": 3.5949913646737456e-06, "loss": 0.1027, "step": 3158 }, { "epoch": 0.63, "learning_rate": 3.5916416970213173e-06, "loss": 0.0868, "step": 3159 }, { "epoch": 0.63, "learning_rate": 3.5882927157856175e-06, "loss": 0.0832, "step": 3160 }, { "epoch": 0.63, "learning_rate": 3.584944422598899e-06, "loss": 0.0795, "step": 3161 }, { "epoch": 0.63, "learning_rate": 3.5815968190930793e-06, "loss": 0.0841, "step": 3162 }, { "epoch": 0.63, "learning_rate": 3.5782499068997386e-06, "loss": 0.0881, "step": 3163 }, { "epoch": 0.63, "learning_rate": 3.5749036876501196e-06, "loss": 0.0899, "step": 3164 }, { "epoch": 0.63, "learning_rate": 3.571558162975133e-06, "loss": 0.0851, "step": 3165 }, { "epoch": 0.63, "learning_rate": 3.568213334505345e-06, "loss": 0.0913, "step": 3166 }, { "epoch": 0.63, "learning_rate": 3.564869203870982e-06, "loss": 0.0879, "step": 3167 }, { "epoch": 0.63, "learning_rate": 3.561525772701937e-06, "loss": 0.0964, "step": 3168 }, { "epoch": 0.63, "learning_rate": 3.5581830426277554e-06, "loss": 0.0847, "step": 3169 }, { "epoch": 0.63, "learning_rate": 3.5548410152776414e-06, "loss": 0.0931, "step": 3170 }, { "epoch": 0.63, "learning_rate": 3.5514996922804636e-06, "loss": 0.0852, "step": 3171 }, { "epoch": 0.63, "learning_rate": 3.548159075264738e-06, "loss": 0.0897, "step": 3172 }, { "epoch": 0.63, "learning_rate": 3.5448191658586423e-06, "loss": 0.0829, "step": 3173 }, { "epoch": 0.63, "learning_rate": 3.5414799656900057e-06, "loss": 0.0914, "step": 3174 }, { "epoch": 0.64, "learning_rate": 3.538141476386317e-06, "loss": 0.0833, "step": 3175 }, { "epoch": 0.64, "learning_rate": 3.5348036995747135e-06, "loss": 0.0955, "step": 3176 }, { "epoch": 0.64, "learning_rate": 3.531466636881987e-06, "loss": 0.084, "step": 3177 }, { "epoch": 0.64, "learning_rate": 3.5281302899345825e-06, "loss": 0.0847, "step": 3178 }, { "epoch": 0.64, "learning_rate": 3.524794660358593e-06, "loss": 0.0852, "step": 3179 }, { "epoch": 0.64, "learning_rate": 3.521459749779769e-06, "loss": 0.0869, "step": 3180 }, { "epoch": 0.64, "learning_rate": 3.5181255598234963e-06, "loss": 0.094, "step": 3181 }, { "epoch": 0.64, "learning_rate": 3.5147920921148267e-06, "loss": 0.0868, "step": 3182 }, { "epoch": 0.64, "learning_rate": 3.511459348278448e-06, "loss": 0.0872, "step": 3183 }, { "epoch": 0.64, "learning_rate": 3.508127329938699e-06, "loss": 0.0922, "step": 3184 }, { "epoch": 0.64, "learning_rate": 3.5047960387195673e-06, "loss": 0.097, "step": 3185 }, { "epoch": 0.64, "learning_rate": 3.501465476244681e-06, "loss": 0.1024, "step": 3186 }, { "epoch": 0.64, "learning_rate": 3.498135644137318e-06, "loss": 0.098, "step": 3187 }, { "epoch": 0.64, "learning_rate": 3.4948065440203982e-06, "loss": 0.0939, "step": 3188 }, { "epoch": 0.64, "learning_rate": 3.491478177516484e-06, "loss": 0.0953, "step": 3189 }, { "epoch": 0.64, "learning_rate": 3.488150546247778e-06, "loss": 0.0886, "step": 3190 }, { "epoch": 0.64, "learning_rate": 3.484823651836131e-06, "loss": 0.0905, "step": 3191 }, { "epoch": 0.64, "learning_rate": 3.4814974959030294e-06, "loss": 0.0835, "step": 3192 }, { "epoch": 0.64, "learning_rate": 3.4781720800696006e-06, "loss": 0.091, "step": 3193 }, { "epoch": 0.64, "learning_rate": 3.474847405956613e-06, "loss": 0.0908, "step": 3194 }, { "epoch": 0.64, "learning_rate": 3.471523475184472e-06, "loss": 0.0869, "step": 3195 }, { "epoch": 0.64, "learning_rate": 3.4682002893732203e-06, "loss": 0.1452, "step": 3196 }, { "epoch": 0.64, "learning_rate": 3.464877850142541e-06, "loss": 0.0956, "step": 3197 }, { "epoch": 0.64, "learning_rate": 3.4615561591117486e-06, "loss": 0.091, "step": 3198 }, { "epoch": 0.64, "learning_rate": 3.4582352178997937e-06, "loss": 0.102, "step": 3199 }, { "epoch": 0.64, "learning_rate": 3.4549150281252635e-06, "loss": 0.0975, "step": 3200 }, { "epoch": 0.64, "learning_rate": 3.4515955914063796e-06, "loss": 0.0941, "step": 3201 }, { "epoch": 0.64, "learning_rate": 3.4482769093609945e-06, "loss": 0.0867, "step": 3202 }, { "epoch": 0.64, "learning_rate": 3.444958983606592e-06, "loss": 0.0898, "step": 3203 }, { "epoch": 0.64, "learning_rate": 3.441641815760291e-06, "loss": 0.1051, "step": 3204 }, { "epoch": 0.64, "learning_rate": 3.4383254074388373e-06, "loss": 0.0852, "step": 3205 }, { "epoch": 0.64, "learning_rate": 3.4350097602586085e-06, "loss": 0.0906, "step": 3206 }, { "epoch": 0.64, "learning_rate": 3.4316948758356127e-06, "loss": 0.0922, "step": 3207 }, { "epoch": 0.64, "learning_rate": 3.4283807557854814e-06, "loss": 0.0922, "step": 3208 }, { "epoch": 0.64, "learning_rate": 3.4250674017234774e-06, "loss": 0.085, "step": 3209 }, { "epoch": 0.64, "learning_rate": 3.4217548152644887e-06, "loss": 0.0843, "step": 3210 }, { "epoch": 0.64, "learning_rate": 3.4184429980230305e-06, "loss": 0.0919, "step": 3211 }, { "epoch": 0.64, "learning_rate": 3.4151319516132414e-06, "loss": 0.0903, "step": 3212 }, { "epoch": 0.64, "learning_rate": 3.411821677648887e-06, "loss": 0.0847, "step": 3213 }, { "epoch": 0.64, "learning_rate": 3.4085121777433532e-06, "loss": 0.08, "step": 3214 }, { "epoch": 0.64, "learning_rate": 3.40520345350965e-06, "loss": 0.0886, "step": 3215 }, { "epoch": 0.64, "learning_rate": 3.401895506560411e-06, "loss": 0.0898, "step": 3216 }, { "epoch": 0.64, "learning_rate": 3.3985883385078875e-06, "loss": 0.0893, "step": 3217 }, { "epoch": 0.64, "learning_rate": 3.3952819509639534e-06, "loss": 0.0873, "step": 3218 }, { "epoch": 0.64, "learning_rate": 3.3919763455401016e-06, "loss": 0.092, "step": 3219 }, { "epoch": 0.64, "learning_rate": 3.3886715238474454e-06, "loss": 0.0951, "step": 3220 }, { "epoch": 0.64, "learning_rate": 3.3853674874967134e-06, "loss": 0.093, "step": 3221 }, { "epoch": 0.64, "learning_rate": 3.3820642380982527e-06, "loss": 0.0895, "step": 3222 }, { "epoch": 0.64, "learning_rate": 3.378761777262028e-06, "loss": 0.0934, "step": 3223 }, { "epoch": 0.64, "learning_rate": 3.375460106597619e-06, "loss": 0.0927, "step": 3224 }, { "epoch": 0.65, "learning_rate": 3.372159227714218e-06, "loss": 0.0958, "step": 3225 }, { "epoch": 0.65, "learning_rate": 3.3688591422206333e-06, "loss": 0.0923, "step": 3226 }, { "epoch": 0.65, "learning_rate": 3.3655598517252886e-06, "loss": 0.0872, "step": 3227 }, { "epoch": 0.65, "learning_rate": 3.3622613578362162e-06, "loss": 0.089, "step": 3228 }, { "epoch": 0.65, "learning_rate": 3.358963662161062e-06, "loss": 0.0861, "step": 3229 }, { "epoch": 0.65, "learning_rate": 3.355666766307084e-06, "loss": 0.0938, "step": 3230 }, { "epoch": 0.65, "learning_rate": 3.352370671881148e-06, "loss": 0.1005, "step": 3231 }, { "epoch": 0.65, "learning_rate": 3.3490753804897315e-06, "loss": 0.0854, "step": 3232 }, { "epoch": 0.65, "learning_rate": 3.34578089373892e-06, "loss": 0.0901, "step": 3233 }, { "epoch": 0.65, "learning_rate": 3.3424872132344044e-06, "loss": 0.0898, "step": 3234 }, { "epoch": 0.65, "learning_rate": 3.339194340581485e-06, "loss": 0.0963, "step": 3235 }, { "epoch": 0.65, "learning_rate": 3.3359022773850673e-06, "loss": 0.0912, "step": 3236 }, { "epoch": 0.65, "learning_rate": 3.3326110252496652e-06, "loss": 0.0833, "step": 3237 }, { "epoch": 0.65, "learning_rate": 3.3293205857793924e-06, "loss": 0.0844, "step": 3238 }, { "epoch": 0.65, "learning_rate": 3.3260309605779717e-06, "loss": 0.0862, "step": 3239 }, { "epoch": 0.65, "learning_rate": 3.322742151248726e-06, "loss": 0.0797, "step": 3240 }, { "epoch": 0.65, "learning_rate": 3.319454159394578e-06, "loss": 0.0841, "step": 3241 }, { "epoch": 0.65, "learning_rate": 3.31616698661806e-06, "loss": 0.0832, "step": 3242 }, { "epoch": 0.65, "learning_rate": 3.312880634521295e-06, "loss": 0.0861, "step": 3243 }, { "epoch": 0.65, "learning_rate": 3.3095951047060147e-06, "loss": 0.085, "step": 3244 }, { "epoch": 0.65, "learning_rate": 3.3063103987735433e-06, "loss": 0.0856, "step": 3245 }, { "epoch": 0.65, "learning_rate": 3.30302651832481e-06, "loss": 0.0825, "step": 3246 }, { "epoch": 0.65, "learning_rate": 3.2997434649603368e-06, "loss": 0.0873, "step": 3247 }, { "epoch": 0.65, "learning_rate": 3.2964612402802422e-06, "loss": 0.0905, "step": 3248 }, { "epoch": 0.65, "learning_rate": 3.293179845884245e-06, "loss": 0.0931, "step": 3249 }, { "epoch": 0.65, "learning_rate": 3.289899283371657e-06, "loss": 0.0868, "step": 3250 }, { "epoch": 0.65, "learning_rate": 3.2866195543413843e-06, "loss": 0.0919, "step": 3251 }, { "epoch": 0.65, "learning_rate": 3.2833406603919243e-06, "loss": 0.0909, "step": 3252 }, { "epoch": 0.65, "learning_rate": 3.280062603121373e-06, "loss": 0.0881, "step": 3253 }, { "epoch": 0.65, "learning_rate": 3.2767853841274154e-06, "loss": 0.0887, "step": 3254 }, { "epoch": 0.65, "learning_rate": 3.273509005007327e-06, "loss": 0.0863, "step": 3255 }, { "epoch": 0.65, "learning_rate": 3.2702334673579765e-06, "loss": 0.0895, "step": 3256 }, { "epoch": 0.65, "learning_rate": 3.26695877277582e-06, "loss": 0.0851, "step": 3257 }, { "epoch": 0.65, "learning_rate": 3.263684922856905e-06, "loss": 0.0867, "step": 3258 }, { "epoch": 0.65, "learning_rate": 3.260411919196866e-06, "loss": 0.0826, "step": 3259 }, { "epoch": 0.65, "learning_rate": 3.2571397633909252e-06, "loss": 0.0839, "step": 3260 }, { "epoch": 0.65, "learning_rate": 3.2538684570338908e-06, "loss": 0.0866, "step": 3261 }, { "epoch": 0.65, "learning_rate": 3.2505980017201564e-06, "loss": 0.091, "step": 3262 }, { "epoch": 0.65, "learning_rate": 3.247328399043706e-06, "loss": 0.0926, "step": 3263 }, { "epoch": 0.65, "learning_rate": 3.2440596505981005e-06, "loss": 0.088, "step": 3264 }, { "epoch": 0.65, "learning_rate": 3.2407917579764914e-06, "loss": 0.0854, "step": 3265 }, { "epoch": 0.65, "learning_rate": 3.2375247227716077e-06, "loss": 0.1041, "step": 3266 }, { "epoch": 0.65, "learning_rate": 3.2342585465757625e-06, "loss": 0.0872, "step": 3267 }, { "epoch": 0.65, "learning_rate": 3.230993230980853e-06, "loss": 0.0989, "step": 3268 }, { "epoch": 0.65, "learning_rate": 3.227728777578353e-06, "loss": 0.1116, "step": 3269 }, { "epoch": 0.65, "learning_rate": 3.224465187959316e-06, "loss": 0.0875, "step": 3270 }, { "epoch": 0.65, "learning_rate": 3.2212024637143756e-06, "loss": 0.0885, "step": 3271 }, { "epoch": 0.65, "learning_rate": 3.217940606433747e-06, "loss": 0.097, "step": 3272 }, { "epoch": 0.65, "learning_rate": 3.2146796177072183e-06, "loss": 0.1085, "step": 3273 }, { "epoch": 0.65, "learning_rate": 3.211419499124154e-06, "loss": 0.0942, "step": 3274 }, { "epoch": 0.66, "learning_rate": 3.2081602522734987e-06, "loss": 0.0908, "step": 3275 }, { "epoch": 0.66, "learning_rate": 3.2049018787437693e-06, "loss": 0.0909, "step": 3276 }, { "epoch": 0.66, "learning_rate": 3.201644380123056e-06, "loss": 0.0829, "step": 3277 }, { "epoch": 0.66, "learning_rate": 3.1983877579990276e-06, "loss": 0.098, "step": 3278 }, { "epoch": 0.66, "learning_rate": 3.195132013958918e-06, "loss": 0.0887, "step": 3279 }, { "epoch": 0.66, "learning_rate": 3.1918771495895395e-06, "loss": 0.086, "step": 3280 }, { "epoch": 0.66, "learning_rate": 3.188623166477272e-06, "loss": 0.0851, "step": 3281 }, { "epoch": 0.66, "learning_rate": 3.185370066208069e-06, "loss": 0.0915, "step": 3282 }, { "epoch": 0.66, "learning_rate": 3.1821178503674515e-06, "loss": 0.1017, "step": 3283 }, { "epoch": 0.66, "learning_rate": 3.178866520540509e-06, "loss": 0.0832, "step": 3284 }, { "epoch": 0.66, "learning_rate": 3.1756160783119015e-06, "loss": 0.0918, "step": 3285 }, { "epoch": 0.66, "learning_rate": 3.1723665252658564e-06, "loss": 0.1077, "step": 3286 }, { "epoch": 0.66, "learning_rate": 3.169117862986163e-06, "loss": 0.0856, "step": 3287 }, { "epoch": 0.66, "learning_rate": 3.16587009305618e-06, "loss": 0.0848, "step": 3288 }, { "epoch": 0.66, "learning_rate": 3.1626232170588343e-06, "loss": 0.0789, "step": 3289 }, { "epoch": 0.66, "learning_rate": 3.1593772365766107e-06, "loss": 0.0969, "step": 3290 }, { "epoch": 0.66, "learning_rate": 3.1561321531915622e-06, "loss": 0.0837, "step": 3291 }, { "epoch": 0.66, "learning_rate": 3.152887968485303e-06, "loss": 0.087, "step": 3292 }, { "epoch": 0.66, "learning_rate": 3.149644684039008e-06, "loss": 0.1164, "step": 3293 }, { "epoch": 0.66, "learning_rate": 3.1464023014334164e-06, "loss": 0.0839, "step": 3294 }, { "epoch": 0.66, "learning_rate": 3.1431608222488276e-06, "loss": 0.0776, "step": 3295 }, { "epoch": 0.66, "learning_rate": 3.139920248065095e-06, "loss": 0.0878, "step": 3296 }, { "epoch": 0.66, "learning_rate": 3.1366805804616353e-06, "loss": 0.0964, "step": 3297 }, { "epoch": 0.66, "learning_rate": 3.1334418210174268e-06, "loss": 0.0865, "step": 3298 }, { "epoch": 0.66, "learning_rate": 3.130203971310999e-06, "loss": 0.0923, "step": 3299 }, { "epoch": 0.66, "learning_rate": 3.12696703292044e-06, "loss": 0.0881, "step": 3300 }, { "epoch": 0.66, "learning_rate": 3.1237310074233964e-06, "loss": 0.0831, "step": 3301 }, { "epoch": 0.66, "learning_rate": 3.1204958963970666e-06, "loss": 0.1053, "step": 3302 }, { "epoch": 0.66, "learning_rate": 3.117261701418204e-06, "loss": 0.1024, "step": 3303 }, { "epoch": 0.66, "learning_rate": 3.114028424063118e-06, "loss": 0.0924, "step": 3304 }, { "epoch": 0.66, "learning_rate": 3.110796065907665e-06, "loss": 0.0827, "step": 3305 }, { "epoch": 0.66, "learning_rate": 3.1075646285272608e-06, "loss": 0.0917, "step": 3306 }, { "epoch": 0.66, "learning_rate": 3.1043341134968653e-06, "loss": 0.0873, "step": 3307 }, { "epoch": 0.66, "learning_rate": 3.1011045223909954e-06, "loss": 0.0804, "step": 3308 }, { "epoch": 0.66, "learning_rate": 3.097875856783713e-06, "loss": 0.081, "step": 3309 }, { "epoch": 0.66, "learning_rate": 3.09464811824863e-06, "loss": 0.0821, "step": 3310 }, { "epoch": 0.66, "learning_rate": 3.0914213083589086e-06, "loss": 0.0856, "step": 3311 }, { "epoch": 0.66, "learning_rate": 3.088195428687254e-06, "loss": 0.0846, "step": 3312 }, { "epoch": 0.66, "learning_rate": 3.0849704808059266e-06, "loss": 0.0871, "step": 3313 }, { "epoch": 0.66, "learning_rate": 3.0817464662867192e-06, "loss": 0.0862, "step": 3314 }, { "epoch": 0.66, "learning_rate": 3.078523386700982e-06, "loss": 0.0867, "step": 3315 }, { "epoch": 0.66, "learning_rate": 3.0753012436196033e-06, "loss": 0.0833, "step": 3316 }, { "epoch": 0.66, "learning_rate": 3.0720800386130176e-06, "loss": 0.0865, "step": 3317 }, { "epoch": 0.66, "learning_rate": 3.0688597732512004e-06, "loss": 0.0865, "step": 3318 }, { "epoch": 0.66, "learning_rate": 3.0656404491036696e-06, "loss": 0.1181, "step": 3319 }, { "epoch": 0.66, "learning_rate": 3.0624220677394854e-06, "loss": 0.0974, "step": 3320 }, { "epoch": 0.66, "learning_rate": 3.059204630727247e-06, "loss": 0.0842, "step": 3321 }, { "epoch": 0.66, "learning_rate": 3.0559881396350967e-06, "loss": 0.0829, "step": 3322 }, { "epoch": 0.66, "learning_rate": 3.0527725960307083e-06, "loss": 0.0951, "step": 3323 }, { "epoch": 0.66, "learning_rate": 3.049558001481302e-06, "loss": 0.0959, "step": 3324 }, { "epoch": 0.67, "learning_rate": 3.0463443575536324e-06, "loss": 0.0859, "step": 3325 }, { "epoch": 0.67, "learning_rate": 3.043131665813988e-06, "loss": 0.0913, "step": 3326 }, { "epoch": 0.67, "learning_rate": 3.0399199278281986e-06, "loss": 0.0914, "step": 3327 }, { "epoch": 0.67, "learning_rate": 3.0367091451616254e-06, "loss": 0.0864, "step": 3328 }, { "epoch": 0.67, "learning_rate": 3.033499319379163e-06, "loss": 0.0978, "step": 3329 }, { "epoch": 0.67, "learning_rate": 3.030290452045245e-06, "loss": 0.0841, "step": 3330 }, { "epoch": 0.67, "learning_rate": 3.0270825447238316e-06, "loss": 0.0824, "step": 3331 }, { "epoch": 0.67, "learning_rate": 3.023875598978419e-06, "loss": 0.0846, "step": 3332 }, { "epoch": 0.67, "learning_rate": 3.0206696163720317e-06, "loss": 0.0872, "step": 3333 }, { "epoch": 0.67, "learning_rate": 3.0174645984672298e-06, "loss": 0.0867, "step": 3334 }, { "epoch": 0.67, "learning_rate": 3.0142605468260976e-06, "loss": 0.1, "step": 3335 }, { "epoch": 0.67, "learning_rate": 3.011057463010252e-06, "loss": 0.0854, "step": 3336 }, { "epoch": 0.67, "learning_rate": 3.007855348580837e-06, "loss": 0.0789, "step": 3337 }, { "epoch": 0.67, "learning_rate": 3.004654205098524e-06, "loss": 0.085, "step": 3338 }, { "epoch": 0.67, "learning_rate": 3.001454034123512e-06, "loss": 0.0869, "step": 3339 }, { "epoch": 0.67, "learning_rate": 2.9982548372155264e-06, "loss": 0.0917, "step": 3340 }, { "epoch": 0.67, "learning_rate": 2.9950566159338146e-06, "loss": 0.0929, "step": 3341 }, { "epoch": 0.67, "learning_rate": 2.991859371837151e-06, "loss": 0.0748, "step": 3342 }, { "epoch": 0.67, "learning_rate": 2.9886631064838355e-06, "loss": 0.0929, "step": 3343 }, { "epoch": 0.67, "learning_rate": 2.9854678214316875e-06, "loss": 0.0807, "step": 3344 }, { "epoch": 0.67, "learning_rate": 2.98227351823805e-06, "loss": 0.091, "step": 3345 }, { "epoch": 0.67, "learning_rate": 2.9790801984597885e-06, "loss": 0.0822, "step": 3346 }, { "epoch": 0.67, "learning_rate": 2.9758878636532884e-06, "loss": 0.0879, "step": 3347 }, { "epoch": 0.67, "learning_rate": 2.972696515374455e-06, "loss": 0.088, "step": 3348 }, { "epoch": 0.67, "learning_rate": 2.969506155178711e-06, "loss": 0.0873, "step": 3349 }, { "epoch": 0.67, "learning_rate": 2.966316784621e-06, "loss": 0.0879, "step": 3350 }, { "epoch": 0.67, "learning_rate": 2.963128405255783e-06, "loss": 0.091, "step": 3351 }, { "epoch": 0.67, "learning_rate": 2.9599410186370363e-06, "loss": 0.0916, "step": 3352 }, { "epoch": 0.67, "learning_rate": 2.9567546263182554e-06, "loss": 0.0821, "step": 3353 }, { "epoch": 0.67, "learning_rate": 2.9535692298524477e-06, "loss": 0.081, "step": 3354 }, { "epoch": 0.67, "learning_rate": 2.9503848307921363e-06, "loss": 0.082, "step": 3355 }, { "epoch": 0.67, "learning_rate": 2.9472014306893605e-06, "loss": 0.0845, "step": 3356 }, { "epoch": 0.67, "learning_rate": 2.94401903109567e-06, "loss": 0.0968, "step": 3357 }, { "epoch": 0.67, "learning_rate": 2.940837633562127e-06, "loss": 0.1066, "step": 3358 }, { "epoch": 0.67, "learning_rate": 2.9376572396393047e-06, "loss": 0.0817, "step": 3359 }, { "epoch": 0.67, "learning_rate": 2.934477850877292e-06, "loss": 0.0927, "step": 3360 }, { "epoch": 0.67, "learning_rate": 2.931299468825682e-06, "loss": 0.0845, "step": 3361 }, { "epoch": 0.67, "learning_rate": 2.92812209503358e-06, "loss": 0.1074, "step": 3362 }, { "epoch": 0.67, "learning_rate": 2.9249457310495994e-06, "loss": 0.0896, "step": 3363 }, { "epoch": 0.67, "learning_rate": 2.921770378421861e-06, "loss": 0.0846, "step": 3364 }, { "epoch": 0.67, "learning_rate": 2.918596038697995e-06, "loss": 0.0892, "step": 3365 }, { "epoch": 0.67, "learning_rate": 2.915422713425134e-06, "loss": 0.0853, "step": 3366 }, { "epoch": 0.67, "learning_rate": 2.912250404149918e-06, "loss": 0.0907, "step": 3367 }, { "epoch": 0.67, "learning_rate": 2.9090791124184934e-06, "loss": 0.1032, "step": 3368 }, { "epoch": 0.67, "learning_rate": 2.905908839776509e-06, "loss": 0.0862, "step": 3369 }, { "epoch": 0.67, "learning_rate": 2.9027395877691143e-06, "loss": 0.0826, "step": 3370 }, { "epoch": 0.67, "learning_rate": 2.899571357940969e-06, "loss": 0.0988, "step": 3371 }, { "epoch": 0.67, "learning_rate": 2.896404151836227e-06, "loss": 0.0924, "step": 3372 }, { "epoch": 0.67, "learning_rate": 2.893237970998547e-06, "loss": 0.0856, "step": 3373 }, { "epoch": 0.67, "learning_rate": 2.8900728169710866e-06, "loss": 0.0917, "step": 3374 }, { "epoch": 0.68, "learning_rate": 2.886908691296504e-06, "loss": 0.0867, "step": 3375 }, { "epoch": 0.68, "learning_rate": 2.8837455955169547e-06, "loss": 0.0969, "step": 3376 }, { "epoch": 0.68, "learning_rate": 2.8805835311740933e-06, "loss": 0.0892, "step": 3377 }, { "epoch": 0.68, "learning_rate": 2.877422499809072e-06, "loss": 0.0871, "step": 3378 }, { "epoch": 0.68, "learning_rate": 2.874262502962537e-06, "loss": 0.0868, "step": 3379 }, { "epoch": 0.68, "learning_rate": 2.871103542174637e-06, "loss": 0.086, "step": 3380 }, { "epoch": 0.68, "learning_rate": 2.8679456189850076e-06, "loss": 0.0882, "step": 3381 }, { "epoch": 0.68, "learning_rate": 2.864788734932783e-06, "loss": 0.0884, "step": 3382 }, { "epoch": 0.68, "learning_rate": 2.8616328915565907e-06, "loss": 0.0893, "step": 3383 }, { "epoch": 0.68, "learning_rate": 2.858478090394549e-06, "loss": 0.0823, "step": 3384 }, { "epoch": 0.68, "learning_rate": 2.8553243329842715e-06, "loss": 0.0908, "step": 3385 }, { "epoch": 0.68, "learning_rate": 2.8521716208628597e-06, "loss": 0.0932, "step": 3386 }, { "epoch": 0.68, "learning_rate": 2.849019955566908e-06, "loss": 0.0931, "step": 3387 }, { "epoch": 0.68, "learning_rate": 2.8458693386325e-06, "loss": 0.0936, "step": 3388 }, { "epoch": 0.68, "learning_rate": 2.8427197715952047e-06, "loss": 0.0835, "step": 3389 }, { "epoch": 0.68, "learning_rate": 2.839571255990088e-06, "loss": 0.0842, "step": 3390 }, { "epoch": 0.68, "learning_rate": 2.8364237933516964e-06, "loss": 0.082, "step": 3391 }, { "epoch": 0.68, "learning_rate": 2.8332773852140644e-06, "loss": 0.0837, "step": 3392 }, { "epoch": 0.68, "learning_rate": 2.830132033110713e-06, "loss": 0.0867, "step": 3393 }, { "epoch": 0.68, "learning_rate": 2.826987738574649e-06, "loss": 0.0874, "step": 3394 }, { "epoch": 0.68, "learning_rate": 2.8238445031383634e-06, "loss": 0.0832, "step": 3395 }, { "epoch": 0.68, "learning_rate": 2.8207023283338304e-06, "loss": 0.0814, "step": 3396 }, { "epoch": 0.68, "learning_rate": 2.8175612156925082e-06, "loss": 0.0883, "step": 3397 }, { "epoch": 0.68, "learning_rate": 2.814421166745337e-06, "loss": 0.1134, "step": 3398 }, { "epoch": 0.68, "learning_rate": 2.811282183022736e-06, "loss": 0.0976, "step": 3399 }, { "epoch": 0.68, "learning_rate": 2.8081442660546126e-06, "loss": 0.0829, "step": 3400 }, { "epoch": 0.68, "learning_rate": 2.805007417370347e-06, "loss": 0.0905, "step": 3401 }, { "epoch": 0.68, "learning_rate": 2.8018716384988034e-06, "loss": 0.0875, "step": 3402 }, { "epoch": 0.68, "learning_rate": 2.798736930968315e-06, "loss": 0.0868, "step": 3403 }, { "epoch": 0.68, "learning_rate": 2.795603296306708e-06, "loss": 0.0848, "step": 3404 }, { "epoch": 0.68, "learning_rate": 2.7924707360412743e-06, "loss": 0.0904, "step": 3405 }, { "epoch": 0.68, "learning_rate": 2.7893392516987873e-06, "loss": 0.0832, "step": 3406 }, { "epoch": 0.68, "learning_rate": 2.7862088448054936e-06, "loss": 0.0964, "step": 3407 }, { "epoch": 0.68, "learning_rate": 2.7830795168871127e-06, "loss": 0.0891, "step": 3408 }, { "epoch": 0.68, "learning_rate": 2.779951269468847e-06, "loss": 0.0978, "step": 3409 }, { "epoch": 0.68, "learning_rate": 2.776824104075364e-06, "loss": 0.096, "step": 3410 }, { "epoch": 0.68, "learning_rate": 2.7736980222308042e-06, "loss": 0.0848, "step": 3411 }, { "epoch": 0.68, "learning_rate": 2.7705730254587802e-06, "loss": 0.1071, "step": 3412 }, { "epoch": 0.68, "learning_rate": 2.7674491152823825e-06, "loss": 0.09, "step": 3413 }, { "epoch": 0.68, "learning_rate": 2.7643262932241642e-06, "loss": 0.0956, "step": 3414 }, { "epoch": 0.68, "learning_rate": 2.761204560806152e-06, "loss": 0.0917, "step": 3415 }, { "epoch": 0.68, "learning_rate": 2.7580839195498397e-06, "loss": 0.0854, "step": 3416 }, { "epoch": 0.68, "learning_rate": 2.75496437097619e-06, "loss": 0.0847, "step": 3417 }, { "epoch": 0.68, "learning_rate": 2.75184591660563e-06, "loss": 0.0866, "step": 3418 }, { "epoch": 0.68, "learning_rate": 2.7487285579580635e-06, "loss": 0.082, "step": 3419 }, { "epoch": 0.68, "learning_rate": 2.7456122965528475e-06, "loss": 0.0851, "step": 3420 }, { "epoch": 0.68, "learning_rate": 2.742497133908812e-06, "loss": 0.0807, "step": 3421 }, { "epoch": 0.68, "learning_rate": 2.739383071544246e-06, "loss": 0.088, "step": 3422 }, { "epoch": 0.68, "learning_rate": 2.736270110976912e-06, "loss": 0.0844, "step": 3423 }, { "epoch": 0.68, "learning_rate": 2.7331582537240243e-06, "loss": 0.085, "step": 3424 }, { "epoch": 0.69, "learning_rate": 2.7300475013022666e-06, "loss": 0.0815, "step": 3425 }, { "epoch": 0.69, "learning_rate": 2.726937855227781e-06, "loss": 0.0864, "step": 3426 }, { "epoch": 0.69, "learning_rate": 2.723829317016169e-06, "loss": 0.0869, "step": 3427 }, { "epoch": 0.69, "learning_rate": 2.7207218881825016e-06, "loss": 0.0827, "step": 3428 }, { "epoch": 0.69, "learning_rate": 2.717615570241294e-06, "loss": 0.0936, "step": 3429 }, { "epoch": 0.69, "learning_rate": 2.714510364706531e-06, "loss": 0.1032, "step": 3430 }, { "epoch": 0.69, "learning_rate": 2.7114062730916513e-06, "loss": 0.0781, "step": 3431 }, { "epoch": 0.69, "learning_rate": 2.708303296909551e-06, "loss": 0.0803, "step": 3432 }, { "epoch": 0.69, "learning_rate": 2.705201437672585e-06, "loss": 0.0809, "step": 3433 }, { "epoch": 0.69, "learning_rate": 2.7021006968925613e-06, "loss": 0.0845, "step": 3434 }, { "epoch": 0.69, "learning_rate": 2.699001076080742e-06, "loss": 0.097, "step": 3435 }, { "epoch": 0.69, "learning_rate": 2.6959025767478466e-06, "loss": 0.0822, "step": 3436 }, { "epoch": 0.69, "learning_rate": 2.692805200404044e-06, "loss": 0.0841, "step": 3437 }, { "epoch": 0.69, "learning_rate": 2.6897089485589584e-06, "loss": 0.0842, "step": 3438 }, { "epoch": 0.69, "learning_rate": 2.686613822721666e-06, "loss": 0.0914, "step": 3439 }, { "epoch": 0.69, "learning_rate": 2.683519824400693e-06, "loss": 0.0884, "step": 3440 }, { "epoch": 0.69, "learning_rate": 2.680426955104014e-06, "loss": 0.0828, "step": 3441 }, { "epoch": 0.69, "learning_rate": 2.677335216339062e-06, "loss": 0.0998, "step": 3442 }, { "epoch": 0.69, "learning_rate": 2.6742446096127086e-06, "loss": 0.0834, "step": 3443 }, { "epoch": 0.69, "learning_rate": 2.671155136431279e-06, "loss": 0.0901, "step": 3444 }, { "epoch": 0.69, "learning_rate": 2.6680667983005446e-06, "loss": 0.1339, "step": 3445 }, { "epoch": 0.69, "learning_rate": 2.6649795967257243e-06, "loss": 0.1032, "step": 3446 }, { "epoch": 0.69, "learning_rate": 2.661893533211482e-06, "loss": 0.0969, "step": 3447 }, { "epoch": 0.69, "learning_rate": 2.658808609261928e-06, "loss": 0.0917, "step": 3448 }, { "epoch": 0.69, "learning_rate": 2.6557248263806175e-06, "loss": 0.0904, "step": 3449 }, { "epoch": 0.69, "learning_rate": 2.6526421860705474e-06, "loss": 0.0807, "step": 3450 }, { "epoch": 0.69, "learning_rate": 2.649560689834158e-06, "loss": 0.0851, "step": 3451 }, { "epoch": 0.69, "learning_rate": 2.646480339173337e-06, "loss": 0.096, "step": 3452 }, { "epoch": 0.69, "learning_rate": 2.6434011355894074e-06, "loss": 0.0936, "step": 3453 }, { "epoch": 0.69, "learning_rate": 2.640323080583137e-06, "loss": 0.0929, "step": 3454 }, { "epoch": 0.69, "learning_rate": 2.637246175654731e-06, "loss": 0.0857, "step": 3455 }, { "epoch": 0.69, "learning_rate": 2.634170422303835e-06, "loss": 0.0884, "step": 3456 }, { "epoch": 0.69, "learning_rate": 2.6310958220295356e-06, "loss": 0.0818, "step": 3457 }, { "epoch": 0.69, "learning_rate": 2.6280223763303546e-06, "loss": 0.0897, "step": 3458 }, { "epoch": 0.69, "learning_rate": 2.6249500867042523e-06, "loss": 0.0836, "step": 3459 }, { "epoch": 0.69, "learning_rate": 2.6218789546486235e-06, "loss": 0.0981, "step": 3460 }, { "epoch": 0.69, "learning_rate": 2.618808981660304e-06, "loss": 0.0853, "step": 3461 }, { "epoch": 0.69, "learning_rate": 2.61574016923556e-06, "loss": 0.0924, "step": 3462 }, { "epoch": 0.69, "learning_rate": 2.612672518870093e-06, "loss": 0.082, "step": 3463 }, { "epoch": 0.69, "learning_rate": 2.6096060320590393e-06, "loss": 0.084, "step": 3464 }, { "epoch": 0.69, "learning_rate": 2.6065407102969664e-06, "loss": 0.0832, "step": 3465 }, { "epoch": 0.69, "learning_rate": 2.6034765550778753e-06, "loss": 0.0839, "step": 3466 }, { "epoch": 0.69, "learning_rate": 2.600413567895198e-06, "loss": 0.0933, "step": 3467 }, { "epoch": 0.69, "learning_rate": 2.5973517502417966e-06, "loss": 0.097, "step": 3468 }, { "epoch": 0.69, "learning_rate": 2.5942911036099657e-06, "loss": 0.0989, "step": 3469 }, { "epoch": 0.69, "learning_rate": 2.5912316294914232e-06, "loss": 0.1018, "step": 3470 }, { "epoch": 0.69, "learning_rate": 2.588173329377324e-06, "loss": 0.0847, "step": 3471 }, { "epoch": 0.69, "learning_rate": 2.5851162047582477e-06, "loss": 0.0812, "step": 3472 }, { "epoch": 0.69, "learning_rate": 2.582060257124195e-06, "loss": 0.0816, "step": 3473 }, { "epoch": 0.69, "learning_rate": 2.5790054879645964e-06, "loss": 0.0975, "step": 3474 }, { "epoch": 0.69, "learning_rate": 2.5759518987683154e-06, "loss": 0.0857, "step": 3475 }, { "epoch": 0.7, "learning_rate": 2.5728994910236304e-06, "loss": 0.0856, "step": 3476 }, { "epoch": 0.7, "learning_rate": 2.5698482662182494e-06, "loss": 0.1139, "step": 3477 }, { "epoch": 0.7, "learning_rate": 2.5667982258393016e-06, "loss": 0.1027, "step": 3478 }, { "epoch": 0.7, "learning_rate": 2.5637493713733376e-06, "loss": 0.0849, "step": 3479 }, { "epoch": 0.7, "learning_rate": 2.560701704306336e-06, "loss": 0.0858, "step": 3480 }, { "epoch": 0.7, "learning_rate": 2.557655226123693e-06, "loss": 0.0806, "step": 3481 }, { "epoch": 0.7, "learning_rate": 2.5546099383102206e-06, "loss": 0.0951, "step": 3482 }, { "epoch": 0.7, "learning_rate": 2.5515658423501573e-06, "loss": 0.0822, "step": 3483 }, { "epoch": 0.7, "learning_rate": 2.5485229397271567e-06, "loss": 0.084, "step": 3484 }, { "epoch": 0.7, "learning_rate": 2.545481231924296e-06, "loss": 0.0882, "step": 3485 }, { "epoch": 0.7, "learning_rate": 2.5424407204240653e-06, "loss": 0.0951, "step": 3486 }, { "epoch": 0.7, "learning_rate": 2.539401406708373e-06, "loss": 0.0792, "step": 3487 }, { "epoch": 0.7, "learning_rate": 2.536363292258543e-06, "loss": 0.0833, "step": 3488 }, { "epoch": 0.7, "learning_rate": 2.533326378555314e-06, "loss": 0.0816, "step": 3489 }, { "epoch": 0.7, "learning_rate": 2.5302906670788463e-06, "loss": 0.0853, "step": 3490 }, { "epoch": 0.7, "learning_rate": 2.527256159308703e-06, "loss": 0.0797, "step": 3491 }, { "epoch": 0.7, "learning_rate": 2.524222856723869e-06, "loss": 0.0943, "step": 3492 }, { "epoch": 0.7, "learning_rate": 2.5211907608027366e-06, "loss": 0.0952, "step": 3493 }, { "epoch": 0.7, "learning_rate": 2.518159873023116e-06, "loss": 0.0898, "step": 3494 }, { "epoch": 0.7, "learning_rate": 2.5151301948622235e-06, "loss": 0.0849, "step": 3495 }, { "epoch": 0.7, "learning_rate": 2.5121017277966875e-06, "loss": 0.0843, "step": 3496 }, { "epoch": 0.7, "learning_rate": 2.509074473302546e-06, "loss": 0.0839, "step": 3497 }, { "epoch": 0.7, "learning_rate": 2.506048432855247e-06, "loss": 0.0885, "step": 3498 }, { "epoch": 0.7, "learning_rate": 2.5030236079296443e-06, "loss": 0.076, "step": 3499 }, { "epoch": 0.7, "learning_rate": 2.5000000000000015e-06, "loss": 0.0786, "step": 3500 }, { "epoch": 0.7, "learning_rate": 2.496977610539988e-06, "loss": 0.0873, "step": 3501 }, { "epoch": 0.7, "learning_rate": 2.49395644102268e-06, "loss": 0.0911, "step": 3502 }, { "epoch": 0.7, "learning_rate": 2.4909364929205575e-06, "loss": 0.0858, "step": 3503 }, { "epoch": 0.7, "learning_rate": 2.48791776770551e-06, "loss": 0.0855, "step": 3504 }, { "epoch": 0.7, "learning_rate": 2.484900266848825e-06, "loss": 0.0819, "step": 3505 }, { "epoch": 0.7, "learning_rate": 2.4818839918211963e-06, "loss": 0.0864, "step": 3506 }, { "epoch": 0.7, "learning_rate": 2.4788689440927193e-06, "loss": 0.0811, "step": 3507 }, { "epoch": 0.7, "learning_rate": 2.4758551251328923e-06, "loss": 0.1008, "step": 3508 }, { "epoch": 0.7, "learning_rate": 2.4728425364106136e-06, "loss": 0.0872, "step": 3509 }, { "epoch": 0.7, "learning_rate": 2.469831179394182e-06, "loss": 0.0864, "step": 3510 }, { "epoch": 0.7, "learning_rate": 2.4668210555512974e-06, "loss": 0.0888, "step": 3511 }, { "epoch": 0.7, "learning_rate": 2.4638121663490546e-06, "loss": 0.0846, "step": 3512 }, { "epoch": 0.7, "learning_rate": 2.4608045132539536e-06, "loss": 0.0855, "step": 3513 }, { "epoch": 0.7, "learning_rate": 2.4577980977318866e-06, "loss": 0.0857, "step": 3514 }, { "epoch": 0.7, "learning_rate": 2.4547929212481436e-06, "loss": 0.0832, "step": 3515 }, { "epoch": 0.7, "learning_rate": 2.4517889852674114e-06, "loss": 0.0929, "step": 3516 }, { "epoch": 0.7, "learning_rate": 2.448786291253772e-06, "loss": 0.0868, "step": 3517 }, { "epoch": 0.7, "learning_rate": 2.4457848406707014e-06, "loss": 0.0802, "step": 3518 }, { "epoch": 0.7, "learning_rate": 2.442784634981071e-06, "loss": 0.0916, "step": 3519 }, { "epoch": 0.7, "learning_rate": 2.4397856756471435e-06, "loss": 0.0863, "step": 3520 }, { "epoch": 0.7, "learning_rate": 2.4367879641305757e-06, "loss": 0.1067, "step": 3521 }, { "epoch": 0.7, "learning_rate": 2.4337915018924147e-06, "loss": 0.0872, "step": 3522 }, { "epoch": 0.7, "learning_rate": 2.4307962903931025e-06, "loss": 0.0983, "step": 3523 }, { "epoch": 0.7, "learning_rate": 2.4278023310924676e-06, "loss": 0.0913, "step": 3524 }, { "epoch": 0.7, "learning_rate": 2.424809625449729e-06, "loss": 0.0829, "step": 3525 }, { "epoch": 0.71, "learning_rate": 2.4218181749234954e-06, "loss": 0.0841, "step": 3526 }, { "epoch": 0.71, "learning_rate": 2.418827980971763e-06, "loss": 0.0853, "step": 3527 }, { "epoch": 0.71, "learning_rate": 2.415839045051916e-06, "loss": 0.0879, "step": 3528 }, { "epoch": 0.71, "learning_rate": 2.412851368620726e-06, "loss": 0.0893, "step": 3529 }, { "epoch": 0.71, "learning_rate": 2.40986495313435e-06, "loss": 0.09, "step": 3530 }, { "epoch": 0.71, "learning_rate": 2.4068798000483306e-06, "loss": 0.0843, "step": 3531 }, { "epoch": 0.71, "learning_rate": 2.403895910817593e-06, "loss": 0.0899, "step": 3532 }, { "epoch": 0.71, "learning_rate": 2.4009132868964525e-06, "loss": 0.0833, "step": 3533 }, { "epoch": 0.71, "learning_rate": 2.3979319297386035e-06, "loss": 0.0874, "step": 3534 }, { "epoch": 0.71, "learning_rate": 2.39495184079712e-06, "loss": 0.0835, "step": 3535 }, { "epoch": 0.71, "learning_rate": 2.391973021524461e-06, "loss": 0.0845, "step": 3536 }, { "epoch": 0.71, "learning_rate": 2.3889954733724708e-06, "loss": 0.087, "step": 3537 }, { "epoch": 0.71, "learning_rate": 2.3860191977923673e-06, "loss": 0.0921, "step": 3538 }, { "epoch": 0.71, "learning_rate": 2.3830441962347528e-06, "loss": 0.0838, "step": 3539 }, { "epoch": 0.71, "learning_rate": 2.380070470149605e-06, "loss": 0.0826, "step": 3540 }, { "epoch": 0.71, "learning_rate": 2.3770980209862814e-06, "loss": 0.0842, "step": 3541 }, { "epoch": 0.71, "learning_rate": 2.3741268501935212e-06, "loss": 0.0899, "step": 3542 }, { "epoch": 0.71, "learning_rate": 2.3711569592194363e-06, "loss": 0.0918, "step": 3543 }, { "epoch": 0.71, "learning_rate": 2.3681883495115114e-06, "loss": 0.0858, "step": 3544 }, { "epoch": 0.71, "learning_rate": 2.3652210225166122e-06, "loss": 0.0814, "step": 3545 }, { "epoch": 0.71, "learning_rate": 2.3622549796809807e-06, "loss": 0.0946, "step": 3546 }, { "epoch": 0.71, "learning_rate": 2.3592902224502284e-06, "loss": 0.0819, "step": 3547 }, { "epoch": 0.71, "learning_rate": 2.356326752269342e-06, "loss": 0.0856, "step": 3548 }, { "epoch": 0.71, "learning_rate": 2.353364570582681e-06, "loss": 0.1097, "step": 3549 }, { "epoch": 0.71, "learning_rate": 2.3504036788339763e-06, "loss": 0.1421, "step": 3550 }, { "epoch": 0.71, "learning_rate": 2.3474440784663287e-06, "loss": 0.0846, "step": 3551 }, { "epoch": 0.71, "learning_rate": 2.344485770922218e-06, "loss": 0.0894, "step": 3552 }, { "epoch": 0.71, "learning_rate": 2.3415287576434807e-06, "loss": 0.0811, "step": 3553 }, { "epoch": 0.71, "learning_rate": 2.338573040071332e-06, "loss": 0.0823, "step": 3554 }, { "epoch": 0.71, "learning_rate": 2.3356186196463497e-06, "loss": 0.0888, "step": 3555 }, { "epoch": 0.71, "learning_rate": 2.3326654978084872e-06, "loss": 0.0848, "step": 3556 }, { "epoch": 0.71, "learning_rate": 2.329713675997058e-06, "loss": 0.0799, "step": 3557 }, { "epoch": 0.71, "learning_rate": 2.3267631556507443e-06, "loss": 0.0863, "step": 3558 }, { "epoch": 0.71, "learning_rate": 2.323813938207593e-06, "loss": 0.0869, "step": 3559 }, { "epoch": 0.71, "learning_rate": 2.320866025105016e-06, "loss": 0.0883, "step": 3560 }, { "epoch": 0.71, "learning_rate": 2.3179194177797954e-06, "loss": 0.0894, "step": 3561 }, { "epoch": 0.71, "learning_rate": 2.3149741176680666e-06, "loss": 0.0848, "step": 3562 }, { "epoch": 0.71, "learning_rate": 2.312030126205335e-06, "loss": 0.0905, "step": 3563 }, { "epoch": 0.71, "learning_rate": 2.309087444826464e-06, "loss": 0.0908, "step": 3564 }, { "epoch": 0.71, "learning_rate": 2.3061460749656844e-06, "loss": 0.0878, "step": 3565 }, { "epoch": 0.71, "learning_rate": 2.303206018056583e-06, "loss": 0.0852, "step": 3566 }, { "epoch": 0.71, "learning_rate": 2.3002672755321076e-06, "loss": 0.085, "step": 3567 }, { "epoch": 0.71, "learning_rate": 2.297329848824565e-06, "loss": 0.0863, "step": 3568 }, { "epoch": 0.71, "learning_rate": 2.294393739365621e-06, "loss": 0.0894, "step": 3569 }, { "epoch": 0.71, "learning_rate": 2.2914589485863015e-06, "loss": 0.086, "step": 3570 }, { "epoch": 0.71, "learning_rate": 2.288525477916986e-06, "loss": 0.089, "step": 3571 }, { "epoch": 0.71, "learning_rate": 2.285593328787414e-06, "loss": 0.0893, "step": 3572 }, { "epoch": 0.71, "learning_rate": 2.282662502626678e-06, "loss": 0.0863, "step": 3573 }, { "epoch": 0.71, "learning_rate": 2.2797330008632255e-06, "loss": 0.0838, "step": 3574 }, { "epoch": 0.71, "learning_rate": 2.2768048249248648e-06, "loss": 0.0842, "step": 3575 }, { "epoch": 0.72, "learning_rate": 2.27387797623875e-06, "loss": 0.0942, "step": 3576 }, { "epoch": 0.72, "learning_rate": 2.2709524562313923e-06, "loss": 0.0827, "step": 3577 }, { "epoch": 0.72, "learning_rate": 2.268028266328655e-06, "loss": 0.0875, "step": 3578 }, { "epoch": 0.72, "learning_rate": 2.265105407955752e-06, "loss": 0.076, "step": 3579 }, { "epoch": 0.72, "learning_rate": 2.2621838825372496e-06, "loss": 0.0894, "step": 3580 }, { "epoch": 0.72, "learning_rate": 2.2592636914970633e-06, "loss": 0.0825, "step": 3581 }, { "epoch": 0.72, "learning_rate": 2.256344836258459e-06, "loss": 0.0895, "step": 3582 }, { "epoch": 0.72, "learning_rate": 2.2534273182440515e-06, "loss": 0.1013, "step": 3583 }, { "epoch": 0.72, "learning_rate": 2.250511138875801e-06, "loss": 0.0998, "step": 3584 }, { "epoch": 0.72, "learning_rate": 2.2475962995750224e-06, "loss": 0.0866, "step": 3585 }, { "epoch": 0.72, "learning_rate": 2.24468280176237e-06, "loss": 0.0864, "step": 3586 }, { "epoch": 0.72, "learning_rate": 2.2417706468578495e-06, "loss": 0.0848, "step": 3587 }, { "epoch": 0.72, "learning_rate": 2.2388598362808074e-06, "loss": 0.0869, "step": 3588 }, { "epoch": 0.72, "learning_rate": 2.235950371449938e-06, "loss": 0.0851, "step": 3589 }, { "epoch": 0.72, "learning_rate": 2.23304225378328e-06, "loss": 0.0834, "step": 3590 }, { "epoch": 0.72, "learning_rate": 2.2301354846982148e-06, "loss": 0.0865, "step": 3591 }, { "epoch": 0.72, "learning_rate": 2.2272300656114648e-06, "loss": 0.0826, "step": 3592 }, { "epoch": 0.72, "learning_rate": 2.224325997939095e-06, "loss": 0.0918, "step": 3593 }, { "epoch": 0.72, "learning_rate": 2.221423283096517e-06, "loss": 0.0886, "step": 3594 }, { "epoch": 0.72, "learning_rate": 2.218521922498476e-06, "loss": 0.0807, "step": 3595 }, { "epoch": 0.72, "learning_rate": 2.2156219175590623e-06, "loss": 0.0851, "step": 3596 }, { "epoch": 0.72, "learning_rate": 2.212723269691697e-06, "loss": 0.0989, "step": 3597 }, { "epoch": 0.72, "learning_rate": 2.209825980309151e-06, "loss": 0.0866, "step": 3598 }, { "epoch": 0.72, "learning_rate": 2.2069300508235273e-06, "loss": 0.0888, "step": 3599 }, { "epoch": 0.72, "learning_rate": 2.204035482646267e-06, "loss": 0.0848, "step": 3600 }, { "epoch": 0.72, "learning_rate": 2.201142277188146e-06, "loss": 0.1137, "step": 3601 }, { "epoch": 0.72, "learning_rate": 2.1982504358592777e-06, "loss": 0.0852, "step": 3602 }, { "epoch": 0.72, "learning_rate": 2.19535996006911e-06, "loss": 0.0817, "step": 3603 }, { "epoch": 0.72, "learning_rate": 2.192470851226428e-06, "loss": 0.0892, "step": 3604 }, { "epoch": 0.72, "learning_rate": 2.1895831107393485e-06, "loss": 0.0835, "step": 3605 }, { "epoch": 0.72, "learning_rate": 2.1866967400153184e-06, "loss": 0.1055, "step": 3606 }, { "epoch": 0.72, "learning_rate": 2.183811740461118e-06, "loss": 0.0834, "step": 3607 }, { "epoch": 0.72, "learning_rate": 2.1809281134828663e-06, "loss": 0.0879, "step": 3608 }, { "epoch": 0.72, "learning_rate": 2.1780458604860056e-06, "loss": 0.0854, "step": 3609 }, { "epoch": 0.72, "learning_rate": 2.175164982875311e-06, "loss": 0.0809, "step": 3610 }, { "epoch": 0.72, "learning_rate": 2.1722854820548873e-06, "loss": 0.0895, "step": 3611 }, { "epoch": 0.72, "learning_rate": 2.1694073594281663e-06, "loss": 0.0949, "step": 3612 }, { "epoch": 0.72, "learning_rate": 2.1665306163979132e-06, "loss": 0.0892, "step": 3613 }, { "epoch": 0.72, "learning_rate": 2.1636552543662187e-06, "loss": 0.0864, "step": 3614 }, { "epoch": 0.72, "learning_rate": 2.1607812747344955e-06, "loss": 0.0844, "step": 3615 }, { "epoch": 0.72, "learning_rate": 2.157908678903487e-06, "loss": 0.0853, "step": 3616 }, { "epoch": 0.72, "learning_rate": 2.1550374682732605e-06, "loss": 0.1418, "step": 3617 }, { "epoch": 0.72, "learning_rate": 2.152167644243213e-06, "loss": 0.0849, "step": 3618 }, { "epoch": 0.72, "learning_rate": 2.14929920821206e-06, "loss": 0.0866, "step": 3619 }, { "epoch": 0.72, "learning_rate": 2.146432161577842e-06, "loss": 0.119, "step": 3620 }, { "epoch": 0.72, "learning_rate": 2.1435665057379233e-06, "loss": 0.0852, "step": 3621 }, { "epoch": 0.72, "learning_rate": 2.140702242088987e-06, "loss": 0.0823, "step": 3622 }, { "epoch": 0.72, "learning_rate": 2.137839372027047e-06, "loss": 0.0836, "step": 3623 }, { "epoch": 0.72, "learning_rate": 2.134977896947425e-06, "loss": 0.0846, "step": 3624 }, { "epoch": 0.72, "learning_rate": 2.132117818244771e-06, "loss": 0.0859, "step": 3625 }, { "epoch": 0.73, "learning_rate": 2.1292591373130515e-06, "loss": 0.0837, "step": 3626 }, { "epoch": 0.73, "learning_rate": 2.1264018555455563e-06, "loss": 0.0954, "step": 3627 }, { "epoch": 0.73, "learning_rate": 2.1235459743348874e-06, "loss": 0.0793, "step": 3628 }, { "epoch": 0.73, "learning_rate": 2.1206914950729673e-06, "loss": 0.0779, "step": 3629 }, { "epoch": 0.73, "learning_rate": 2.1178384191510344e-06, "loss": 0.0823, "step": 3630 }, { "epoch": 0.73, "learning_rate": 2.114986747959643e-06, "loss": 0.0868, "step": 3631 }, { "epoch": 0.73, "learning_rate": 2.112136482888663e-06, "loss": 0.0895, "step": 3632 }, { "epoch": 0.73, "learning_rate": 2.1092876253272793e-06, "loss": 0.0847, "step": 3633 }, { "epoch": 0.73, "learning_rate": 2.10644017666399e-06, "loss": 0.0885, "step": 3634 }, { "epoch": 0.73, "learning_rate": 2.103594138286607e-06, "loss": 0.0857, "step": 3635 }, { "epoch": 0.73, "learning_rate": 2.100749511582254e-06, "loss": 0.0867, "step": 3636 }, { "epoch": 0.73, "learning_rate": 2.09790629793737e-06, "loss": 0.0787, "step": 3637 }, { "epoch": 0.73, "learning_rate": 2.095064498737701e-06, "loss": 0.0853, "step": 3638 }, { "epoch": 0.73, "learning_rate": 2.0922241153683064e-06, "loss": 0.0869, "step": 3639 }, { "epoch": 0.73, "learning_rate": 2.0893851492135536e-06, "loss": 0.0836, "step": 3640 }, { "epoch": 0.73, "learning_rate": 2.0865476016571206e-06, "loss": 0.0841, "step": 3641 }, { "epoch": 0.73, "learning_rate": 2.083711474081993e-06, "loss": 0.0806, "step": 3642 }, { "epoch": 0.73, "learning_rate": 2.080876767870466e-06, "loss": 0.0863, "step": 3643 }, { "epoch": 0.73, "learning_rate": 2.07804348440414e-06, "loss": 0.0832, "step": 3644 }, { "epoch": 0.73, "learning_rate": 2.075211625063923e-06, "loss": 0.0872, "step": 3645 }, { "epoch": 0.73, "learning_rate": 2.0723811912300295e-06, "loss": 0.09, "step": 3646 }, { "epoch": 0.73, "learning_rate": 2.0695521842819788e-06, "loss": 0.0886, "step": 3647 }, { "epoch": 0.73, "learning_rate": 2.066724605598594e-06, "loss": 0.0898, "step": 3648 }, { "epoch": 0.73, "learning_rate": 2.063898456558002e-06, "loss": 0.0846, "step": 3649 }, { "epoch": 0.73, "learning_rate": 2.061073738537635e-06, "loss": 0.0886, "step": 3650 }, { "epoch": 0.73, "learning_rate": 2.0582504529142248e-06, "loss": 0.085, "step": 3651 }, { "epoch": 0.73, "learning_rate": 2.0554286010638076e-06, "loss": 0.0837, "step": 3652 }, { "epoch": 0.73, "learning_rate": 2.0526081843617183e-06, "loss": 0.0841, "step": 3653 }, { "epoch": 0.73, "learning_rate": 2.049789204182596e-06, "loss": 0.0805, "step": 3654 }, { "epoch": 0.73, "learning_rate": 2.046971661900373e-06, "loss": 0.0959, "step": 3655 }, { "epoch": 0.73, "learning_rate": 2.04415555888829e-06, "loss": 0.0847, "step": 3656 }, { "epoch": 0.73, "learning_rate": 2.04134089651888e-06, "loss": 0.0919, "step": 3657 }, { "epoch": 0.73, "learning_rate": 2.0385276761639768e-06, "loss": 0.0795, "step": 3658 }, { "epoch": 0.73, "learning_rate": 2.035715899194704e-06, "loss": 0.1024, "step": 3659 }, { "epoch": 0.73, "learning_rate": 2.0329055669814936e-06, "loss": 0.0815, "step": 3660 }, { "epoch": 0.73, "learning_rate": 2.030096680894065e-06, "loss": 0.0877, "step": 3661 }, { "epoch": 0.73, "learning_rate": 2.027289242301435e-06, "loss": 0.0837, "step": 3662 }, { "epoch": 0.73, "learning_rate": 2.0244832525719155e-06, "loss": 0.1362, "step": 3663 }, { "epoch": 0.73, "learning_rate": 2.02167871307311e-06, "loss": 0.0844, "step": 3664 }, { "epoch": 0.73, "learning_rate": 2.0188756251719204e-06, "loss": 0.0878, "step": 3665 }, { "epoch": 0.73, "learning_rate": 2.016073990234536e-06, "loss": 0.0841, "step": 3666 }, { "epoch": 0.73, "learning_rate": 2.0132738096264415e-06, "loss": 0.0882, "step": 3667 }, { "epoch": 0.73, "learning_rate": 2.0104750847124075e-06, "loss": 0.0814, "step": 3668 }, { "epoch": 0.73, "learning_rate": 2.007677816856498e-06, "loss": 0.0851, "step": 3669 }, { "epoch": 0.73, "learning_rate": 2.0048820074220716e-06, "loss": 0.0801, "step": 3670 }, { "epoch": 0.73, "learning_rate": 2.002087657771769e-06, "loss": 0.086, "step": 3671 }, { "epoch": 0.73, "learning_rate": 1.999294769267523e-06, "loss": 0.0959, "step": 3672 }, { "epoch": 0.73, "learning_rate": 1.996503343270554e-06, "loss": 0.0823, "step": 3673 }, { "epoch": 0.73, "learning_rate": 1.9937133811413666e-06, "loss": 0.0933, "step": 3674 }, { "epoch": 0.73, "learning_rate": 1.990924884239758e-06, "loss": 0.0846, "step": 3675 }, { "epoch": 0.74, "learning_rate": 1.988137853924808e-06, "loss": 0.0838, "step": 3676 }, { "epoch": 0.74, "learning_rate": 1.9853522915548777e-06, "loss": 0.0885, "step": 3677 }, { "epoch": 0.74, "learning_rate": 1.9825681984876173e-06, "loss": 0.0875, "step": 3678 }, { "epoch": 0.74, "learning_rate": 1.979785576079961e-06, "loss": 0.0874, "step": 3679 }, { "epoch": 0.74, "learning_rate": 1.977004425688126e-06, "loss": 0.0853, "step": 3680 }, { "epoch": 0.74, "learning_rate": 1.97422474866761e-06, "loss": 0.0895, "step": 3681 }, { "epoch": 0.74, "learning_rate": 1.9714465463731934e-06, "loss": 0.0815, "step": 3682 }, { "epoch": 0.74, "learning_rate": 1.9686698201589395e-06, "loss": 0.0903, "step": 3683 }, { "epoch": 0.74, "learning_rate": 1.9658945713781883e-06, "loss": 0.0937, "step": 3684 }, { "epoch": 0.74, "learning_rate": 1.9631208013835677e-06, "loss": 0.085, "step": 3685 }, { "epoch": 0.74, "learning_rate": 1.9603485115269743e-06, "loss": 0.0946, "step": 3686 }, { "epoch": 0.74, "learning_rate": 1.9575777031595906e-06, "loss": 0.0782, "step": 3687 }, { "epoch": 0.74, "learning_rate": 1.9548083776318727e-06, "loss": 0.0914, "step": 3688 }, { "epoch": 0.74, "learning_rate": 1.95204053629356e-06, "loss": 0.0917, "step": 3689 }, { "epoch": 0.74, "learning_rate": 1.9492741804936623e-06, "loss": 0.079, "step": 3690 }, { "epoch": 0.74, "learning_rate": 1.946509311580469e-06, "loss": 0.082, "step": 3691 }, { "epoch": 0.74, "learning_rate": 1.9437459309015426e-06, "loss": 0.0914, "step": 3692 }, { "epoch": 0.74, "learning_rate": 1.94098403980372e-06, "loss": 0.0993, "step": 3693 }, { "epoch": 0.74, "learning_rate": 1.938223639633119e-06, "loss": 0.0852, "step": 3694 }, { "epoch": 0.74, "learning_rate": 1.9354647317351187e-06, "loss": 0.0856, "step": 3695 }, { "epoch": 0.74, "learning_rate": 1.93270731745438e-06, "loss": 0.0841, "step": 3696 }, { "epoch": 0.74, "learning_rate": 1.929951398134832e-06, "loss": 0.0842, "step": 3697 }, { "epoch": 0.74, "learning_rate": 1.927196975119678e-06, "loss": 0.0855, "step": 3698 }, { "epoch": 0.74, "learning_rate": 1.9244440497513895e-06, "loss": 0.0884, "step": 3699 }, { "epoch": 0.74, "learning_rate": 1.9216926233717087e-06, "loss": 0.0885, "step": 3700 }, { "epoch": 0.74, "learning_rate": 1.9189426973216478e-06, "loss": 0.085, "step": 3701 }, { "epoch": 0.74, "learning_rate": 1.9161942729414876e-06, "loss": 0.0885, "step": 3702 }, { "epoch": 0.74, "learning_rate": 1.913447351570776e-06, "loss": 0.0879, "step": 3703 }, { "epoch": 0.74, "learning_rate": 1.910701934548329e-06, "loss": 0.0829, "step": 3704 }, { "epoch": 0.74, "learning_rate": 1.90795802321223e-06, "loss": 0.0874, "step": 3705 }, { "epoch": 0.74, "learning_rate": 1.9052156188998284e-06, "loss": 0.1054, "step": 3706 }, { "epoch": 0.74, "learning_rate": 1.9024747229477365e-06, "loss": 0.0911, "step": 3707 }, { "epoch": 0.74, "learning_rate": 1.8997353366918369e-06, "loss": 0.0979, "step": 3708 }, { "epoch": 0.74, "learning_rate": 1.896997461467272e-06, "loss": 0.0805, "step": 3709 }, { "epoch": 0.74, "learning_rate": 1.8942610986084487e-06, "loss": 0.0877, "step": 3710 }, { "epoch": 0.74, "learning_rate": 1.8915262494490366e-06, "loss": 0.0872, "step": 3711 }, { "epoch": 0.74, "learning_rate": 1.8887929153219687e-06, "loss": 0.0886, "step": 3712 }, { "epoch": 0.74, "learning_rate": 1.8860610975594384e-06, "loss": 0.0846, "step": 3713 }, { "epoch": 0.74, "learning_rate": 1.8833307974929006e-06, "loss": 0.082, "step": 3714 }, { "epoch": 0.74, "learning_rate": 1.8806020164530702e-06, "loss": 0.0847, "step": 3715 }, { "epoch": 0.74, "learning_rate": 1.8778747557699223e-06, "loss": 0.0911, "step": 3716 }, { "epoch": 0.74, "learning_rate": 1.8751490167726888e-06, "loss": 0.0811, "step": 3717 }, { "epoch": 0.74, "learning_rate": 1.8724248007898648e-06, "loss": 0.1059, "step": 3718 }, { "epoch": 0.74, "learning_rate": 1.8697021091491991e-06, "loss": 0.0852, "step": 3719 }, { "epoch": 0.74, "learning_rate": 1.8669809431776991e-06, "loss": 0.0846, "step": 3720 }, { "epoch": 0.74, "learning_rate": 1.8642613042016245e-06, "loss": 0.0879, "step": 3721 }, { "epoch": 0.74, "learning_rate": 1.8615431935464984e-06, "loss": 0.0899, "step": 3722 }, { "epoch": 0.74, "learning_rate": 1.8588266125370929e-06, "loss": 0.1042, "step": 3723 }, { "epoch": 0.74, "learning_rate": 1.8561115624974374e-06, "loss": 0.0963, "step": 3724 }, { "epoch": 0.74, "learning_rate": 1.8533980447508138e-06, "loss": 0.092, "step": 3725 }, { "epoch": 0.75, "learning_rate": 1.8506860606197564e-06, "loss": 0.0837, "step": 3726 }, { "epoch": 0.75, "learning_rate": 1.8479756114260562e-06, "loss": 0.0799, "step": 3727 }, { "epoch": 0.75, "learning_rate": 1.8452666984907519e-06, "loss": 0.0882, "step": 3728 }, { "epoch": 0.75, "learning_rate": 1.842559323134136e-06, "loss": 0.0876, "step": 3729 }, { "epoch": 0.75, "learning_rate": 1.8398534866757455e-06, "loss": 0.0934, "step": 3730 }, { "epoch": 0.75, "learning_rate": 1.837149190434378e-06, "loss": 0.088, "step": 3731 }, { "epoch": 0.75, "learning_rate": 1.8344464357280722e-06, "loss": 0.0863, "step": 3732 }, { "epoch": 0.75, "learning_rate": 1.831745223874118e-06, "loss": 0.0868, "step": 3733 }, { "epoch": 0.75, "learning_rate": 1.829045556189053e-06, "loss": 0.0839, "step": 3734 }, { "epoch": 0.75, "learning_rate": 1.8263474339886628e-06, "loss": 0.0796, "step": 3735 }, { "epoch": 0.75, "learning_rate": 1.8236508585879781e-06, "loss": 0.0899, "step": 3736 }, { "epoch": 0.75, "learning_rate": 1.8209558313012792e-06, "loss": 0.0959, "step": 3737 }, { "epoch": 0.75, "learning_rate": 1.8182623534420906e-06, "loss": 0.0905, "step": 3738 }, { "epoch": 0.75, "learning_rate": 1.8155704263231777e-06, "loss": 0.0818, "step": 3739 }, { "epoch": 0.75, "learning_rate": 1.8128800512565514e-06, "loss": 0.0818, "step": 3740 }, { "epoch": 0.75, "learning_rate": 1.810191229553473e-06, "loss": 0.0835, "step": 3741 }, { "epoch": 0.75, "learning_rate": 1.807503962524439e-06, "loss": 0.0899, "step": 3742 }, { "epoch": 0.75, "learning_rate": 1.8048182514791901e-06, "loss": 0.0871, "step": 3743 }, { "epoch": 0.75, "learning_rate": 1.8021340977267104e-06, "loss": 0.087, "step": 3744 }, { "epoch": 0.75, "learning_rate": 1.799451502575222e-06, "loss": 0.0926, "step": 3745 }, { "epoch": 0.75, "learning_rate": 1.7967704673321917e-06, "loss": 0.0891, "step": 3746 }, { "epoch": 0.75, "learning_rate": 1.7940909933043243e-06, "loss": 0.0898, "step": 3747 }, { "epoch": 0.75, "learning_rate": 1.7914130817975595e-06, "loss": 0.0826, "step": 3748 }, { "epoch": 0.75, "learning_rate": 1.7887367341170781e-06, "loss": 0.0879, "step": 3749 }, { "epoch": 0.75, "learning_rate": 1.7860619515673034e-06, "loss": 0.0848, "step": 3750 }, { "epoch": 0.75, "learning_rate": 1.7833887354518902e-06, "loss": 0.093, "step": 3751 }, { "epoch": 0.75, "learning_rate": 1.7807170870737317e-06, "loss": 0.0943, "step": 3752 }, { "epoch": 0.75, "learning_rate": 1.7780470077349566e-06, "loss": 0.0898, "step": 3753 }, { "epoch": 0.75, "learning_rate": 1.7753784987369287e-06, "loss": 0.0913, "step": 3754 }, { "epoch": 0.75, "learning_rate": 1.7727115613802465e-06, "loss": 0.0866, "step": 3755 }, { "epoch": 0.75, "learning_rate": 1.770046196964747e-06, "loss": 0.0861, "step": 3756 }, { "epoch": 0.75, "learning_rate": 1.7673824067894912e-06, "loss": 0.0874, "step": 3757 }, { "epoch": 0.75, "learning_rate": 1.7647201921527802e-06, "loss": 0.0891, "step": 3758 }, { "epoch": 0.75, "learning_rate": 1.762059554352143e-06, "loss": 0.0822, "step": 3759 }, { "epoch": 0.75, "learning_rate": 1.7594004946843458e-06, "loss": 0.0885, "step": 3760 }, { "epoch": 0.75, "learning_rate": 1.7567430144453801e-06, "loss": 0.0827, "step": 3761 }, { "epoch": 0.75, "learning_rate": 1.75408711493047e-06, "loss": 0.1134, "step": 3762 }, { "epoch": 0.75, "learning_rate": 1.751432797434068e-06, "loss": 0.1065, "step": 3763 }, { "epoch": 0.75, "learning_rate": 1.7487800632498547e-06, "loss": 0.083, "step": 3764 }, { "epoch": 0.75, "learning_rate": 1.746128913670746e-06, "loss": 0.0833, "step": 3765 }, { "epoch": 0.75, "learning_rate": 1.7434793499888746e-06, "loss": 0.0866, "step": 3766 }, { "epoch": 0.75, "learning_rate": 1.7408313734956074e-06, "loss": 0.1011, "step": 3767 }, { "epoch": 0.75, "learning_rate": 1.738184985481536e-06, "loss": 0.0811, "step": 3768 }, { "epoch": 0.75, "learning_rate": 1.7355401872364759e-06, "loss": 0.0894, "step": 3769 }, { "epoch": 0.75, "learning_rate": 1.7328969800494727e-06, "loss": 0.088, "step": 3770 }, { "epoch": 0.75, "learning_rate": 1.7302553652087927e-06, "loss": 0.084, "step": 3771 }, { "epoch": 0.75, "learning_rate": 1.727615344001926e-06, "loss": 0.1402, "step": 3772 }, { "epoch": 0.75, "learning_rate": 1.7249769177155879e-06, "loss": 0.0809, "step": 3773 }, { "epoch": 0.75, "learning_rate": 1.7223400876357144e-06, "loss": 0.0851, "step": 3774 }, { "epoch": 0.76, "learning_rate": 1.7197048550474643e-06, "loss": 0.0863, "step": 3775 }, { "epoch": 0.76, "learning_rate": 1.7170712212352187e-06, "loss": 0.0897, "step": 3776 }, { "epoch": 0.76, "learning_rate": 1.7144391874825784e-06, "loss": 0.0904, "step": 3777 }, { "epoch": 0.76, "learning_rate": 1.7118087550723633e-06, "loss": 0.0853, "step": 3778 }, { "epoch": 0.76, "learning_rate": 1.709179925286617e-06, "loss": 0.0797, "step": 3779 }, { "epoch": 0.76, "learning_rate": 1.7065526994065973e-06, "loss": 0.0817, "step": 3780 }, { "epoch": 0.76, "learning_rate": 1.7039270787127832e-06, "loss": 0.0811, "step": 3781 }, { "epoch": 0.76, "learning_rate": 1.7013030644848698e-06, "loss": 0.0792, "step": 3782 }, { "epoch": 0.76, "learning_rate": 1.6986806580017695e-06, "loss": 0.0902, "step": 3783 }, { "epoch": 0.76, "learning_rate": 1.6960598605416117e-06, "loss": 0.0782, "step": 3784 }, { "epoch": 0.76, "learning_rate": 1.6934406733817417e-06, "loss": 0.083, "step": 3785 }, { "epoch": 0.76, "learning_rate": 1.6908230977987184e-06, "loss": 0.0977, "step": 3786 }, { "epoch": 0.76, "learning_rate": 1.6882071350683165e-06, "loss": 0.1105, "step": 3787 }, { "epoch": 0.76, "learning_rate": 1.6855927864655241e-06, "loss": 0.1175, "step": 3788 }, { "epoch": 0.76, "learning_rate": 1.6829800532645447e-06, "loss": 0.0787, "step": 3789 }, { "epoch": 0.76, "learning_rate": 1.680368936738792e-06, "loss": 0.0854, "step": 3790 }, { "epoch": 0.76, "learning_rate": 1.6777594381608936e-06, "loss": 0.0863, "step": 3791 }, { "epoch": 0.76, "learning_rate": 1.6751515588026828e-06, "loss": 0.0881, "step": 3792 }, { "epoch": 0.76, "learning_rate": 1.6725452999352137e-06, "loss": 0.0838, "step": 3793 }, { "epoch": 0.76, "learning_rate": 1.6699406628287423e-06, "loss": 0.0858, "step": 3794 }, { "epoch": 0.76, "learning_rate": 1.6673376487527382e-06, "loss": 0.0827, "step": 3795 }, { "epoch": 0.76, "learning_rate": 1.6647362589758787e-06, "loss": 0.0921, "step": 3796 }, { "epoch": 0.76, "learning_rate": 1.6621364947660472e-06, "loss": 0.0972, "step": 3797 }, { "epoch": 0.76, "learning_rate": 1.6595383573903412e-06, "loss": 0.0945, "step": 3798 }, { "epoch": 0.76, "learning_rate": 1.6569418481150596e-06, "loss": 0.0791, "step": 3799 }, { "epoch": 0.76, "learning_rate": 1.6543469682057105e-06, "loss": 0.0845, "step": 3800 }, { "epoch": 0.76, "learning_rate": 1.6517537189270043e-06, "loss": 0.0846, "step": 3801 }, { "epoch": 0.76, "learning_rate": 1.6491621015428588e-06, "loss": 0.0843, "step": 3802 }, { "epoch": 0.76, "learning_rate": 1.6465721173164e-06, "loss": 0.0959, "step": 3803 }, { "epoch": 0.76, "learning_rate": 1.643983767509954e-06, "loss": 0.0905, "step": 3804 }, { "epoch": 0.76, "learning_rate": 1.6413970533850498e-06, "loss": 0.0884, "step": 3805 }, { "epoch": 0.76, "learning_rate": 1.6388119762024213e-06, "loss": 0.0822, "step": 3806 }, { "epoch": 0.76, "learning_rate": 1.6362285372220016e-06, "loss": 0.0801, "step": 3807 }, { "epoch": 0.76, "learning_rate": 1.6336467377029308e-06, "loss": 0.0845, "step": 3808 }, { "epoch": 0.76, "learning_rate": 1.6310665789035468e-06, "loss": 0.0868, "step": 3809 }, { "epoch": 0.76, "learning_rate": 1.6284880620813847e-06, "loss": 0.0899, "step": 3810 }, { "epoch": 0.76, "learning_rate": 1.6259111884931817e-06, "loss": 0.0891, "step": 3811 }, { "epoch": 0.76, "learning_rate": 1.6233359593948777e-06, "loss": 0.0805, "step": 3812 }, { "epoch": 0.76, "learning_rate": 1.6207623760416074e-06, "loss": 0.0846, "step": 3813 }, { "epoch": 0.76, "learning_rate": 1.6181904396877041e-06, "loss": 0.0918, "step": 3814 }, { "epoch": 0.76, "learning_rate": 1.6156201515866971e-06, "loss": 0.0826, "step": 3815 }, { "epoch": 0.76, "learning_rate": 1.6130515129913144e-06, "loss": 0.0807, "step": 3816 }, { "epoch": 0.76, "learning_rate": 1.6104845251534772e-06, "loss": 0.0805, "step": 3817 }, { "epoch": 0.76, "learning_rate": 1.6079191893243102e-06, "loss": 0.083, "step": 3818 }, { "epoch": 0.76, "learning_rate": 1.605355506754121e-06, "loss": 0.0926, "step": 3819 }, { "epoch": 0.76, "learning_rate": 1.6027934786924187e-06, "loss": 0.0844, "step": 3820 }, { "epoch": 0.76, "learning_rate": 1.600233106387904e-06, "loss": 0.0844, "step": 3821 }, { "epoch": 0.76, "learning_rate": 1.597674391088474e-06, "loss": 0.0888, "step": 3822 }, { "epoch": 0.76, "learning_rate": 1.5951173340412134e-06, "loss": 0.1016, "step": 3823 }, { "epoch": 0.76, "learning_rate": 1.5925619364924016e-06, "loss": 0.0847, "step": 3824 }, { "epoch": 0.77, "learning_rate": 1.5900081996875083e-06, "loss": 0.0816, "step": 3825 }, { "epoch": 0.77, "learning_rate": 1.587456124871191e-06, "loss": 0.0821, "step": 3826 }, { "epoch": 0.77, "learning_rate": 1.5849057132873063e-06, "loss": 0.0882, "step": 3827 }, { "epoch": 0.77, "learning_rate": 1.582356966178888e-06, "loss": 0.0862, "step": 3828 }, { "epoch": 0.77, "learning_rate": 1.5798098847881664e-06, "loss": 0.0877, "step": 3829 }, { "epoch": 0.77, "learning_rate": 1.5772644703565564e-06, "loss": 0.081, "step": 3830 }, { "epoch": 0.77, "learning_rate": 1.5747207241246654e-06, "loss": 0.0825, "step": 3831 }, { "epoch": 0.77, "learning_rate": 1.5721786473322825e-06, "loss": 0.1132, "step": 3832 }, { "epoch": 0.77, "learning_rate": 1.5696382412183853e-06, "loss": 0.0832, "step": 3833 }, { "epoch": 0.77, "learning_rate": 1.567099507021137e-06, "loss": 0.0894, "step": 3834 }, { "epoch": 0.77, "learning_rate": 1.5645624459778858e-06, "loss": 0.0829, "step": 3835 }, { "epoch": 0.77, "learning_rate": 1.5620270593251635e-06, "loss": 0.0765, "step": 3836 }, { "epoch": 0.77, "learning_rate": 1.5594933482986885e-06, "loss": 0.0911, "step": 3837 }, { "epoch": 0.77, "learning_rate": 1.556961314133359e-06, "loss": 0.0756, "step": 3838 }, { "epoch": 0.77, "learning_rate": 1.554430958063259e-06, "loss": 0.0903, "step": 3839 }, { "epoch": 0.77, "learning_rate": 1.551902281321651e-06, "loss": 0.0808, "step": 3840 }, { "epoch": 0.77, "learning_rate": 1.5493752851409844e-06, "loss": 0.0833, "step": 3841 }, { "epoch": 0.77, "learning_rate": 1.5468499707528856e-06, "loss": 0.0854, "step": 3842 }, { "epoch": 0.77, "learning_rate": 1.5443263393881619e-06, "loss": 0.091, "step": 3843 }, { "epoch": 0.77, "learning_rate": 1.5418043922768e-06, "loss": 0.0897, "step": 3844 }, { "epoch": 0.77, "learning_rate": 1.5392841306479667e-06, "loss": 0.087, "step": 3845 }, { "epoch": 0.77, "learning_rate": 1.5367655557300066e-06, "loss": 0.0865, "step": 3846 }, { "epoch": 0.77, "learning_rate": 1.5342486687504432e-06, "loss": 0.0895, "step": 3847 }, { "epoch": 0.77, "learning_rate": 1.531733470935976e-06, "loss": 0.097, "step": 3848 }, { "epoch": 0.77, "learning_rate": 1.529219963512481e-06, "loss": 0.0868, "step": 3849 }, { "epoch": 0.77, "learning_rate": 1.5267081477050132e-06, "loss": 0.1028, "step": 3850 }, { "epoch": 0.77, "learning_rate": 1.5241980247378008e-06, "loss": 0.0882, "step": 3851 }, { "epoch": 0.77, "learning_rate": 1.521689595834246e-06, "loss": 0.0879, "step": 3852 }, { "epoch": 0.77, "learning_rate": 1.519182862216929e-06, "loss": 0.0907, "step": 3853 }, { "epoch": 0.77, "learning_rate": 1.5166778251075964e-06, "loss": 0.0795, "step": 3854 }, { "epoch": 0.77, "learning_rate": 1.514174485727178e-06, "loss": 0.0842, "step": 3855 }, { "epoch": 0.77, "learning_rate": 1.5116728452957686e-06, "loss": 0.0851, "step": 3856 }, { "epoch": 0.77, "learning_rate": 1.5091729050326376e-06, "loss": 0.0891, "step": 3857 }, { "epoch": 0.77, "learning_rate": 1.5066746661562254e-06, "loss": 0.0859, "step": 3858 }, { "epoch": 0.77, "learning_rate": 1.5041781298841424e-06, "loss": 0.0849, "step": 3859 }, { "epoch": 0.77, "learning_rate": 1.5016832974331725e-06, "loss": 0.0831, "step": 3860 }, { "epoch": 0.77, "learning_rate": 1.4991901700192657e-06, "loss": 0.0928, "step": 3861 }, { "epoch": 0.77, "learning_rate": 1.496698748857543e-06, "loss": 0.0786, "step": 3862 }, { "epoch": 0.77, "learning_rate": 1.4942090351622884e-06, "loss": 0.0882, "step": 3863 }, { "epoch": 0.77, "learning_rate": 1.491721030146963e-06, "loss": 0.0879, "step": 3864 }, { "epoch": 0.77, "learning_rate": 1.489234735024188e-06, "loss": 0.0996, "step": 3865 }, { "epoch": 0.77, "learning_rate": 1.4867501510057548e-06, "loss": 0.0972, "step": 3866 }, { "epoch": 0.77, "learning_rate": 1.484267279302618e-06, "loss": 0.0767, "step": 3867 }, { "epoch": 0.77, "learning_rate": 1.4817861211248996e-06, "loss": 0.0844, "step": 3868 }, { "epoch": 0.77, "learning_rate": 1.4793066776818843e-06, "loss": 0.0856, "step": 3869 }, { "epoch": 0.77, "learning_rate": 1.4768289501820265e-06, "loss": 0.0977, "step": 3870 }, { "epoch": 0.77, "learning_rate": 1.4743529398329393e-06, "loss": 0.0858, "step": 3871 }, { "epoch": 0.77, "learning_rate": 1.4718786478413983e-06, "loss": 0.0838, "step": 3872 }, { "epoch": 0.77, "learning_rate": 1.469406075413342e-06, "loss": 0.0861, "step": 3873 }, { "epoch": 0.77, "learning_rate": 1.4669352237538763e-06, "loss": 0.0847, "step": 3874 }, { "epoch": 0.78, "learning_rate": 1.4644660940672628e-06, "loss": 0.0827, "step": 3875 }, { "epoch": 0.78, "learning_rate": 1.4619986875569247e-06, "loss": 0.0801, "step": 3876 }, { "epoch": 0.78, "learning_rate": 1.459533005425446e-06, "loss": 0.0829, "step": 3877 }, { "epoch": 0.78, "learning_rate": 1.4570690488745687e-06, "loss": 0.0922, "step": 3878 }, { "epoch": 0.78, "learning_rate": 1.4546068191051988e-06, "loss": 0.0738, "step": 3879 }, { "epoch": 0.78, "learning_rate": 1.4521463173173966e-06, "loss": 0.0896, "step": 3880 }, { "epoch": 0.78, "learning_rate": 1.4496875447103781e-06, "loss": 0.0806, "step": 3881 }, { "epoch": 0.78, "learning_rate": 1.4472305024825189e-06, "loss": 0.0843, "step": 3882 }, { "epoch": 0.78, "learning_rate": 1.4447751918313552e-06, "loss": 0.1008, "step": 3883 }, { "epoch": 0.78, "learning_rate": 1.4423216139535735e-06, "loss": 0.0845, "step": 3884 }, { "epoch": 0.78, "learning_rate": 1.4398697700450181e-06, "loss": 0.0822, "step": 3885 }, { "epoch": 0.78, "learning_rate": 1.4374196613006874e-06, "loss": 0.0958, "step": 3886 }, { "epoch": 0.78, "learning_rate": 1.4349712889147355e-06, "loss": 0.0832, "step": 3887 }, { "epoch": 0.78, "learning_rate": 1.4325246540804672e-06, "loss": 0.0828, "step": 3888 }, { "epoch": 0.78, "learning_rate": 1.4300797579903476e-06, "loss": 0.0888, "step": 3889 }, { "epoch": 0.78, "learning_rate": 1.4276366018359845e-06, "loss": 0.0889, "step": 3890 }, { "epoch": 0.78, "learning_rate": 1.4251951868081438e-06, "loss": 0.0866, "step": 3891 }, { "epoch": 0.78, "learning_rate": 1.4227555140967402e-06, "loss": 0.0925, "step": 3892 }, { "epoch": 0.78, "learning_rate": 1.420317584890844e-06, "loss": 0.0917, "step": 3893 }, { "epoch": 0.78, "learning_rate": 1.4178814003786706e-06, "loss": 0.0837, "step": 3894 }, { "epoch": 0.78, "learning_rate": 1.4154469617475864e-06, "loss": 0.086, "step": 3895 }, { "epoch": 0.78, "learning_rate": 1.4130142701841076e-06, "loss": 0.0828, "step": 3896 }, { "epoch": 0.78, "learning_rate": 1.4105833268738966e-06, "loss": 0.1065, "step": 3897 }, { "epoch": 0.78, "learning_rate": 1.4081541330017706e-06, "loss": 0.0804, "step": 3898 }, { "epoch": 0.78, "learning_rate": 1.4057266897516842e-06, "loss": 0.0877, "step": 3899 }, { "epoch": 0.78, "learning_rate": 1.4033009983067454e-06, "loss": 0.0889, "step": 3900 }, { "epoch": 0.78, "learning_rate": 1.4008770598492072e-06, "loss": 0.0984, "step": 3901 }, { "epoch": 0.78, "learning_rate": 1.3984548755604655e-06, "loss": 0.0843, "step": 3902 }, { "epoch": 0.78, "learning_rate": 1.3960344466210669e-06, "loss": 0.0748, "step": 3903 }, { "epoch": 0.78, "learning_rate": 1.3936157742106977e-06, "loss": 0.0941, "step": 3904 }, { "epoch": 0.78, "learning_rate": 1.3911988595081894e-06, "loss": 0.0829, "step": 3905 }, { "epoch": 0.78, "learning_rate": 1.3887837036915169e-06, "loss": 0.0883, "step": 3906 }, { "epoch": 0.78, "learning_rate": 1.3863703079377971e-06, "loss": 0.0892, "step": 3907 }, { "epoch": 0.78, "learning_rate": 1.3839586734232907e-06, "loss": 0.0794, "step": 3908 }, { "epoch": 0.78, "learning_rate": 1.3815488013233986e-06, "loss": 0.0821, "step": 3909 }, { "epoch": 0.78, "learning_rate": 1.3791406928126638e-06, "loss": 0.0914, "step": 3910 }, { "epoch": 0.78, "learning_rate": 1.3767343490647668e-06, "loss": 0.0868, "step": 3911 }, { "epoch": 0.78, "learning_rate": 1.3743297712525334e-06, "loss": 0.0857, "step": 3912 }, { "epoch": 0.78, "learning_rate": 1.3719269605479241e-06, "loss": 0.0889, "step": 3913 }, { "epoch": 0.78, "learning_rate": 1.3695259181220405e-06, "loss": 0.0901, "step": 3914 }, { "epoch": 0.78, "learning_rate": 1.3671266451451209e-06, "loss": 0.0868, "step": 3915 }, { "epoch": 0.78, "learning_rate": 1.3647291427865417e-06, "loss": 0.0843, "step": 3916 }, { "epoch": 0.78, "learning_rate": 1.3623334122148164e-06, "loss": 0.0861, "step": 3917 }, { "epoch": 0.78, "learning_rate": 1.3599394545975952e-06, "loss": 0.0845, "step": 3918 }, { "epoch": 0.78, "learning_rate": 1.3575472711016634e-06, "loss": 0.0803, "step": 3919 }, { "epoch": 0.78, "learning_rate": 1.3551568628929434e-06, "loss": 0.0886, "step": 3920 }, { "epoch": 0.78, "learning_rate": 1.3527682311364886e-06, "loss": 0.0832, "step": 3921 }, { "epoch": 0.78, "learning_rate": 1.3503813769964923e-06, "loss": 0.0885, "step": 3922 }, { "epoch": 0.78, "learning_rate": 1.3479963016362768e-06, "loss": 0.079, "step": 3923 }, { "epoch": 0.78, "learning_rate": 1.3456130062183003e-06, "loss": 0.0867, "step": 3924 }, { "epoch": 0.79, "learning_rate": 1.3432314919041478e-06, "loss": 0.0894, "step": 3925 }, { "epoch": 0.79, "learning_rate": 1.3408517598545446e-06, "loss": 0.0904, "step": 3926 }, { "epoch": 0.79, "learning_rate": 1.3384738112293415e-06, "loss": 0.0782, "step": 3927 }, { "epoch": 0.79, "learning_rate": 1.3360976471875226e-06, "loss": 0.1684, "step": 3928 }, { "epoch": 0.79, "learning_rate": 1.333723268887201e-06, "loss": 0.0858, "step": 3929 }, { "epoch": 0.79, "learning_rate": 1.3313506774856177e-06, "loss": 0.085, "step": 3930 }, { "epoch": 0.79, "learning_rate": 1.3289798741391486e-06, "loss": 0.087, "step": 3931 }, { "epoch": 0.79, "learning_rate": 1.3266108600032928e-06, "loss": 0.0892, "step": 3932 }, { "epoch": 0.79, "learning_rate": 1.3242436362326804e-06, "loss": 0.0781, "step": 3933 }, { "epoch": 0.79, "learning_rate": 1.3218782039810634e-06, "loss": 0.0835, "step": 3934 }, { "epoch": 0.79, "learning_rate": 1.3195145644013286e-06, "loss": 0.0865, "step": 3935 }, { "epoch": 0.79, "learning_rate": 1.317152718645484e-06, "loss": 0.09, "step": 3936 }, { "epoch": 0.79, "learning_rate": 1.314792667864665e-06, "loss": 0.0825, "step": 3937 }, { "epoch": 0.79, "learning_rate": 1.312434413209131e-06, "loss": 0.0818, "step": 3938 }, { "epoch": 0.79, "learning_rate": 1.3100779558282673e-06, "loss": 0.0827, "step": 3939 }, { "epoch": 0.79, "learning_rate": 1.3077232968705805e-06, "loss": 0.0848, "step": 3940 }, { "epoch": 0.79, "learning_rate": 1.3053704374837063e-06, "loss": 0.0859, "step": 3941 }, { "epoch": 0.79, "learning_rate": 1.3030193788143991e-06, "loss": 0.082, "step": 3942 }, { "epoch": 0.79, "learning_rate": 1.3006701220085338e-06, "loss": 0.0846, "step": 3943 }, { "epoch": 0.79, "learning_rate": 1.2983226682111094e-06, "loss": 0.0734, "step": 3944 }, { "epoch": 0.79, "learning_rate": 1.2959770185662502e-06, "loss": 0.094, "step": 3945 }, { "epoch": 0.79, "learning_rate": 1.2936331742171943e-06, "loss": 0.0844, "step": 3946 }, { "epoch": 0.79, "learning_rate": 1.2912911363063048e-06, "loss": 0.1212, "step": 3947 }, { "epoch": 0.79, "learning_rate": 1.2889509059750605e-06, "loss": 0.1114, "step": 3948 }, { "epoch": 0.79, "learning_rate": 1.2866124843640614e-06, "loss": 0.0846, "step": 3949 }, { "epoch": 0.79, "learning_rate": 1.2842758726130283e-06, "loss": 0.0841, "step": 3950 }, { "epoch": 0.79, "learning_rate": 1.2819410718607972e-06, "loss": 0.0829, "step": 3951 }, { "epoch": 0.79, "learning_rate": 1.2796080832453183e-06, "loss": 0.0764, "step": 3952 }, { "epoch": 0.79, "learning_rate": 1.2772769079036639e-06, "loss": 0.0825, "step": 3953 }, { "epoch": 0.79, "learning_rate": 1.2749475469720196e-06, "loss": 0.0775, "step": 3954 }, { "epoch": 0.79, "learning_rate": 1.2726200015856893e-06, "loss": 0.0845, "step": 3955 }, { "epoch": 0.79, "learning_rate": 1.2702942728790897e-06, "loss": 0.0821, "step": 3956 }, { "epoch": 0.79, "learning_rate": 1.2679703619857525e-06, "loss": 0.0838, "step": 3957 }, { "epoch": 0.79, "learning_rate": 1.2656482700383238e-06, "loss": 0.0905, "step": 3958 }, { "epoch": 0.79, "learning_rate": 1.2633279981685608e-06, "loss": 0.0837, "step": 3959 }, { "epoch": 0.79, "learning_rate": 1.2610095475073415e-06, "loss": 0.1063, "step": 3960 }, { "epoch": 0.79, "learning_rate": 1.2586929191846453e-06, "loss": 0.0889, "step": 3961 }, { "epoch": 0.79, "learning_rate": 1.2563781143295705e-06, "loss": 0.1699, "step": 3962 }, { "epoch": 0.79, "learning_rate": 1.2540651340703231e-06, "loss": 0.0923, "step": 3963 }, { "epoch": 0.79, "learning_rate": 1.2517539795342248e-06, "loss": 0.0891, "step": 3964 }, { "epoch": 0.79, "learning_rate": 1.2494446518477022e-06, "loss": 0.0808, "step": 3965 }, { "epoch": 0.79, "learning_rate": 1.2471371521362946e-06, "loss": 0.0899, "step": 3966 }, { "epoch": 0.79, "learning_rate": 1.2448314815246487e-06, "loss": 0.0957, "step": 3967 }, { "epoch": 0.79, "learning_rate": 1.24252764113652e-06, "loss": 0.0935, "step": 3968 }, { "epoch": 0.79, "learning_rate": 1.240225632094773e-06, "loss": 0.089, "step": 3969 }, { "epoch": 0.79, "learning_rate": 1.2379254555213788e-06, "loss": 0.0788, "step": 3970 }, { "epoch": 0.79, "learning_rate": 1.2356271125374153e-06, "loss": 0.0875, "step": 3971 }, { "epoch": 0.79, "learning_rate": 1.2333306042630672e-06, "loss": 0.0834, "step": 3972 }, { "epoch": 0.79, "learning_rate": 1.2310359318176229e-06, "loss": 0.0853, "step": 3973 }, { "epoch": 0.79, "learning_rate": 1.2287430963194807e-06, "loss": 0.0864, "step": 3974 }, { "epoch": 0.8, "learning_rate": 1.22645209888614e-06, "loss": 0.0836, "step": 3975 }, { "epoch": 0.8, "learning_rate": 1.2241629406342048e-06, "loss": 0.0838, "step": 3976 }, { "epoch": 0.8, "learning_rate": 1.2218756226793827e-06, "loss": 0.0825, "step": 3977 }, { "epoch": 0.8, "learning_rate": 1.2195901461364851e-06, "loss": 0.0877, "step": 3978 }, { "epoch": 0.8, "learning_rate": 1.217306512119425e-06, "loss": 0.0903, "step": 3979 }, { "epoch": 0.8, "learning_rate": 1.2150247217412186e-06, "loss": 0.0909, "step": 3980 }, { "epoch": 0.8, "learning_rate": 1.2127447761139821e-06, "loss": 0.0841, "step": 3981 }, { "epoch": 0.8, "learning_rate": 1.2104666763489326e-06, "loss": 0.0877, "step": 3982 }, { "epoch": 0.8, "learning_rate": 1.2081904235563908e-06, "loss": 0.0837, "step": 3983 }, { "epoch": 0.8, "learning_rate": 1.2059160188457724e-06, "loss": 0.0818, "step": 3984 }, { "epoch": 0.8, "learning_rate": 1.203643463325596e-06, "loss": 0.0888, "step": 3985 }, { "epoch": 0.8, "learning_rate": 1.2013727581034783e-06, "loss": 0.083, "step": 3986 }, { "epoch": 0.8, "learning_rate": 1.199103904286129e-06, "loss": 0.0884, "step": 3987 }, { "epoch": 0.8, "learning_rate": 1.1968369029793642e-06, "loss": 0.0849, "step": 3988 }, { "epoch": 0.8, "learning_rate": 1.1945717552880919e-06, "loss": 0.083, "step": 3989 }, { "epoch": 0.8, "learning_rate": 1.1923084623163172e-06, "loss": 0.0897, "step": 3990 }, { "epoch": 0.8, "learning_rate": 1.1900470251671415e-06, "loss": 0.0918, "step": 3991 }, { "epoch": 0.8, "learning_rate": 1.18778744494276e-06, "loss": 0.0779, "step": 3992 }, { "epoch": 0.8, "learning_rate": 1.185529722744469e-06, "loss": 0.0823, "step": 3993 }, { "epoch": 0.8, "learning_rate": 1.1832738596726518e-06, "loss": 0.0868, "step": 3994 }, { "epoch": 0.8, "learning_rate": 1.1810198568267906e-06, "loss": 0.0794, "step": 3995 }, { "epoch": 0.8, "learning_rate": 1.178767715305455e-06, "loss": 0.0794, "step": 3996 }, { "epoch": 0.8, "learning_rate": 1.1765174362063152e-06, "loss": 0.0818, "step": 3997 }, { "epoch": 0.8, "learning_rate": 1.1742690206261293e-06, "loss": 0.0813, "step": 3998 }, { "epoch": 0.8, "learning_rate": 1.1720224696607474e-06, "loss": 0.0822, "step": 3999 }, { "epoch": 0.8, "learning_rate": 1.1697777844051105e-06, "loss": 0.0862, "step": 4000 }, { "epoch": 0.8, "learning_rate": 1.1675349659532514e-06, "loss": 0.0784, "step": 4001 }, { "epoch": 0.8, "learning_rate": 1.1652940153982917e-06, "loss": 0.0835, "step": 4002 }, { "epoch": 0.8, "learning_rate": 1.1630549338324454e-06, "loss": 0.0965, "step": 4003 }, { "epoch": 0.8, "learning_rate": 1.160817722347014e-06, "loss": 0.0825, "step": 4004 }, { "epoch": 0.8, "learning_rate": 1.1585823820323845e-06, "loss": 0.0794, "step": 4005 }, { "epoch": 0.8, "learning_rate": 1.1563489139780344e-06, "loss": 0.0803, "step": 4006 }, { "epoch": 0.8, "learning_rate": 1.154117319272532e-06, "loss": 0.0921, "step": 4007 }, { "epoch": 0.8, "learning_rate": 1.1518875990035278e-06, "loss": 0.0902, "step": 4008 }, { "epoch": 0.8, "learning_rate": 1.1496597542577603e-06, "loss": 0.0895, "step": 4009 }, { "epoch": 0.8, "learning_rate": 1.1474337861210543e-06, "loss": 0.0825, "step": 4010 }, { "epoch": 0.8, "learning_rate": 1.1452096956783181e-06, "loss": 0.0964, "step": 4011 }, { "epoch": 0.8, "learning_rate": 1.1429874840135492e-06, "loss": 0.085, "step": 4012 }, { "epoch": 0.8, "learning_rate": 1.1407671522098262e-06, "loss": 0.09, "step": 4013 }, { "epoch": 0.8, "learning_rate": 1.1385487013493095e-06, "loss": 0.0858, "step": 4014 }, { "epoch": 0.8, "learning_rate": 1.136332132513245e-06, "loss": 0.1054, "step": 4015 }, { "epoch": 0.8, "learning_rate": 1.1341174467819637e-06, "loss": 0.081, "step": 4016 }, { "epoch": 0.8, "learning_rate": 1.1319046452348758e-06, "loss": 0.0785, "step": 4017 }, { "epoch": 0.8, "learning_rate": 1.129693728950474e-06, "loss": 0.0825, "step": 4018 }, { "epoch": 0.8, "learning_rate": 1.1274846990063314e-06, "loss": 0.0791, "step": 4019 }, { "epoch": 0.8, "learning_rate": 1.1252775564791023e-06, "loss": 0.0845, "step": 4020 }, { "epoch": 0.8, "learning_rate": 1.1230723024445212e-06, "loss": 0.0758, "step": 4021 }, { "epoch": 0.8, "learning_rate": 1.120868937977404e-06, "loss": 0.0838, "step": 4022 }, { "epoch": 0.8, "learning_rate": 1.1186674641516415e-06, "loss": 0.08, "step": 4023 }, { "epoch": 0.8, "learning_rate": 1.1164678820402059e-06, "loss": 0.0774, "step": 4024 }, { "epoch": 0.81, "learning_rate": 1.1142701927151456e-06, "loss": 0.1139, "step": 4025 }, { "epoch": 0.81, "learning_rate": 1.11207439724759e-06, "loss": 0.1172, "step": 4026 }, { "epoch": 0.81, "learning_rate": 1.1098804967077425e-06, "loss": 0.0944, "step": 4027 }, { "epoch": 0.81, "learning_rate": 1.1076884921648834e-06, "loss": 0.0854, "step": 4028 }, { "epoch": 0.81, "learning_rate": 1.1054983846873684e-06, "loss": 0.0853, "step": 4029 }, { "epoch": 0.81, "learning_rate": 1.1033101753426285e-06, "loss": 0.0867, "step": 4030 }, { "epoch": 0.81, "learning_rate": 1.1011238651971744e-06, "loss": 0.0876, "step": 4031 }, { "epoch": 0.81, "learning_rate": 1.0989394553165833e-06, "loss": 0.0903, "step": 4032 }, { "epoch": 0.81, "learning_rate": 1.0967569467655104e-06, "loss": 0.0842, "step": 4033 }, { "epoch": 0.81, "learning_rate": 1.0945763406076837e-06, "loss": 0.0925, "step": 4034 }, { "epoch": 0.81, "learning_rate": 1.0923976379059059e-06, "loss": 0.0836, "step": 4035 }, { "epoch": 0.81, "learning_rate": 1.09022083972205e-06, "loss": 0.0924, "step": 4036 }, { "epoch": 0.81, "learning_rate": 1.0880459471170597e-06, "loss": 0.0901, "step": 4037 }, { "epoch": 0.81, "learning_rate": 1.0858729611509516e-06, "loss": 0.0938, "step": 4038 }, { "epoch": 0.81, "learning_rate": 1.0837018828828133e-06, "loss": 0.0851, "step": 4039 }, { "epoch": 0.81, "learning_rate": 1.0815327133708015e-06, "loss": 0.0804, "step": 4040 }, { "epoch": 0.81, "learning_rate": 1.0793654536721432e-06, "loss": 0.0829, "step": 4041 }, { "epoch": 0.81, "learning_rate": 1.077200104843134e-06, "loss": 0.0793, "step": 4042 }, { "epoch": 0.81, "learning_rate": 1.0750366679391393e-06, "loss": 0.0826, "step": 4043 }, { "epoch": 0.81, "learning_rate": 1.0728751440145907e-06, "loss": 0.0804, "step": 4044 }, { "epoch": 0.81, "learning_rate": 1.0707155341229902e-06, "loss": 0.0893, "step": 4045 }, { "epoch": 0.81, "learning_rate": 1.0685578393169054e-06, "loss": 0.0826, "step": 4046 }, { "epoch": 0.81, "learning_rate": 1.0664020606479702e-06, "loss": 0.0863, "step": 4047 }, { "epoch": 0.81, "learning_rate": 1.064248199166884e-06, "loss": 0.093, "step": 4048 }, { "epoch": 0.81, "learning_rate": 1.0620962559234144e-06, "loss": 0.079, "step": 4049 }, { "epoch": 0.81, "learning_rate": 1.0599462319663906e-06, "loss": 0.0893, "step": 4050 }, { "epoch": 0.81, "learning_rate": 1.0577981283437095e-06, "loss": 0.0793, "step": 4051 }, { "epoch": 0.81, "learning_rate": 1.0556519461023301e-06, "loss": 0.08, "step": 4052 }, { "epoch": 0.81, "learning_rate": 1.053507686288276e-06, "loss": 0.0927, "step": 4053 }, { "epoch": 0.81, "learning_rate": 1.0513653499466315e-06, "loss": 0.0872, "step": 4054 }, { "epoch": 0.81, "learning_rate": 1.049224938121548e-06, "loss": 0.0866, "step": 4055 }, { "epoch": 0.81, "learning_rate": 1.047086451856235e-06, "loss": 0.0965, "step": 4056 }, { "epoch": 0.81, "learning_rate": 1.0449498921929669e-06, "loss": 0.0865, "step": 4057 }, { "epoch": 0.81, "learning_rate": 1.0428152601730718e-06, "loss": 0.0836, "step": 4058 }, { "epoch": 0.81, "learning_rate": 1.0406825568369478e-06, "loss": 0.0829, "step": 4059 }, { "epoch": 0.81, "learning_rate": 1.0385517832240472e-06, "loss": 0.077, "step": 4060 }, { "epoch": 0.81, "learning_rate": 1.036422940372883e-06, "loss": 0.0873, "step": 4061 }, { "epoch": 0.81, "learning_rate": 1.0342960293210281e-06, "loss": 0.0825, "step": 4062 }, { "epoch": 0.81, "learning_rate": 1.0321710511051108e-06, "loss": 0.0878, "step": 4063 }, { "epoch": 0.81, "learning_rate": 1.0300480067608232e-06, "loss": 0.0898, "step": 4064 }, { "epoch": 0.81, "learning_rate": 1.0279268973229089e-06, "loss": 0.0935, "step": 4065 }, { "epoch": 0.81, "learning_rate": 1.0258077238251735e-06, "loss": 0.0892, "step": 4066 }, { "epoch": 0.81, "learning_rate": 1.0236904873004722e-06, "loss": 0.0877, "step": 4067 }, { "epoch": 0.81, "learning_rate": 1.0215751887807228e-06, "loss": 0.1035, "step": 4068 }, { "epoch": 0.81, "learning_rate": 1.0194618292968972e-06, "loss": 0.0974, "step": 4069 }, { "epoch": 0.81, "learning_rate": 1.0173504098790188e-06, "loss": 0.0855, "step": 4070 }, { "epoch": 0.81, "learning_rate": 1.0152409315561696e-06, "loss": 0.0842, "step": 4071 }, { "epoch": 0.81, "learning_rate": 1.0131333953564825e-06, "loss": 0.0934, "step": 4072 }, { "epoch": 0.81, "learning_rate": 1.0110278023071445e-06, "loss": 0.0864, "step": 4073 }, { "epoch": 0.81, "learning_rate": 1.0089241534343986e-06, "loss": 0.0859, "step": 4074 }, { "epoch": 0.81, "learning_rate": 1.006822449763537e-06, "loss": 0.0891, "step": 4075 }, { "epoch": 0.82, "learning_rate": 1.0047226923189024e-06, "loss": 0.0848, "step": 4076 }, { "epoch": 0.82, "learning_rate": 1.0026248821238915e-06, "loss": 0.0816, "step": 4077 }, { "epoch": 0.82, "learning_rate": 1.0005290202009533e-06, "loss": 0.0847, "step": 4078 }, { "epoch": 0.82, "learning_rate": 9.984351075715848e-07, "loss": 0.0932, "step": 4079 }, { "epoch": 0.82, "learning_rate": 9.963431452563331e-07, "loss": 0.0933, "step": 4080 }, { "epoch": 0.82, "learning_rate": 9.942531342747953e-07, "loss": 0.0775, "step": 4081 }, { "epoch": 0.82, "learning_rate": 9.921650756456164e-07, "loss": 0.0923, "step": 4082 }, { "epoch": 0.82, "learning_rate": 9.900789703864933e-07, "loss": 0.0818, "step": 4083 }, { "epoch": 0.82, "learning_rate": 9.879948195141681e-07, "loss": 0.0857, "step": 4084 }, { "epoch": 0.82, "learning_rate": 9.859126240444284e-07, "loss": 0.0828, "step": 4085 }, { "epoch": 0.82, "learning_rate": 9.838323849921123e-07, "loss": 0.0805, "step": 4086 }, { "epoch": 0.82, "learning_rate": 9.81754103371101e-07, "loss": 0.0772, "step": 4087 }, { "epoch": 0.82, "learning_rate": 9.79677780194327e-07, "loss": 0.0856, "step": 4088 }, { "epoch": 0.82, "learning_rate": 9.77603416473763e-07, "loss": 0.0885, "step": 4089 }, { "epoch": 0.82, "learning_rate": 9.7553101322043e-07, "loss": 0.0764, "step": 4090 }, { "epoch": 0.82, "learning_rate": 9.734605714443906e-07, "loss": 0.0864, "step": 4091 }, { "epoch": 0.82, "learning_rate": 9.713920921547532e-07, "loss": 0.086, "step": 4092 }, { "epoch": 0.82, "learning_rate": 9.69325576359672e-07, "loss": 0.0914, "step": 4093 }, { "epoch": 0.82, "learning_rate": 9.67261025066339e-07, "loss": 0.0941, "step": 4094 }, { "epoch": 0.82, "learning_rate": 9.651984392809916e-07, "loss": 0.0767, "step": 4095 }, { "epoch": 0.82, "learning_rate": 9.631378200089082e-07, "loss": 0.08, "step": 4096 }, { "epoch": 0.82, "learning_rate": 9.610791682544123e-07, "loss": 0.1037, "step": 4097 }, { "epoch": 0.82, "learning_rate": 9.590224850208645e-07, "loss": 0.1328, "step": 4098 }, { "epoch": 0.82, "learning_rate": 9.569677713106673e-07, "loss": 0.0867, "step": 4099 }, { "epoch": 0.82, "learning_rate": 9.549150281252633e-07, "loss": 0.0825, "step": 4100 }, { "epoch": 0.82, "learning_rate": 9.528642564651341e-07, "loss": 0.081, "step": 4101 }, { "epoch": 0.82, "learning_rate": 9.508154573298012e-07, "loss": 0.0932, "step": 4102 }, { "epoch": 0.82, "learning_rate": 9.487686317178241e-07, "loss": 0.086, "step": 4103 }, { "epoch": 0.82, "learning_rate": 9.467237806268009e-07, "loss": 0.0814, "step": 4104 }, { "epoch": 0.82, "learning_rate": 9.446809050533679e-07, "loss": 0.0811, "step": 4105 }, { "epoch": 0.82, "learning_rate": 9.426400059931956e-07, "loss": 0.0876, "step": 4106 }, { "epoch": 0.82, "learning_rate": 9.406010844409957e-07, "loss": 0.0826, "step": 4107 }, { "epoch": 0.82, "learning_rate": 9.385641413905139e-07, "loss": 0.079, "step": 4108 }, { "epoch": 0.82, "learning_rate": 9.365291778345303e-07, "loss": 0.0795, "step": 4109 }, { "epoch": 0.82, "learning_rate": 9.344961947648624e-07, "loss": 0.0824, "step": 4110 }, { "epoch": 0.82, "learning_rate": 9.3246519317236e-07, "loss": 0.0827, "step": 4111 }, { "epoch": 0.82, "learning_rate": 9.304361740469103e-07, "loss": 0.0914, "step": 4112 }, { "epoch": 0.82, "learning_rate": 9.284091383774313e-07, "loss": 0.0836, "step": 4113 }, { "epoch": 0.82, "learning_rate": 9.263840871518759e-07, "loss": 0.0795, "step": 4114 }, { "epoch": 0.82, "learning_rate": 9.243610213572285e-07, "loss": 0.0975, "step": 4115 }, { "epoch": 0.82, "learning_rate": 9.223399419795093e-07, "loss": 0.0863, "step": 4116 }, { "epoch": 0.82, "learning_rate": 9.203208500037664e-07, "loss": 0.0838, "step": 4117 }, { "epoch": 0.82, "learning_rate": 9.183037464140804e-07, "loss": 0.0781, "step": 4118 }, { "epoch": 0.82, "learning_rate": 9.162886321935632e-07, "loss": 0.078, "step": 4119 }, { "epoch": 0.82, "learning_rate": 9.142755083243577e-07, "loss": 0.0894, "step": 4120 }, { "epoch": 0.82, "learning_rate": 9.122643757876354e-07, "loss": 0.095, "step": 4121 }, { "epoch": 0.82, "learning_rate": 9.10255235563598e-07, "loss": 0.0838, "step": 4122 }, { "epoch": 0.82, "learning_rate": 9.08248088631476e-07, "loss": 0.0799, "step": 4123 }, { "epoch": 0.82, "learning_rate": 9.06242935969528e-07, "loss": 0.0943, "step": 4124 }, { "epoch": 0.82, "learning_rate": 9.042397785550405e-07, "loss": 0.1036, "step": 4125 }, { "epoch": 0.83, "learning_rate": 9.022386173643305e-07, "loss": 0.0762, "step": 4126 }, { "epoch": 0.83, "learning_rate": 9.002394533727382e-07, "loss": 0.0929, "step": 4127 }, { "epoch": 0.83, "learning_rate": 8.982422875546332e-07, "loss": 0.0827, "step": 4128 }, { "epoch": 0.83, "learning_rate": 8.962471208834056e-07, "loss": 0.0792, "step": 4129 }, { "epoch": 0.83, "learning_rate": 8.942539543314799e-07, "loss": 0.0884, "step": 4130 }, { "epoch": 0.83, "learning_rate": 8.922627888703e-07, "loss": 0.0823, "step": 4131 }, { "epoch": 0.83, "learning_rate": 8.902736254703347e-07, "loss": 0.0836, "step": 4132 }, { "epoch": 0.83, "learning_rate": 8.882864651010798e-07, "loss": 0.0861, "step": 4133 }, { "epoch": 0.83, "learning_rate": 8.863013087310502e-07, "loss": 0.0814, "step": 4134 }, { "epoch": 0.83, "learning_rate": 8.843181573277904e-07, "loss": 0.0808, "step": 4135 }, { "epoch": 0.83, "learning_rate": 8.823370118578628e-07, "loss": 0.0872, "step": 4136 }, { "epoch": 0.83, "learning_rate": 8.803578732868545e-07, "loss": 0.0778, "step": 4137 }, { "epoch": 0.83, "learning_rate": 8.783807425793722e-07, "loss": 0.0868, "step": 4138 }, { "epoch": 0.83, "learning_rate": 8.764056206990446e-07, "loss": 0.0791, "step": 4139 }, { "epoch": 0.83, "learning_rate": 8.744325086085248e-07, "loss": 0.0824, "step": 4140 }, { "epoch": 0.83, "learning_rate": 8.72461407269482e-07, "loss": 0.0966, "step": 4141 }, { "epoch": 0.83, "learning_rate": 8.704923176426072e-07, "loss": 0.1215, "step": 4142 }, { "epoch": 0.83, "learning_rate": 8.685252406876116e-07, "loss": 0.085, "step": 4143 }, { "epoch": 0.83, "learning_rate": 8.665601773632226e-07, "loss": 0.0844, "step": 4144 }, { "epoch": 0.83, "learning_rate": 8.645971286271903e-07, "loss": 0.096, "step": 4145 }, { "epoch": 0.83, "learning_rate": 8.626360954362817e-07, "loss": 0.0838, "step": 4146 }, { "epoch": 0.83, "learning_rate": 8.606770787462776e-07, "loss": 0.0815, "step": 4147 }, { "epoch": 0.83, "learning_rate": 8.587200795119793e-07, "loss": 0.0861, "step": 4148 }, { "epoch": 0.83, "learning_rate": 8.567650986872061e-07, "loss": 0.0833, "step": 4149 }, { "epoch": 0.83, "learning_rate": 8.54812137224792e-07, "loss": 0.0829, "step": 4150 }, { "epoch": 0.83, "learning_rate": 8.528611960765853e-07, "loss": 0.0972, "step": 4151 }, { "epoch": 0.83, "learning_rate": 8.509122761934519e-07, "loss": 0.0847, "step": 4152 }, { "epoch": 0.83, "learning_rate": 8.489653785252711e-07, "loss": 0.0889, "step": 4153 }, { "epoch": 0.83, "learning_rate": 8.470205040209362e-07, "loss": 0.0858, "step": 4154 }, { "epoch": 0.83, "learning_rate": 8.450776536283594e-07, "loss": 0.0799, "step": 4155 }, { "epoch": 0.83, "learning_rate": 8.431368282944585e-07, "loss": 0.0863, "step": 4156 }, { "epoch": 0.83, "learning_rate": 8.411980289651689e-07, "loss": 0.0834, "step": 4157 }, { "epoch": 0.83, "learning_rate": 8.392612565854374e-07, "loss": 0.0853, "step": 4158 }, { "epoch": 0.83, "learning_rate": 8.373265120992252e-07, "loss": 0.0949, "step": 4159 }, { "epoch": 0.83, "learning_rate": 8.353937964495029e-07, "loss": 0.1299, "step": 4160 }, { "epoch": 0.83, "learning_rate": 8.334631105782515e-07, "loss": 0.0921, "step": 4161 }, { "epoch": 0.83, "learning_rate": 8.315344554264643e-07, "loss": 0.0868, "step": 4162 }, { "epoch": 0.83, "learning_rate": 8.296078319341444e-07, "loss": 0.0833, "step": 4163 }, { "epoch": 0.83, "learning_rate": 8.276832410403051e-07, "loss": 0.0897, "step": 4164 }, { "epoch": 0.83, "learning_rate": 8.25760683682968e-07, "loss": 0.0922, "step": 4165 }, { "epoch": 0.83, "learning_rate": 8.238401607991647e-07, "loss": 0.0889, "step": 4166 }, { "epoch": 0.83, "learning_rate": 8.21921673324933e-07, "loss": 0.0798, "step": 4167 }, { "epoch": 0.83, "learning_rate": 8.200052221953231e-07, "loss": 0.0842, "step": 4168 }, { "epoch": 0.83, "learning_rate": 8.180908083443884e-07, "loss": 0.0854, "step": 4169 }, { "epoch": 0.83, "learning_rate": 8.161784327051919e-07, "loss": 0.0869, "step": 4170 }, { "epoch": 0.83, "learning_rate": 8.142680962098016e-07, "loss": 0.0909, "step": 4171 }, { "epoch": 0.83, "learning_rate": 8.123597997892918e-07, "loss": 0.0791, "step": 4172 }, { "epoch": 0.83, "learning_rate": 8.104535443737438e-07, "loss": 0.0804, "step": 4173 }, { "epoch": 0.83, "learning_rate": 8.085493308922432e-07, "loss": 0.0855, "step": 4174 }, { "epoch": 0.83, "learning_rate": 8.066471602728804e-07, "loss": 0.0809, "step": 4175 }, { "epoch": 0.84, "learning_rate": 8.047470334427504e-07, "loss": 0.0908, "step": 4176 }, { "epoch": 0.84, "learning_rate": 8.028489513279503e-07, "loss": 0.0853, "step": 4177 }, { "epoch": 0.84, "learning_rate": 8.009529148535855e-07, "loss": 0.0897, "step": 4178 }, { "epoch": 0.84, "learning_rate": 7.990589249437591e-07, "loss": 0.0811, "step": 4179 }, { "epoch": 0.84, "learning_rate": 7.971669825215789e-07, "loss": 0.0797, "step": 4180 }, { "epoch": 0.84, "learning_rate": 7.952770885091548e-07, "loss": 0.0872, "step": 4181 }, { "epoch": 0.84, "learning_rate": 7.933892438275987e-07, "loss": 0.0904, "step": 4182 }, { "epoch": 0.84, "learning_rate": 7.91503449397022e-07, "loss": 0.0878, "step": 4183 }, { "epoch": 0.84, "learning_rate": 7.89619706136539e-07, "loss": 0.0911, "step": 4184 }, { "epoch": 0.84, "learning_rate": 7.877380149642628e-07, "loss": 0.0927, "step": 4185 }, { "epoch": 0.84, "learning_rate": 7.858583767973071e-07, "loss": 0.0896, "step": 4186 }, { "epoch": 0.84, "learning_rate": 7.839807925517834e-07, "loss": 0.081, "step": 4187 }, { "epoch": 0.84, "learning_rate": 7.821052631428061e-07, "loss": 0.11, "step": 4188 }, { "epoch": 0.84, "learning_rate": 7.802317894844835e-07, "loss": 0.085, "step": 4189 }, { "epoch": 0.84, "learning_rate": 7.783603724899258e-07, "loss": 0.0796, "step": 4190 }, { "epoch": 0.84, "learning_rate": 7.76491013071235e-07, "loss": 0.0999, "step": 4191 }, { "epoch": 0.84, "learning_rate": 7.746237121395184e-07, "loss": 0.1064, "step": 4192 }, { "epoch": 0.84, "learning_rate": 7.727584706048735e-07, "loss": 0.0899, "step": 4193 }, { "epoch": 0.84, "learning_rate": 7.708952893763972e-07, "loss": 0.0861, "step": 4194 }, { "epoch": 0.84, "learning_rate": 7.690341693621805e-07, "loss": 0.0794, "step": 4195 }, { "epoch": 0.84, "learning_rate": 7.671751114693104e-07, "loss": 0.0907, "step": 4196 }, { "epoch": 0.84, "learning_rate": 7.653181166038715e-07, "loss": 0.0919, "step": 4197 }, { "epoch": 0.84, "learning_rate": 7.63463185670939e-07, "loss": 0.0811, "step": 4198 }, { "epoch": 0.84, "learning_rate": 7.61610319574585e-07, "loss": 0.0883, "step": 4199 }, { "epoch": 0.84, "learning_rate": 7.597595192178702e-07, "loss": 0.0885, "step": 4200 }, { "epoch": 0.84, "learning_rate": 7.579107855028562e-07, "loss": 0.0839, "step": 4201 }, { "epoch": 0.84, "learning_rate": 7.560641193305912e-07, "loss": 0.0911, "step": 4202 }, { "epoch": 0.84, "learning_rate": 7.542195216011188e-07, "loss": 0.0856, "step": 4203 }, { "epoch": 0.84, "learning_rate": 7.523769932134739e-07, "loss": 0.0798, "step": 4204 }, { "epoch": 0.84, "learning_rate": 7.505365350656813e-07, "loss": 0.0844, "step": 4205 }, { "epoch": 0.84, "learning_rate": 7.486981480547567e-07, "loss": 0.0866, "step": 4206 }, { "epoch": 0.84, "learning_rate": 7.468618330767114e-07, "loss": 0.0901, "step": 4207 }, { "epoch": 0.84, "learning_rate": 7.450275910265415e-07, "loss": 0.0882, "step": 4208 }, { "epoch": 0.84, "learning_rate": 7.43195422798233e-07, "loss": 0.0871, "step": 4209 }, { "epoch": 0.84, "learning_rate": 7.413653292847617e-07, "loss": 0.0862, "step": 4210 }, { "epoch": 0.84, "learning_rate": 7.395373113780962e-07, "loss": 0.0801, "step": 4211 }, { "epoch": 0.84, "learning_rate": 7.377113699691879e-07, "loss": 0.0894, "step": 4212 }, { "epoch": 0.84, "learning_rate": 7.358875059479792e-07, "loss": 0.0865, "step": 4213 }, { "epoch": 0.84, "learning_rate": 7.34065720203399e-07, "loss": 0.0825, "step": 4214 }, { "epoch": 0.84, "learning_rate": 7.322460136233622e-07, "loss": 0.0835, "step": 4215 }, { "epoch": 0.84, "learning_rate": 7.304283870947748e-07, "loss": 0.0813, "step": 4216 }, { "epoch": 0.84, "learning_rate": 7.286128415035249e-07, "loss": 0.1346, "step": 4217 }, { "epoch": 0.84, "learning_rate": 7.267993777344856e-07, "loss": 0.0833, "step": 4218 }, { "epoch": 0.84, "learning_rate": 7.249879966715174e-07, "loss": 0.0873, "step": 4219 }, { "epoch": 0.84, "learning_rate": 7.23178699197467e-07, "loss": 0.0851, "step": 4220 }, { "epoch": 0.84, "learning_rate": 7.213714861941628e-07, "loss": 0.0887, "step": 4221 }, { "epoch": 0.84, "learning_rate": 7.195663585424195e-07, "loss": 0.0941, "step": 4222 }, { "epoch": 0.84, "learning_rate": 7.177633171220339e-07, "loss": 0.0877, "step": 4223 }, { "epoch": 0.84, "learning_rate": 7.159623628117856e-07, "loss": 0.0877, "step": 4224 }, { "epoch": 0.84, "learning_rate": 7.141634964894389e-07, "loss": 0.0838, "step": 4225 }, { "epoch": 0.85, "learning_rate": 7.123667190317396e-07, "loss": 0.0851, "step": 4226 }, { "epoch": 0.85, "learning_rate": 7.105720313144143e-07, "loss": 0.0854, "step": 4227 }, { "epoch": 0.85, "learning_rate": 7.087794342121724e-07, "loss": 0.0861, "step": 4228 }, { "epoch": 0.85, "learning_rate": 7.069889285987025e-07, "loss": 0.0817, "step": 4229 }, { "epoch": 0.85, "learning_rate": 7.052005153466779e-07, "loss": 0.0857, "step": 4230 }, { "epoch": 0.85, "learning_rate": 7.034141953277484e-07, "loss": 0.0919, "step": 4231 }, { "epoch": 0.85, "learning_rate": 7.01629969412545e-07, "loss": 0.0825, "step": 4232 }, { "epoch": 0.85, "learning_rate": 6.99847838470677e-07, "loss": 0.0828, "step": 4233 }, { "epoch": 0.85, "learning_rate": 6.980678033707333e-07, "loss": 0.0815, "step": 4234 }, { "epoch": 0.85, "learning_rate": 6.962898649802824e-07, "loss": 0.0887, "step": 4235 }, { "epoch": 0.85, "learning_rate": 6.945140241658688e-07, "loss": 0.0788, "step": 4236 }, { "epoch": 0.85, "learning_rate": 6.927402817930168e-07, "loss": 0.0983, "step": 4237 }, { "epoch": 0.85, "learning_rate": 6.909686387262255e-07, "loss": 0.0848, "step": 4238 }, { "epoch": 0.85, "learning_rate": 6.891990958289724e-07, "loss": 0.0851, "step": 4239 }, { "epoch": 0.85, "learning_rate": 6.874316539637127e-07, "loss": 0.1056, "step": 4240 }, { "epoch": 0.85, "learning_rate": 6.856663139918751e-07, "loss": 0.0946, "step": 4241 }, { "epoch": 0.85, "learning_rate": 6.839030767738653e-07, "loss": 0.0805, "step": 4242 }, { "epoch": 0.85, "learning_rate": 6.821419431690629e-07, "loss": 0.0839, "step": 4243 }, { "epoch": 0.85, "learning_rate": 6.803829140358237e-07, "loss": 0.0821, "step": 4244 }, { "epoch": 0.85, "learning_rate": 6.786259902314768e-07, "loss": 0.0868, "step": 4245 }, { "epoch": 0.85, "learning_rate": 6.768711726123261e-07, "loss": 0.0882, "step": 4246 }, { "epoch": 0.85, "learning_rate": 6.751184620336471e-07, "loss": 0.0808, "step": 4247 }, { "epoch": 0.85, "learning_rate": 6.733678593496901e-07, "loss": 0.0808, "step": 4248 }, { "epoch": 0.85, "learning_rate": 6.716193654136788e-07, "loss": 0.0855, "step": 4249 }, { "epoch": 0.85, "learning_rate": 6.698729810778065e-07, "loss": 0.0839, "step": 4250 }, { "epoch": 0.85, "learning_rate": 6.681287071932408e-07, "loss": 0.0861, "step": 4251 }, { "epoch": 0.85, "learning_rate": 6.663865446101192e-07, "loss": 0.0841, "step": 4252 }, { "epoch": 0.85, "learning_rate": 6.646464941775499e-07, "loss": 0.078, "step": 4253 }, { "epoch": 0.85, "learning_rate": 6.629085567436133e-07, "loss": 0.0827, "step": 4254 }, { "epoch": 0.85, "learning_rate": 6.611727331553585e-07, "loss": 0.0863, "step": 4255 }, { "epoch": 0.85, "learning_rate": 6.594390242588044e-07, "loss": 0.0818, "step": 4256 }, { "epoch": 0.85, "learning_rate": 6.577074308989406e-07, "loss": 0.0837, "step": 4257 }, { "epoch": 0.85, "learning_rate": 6.559779539197231e-07, "loss": 0.0769, "step": 4258 }, { "epoch": 0.85, "learning_rate": 6.542505941640803e-07, "loss": 0.0811, "step": 4259 }, { "epoch": 0.85, "learning_rate": 6.52525352473905e-07, "loss": 0.0883, "step": 4260 }, { "epoch": 0.85, "learning_rate": 6.508022296900601e-07, "loss": 0.0879, "step": 4261 }, { "epoch": 0.85, "learning_rate": 6.490812266523716e-07, "loss": 0.0799, "step": 4262 }, { "epoch": 0.85, "learning_rate": 6.47362344199639e-07, "loss": 0.0864, "step": 4263 }, { "epoch": 0.85, "learning_rate": 6.456455831696234e-07, "loss": 0.0819, "step": 4264 }, { "epoch": 0.85, "learning_rate": 6.439309443990532e-07, "loss": 0.0894, "step": 4265 }, { "epoch": 0.85, "learning_rate": 6.422184287236227e-07, "loss": 0.0834, "step": 4266 }, { "epoch": 0.85, "learning_rate": 6.405080369779898e-07, "loss": 0.0824, "step": 4267 }, { "epoch": 0.85, "learning_rate": 6.387997699957815e-07, "loss": 0.0833, "step": 4268 }, { "epoch": 0.85, "learning_rate": 6.370936286095842e-07, "loss": 0.0921, "step": 4269 }, { "epoch": 0.85, "learning_rate": 6.353896136509524e-07, "loss": 0.0791, "step": 4270 }, { "epoch": 0.85, "learning_rate": 6.336877259504004e-07, "loss": 0.0951, "step": 4271 }, { "epoch": 0.85, "learning_rate": 6.319879663374068e-07, "loss": 0.0944, "step": 4272 }, { "epoch": 0.85, "learning_rate": 6.302903356404161e-07, "loss": 0.1021, "step": 4273 }, { "epoch": 0.85, "learning_rate": 6.28594834686832e-07, "loss": 0.0793, "step": 4274 }, { "epoch": 0.85, "learning_rate": 6.269014643030214e-07, "loss": 0.0849, "step": 4275 }, { "epoch": 0.86, "learning_rate": 6.252102253143122e-07, "loss": 0.0861, "step": 4276 }, { "epoch": 0.86, "learning_rate": 6.235211185449919e-07, "loss": 0.082, "step": 4277 }, { "epoch": 0.86, "learning_rate": 6.218341448183141e-07, "loss": 0.0767, "step": 4278 }, { "epoch": 0.86, "learning_rate": 6.201493049564883e-07, "loss": 0.0818, "step": 4279 }, { "epoch": 0.86, "learning_rate": 6.184665997806832e-07, "loss": 0.0821, "step": 4280 }, { "epoch": 0.86, "learning_rate": 6.167860301110284e-07, "loss": 0.0817, "step": 4281 }, { "epoch": 0.86, "learning_rate": 6.151075967666165e-07, "loss": 0.0867, "step": 4282 }, { "epoch": 0.86, "learning_rate": 6.134313005654929e-07, "loss": 0.0822, "step": 4283 }, { "epoch": 0.86, "learning_rate": 6.117571423246655e-07, "loss": 0.0831, "step": 4284 }, { "epoch": 0.86, "learning_rate": 6.100851228600974e-07, "loss": 0.0987, "step": 4285 }, { "epoch": 0.86, "learning_rate": 6.084152429867113e-07, "loss": 0.0898, "step": 4286 }, { "epoch": 0.86, "learning_rate": 6.067475035183862e-07, "loss": 0.0803, "step": 4287 }, { "epoch": 0.86, "learning_rate": 6.050819052679585e-07, "loss": 0.0817, "step": 4288 }, { "epoch": 0.86, "learning_rate": 6.034184490472195e-07, "loss": 0.082, "step": 4289 }, { "epoch": 0.86, "learning_rate": 6.017571356669183e-07, "loss": 0.0784, "step": 4290 }, { "epoch": 0.86, "learning_rate": 6.000979659367579e-07, "loss": 0.0827, "step": 4291 }, { "epoch": 0.86, "learning_rate": 5.98440940665399e-07, "loss": 0.0865, "step": 4292 }, { "epoch": 0.86, "learning_rate": 5.967860606604553e-07, "loss": 0.0964, "step": 4293 }, { "epoch": 0.86, "learning_rate": 5.951333267284942e-07, "loss": 0.085, "step": 4294 }, { "epoch": 0.86, "learning_rate": 5.934827396750392e-07, "loss": 0.0825, "step": 4295 }, { "epoch": 0.86, "learning_rate": 5.918343003045656e-07, "loss": 0.0859, "step": 4296 }, { "epoch": 0.86, "learning_rate": 5.901880094205037e-07, "loss": 0.0905, "step": 4297 }, { "epoch": 0.86, "learning_rate": 5.885438678252342e-07, "loss": 0.0867, "step": 4298 }, { "epoch": 0.86, "learning_rate": 5.869018763200929e-07, "loss": 0.0799, "step": 4299 }, { "epoch": 0.86, "learning_rate": 5.852620357053651e-07, "loss": 0.0909, "step": 4300 }, { "epoch": 0.86, "learning_rate": 5.836243467802915e-07, "loss": 0.0769, "step": 4301 }, { "epoch": 0.86, "learning_rate": 5.819888103430598e-07, "loss": 0.1078, "step": 4302 }, { "epoch": 0.86, "learning_rate": 5.803554271908124e-07, "loss": 0.0778, "step": 4303 }, { "epoch": 0.86, "learning_rate": 5.787241981196384e-07, "loss": 0.0785, "step": 4304 }, { "epoch": 0.86, "learning_rate": 5.770951239245803e-07, "loss": 0.088, "step": 4305 }, { "epoch": 0.86, "learning_rate": 5.754682053996291e-07, "loss": 0.0804, "step": 4306 }, { "epoch": 0.86, "learning_rate": 5.738434433377244e-07, "loss": 0.082, "step": 4307 }, { "epoch": 0.86, "learning_rate": 5.722208385307559e-07, "loss": 0.0876, "step": 4308 }, { "epoch": 0.86, "learning_rate": 5.706003917695619e-07, "loss": 0.0763, "step": 4309 }, { "epoch": 0.86, "learning_rate": 5.689821038439264e-07, "loss": 0.093, "step": 4310 }, { "epoch": 0.86, "learning_rate": 5.673659755425859e-07, "loss": 0.0829, "step": 4311 }, { "epoch": 0.86, "learning_rate": 5.657520076532208e-07, "loss": 0.0851, "step": 4312 }, { "epoch": 0.86, "learning_rate": 5.641402009624591e-07, "loss": 0.084, "step": 4313 }, { "epoch": 0.86, "learning_rate": 5.625305562558764e-07, "loss": 0.091, "step": 4314 }, { "epoch": 0.86, "learning_rate": 5.609230743179939e-07, "loss": 0.0857, "step": 4315 }, { "epoch": 0.86, "learning_rate": 5.593177559322776e-07, "loss": 0.1445, "step": 4316 }, { "epoch": 0.86, "learning_rate": 5.577146018811419e-07, "loss": 0.1207, "step": 4317 }, { "epoch": 0.86, "learning_rate": 5.561136129459432e-07, "loss": 0.0805, "step": 4318 }, { "epoch": 0.86, "learning_rate": 5.545147899069836e-07, "loss": 0.089, "step": 4319 }, { "epoch": 0.86, "learning_rate": 5.529181335435124e-07, "loss": 0.084, "step": 4320 }, { "epoch": 0.86, "learning_rate": 5.51323644633719e-07, "loss": 0.1032, "step": 4321 }, { "epoch": 0.86, "learning_rate": 5.497313239547374e-07, "loss": 0.1255, "step": 4322 }, { "epoch": 0.86, "learning_rate": 5.48141172282648e-07, "loss": 0.0864, "step": 4323 }, { "epoch": 0.86, "learning_rate": 5.46553190392467e-07, "loss": 0.0907, "step": 4324 }, { "epoch": 0.86, "learning_rate": 5.449673790581611e-07, "loss": 0.0905, "step": 4325 }, { "epoch": 0.87, "learning_rate": 5.433837390526341e-07, "loss": 0.0851, "step": 4326 }, { "epoch": 0.87, "learning_rate": 5.418022711477333e-07, "loss": 0.0815, "step": 4327 }, { "epoch": 0.87, "learning_rate": 5.402229761142464e-07, "loss": 0.0851, "step": 4328 }, { "epoch": 0.87, "learning_rate": 5.386458547219026e-07, "loss": 0.0793, "step": 4329 }, { "epoch": 0.87, "learning_rate": 5.370709077393721e-07, "loss": 0.0883, "step": 4330 }, { "epoch": 0.87, "learning_rate": 5.354981359342659e-07, "loss": 0.0906, "step": 4331 }, { "epoch": 0.87, "learning_rate": 5.339275400731331e-07, "loss": 0.087, "step": 4332 }, { "epoch": 0.87, "learning_rate": 5.323591209214612e-07, "loss": 0.085, "step": 4333 }, { "epoch": 0.87, "learning_rate": 5.307928792436812e-07, "loss": 0.0826, "step": 4334 }, { "epoch": 0.87, "learning_rate": 5.292288158031595e-07, "loss": 0.0805, "step": 4335 }, { "epoch": 0.87, "learning_rate": 5.276669313622013e-07, "loss": 0.0826, "step": 4336 }, { "epoch": 0.87, "learning_rate": 5.2610722668205e-07, "loss": 0.0946, "step": 4337 }, { "epoch": 0.87, "learning_rate": 5.245497025228874e-07, "loss": 0.0941, "step": 4338 }, { "epoch": 0.87, "learning_rate": 5.229943596438297e-07, "loss": 0.0839, "step": 4339 }, { "epoch": 0.87, "learning_rate": 5.214411988029355e-07, "loss": 0.0911, "step": 4340 }, { "epoch": 0.87, "learning_rate": 5.198902207571955e-07, "loss": 0.083, "step": 4341 }, { "epoch": 0.87, "learning_rate": 5.183414262625364e-07, "loss": 0.0834, "step": 4342 }, { "epoch": 0.87, "learning_rate": 5.167948160738206e-07, "loss": 0.0805, "step": 4343 }, { "epoch": 0.87, "learning_rate": 5.152503909448503e-07, "loss": 0.0871, "step": 4344 }, { "epoch": 0.87, "learning_rate": 5.137081516283582e-07, "loss": 0.117, "step": 4345 }, { "epoch": 0.87, "learning_rate": 5.121680988760125e-07, "loss": 0.0874, "step": 4346 }, { "epoch": 0.87, "learning_rate": 5.106302334384172e-07, "loss": 0.0837, "step": 4347 }, { "epoch": 0.87, "learning_rate": 5.090945560651073e-07, "loss": 0.0836, "step": 4348 }, { "epoch": 0.87, "learning_rate": 5.075610675045567e-07, "loss": 0.0891, "step": 4349 }, { "epoch": 0.87, "learning_rate": 5.06029768504166e-07, "loss": 0.1023, "step": 4350 }, { "epoch": 0.87, "learning_rate": 5.045006598102725e-07, "loss": 0.1047, "step": 4351 }, { "epoch": 0.87, "learning_rate": 5.029737421681446e-07, "loss": 0.0868, "step": 4352 }, { "epoch": 0.87, "learning_rate": 5.014490163219854e-07, "loss": 0.0777, "step": 4353 }, { "epoch": 0.87, "learning_rate": 4.99926483014927e-07, "loss": 0.0887, "step": 4354 }, { "epoch": 0.87, "learning_rate": 4.984061429890324e-07, "loss": 0.083, "step": 4355 }, { "epoch": 0.87, "learning_rate": 4.968879969852985e-07, "loss": 0.0923, "step": 4356 }, { "epoch": 0.87, "learning_rate": 4.9537204574365e-07, "loss": 0.0816, "step": 4357 }, { "epoch": 0.87, "learning_rate": 4.938582900029437e-07, "loss": 0.0852, "step": 4358 }, { "epoch": 0.87, "learning_rate": 4.92346730500966e-07, "loss": 0.0865, "step": 4359 }, { "epoch": 0.87, "learning_rate": 4.908373679744316e-07, "loss": 0.0753, "step": 4360 }, { "epoch": 0.87, "learning_rate": 4.893302031589864e-07, "loss": 0.0866, "step": 4361 }, { "epoch": 0.87, "learning_rate": 4.878252367892033e-07, "loss": 0.0822, "step": 4362 }, { "epoch": 0.87, "learning_rate": 4.863224695985858e-07, "loss": 0.0859, "step": 4363 }, { "epoch": 0.87, "learning_rate": 4.848219023195644e-07, "loss": 0.084, "step": 4364 }, { "epoch": 0.87, "learning_rate": 4.833235356834959e-07, "loss": 0.0848, "step": 4365 }, { "epoch": 0.87, "learning_rate": 4.818273704206678e-07, "loss": 0.1133, "step": 4366 }, { "epoch": 0.87, "learning_rate": 4.803334072602917e-07, "loss": 0.0807, "step": 4367 }, { "epoch": 0.87, "learning_rate": 4.788416469305068e-07, "loss": 0.0909, "step": 4368 }, { "epoch": 0.87, "learning_rate": 4.773520901583801e-07, "loss": 0.0808, "step": 4369 }, { "epoch": 0.87, "learning_rate": 4.758647376699033e-07, "loss": 0.0787, "step": 4370 }, { "epoch": 0.87, "learning_rate": 4.743795901899928e-07, "loss": 0.0856, "step": 4371 }, { "epoch": 0.87, "learning_rate": 4.728966484424913e-07, "loss": 0.0824, "step": 4372 }, { "epoch": 0.87, "learning_rate": 4.714159131501689e-07, "loss": 0.0872, "step": 4373 }, { "epoch": 0.87, "learning_rate": 4.699373850347161e-07, "loss": 0.089, "step": 4374 }, { "epoch": 0.88, "learning_rate": 4.6846106481675035e-07, "loss": 0.0827, "step": 4375 }, { "epoch": 0.88, "learning_rate": 4.6698695321581165e-07, "loss": 0.0922, "step": 4376 }, { "epoch": 0.88, "learning_rate": 4.655150509503642e-07, "loss": 0.0876, "step": 4377 }, { "epoch": 0.88, "learning_rate": 4.640453587377958e-07, "loss": 0.0903, "step": 4378 }, { "epoch": 0.88, "learning_rate": 4.625778772944156e-07, "loss": 0.1175, "step": 4379 }, { "epoch": 0.88, "learning_rate": 4.6111260733545714e-07, "loss": 0.0941, "step": 4380 }, { "epoch": 0.88, "learning_rate": 4.5964954957507414e-07, "loss": 0.2558, "step": 4381 }, { "epoch": 0.88, "learning_rate": 4.581887047263445e-07, "loss": 0.0819, "step": 4382 }, { "epoch": 0.88, "learning_rate": 4.567300735012653e-07, "loss": 0.0827, "step": 4383 }, { "epoch": 0.88, "learning_rate": 4.552736566107563e-07, "loss": 0.0854, "step": 4384 }, { "epoch": 0.88, "learning_rate": 4.538194547646574e-07, "loss": 0.0862, "step": 4385 }, { "epoch": 0.88, "learning_rate": 4.523674686717283e-07, "loss": 0.0947, "step": 4386 }, { "epoch": 0.88, "learning_rate": 4.5091769903964965e-07, "loss": 0.0814, "step": 4387 }, { "epoch": 0.88, "learning_rate": 4.494701465750217e-07, "loss": 0.0806, "step": 4388 }, { "epoch": 0.88, "learning_rate": 4.480248119833641e-07, "loss": 0.0763, "step": 4389 }, { "epoch": 0.88, "learning_rate": 4.4658169596911493e-07, "loss": 0.0811, "step": 4390 }, { "epoch": 0.88, "learning_rate": 4.4514079923563103e-07, "loss": 0.084, "step": 4391 }, { "epoch": 0.88, "learning_rate": 4.4370212248518895e-07, "loss": 0.0886, "step": 4392 }, { "epoch": 0.88, "learning_rate": 4.4226566641898173e-07, "loss": 0.0823, "step": 4393 }, { "epoch": 0.88, "learning_rate": 4.4083143173712207e-07, "loss": 0.0951, "step": 4394 }, { "epoch": 0.88, "learning_rate": 4.3939941913863525e-07, "loss": 0.0872, "step": 4395 }, { "epoch": 0.88, "learning_rate": 4.379696293214697e-07, "loss": 0.0811, "step": 4396 }, { "epoch": 0.88, "learning_rate": 4.3654206298248625e-07, "loss": 0.0869, "step": 4397 }, { "epoch": 0.88, "learning_rate": 4.3511672081746393e-07, "loss": 0.0893, "step": 4398 }, { "epoch": 0.88, "learning_rate": 4.33693603521097e-07, "loss": 0.076, "step": 4399 }, { "epoch": 0.88, "learning_rate": 4.322727117869951e-07, "loss": 0.0878, "step": 4400 }, { "epoch": 0.88, "learning_rate": 4.308540463076849e-07, "loss": 0.082, "step": 4401 }, { "epoch": 0.88, "learning_rate": 4.29437607774606e-07, "loss": 0.0862, "step": 4402 }, { "epoch": 0.88, "learning_rate": 4.280233968781139e-07, "loss": 0.0911, "step": 4403 }, { "epoch": 0.88, "learning_rate": 4.266114143074751e-07, "loss": 0.0897, "step": 4404 }, { "epoch": 0.88, "learning_rate": 4.2520166075087635e-07, "loss": 0.0928, "step": 4405 }, { "epoch": 0.88, "learning_rate": 4.237941368954124e-07, "loss": 0.083, "step": 4406 }, { "epoch": 0.88, "learning_rate": 4.2238884342709397e-07, "loss": 0.0878, "step": 4407 }, { "epoch": 0.88, "learning_rate": 4.2098578103084376e-07, "loss": 0.0876, "step": 4408 }, { "epoch": 0.88, "learning_rate": 4.195849503904975e-07, "loss": 0.0855, "step": 4409 }, { "epoch": 0.88, "learning_rate": 4.1818635218880186e-07, "loss": 0.0967, "step": 4410 }, { "epoch": 0.88, "learning_rate": 4.1678998710741936e-07, "loss": 0.0841, "step": 4411 }, { "epoch": 0.88, "learning_rate": 4.153958558269189e-07, "loss": 0.0818, "step": 4412 }, { "epoch": 0.88, "learning_rate": 4.140039590267836e-07, "loss": 0.0823, "step": 4413 }, { "epoch": 0.88, "learning_rate": 4.1261429738540694e-07, "loss": 0.0841, "step": 4414 }, { "epoch": 0.88, "learning_rate": 4.112268715800943e-07, "loss": 0.1177, "step": 4415 }, { "epoch": 0.88, "learning_rate": 4.0984168228705934e-07, "loss": 0.0848, "step": 4416 }, { "epoch": 0.88, "learning_rate": 4.084587301814269e-07, "loss": 0.0833, "step": 4417 }, { "epoch": 0.88, "learning_rate": 4.0707801593723006e-07, "loss": 0.0938, "step": 4418 }, { "epoch": 0.88, "learning_rate": 4.056995402274122e-07, "loss": 0.0835, "step": 4419 }, { "epoch": 0.88, "learning_rate": 4.043233037238281e-07, "loss": 0.0856, "step": 4420 }, { "epoch": 0.88, "learning_rate": 4.029493070972362e-07, "loss": 0.0835, "step": 4421 }, { "epoch": 0.88, "learning_rate": 4.0157755101730645e-07, "loss": 0.0755, "step": 4422 }, { "epoch": 0.88, "learning_rate": 4.002080361526156e-07, "loss": 0.0907, "step": 4423 }, { "epoch": 0.88, "learning_rate": 3.9884076317064813e-07, "loss": 0.0807, "step": 4424 }, { "epoch": 0.89, "learning_rate": 3.9747573273779816e-07, "loss": 0.0921, "step": 4425 }, { "epoch": 0.89, "learning_rate": 3.961129455193641e-07, "loss": 0.0901, "step": 4426 }, { "epoch": 0.89, "learning_rate": 3.947524021795518e-07, "loss": 0.0825, "step": 4427 }, { "epoch": 0.89, "learning_rate": 3.9339410338147363e-07, "loss": 0.0841, "step": 4428 }, { "epoch": 0.89, "learning_rate": 3.920380497871473e-07, "loss": 0.0971, "step": 4429 }, { "epoch": 0.89, "learning_rate": 3.90684242057498e-07, "loss": 0.0835, "step": 4430 }, { "epoch": 0.89, "learning_rate": 3.89332680852354e-07, "loss": 0.0824, "step": 4431 }, { "epoch": 0.89, "learning_rate": 3.879833668304506e-07, "loss": 0.0769, "step": 4432 }, { "epoch": 0.89, "learning_rate": 3.866363006494256e-07, "loss": 0.0782, "step": 4433 }, { "epoch": 0.89, "learning_rate": 3.85291482965825e-07, "loss": 0.0957, "step": 4434 }, { "epoch": 0.89, "learning_rate": 3.8394891443509554e-07, "loss": 0.0866, "step": 4435 }, { "epoch": 0.89, "learning_rate": 3.8260859571158883e-07, "loss": 0.0755, "step": 4436 }, { "epoch": 0.89, "learning_rate": 3.812705274485595e-07, "loss": 0.0819, "step": 4437 }, { "epoch": 0.89, "learning_rate": 3.7993471029816653e-07, "loss": 0.0992, "step": 4438 }, { "epoch": 0.89, "learning_rate": 3.7860114491147017e-07, "loss": 0.0877, "step": 4439 }, { "epoch": 0.89, "learning_rate": 3.772698319384349e-07, "loss": 0.0821, "step": 4440 }, { "epoch": 0.89, "learning_rate": 3.759407720279257e-07, "loss": 0.0791, "step": 4441 }, { "epoch": 0.89, "learning_rate": 3.7461396582771035e-07, "loss": 0.0814, "step": 4442 }, { "epoch": 0.89, "learning_rate": 3.732894139844578e-07, "loss": 0.0788, "step": 4443 }, { "epoch": 0.89, "learning_rate": 3.7196711714373947e-07, "loss": 0.0972, "step": 4444 }, { "epoch": 0.89, "learning_rate": 3.7064707595002636e-07, "loss": 0.0894, "step": 4445 }, { "epoch": 0.89, "learning_rate": 3.693292910466906e-07, "loss": 0.0822, "step": 4446 }, { "epoch": 0.89, "learning_rate": 3.680137630760039e-07, "loss": 0.088, "step": 4447 }, { "epoch": 0.89, "learning_rate": 3.6670049267913954e-07, "loss": 0.0793, "step": 4448 }, { "epoch": 0.89, "learning_rate": 3.6538948049616886e-07, "loss": 0.0796, "step": 4449 }, { "epoch": 0.89, "learning_rate": 3.6408072716606346e-07, "loss": 0.0805, "step": 4450 }, { "epoch": 0.89, "learning_rate": 3.627742333266937e-07, "loss": 0.0814, "step": 4451 }, { "epoch": 0.89, "learning_rate": 3.614699996148285e-07, "loss": 0.0786, "step": 4452 }, { "epoch": 0.89, "learning_rate": 3.601680266661367e-07, "loss": 0.0785, "step": 4453 }, { "epoch": 0.89, "learning_rate": 3.5886831511518336e-07, "loss": 0.0871, "step": 4454 }, { "epoch": 0.89, "learning_rate": 3.575708655954324e-07, "loss": 0.0793, "step": 4455 }, { "epoch": 0.89, "learning_rate": 3.562756787392452e-07, "loss": 0.0957, "step": 4456 }, { "epoch": 0.89, "learning_rate": 3.5498275517787783e-07, "loss": 0.0812, "step": 4457 }, { "epoch": 0.89, "learning_rate": 3.5369209554148854e-07, "loss": 0.076, "step": 4458 }, { "epoch": 0.89, "learning_rate": 3.524037004591274e-07, "loss": 0.0827, "step": 4459 }, { "epoch": 0.89, "learning_rate": 3.511175705587433e-07, "loss": 0.08, "step": 4460 }, { "epoch": 0.89, "learning_rate": 3.498337064671803e-07, "loss": 0.0851, "step": 4461 }, { "epoch": 0.89, "learning_rate": 3.4855210881017675e-07, "loss": 0.0838, "step": 4462 }, { "epoch": 0.89, "learning_rate": 3.472727782123697e-07, "loss": 0.0873, "step": 4463 }, { "epoch": 0.89, "learning_rate": 3.459957152972887e-07, "loss": 0.079, "step": 4464 }, { "epoch": 0.89, "learning_rate": 3.4472092068735917e-07, "loss": 0.0928, "step": 4465 }, { "epoch": 0.89, "learning_rate": 3.434483950038986e-07, "loss": 0.0825, "step": 4466 }, { "epoch": 0.89, "learning_rate": 3.421781388671225e-07, "loss": 0.0849, "step": 4467 }, { "epoch": 0.89, "learning_rate": 3.409101528961378e-07, "loss": 0.0836, "step": 4468 }, { "epoch": 0.89, "learning_rate": 3.396444377089453e-07, "loss": 0.0788, "step": 4469 }, { "epoch": 0.89, "learning_rate": 3.3838099392243915e-07, "loss": 0.0799, "step": 4470 }, { "epoch": 0.89, "learning_rate": 3.371198221524069e-07, "loss": 0.0872, "step": 4471 }, { "epoch": 0.89, "learning_rate": 3.358609230135268e-07, "loss": 0.0828, "step": 4472 }, { "epoch": 0.89, "learning_rate": 3.3460429711937417e-07, "loss": 0.1008, "step": 4473 }, { "epoch": 0.89, "learning_rate": 3.3334994508241013e-07, "loss": 0.0802, "step": 4474 }, { "epoch": 0.9, "learning_rate": 3.320978675139919e-07, "loss": 0.0794, "step": 4475 }, { "epoch": 0.9, "learning_rate": 3.3084806502436617e-07, "loss": 0.0831, "step": 4476 }, { "epoch": 0.9, "learning_rate": 3.2960053822267245e-07, "loss": 0.0917, "step": 4477 }, { "epoch": 0.9, "learning_rate": 3.283552877169399e-07, "loss": 0.0842, "step": 4478 }, { "epoch": 0.9, "learning_rate": 3.271123141140886e-07, "loss": 0.0813, "step": 4479 }, { "epoch": 0.9, "learning_rate": 3.258716180199278e-07, "loss": 0.0837, "step": 4480 }, { "epoch": 0.9, "learning_rate": 3.246332000391583e-07, "loss": 0.1929, "step": 4481 }, { "epoch": 0.9, "learning_rate": 3.233970607753717e-07, "loss": 0.0814, "step": 4482 }, { "epoch": 0.9, "learning_rate": 3.2216320083104434e-07, "loss": 0.0974, "step": 4483 }, { "epoch": 0.9, "learning_rate": 3.2093162080754634e-07, "loss": 0.0845, "step": 4484 }, { "epoch": 0.9, "learning_rate": 3.1970232130513365e-07, "loss": 0.1045, "step": 4485 }, { "epoch": 0.9, "learning_rate": 3.1847530292295313e-07, "loss": 0.0928, "step": 4486 }, { "epoch": 0.9, "learning_rate": 3.172505662590386e-07, "loss": 0.0944, "step": 4487 }, { "epoch": 0.9, "learning_rate": 3.160281119103109e-07, "loss": 0.0897, "step": 4488 }, { "epoch": 0.9, "learning_rate": 3.148079404725801e-07, "loss": 0.0831, "step": 4489 }, { "epoch": 0.9, "learning_rate": 3.135900525405428e-07, "loss": 0.0755, "step": 4490 }, { "epoch": 0.9, "learning_rate": 3.123744487077829e-07, "loss": 0.0837, "step": 4491 }, { "epoch": 0.9, "learning_rate": 3.1116112956677045e-07, "loss": 0.0785, "step": 4492 }, { "epoch": 0.9, "learning_rate": 3.0995009570886305e-07, "loss": 0.0846, "step": 4493 }, { "epoch": 0.9, "learning_rate": 3.0874134772430344e-07, "loss": 0.0888, "step": 4494 }, { "epoch": 0.9, "learning_rate": 3.0753488620222037e-07, "loss": 0.0805, "step": 4495 }, { "epoch": 0.9, "learning_rate": 3.0633071173062966e-07, "loss": 0.0942, "step": 4496 }, { "epoch": 0.9, "learning_rate": 3.051288248964307e-07, "loss": 0.0844, "step": 4497 }, { "epoch": 0.9, "learning_rate": 3.0392922628540875e-07, "loss": 0.083, "step": 4498 }, { "epoch": 0.9, "learning_rate": 3.027319164822329e-07, "loss": 0.0848, "step": 4499 }, { "epoch": 0.9, "learning_rate": 3.015368960704584e-07, "loss": 0.0839, "step": 4500 }, { "epoch": 0.9, "learning_rate": 3.003441656325229e-07, "loss": 0.1023, "step": 4501 }, { "epoch": 0.9, "learning_rate": 2.99153725749749e-07, "loss": 0.0899, "step": 4502 }, { "epoch": 0.9, "learning_rate": 2.9796557700234317e-07, "loss": 0.0924, "step": 4503 }, { "epoch": 0.9, "learning_rate": 2.967797199693928e-07, "loss": 0.0892, "step": 4504 }, { "epoch": 0.9, "learning_rate": 2.9559615522887275e-07, "loss": 0.0822, "step": 4505 }, { "epoch": 0.9, "learning_rate": 2.9441488335763656e-07, "loss": 0.0875, "step": 4506 }, { "epoch": 0.9, "learning_rate": 2.9323590493142206e-07, "loss": 0.0924, "step": 4507 }, { "epoch": 0.9, "learning_rate": 2.920592205248496e-07, "loss": 0.0814, "step": 4508 }, { "epoch": 0.9, "learning_rate": 2.908848307114198e-07, "loss": 0.0847, "step": 4509 }, { "epoch": 0.9, "learning_rate": 2.8971273606351656e-07, "loss": 0.0909, "step": 4510 }, { "epoch": 0.9, "learning_rate": 2.8854293715240455e-07, "loss": 0.0799, "step": 4511 }, { "epoch": 0.9, "learning_rate": 2.8737543454822993e-07, "loss": 0.0931, "step": 4512 }, { "epoch": 0.9, "learning_rate": 2.862102288200186e-07, "loss": 0.0769, "step": 4513 }, { "epoch": 0.9, "learning_rate": 2.850473205356774e-07, "loss": 0.0825, "step": 4514 }, { "epoch": 0.9, "learning_rate": 2.838867102619952e-07, "loss": 0.085, "step": 4515 }, { "epoch": 0.9, "learning_rate": 2.8272839856463783e-07, "loss": 0.0828, "step": 4516 }, { "epoch": 0.9, "learning_rate": 2.815723860081537e-07, "loss": 0.0913, "step": 4517 }, { "epoch": 0.9, "learning_rate": 2.804186731559677e-07, "loss": 0.1001, "step": 4518 }, { "epoch": 0.9, "learning_rate": 2.792672605703867e-07, "loss": 0.0815, "step": 4519 }, { "epoch": 0.9, "learning_rate": 2.7811814881259503e-07, "loss": 0.0833, "step": 4520 }, { "epoch": 0.9, "learning_rate": 2.7697133844265535e-07, "loss": 0.0827, "step": 4521 }, { "epoch": 0.9, "learning_rate": 2.758268300195094e-07, "loss": 0.0927, "step": 4522 }, { "epoch": 0.9, "learning_rate": 2.746846241009765e-07, "loss": 0.0837, "step": 4523 }, { "epoch": 0.9, "learning_rate": 2.735447212437531e-07, "loss": 0.0875, "step": 4524 }, { "epoch": 0.91, "learning_rate": 2.724071220034158e-07, "loss": 0.0877, "step": 4525 }, { "epoch": 0.91, "learning_rate": 2.712718269344161e-07, "loss": 0.0876, "step": 4526 }, { "epoch": 0.91, "learning_rate": 2.701388365900831e-07, "loss": 0.0847, "step": 4527 }, { "epoch": 0.91, "learning_rate": 2.690081515226206e-07, "loss": 0.0969, "step": 4528 }, { "epoch": 0.91, "learning_rate": 2.6787977228311336e-07, "loss": 0.08, "step": 4529 }, { "epoch": 0.91, "learning_rate": 2.6675369942151864e-07, "loss": 0.0878, "step": 4530 }, { "epoch": 0.91, "learning_rate": 2.656299334866702e-07, "loss": 0.0848, "step": 4531 }, { "epoch": 0.91, "learning_rate": 2.6450847502627883e-07, "loss": 0.077, "step": 4532 }, { "epoch": 0.91, "learning_rate": 2.6338932458692847e-07, "loss": 0.0911, "step": 4533 }, { "epoch": 0.91, "learning_rate": 2.622724827140816e-07, "loss": 0.0795, "step": 4534 }, { "epoch": 0.91, "learning_rate": 2.611579499520722e-07, "loss": 0.0758, "step": 4535 }, { "epoch": 0.91, "learning_rate": 2.600457268441092e-07, "loss": 0.0806, "step": 4536 }, { "epoch": 0.91, "learning_rate": 2.589358139322767e-07, "loss": 0.0859, "step": 4537 }, { "epoch": 0.91, "learning_rate": 2.578282117575343e-07, "loss": 0.079, "step": 4538 }, { "epoch": 0.91, "learning_rate": 2.5672292085971276e-07, "loss": 0.0825, "step": 4539 }, { "epoch": 0.91, "learning_rate": 2.556199417775174e-07, "loss": 0.0797, "step": 4540 }, { "epoch": 0.91, "learning_rate": 2.5451927504852757e-07, "loss": 0.0913, "step": 4541 }, { "epoch": 0.91, "learning_rate": 2.534209212091937e-07, "loss": 0.0821, "step": 4542 }, { "epoch": 0.91, "learning_rate": 2.523248807948403e-07, "loss": 0.0767, "step": 4543 }, { "epoch": 0.91, "learning_rate": 2.5123115433966615e-07, "loss": 0.0854, "step": 4544 }, { "epoch": 0.91, "learning_rate": 2.5013974237673824e-07, "loss": 0.0825, "step": 4545 }, { "epoch": 0.91, "learning_rate": 2.4905064543799706e-07, "loss": 0.0856, "step": 4546 }, { "epoch": 0.91, "learning_rate": 2.479638640542564e-07, "loss": 0.0903, "step": 4547 }, { "epoch": 0.91, "learning_rate": 2.4687939875519984e-07, "loss": 0.1022, "step": 4548 }, { "epoch": 0.91, "learning_rate": 2.457972500693834e-07, "loss": 0.0871, "step": 4549 }, { "epoch": 0.91, "learning_rate": 2.447174185242324e-07, "loss": 0.1023, "step": 4550 }, { "epoch": 0.91, "learning_rate": 2.4363990464604357e-07, "loss": 0.0772, "step": 4551 }, { "epoch": 0.91, "learning_rate": 2.4256470895998363e-07, "loss": 0.0812, "step": 4552 }, { "epoch": 0.91, "learning_rate": 2.414918319900922e-07, "loss": 0.0761, "step": 4553 }, { "epoch": 0.91, "learning_rate": 2.404212742592743e-07, "loss": 0.0817, "step": 4554 }, { "epoch": 0.91, "learning_rate": 2.3935303628930705e-07, "loss": 0.0797, "step": 4555 }, { "epoch": 0.91, "learning_rate": 2.3828711860083676e-07, "loss": 0.0889, "step": 4556 }, { "epoch": 0.91, "learning_rate": 2.3722352171337836e-07, "loss": 0.0831, "step": 4557 }, { "epoch": 0.91, "learning_rate": 2.361622461453178e-07, "loss": 0.087, "step": 4558 }, { "epoch": 0.91, "learning_rate": 2.351032924139063e-07, "loss": 0.0833, "step": 4559 }, { "epoch": 0.91, "learning_rate": 2.3404666103526542e-07, "loss": 0.0808, "step": 4560 }, { "epoch": 0.91, "learning_rate": 2.3299235252438434e-07, "loss": 0.0779, "step": 4561 }, { "epoch": 0.91, "learning_rate": 2.319403673951204e-07, "loss": 0.0815, "step": 4562 }, { "epoch": 0.91, "learning_rate": 2.3089070616019838e-07, "loss": 0.0777, "step": 4563 }, { "epoch": 0.91, "learning_rate": 2.2984336933121076e-07, "loss": 0.0881, "step": 4564 }, { "epoch": 0.91, "learning_rate": 2.287983574186159e-07, "loss": 0.0831, "step": 4565 }, { "epoch": 0.91, "learning_rate": 2.2775567093174022e-07, "loss": 0.0786, "step": 4566 }, { "epoch": 0.91, "learning_rate": 2.2671531037877724e-07, "loss": 0.0851, "step": 4567 }, { "epoch": 0.91, "learning_rate": 2.2567727626678527e-07, "loss": 0.0816, "step": 4568 }, { "epoch": 0.91, "learning_rate": 2.2464156910168954e-07, "loss": 0.0834, "step": 4569 }, { "epoch": 0.91, "learning_rate": 2.2360818938828189e-07, "loss": 0.0869, "step": 4570 }, { "epoch": 0.91, "learning_rate": 2.2257713763021826e-07, "loss": 0.081, "step": 4571 }, { "epoch": 0.91, "learning_rate": 2.2154841433002062e-07, "loss": 0.0818, "step": 4572 }, { "epoch": 0.91, "learning_rate": 2.2052201998907673e-07, "loss": 0.0807, "step": 4573 }, { "epoch": 0.91, "learning_rate": 2.1949795510763872e-07, "loss": 0.0827, "step": 4574 }, { "epoch": 0.92, "learning_rate": 2.1847622018482283e-07, "loss": 0.081, "step": 4575 }, { "epoch": 0.92, "learning_rate": 2.174568157186102e-07, "loss": 0.082, "step": 4576 }, { "epoch": 0.92, "learning_rate": 2.1643974220584729e-07, "loss": 0.0821, "step": 4577 }, { "epoch": 0.92, "learning_rate": 2.154250001422431e-07, "loss": 0.0826, "step": 4578 }, { "epoch": 0.92, "learning_rate": 2.1441259002236924e-07, "loss": 0.0829, "step": 4579 }, { "epoch": 0.92, "learning_rate": 2.134025123396638e-07, "loss": 0.0832, "step": 4580 }, { "epoch": 0.92, "learning_rate": 2.123947675864252e-07, "loss": 0.0799, "step": 4581 }, { "epoch": 0.92, "learning_rate": 2.1138935625381663e-07, "loss": 0.0774, "step": 4582 }, { "epoch": 0.92, "learning_rate": 2.103862788318628e-07, "loss": 0.0994, "step": 4583 }, { "epoch": 0.92, "learning_rate": 2.0938553580945208e-07, "loss": 0.0876, "step": 4584 }, { "epoch": 0.92, "learning_rate": 2.083871276743338e-07, "loss": 0.0962, "step": 4585 }, { "epoch": 0.92, "learning_rate": 2.0739105491312028e-07, "loss": 0.0849, "step": 4586 }, { "epoch": 0.92, "learning_rate": 2.0639731801128603e-07, "loss": 0.0812, "step": 4587 }, { "epoch": 0.92, "learning_rate": 2.054059174531653e-07, "loss": 0.0809, "step": 4588 }, { "epoch": 0.92, "learning_rate": 2.0441685372195487e-07, "loss": 0.0879, "step": 4589 }, { "epoch": 0.92, "learning_rate": 2.0343012729971244e-07, "loss": 0.0802, "step": 4590 }, { "epoch": 0.92, "learning_rate": 2.0244573866735673e-07, "loss": 0.0835, "step": 4591 }, { "epoch": 0.92, "learning_rate": 2.0146368830466668e-07, "loss": 0.0866, "step": 4592 }, { "epoch": 0.92, "learning_rate": 2.0048397669028164e-07, "loss": 0.0795, "step": 4593 }, { "epoch": 0.92, "learning_rate": 1.995066043017013e-07, "loss": 0.0766, "step": 4594 }, { "epoch": 0.92, "learning_rate": 1.9853157161528468e-07, "loss": 0.0849, "step": 4595 }, { "epoch": 0.92, "learning_rate": 1.9755887910625103e-07, "loss": 0.087, "step": 4596 }, { "epoch": 0.92, "learning_rate": 1.9658852724868005e-07, "loss": 0.0828, "step": 4597 }, { "epoch": 0.92, "learning_rate": 1.9562051651550784e-07, "loss": 0.0825, "step": 4598 }, { "epoch": 0.92, "learning_rate": 1.9465484737853092e-07, "loss": 0.0834, "step": 4599 }, { "epoch": 0.92, "learning_rate": 1.9369152030840553e-07, "loss": 0.089, "step": 4600 }, { "epoch": 0.92, "learning_rate": 1.927305357746462e-07, "loss": 0.073, "step": 4601 }, { "epoch": 0.92, "learning_rate": 1.917718942456237e-07, "loss": 0.0842, "step": 4602 }, { "epoch": 0.92, "learning_rate": 1.9081559618856938e-07, "loss": 0.11, "step": 4603 }, { "epoch": 0.92, "learning_rate": 1.8986164206957037e-07, "loss": 0.0836, "step": 4604 }, { "epoch": 0.92, "learning_rate": 1.8891003235357307e-07, "loss": 0.0819, "step": 4605 }, { "epoch": 0.92, "learning_rate": 1.8796076750438096e-07, "loss": 0.095, "step": 4606 }, { "epoch": 0.92, "learning_rate": 1.8701384798465284e-07, "loss": 0.0915, "step": 4607 }, { "epoch": 0.92, "learning_rate": 1.8606927425590616e-07, "loss": 0.0826, "step": 4608 }, { "epoch": 0.92, "learning_rate": 1.8512704677851489e-07, "loss": 0.0816, "step": 4609 }, { "epoch": 0.92, "learning_rate": 1.841871660117095e-07, "loss": 0.0843, "step": 4610 }, { "epoch": 0.92, "learning_rate": 1.832496324135763e-07, "loss": 0.0827, "step": 4611 }, { "epoch": 0.92, "learning_rate": 1.8231444644105755e-07, "loss": 0.0931, "step": 4612 }, { "epoch": 0.92, "learning_rate": 1.8138160854995145e-07, "loss": 0.0816, "step": 4613 }, { "epoch": 0.92, "learning_rate": 1.804511191949121e-07, "loss": 0.089, "step": 4614 }, { "epoch": 0.92, "learning_rate": 1.7952297882945e-07, "loss": 0.0848, "step": 4615 }, { "epoch": 0.92, "learning_rate": 1.785971879059273e-07, "loss": 0.0888, "step": 4616 }, { "epoch": 0.92, "learning_rate": 1.7767374687556405e-07, "loss": 0.0959, "step": 4617 }, { "epoch": 0.92, "learning_rate": 1.7675265618843361e-07, "loss": 0.2668, "step": 4618 }, { "epoch": 0.92, "learning_rate": 1.758339162934658e-07, "loss": 0.1048, "step": 4619 }, { "epoch": 0.92, "learning_rate": 1.7491752763844294e-07, "loss": 0.0832, "step": 4620 }, { "epoch": 0.92, "learning_rate": 1.740034906700011e-07, "loss": 0.0823, "step": 4621 }, { "epoch": 0.92, "learning_rate": 1.7309180583363062e-07, "loss": 0.0821, "step": 4622 }, { "epoch": 0.92, "learning_rate": 1.7218247357367656e-07, "loss": 0.0803, "step": 4623 }, { "epoch": 0.92, "learning_rate": 1.7127549433333557e-07, "loss": 0.0871, "step": 4624 }, { "epoch": 0.93, "learning_rate": 1.7037086855465902e-07, "loss": 0.097, "step": 4625 }, { "epoch": 0.93, "learning_rate": 1.6946859667854977e-07, "loss": 0.0889, "step": 4626 }, { "epoch": 0.93, "learning_rate": 1.6856867914476492e-07, "loss": 0.0887, "step": 4627 }, { "epoch": 0.93, "learning_rate": 1.6767111639191202e-07, "loss": 0.0815, "step": 4628 }, { "epoch": 0.93, "learning_rate": 1.6677590885745388e-07, "loss": 0.0789, "step": 4629 }, { "epoch": 0.93, "learning_rate": 1.6588305697770313e-07, "loss": 0.075, "step": 4630 }, { "epoch": 0.93, "learning_rate": 1.6499256118782503e-07, "loss": 0.0783, "step": 4631 }, { "epoch": 0.93, "learning_rate": 1.6410442192183574e-07, "loss": 0.0832, "step": 4632 }, { "epoch": 0.93, "learning_rate": 1.6321863961260452e-07, "loss": 0.083, "step": 4633 }, { "epoch": 0.93, "learning_rate": 1.6233521469185054e-07, "loss": 0.0863, "step": 4634 }, { "epoch": 0.93, "learning_rate": 1.6145414759014433e-07, "loss": 0.0905, "step": 4635 }, { "epoch": 0.93, "learning_rate": 1.6057543873690685e-07, "loss": 0.0935, "step": 4636 }, { "epoch": 0.93, "learning_rate": 1.596990885604105e-07, "loss": 0.0912, "step": 4637 }, { "epoch": 0.93, "learning_rate": 1.5882509748777809e-07, "loss": 0.0785, "step": 4638 }, { "epoch": 0.93, "learning_rate": 1.5795346594498162e-07, "loss": 0.0848, "step": 4639 }, { "epoch": 0.93, "learning_rate": 1.5708419435684463e-07, "loss": 0.083, "step": 4640 }, { "epoch": 0.93, "learning_rate": 1.5621728314703822e-07, "loss": 0.093, "step": 4641 }, { "epoch": 0.93, "learning_rate": 1.553527327380855e-07, "loss": 0.106, "step": 4642 }, { "epoch": 0.93, "learning_rate": 1.5449054355135718e-07, "loss": 0.0836, "step": 4643 }, { "epoch": 0.93, "learning_rate": 1.5363071600707435e-07, "loss": 0.0855, "step": 4644 }, { "epoch": 0.93, "learning_rate": 1.5277325052430569e-07, "loss": 0.0856, "step": 4645 }, { "epoch": 0.93, "learning_rate": 1.5191814752097024e-07, "loss": 0.0888, "step": 4646 }, { "epoch": 0.93, "learning_rate": 1.5106540741383402e-07, "loss": 0.0867, "step": 4647 }, { "epoch": 0.93, "learning_rate": 1.502150306185135e-07, "loss": 0.083, "step": 4648 }, { "epoch": 0.93, "learning_rate": 1.4936701754947104e-07, "loss": 0.0929, "step": 4649 }, { "epoch": 0.93, "learning_rate": 1.4852136862001766e-07, "loss": 0.0851, "step": 4650 }, { "epoch": 0.93, "learning_rate": 1.4767808424231312e-07, "loss": 0.0882, "step": 4651 }, { "epoch": 0.93, "learning_rate": 1.4683716482736364e-07, "loss": 0.0804, "step": 4652 }, { "epoch": 0.93, "learning_rate": 1.459986107850231e-07, "loss": 0.084, "step": 4653 }, { "epoch": 0.93, "learning_rate": 1.4516242252399227e-07, "loss": 0.0842, "step": 4654 }, { "epoch": 0.93, "learning_rate": 1.4432860045182019e-07, "loss": 0.0816, "step": 4655 }, { "epoch": 0.93, "learning_rate": 1.4349714497490009e-07, "loss": 0.0758, "step": 4656 }, { "epoch": 0.93, "learning_rate": 1.4266805649847392e-07, "loss": 0.0855, "step": 4657 }, { "epoch": 0.93, "learning_rate": 1.4184133542663014e-07, "loss": 0.0796, "step": 4658 }, { "epoch": 0.93, "learning_rate": 1.4101698216230254e-07, "loss": 0.083, "step": 4659 }, { "epoch": 0.93, "learning_rate": 1.4019499710726913e-07, "loss": 0.1019, "step": 4660 }, { "epoch": 0.93, "learning_rate": 1.3937538066215672e-07, "loss": 0.0898, "step": 4661 }, { "epoch": 0.93, "learning_rate": 1.385581332264363e-07, "loss": 0.0794, "step": 4662 }, { "epoch": 0.93, "learning_rate": 1.3774325519842423e-07, "loss": 0.0829, "step": 4663 }, { "epoch": 0.93, "learning_rate": 1.3693074697528231e-07, "loss": 0.0819, "step": 4664 }, { "epoch": 0.93, "learning_rate": 1.3612060895301759e-07, "loss": 0.0804, "step": 4665 }, { "epoch": 0.93, "learning_rate": 1.3531284152647983e-07, "loss": 0.0799, "step": 4666 }, { "epoch": 0.93, "learning_rate": 1.3450744508936687e-07, "loss": 0.0829, "step": 4667 }, { "epoch": 0.93, "learning_rate": 1.3370442003421913e-07, "loss": 0.0805, "step": 4668 }, { "epoch": 0.93, "learning_rate": 1.3290376675242022e-07, "loss": 0.0848, "step": 4669 }, { "epoch": 0.93, "learning_rate": 1.3210548563419857e-07, "loss": 0.0826, "step": 4670 }, { "epoch": 0.93, "learning_rate": 1.313095770686279e-07, "loss": 0.0861, "step": 4671 }, { "epoch": 0.93, "learning_rate": 1.3051604144362407e-07, "loss": 0.0813, "step": 4672 }, { "epoch": 0.93, "learning_rate": 1.29724879145946e-07, "loss": 0.0849, "step": 4673 }, { "epoch": 0.93, "learning_rate": 1.289360905611975e-07, "loss": 0.0792, "step": 4674 }, { "epoch": 0.94, "learning_rate": 1.2814967607382433e-07, "loss": 0.0871, "step": 4675 }, { "epoch": 0.94, "learning_rate": 1.2736563606711384e-07, "loss": 0.0777, "step": 4676 }, { "epoch": 0.94, "learning_rate": 1.2658397092320028e-07, "loss": 0.079, "step": 4677 }, { "epoch": 0.94, "learning_rate": 1.258046810230562e-07, "loss": 0.0825, "step": 4678 }, { "epoch": 0.94, "learning_rate": 1.2502776674649776e-07, "loss": 0.092, "step": 4679 }, { "epoch": 0.94, "learning_rate": 1.2425322847218368e-07, "loss": 0.086, "step": 4680 }, { "epoch": 0.94, "learning_rate": 1.2348106657761537e-07, "loss": 0.08, "step": 4681 }, { "epoch": 0.94, "learning_rate": 1.2271128143913458e-07, "loss": 0.1004, "step": 4682 }, { "epoch": 0.94, "learning_rate": 1.2194387343192504e-07, "loss": 0.0739, "step": 4683 }, { "epoch": 0.94, "learning_rate": 1.211788429300126e-07, "loss": 0.0834, "step": 4684 }, { "epoch": 0.94, "learning_rate": 1.2041619030626283e-07, "loss": 0.1438, "step": 4685 }, { "epoch": 0.94, "learning_rate": 1.1965591593238513e-07, "loss": 0.0814, "step": 4686 }, { "epoch": 0.94, "learning_rate": 1.1889802017892638e-07, "loss": 0.0844, "step": 4687 }, { "epoch": 0.94, "learning_rate": 1.1814250341527611e-07, "loss": 0.0841, "step": 4688 }, { "epoch": 0.94, "learning_rate": 1.1738936600966366e-07, "loss": 0.086, "step": 4689 }, { "epoch": 0.94, "learning_rate": 1.166386083291604e-07, "loss": 0.0929, "step": 4690 }, { "epoch": 0.94, "learning_rate": 1.1589023073967586e-07, "loss": 0.0828, "step": 4691 }, { "epoch": 0.94, "learning_rate": 1.1514423360595939e-07, "loss": 0.0787, "step": 4692 }, { "epoch": 0.94, "learning_rate": 1.1440061729160235e-07, "loss": 0.0873, "step": 4693 }, { "epoch": 0.94, "learning_rate": 1.136593821590326e-07, "loss": 0.0828, "step": 4694 }, { "epoch": 0.94, "learning_rate": 1.1292052856952063e-07, "loss": 0.095, "step": 4695 }, { "epoch": 0.94, "learning_rate": 1.1218405688317447e-07, "loss": 0.0774, "step": 4696 }, { "epoch": 0.94, "learning_rate": 1.1144996745894033e-07, "loss": 0.0803, "step": 4697 }, { "epoch": 0.94, "learning_rate": 1.107182606546059e-07, "loss": 0.0867, "step": 4698 }, { "epoch": 0.94, "learning_rate": 1.0998893682679479e-07, "loss": 0.0846, "step": 4699 }, { "epoch": 0.94, "learning_rate": 1.0926199633097156e-07, "loss": 0.0833, "step": 4700 }, { "epoch": 0.94, "learning_rate": 1.0853743952143836e-07, "loss": 0.0822, "step": 4701 }, { "epoch": 0.94, "learning_rate": 1.0781526675133492e-07, "loss": 0.1212, "step": 4702 }, { "epoch": 0.94, "learning_rate": 1.0709547837263967e-07, "loss": 0.0803, "step": 4703 }, { "epoch": 0.94, "learning_rate": 1.0637807473616812e-07, "loss": 0.0795, "step": 4704 }, { "epoch": 0.94, "learning_rate": 1.0566305619157502e-07, "loss": 0.0873, "step": 4705 }, { "epoch": 0.94, "learning_rate": 1.0495042308735104e-07, "loss": 0.0805, "step": 4706 }, { "epoch": 0.94, "learning_rate": 1.0424017577082556e-07, "loss": 0.0848, "step": 4707 }, { "epoch": 0.94, "learning_rate": 1.0353231458816338e-07, "loss": 0.0994, "step": 4708 }, { "epoch": 0.94, "learning_rate": 1.0282683988436792e-07, "loss": 0.0877, "step": 4709 }, { "epoch": 0.94, "learning_rate": 1.0212375200327973e-07, "loss": 0.086, "step": 4710 }, { "epoch": 0.94, "learning_rate": 1.0142305128757468e-07, "loss": 0.0747, "step": 4711 }, { "epoch": 0.94, "learning_rate": 1.007247380787657e-07, "loss": 0.0861, "step": 4712 }, { "epoch": 0.94, "learning_rate": 1.0002881271720222e-07, "loss": 0.0889, "step": 4713 }, { "epoch": 0.94, "learning_rate": 9.933527554207012e-08, "loss": 0.0843, "step": 4714 }, { "epoch": 0.94, "learning_rate": 9.864412689139124e-08, "loss": 0.0857, "step": 4715 }, { "epoch": 0.94, "learning_rate": 9.795536710202169e-08, "loss": 0.0826, "step": 4716 }, { "epoch": 0.94, "learning_rate": 9.726899650965626e-08, "loss": 0.0791, "step": 4717 }, { "epoch": 0.94, "learning_rate": 9.658501544882182e-08, "loss": 0.0834, "step": 4718 }, { "epoch": 0.94, "learning_rate": 9.590342425288446e-08, "loss": 0.0834, "step": 4719 }, { "epoch": 0.94, "learning_rate": 9.522422325404234e-08, "loss": 0.0833, "step": 4720 }, { "epoch": 0.94, "learning_rate": 9.454741278333013e-08, "loss": 0.0834, "step": 4721 }, { "epoch": 0.94, "learning_rate": 9.387299317061615e-08, "loss": 0.0824, "step": 4722 }, { "epoch": 0.94, "learning_rate": 9.320096474460527e-08, "loss": 0.0766, "step": 4723 }, { "epoch": 0.94, "learning_rate": 9.253132783283548e-08, "loss": 0.0812, "step": 4724 }, { "epoch": 0.94, "learning_rate": 9.186408276168012e-08, "loss": 0.0792, "step": 4725 }, { "epoch": 0.95, "learning_rate": 9.119922985634633e-08, "loss": 0.0846, "step": 4726 }, { "epoch": 0.95, "learning_rate": 9.053676944087542e-08, "loss": 0.0826, "step": 4727 }, { "epoch": 0.95, "learning_rate": 8.987670183814134e-08, "loss": 0.0814, "step": 4728 }, { "epoch": 0.95, "learning_rate": 8.921902736985399e-08, "loss": 0.0849, "step": 4729 }, { "epoch": 0.95, "learning_rate": 8.856374635655696e-08, "loss": 0.0977, "step": 4730 }, { "epoch": 0.95, "learning_rate": 8.791085911762476e-08, "loss": 0.0768, "step": 4731 }, { "epoch": 0.95, "learning_rate": 8.726036597126619e-08, "loss": 0.0886, "step": 4732 }, { "epoch": 0.95, "learning_rate": 8.661226723452542e-08, "loss": 0.0918, "step": 4733 }, { "epoch": 0.95, "learning_rate": 8.596656322327645e-08, "loss": 0.0856, "step": 4734 }, { "epoch": 0.95, "learning_rate": 8.53232542522292e-08, "loss": 0.0815, "step": 4735 }, { "epoch": 0.95, "learning_rate": 8.468234063492287e-08, "loss": 0.0856, "step": 4736 }, { "epoch": 0.95, "learning_rate": 8.404382268373145e-08, "loss": 0.0835, "step": 4737 }, { "epoch": 0.95, "learning_rate": 8.340770070986215e-08, "loss": 0.0865, "step": 4738 }, { "epoch": 0.95, "learning_rate": 8.277397502335194e-08, "loss": 0.0857, "step": 4739 }, { "epoch": 0.95, "learning_rate": 8.214264593307097e-08, "loss": 0.0941, "step": 4740 }, { "epoch": 0.95, "learning_rate": 8.151371374672146e-08, "loss": 0.0817, "step": 4741 }, { "epoch": 0.95, "learning_rate": 8.088717877083706e-08, "loss": 0.0932, "step": 4742 }, { "epoch": 0.95, "learning_rate": 8.02630413107841e-08, "loss": 0.0876, "step": 4743 }, { "epoch": 0.95, "learning_rate": 7.964130167075923e-08, "loss": 0.0812, "step": 4744 }, { "epoch": 0.95, "learning_rate": 7.90219601537906e-08, "loss": 0.0853, "step": 4745 }, { "epoch": 0.95, "learning_rate": 7.840501706173786e-08, "loss": 0.1338, "step": 4746 }, { "epoch": 0.95, "learning_rate": 7.779047269529105e-08, "loss": 0.086, "step": 4747 }, { "epoch": 0.95, "learning_rate": 7.717832735397335e-08, "loss": 0.08, "step": 4748 }, { "epoch": 0.95, "learning_rate": 7.656858133613498e-08, "loss": 0.0789, "step": 4749 }, { "epoch": 0.95, "learning_rate": 7.59612349389599e-08, "loss": 0.0881, "step": 4750 }, { "epoch": 0.95, "learning_rate": 7.535628845846077e-08, "loss": 0.0856, "step": 4751 }, { "epoch": 0.95, "learning_rate": 7.475374218948118e-08, "loss": 0.0838, "step": 4752 }, { "epoch": 0.95, "learning_rate": 7.415359642569564e-08, "loss": 0.0826, "step": 4753 }, { "epoch": 0.95, "learning_rate": 7.355585145960743e-08, "loss": 0.087, "step": 4754 }, { "epoch": 0.95, "learning_rate": 7.296050758254958e-08, "loss": 0.0835, "step": 4755 }, { "epoch": 0.95, "learning_rate": 7.236756508468612e-08, "loss": 0.0839, "step": 4756 }, { "epoch": 0.95, "learning_rate": 7.177702425500977e-08, "loss": 0.0866, "step": 4757 }, { "epoch": 0.95, "learning_rate": 7.118888538134361e-08, "loss": 0.0863, "step": 4758 }, { "epoch": 0.95, "learning_rate": 7.060314875033836e-08, "loss": 0.0855, "step": 4759 }, { "epoch": 0.95, "learning_rate": 7.001981464747565e-08, "loss": 0.0857, "step": 4760 }, { "epoch": 0.95, "learning_rate": 6.943888335706472e-08, "loss": 0.0808, "step": 4761 }, { "epoch": 0.95, "learning_rate": 6.88603551622452e-08, "loss": 0.0859, "step": 4762 }, { "epoch": 0.95, "learning_rate": 6.828423034498488e-08, "loss": 0.0839, "step": 4763 }, { "epoch": 0.95, "learning_rate": 6.771050918607913e-08, "loss": 0.0838, "step": 4764 }, { "epoch": 0.95, "learning_rate": 6.713919196515317e-08, "loss": 0.0891, "step": 4765 }, { "epoch": 0.95, "learning_rate": 6.657027896065982e-08, "loss": 0.0778, "step": 4766 }, { "epoch": 0.95, "learning_rate": 6.60037704498806e-08, "loss": 0.0844, "step": 4767 }, { "epoch": 0.95, "learning_rate": 6.543966670892465e-08, "loss": 0.0795, "step": 4768 }, { "epoch": 0.95, "learning_rate": 6.487796801272983e-08, "loss": 0.0989, "step": 4769 }, { "epoch": 0.95, "learning_rate": 6.431867463506047e-08, "loss": 0.0917, "step": 4770 }, { "epoch": 0.95, "learning_rate": 6.376178684850965e-08, "loss": 0.082, "step": 4771 }, { "epoch": 0.95, "learning_rate": 6.3207304924498e-08, "loss": 0.0842, "step": 4772 }, { "epoch": 0.95, "learning_rate": 6.265522913327326e-08, "loss": 0.0781, "step": 4773 }, { "epoch": 0.95, "learning_rate": 6.210555974391075e-08, "loss": 0.0922, "step": 4774 }, { "epoch": 0.95, "learning_rate": 6.15582970243117e-08, "loss": 0.093, "step": 4775 }, { "epoch": 0.96, "learning_rate": 6.101344124120557e-08, "loss": 0.0819, "step": 4776 }, { "epoch": 0.96, "learning_rate": 6.047099266014877e-08, "loss": 0.0787, "step": 4777 }, { "epoch": 0.96, "learning_rate": 5.993095154552431e-08, "loss": 0.0783, "step": 4778 }, { "epoch": 0.96, "learning_rate": 5.939331816054161e-08, "loss": 0.0806, "step": 4779 }, { "epoch": 0.96, "learning_rate": 5.8858092767236084e-08, "loss": 0.0863, "step": 4780 }, { "epoch": 0.96, "learning_rate": 5.8325275626470166e-08, "loss": 0.0816, "step": 4781 }, { "epoch": 0.96, "learning_rate": 5.7794866997933355e-08, "loss": 0.0876, "step": 4782 }, { "epoch": 0.96, "learning_rate": 5.726686714013996e-08, "loss": 0.0761, "step": 4783 }, { "epoch": 0.96, "learning_rate": 5.674127631043025e-08, "loss": 0.0894, "step": 4784 }, { "epoch": 0.96, "learning_rate": 5.621809476497098e-08, "loss": 0.0888, "step": 4785 }, { "epoch": 0.96, "learning_rate": 5.569732275875428e-08, "loss": 0.0781, "step": 4786 }, { "epoch": 0.96, "learning_rate": 5.517896054559879e-08, "loss": 0.0868, "step": 4787 }, { "epoch": 0.96, "learning_rate": 5.466300837814797e-08, "loss": 0.0784, "step": 4788 }, { "epoch": 0.96, "learning_rate": 5.414946650786957e-08, "loss": 0.0894, "step": 4789 }, { "epoch": 0.96, "learning_rate": 5.363833518505834e-08, "loss": 0.096, "step": 4790 }, { "epoch": 0.96, "learning_rate": 5.312961465883393e-08, "loss": 0.0843, "step": 4791 }, { "epoch": 0.96, "learning_rate": 5.262330517713965e-08, "loss": 0.0954, "step": 4792 }, { "epoch": 0.96, "learning_rate": 5.2119406986745336e-08, "loss": 0.0839, "step": 4793 }, { "epoch": 0.96, "learning_rate": 5.161792033324398e-08, "loss": 0.0864, "step": 4794 }, { "epoch": 0.96, "learning_rate": 5.111884546105506e-08, "loss": 0.0828, "step": 4795 }, { "epoch": 0.96, "learning_rate": 5.062218261342122e-08, "loss": 0.0861, "step": 4796 }, { "epoch": 0.96, "learning_rate": 5.012793203240995e-08, "loss": 0.0797, "step": 4797 }, { "epoch": 0.96, "learning_rate": 4.9636093958913e-08, "loss": 0.0819, "step": 4798 }, { "epoch": 0.96, "learning_rate": 4.914666863264528e-08, "loss": 0.0844, "step": 4799 }, { "epoch": 0.96, "learning_rate": 4.865965629214819e-08, "loss": 0.0831, "step": 4800 }, { "epoch": 0.96, "learning_rate": 4.8175057174785766e-08, "loss": 0.0857, "step": 4801 }, { "epoch": 0.96, "learning_rate": 4.769287151674407e-08, "loss": 0.0812, "step": 4802 }, { "epoch": 0.96, "learning_rate": 4.7213099553035655e-08, "loss": 0.093, "step": 4803 }, { "epoch": 0.96, "learning_rate": 4.6735741517495715e-08, "loss": 0.0823, "step": 4804 }, { "epoch": 0.96, "learning_rate": 4.626079764278202e-08, "loss": 0.0767, "step": 4805 }, { "epoch": 0.96, "learning_rate": 4.578826816037718e-08, "loss": 0.0831, "step": 4806 }, { "epoch": 0.96, "learning_rate": 4.531815330058586e-08, "loss": 0.087, "step": 4807 }, { "epoch": 0.96, "learning_rate": 4.485045329253646e-08, "loss": 0.0828, "step": 4808 }, { "epoch": 0.96, "learning_rate": 4.438516836417994e-08, "loss": 0.0848, "step": 4809 }, { "epoch": 0.96, "learning_rate": 4.392229874229159e-08, "loss": 0.082, "step": 4810 }, { "epoch": 0.96, "learning_rate": 4.346184465246761e-08, "loss": 0.0902, "step": 4811 }, { "epoch": 0.96, "learning_rate": 4.3003806319127376e-08, "loss": 0.0865, "step": 4812 }, { "epoch": 0.96, "learning_rate": 4.2548183965513415e-08, "loss": 0.084, "step": 4813 }, { "epoch": 0.96, "learning_rate": 4.209497781369143e-08, "loss": 0.0808, "step": 4814 }, { "epoch": 0.96, "learning_rate": 4.164418808454806e-08, "loss": 0.0807, "step": 4815 }, { "epoch": 0.96, "learning_rate": 4.1195814997792014e-08, "loss": 0.081, "step": 4816 }, { "epoch": 0.96, "learning_rate": 4.0749858771956253e-08, "loss": 0.0841, "step": 4817 }, { "epoch": 0.96, "learning_rate": 4.030631962439302e-08, "loss": 0.114, "step": 4818 }, { "epoch": 0.96, "learning_rate": 3.986519777127884e-08, "loss": 0.0916, "step": 4819 }, { "epoch": 0.96, "learning_rate": 3.9426493427611177e-08, "loss": 0.0824, "step": 4820 }, { "epoch": 0.96, "learning_rate": 3.899020680720844e-08, "loss": 0.0833, "step": 4821 }, { "epoch": 0.96, "learning_rate": 3.855633812271165e-08, "loss": 0.0779, "step": 4822 }, { "epoch": 0.96, "learning_rate": 3.812488758558386e-08, "loss": 0.0875, "step": 4823 }, { "epoch": 0.96, "learning_rate": 3.769585540610799e-08, "loss": 0.0826, "step": 4824 }, { "epoch": 0.96, "learning_rate": 3.726924179339009e-08, "loss": 0.0915, "step": 4825 }, { "epoch": 0.97, "learning_rate": 3.684504695535496e-08, "loss": 0.0843, "step": 4826 }, { "epoch": 0.97, "learning_rate": 3.642327109875166e-08, "loss": 0.0919, "step": 4827 }, { "epoch": 0.97, "learning_rate": 3.600391442914741e-08, "loss": 0.0815, "step": 4828 }, { "epoch": 0.97, "learning_rate": 3.558697715093207e-08, "loss": 0.0859, "step": 4829 }, { "epoch": 0.97, "learning_rate": 3.517245946731529e-08, "loss": 0.0826, "step": 4830 }, { "epoch": 0.97, "learning_rate": 3.47603615803288e-08, "loss": 0.0795, "step": 4831 }, { "epoch": 0.97, "learning_rate": 3.435068369082306e-08, "loss": 0.0882, "step": 4832 }, { "epoch": 0.97, "learning_rate": 3.394342599847111e-08, "loss": 0.0926, "step": 4833 }, { "epoch": 0.97, "learning_rate": 3.3538588701765296e-08, "loss": 0.0828, "step": 4834 }, { "epoch": 0.97, "learning_rate": 3.313617199801777e-08, "loss": 0.0738, "step": 4835 }, { "epoch": 0.97, "learning_rate": 3.2736176083362216e-08, "loss": 0.0766, "step": 4836 }, { "epoch": 0.97, "learning_rate": 3.2338601152751e-08, "loss": 0.0856, "step": 4837 }, { "epoch": 0.97, "learning_rate": 3.194344739995803e-08, "loss": 0.0853, "step": 4838 }, { "epoch": 0.97, "learning_rate": 3.1550715017575895e-08, "loss": 0.094, "step": 4839 }, { "epoch": 0.97, "learning_rate": 3.1160404197018155e-08, "loss": 0.086, "step": 4840 }, { "epoch": 0.97, "learning_rate": 3.077251512851709e-08, "loss": 0.0768, "step": 4841 }, { "epoch": 0.97, "learning_rate": 3.038704800112535e-08, "loss": 0.0904, "step": 4842 }, { "epoch": 0.97, "learning_rate": 3.0004003002714886e-08, "loss": 0.0782, "step": 4843 }, { "epoch": 0.97, "learning_rate": 2.9623380319976912e-08, "loss": 0.0852, "step": 4844 }, { "epoch": 0.97, "learning_rate": 2.9245180138423033e-08, "loss": 0.083, "step": 4845 }, { "epoch": 0.97, "learning_rate": 2.8869402642382473e-08, "loss": 0.082, "step": 4846 }, { "epoch": 0.97, "learning_rate": 2.8496048015005385e-08, "loss": 0.084, "step": 4847 }, { "epoch": 0.97, "learning_rate": 2.8125116438260104e-08, "loss": 0.0882, "step": 4848 }, { "epoch": 0.97, "learning_rate": 2.7756608092933678e-08, "loss": 0.0819, "step": 4849 }, { "epoch": 0.97, "learning_rate": 2.7390523158633552e-08, "loss": 0.081, "step": 4850 }, { "epoch": 0.97, "learning_rate": 2.7026861813783668e-08, "loss": 0.0687, "step": 4851 }, { "epoch": 0.97, "learning_rate": 2.6665624235629463e-08, "loss": 0.0987, "step": 4852 }, { "epoch": 0.97, "learning_rate": 2.6306810600233435e-08, "loss": 0.0914, "step": 4853 }, { "epoch": 0.97, "learning_rate": 2.5950421082476805e-08, "loss": 0.0862, "step": 4854 }, { "epoch": 0.97, "learning_rate": 2.5596455856058966e-08, "loss": 0.0885, "step": 4855 }, { "epoch": 0.97, "learning_rate": 2.5244915093499134e-08, "loss": 0.0801, "step": 4856 }, { "epoch": 0.97, "learning_rate": 2.489579896613359e-08, "loss": 0.0818, "step": 4857 }, { "epoch": 0.97, "learning_rate": 2.4549107644117888e-08, "loss": 0.0843, "step": 4858 }, { "epoch": 0.97, "learning_rate": 2.4204841296424086e-08, "loss": 0.0818, "step": 4859 }, { "epoch": 0.97, "learning_rate": 2.386300009084408e-08, "loss": 0.0781, "step": 4860 }, { "epoch": 0.97, "learning_rate": 2.3523584193986816e-08, "loss": 0.0722, "step": 4861 }, { "epoch": 0.97, "learning_rate": 2.3186593771280518e-08, "loss": 0.0849, "step": 4862 }, { "epoch": 0.97, "learning_rate": 2.285202898696881e-08, "loss": 0.082, "step": 4863 }, { "epoch": 0.97, "learning_rate": 2.251989000411514e-08, "loss": 0.1066, "step": 4864 }, { "epoch": 0.97, "learning_rate": 2.219017698460002e-08, "loss": 0.0909, "step": 4865 }, { "epoch": 0.97, "learning_rate": 2.1862890089121567e-08, "loss": 0.0821, "step": 4866 }, { "epoch": 0.97, "learning_rate": 2.1538029477195522e-08, "loss": 0.0838, "step": 4867 }, { "epoch": 0.97, "learning_rate": 2.1215595307154667e-08, "loss": 0.0836, "step": 4868 }, { "epoch": 0.97, "learning_rate": 2.0895587736149414e-08, "loss": 0.0823, "step": 4869 }, { "epoch": 0.97, "learning_rate": 2.057800692014833e-08, "loss": 0.0804, "step": 4870 }, { "epoch": 0.97, "learning_rate": 2.026285301393538e-08, "loss": 0.0879, "step": 4871 }, { "epoch": 0.97, "learning_rate": 1.995012617111436e-08, "loss": 0.0789, "step": 4872 }, { "epoch": 0.97, "learning_rate": 1.963982654410279e-08, "loss": 0.0988, "step": 4873 }, { "epoch": 0.97, "learning_rate": 1.9331954284137476e-08, "loss": 0.084, "step": 4874 }, { "epoch": 0.97, "learning_rate": 1.9026509541272276e-08, "loss": 0.0931, "step": 4875 }, { "epoch": 0.98, "learning_rate": 1.8723492464376992e-08, "loss": 0.0943, "step": 4876 }, { "epoch": 0.98, "learning_rate": 1.842290320113793e-08, "loss": 0.1301, "step": 4877 }, { "epoch": 0.98, "learning_rate": 1.8124741898058462e-08, "loss": 0.0828, "step": 4878 }, { "epoch": 0.98, "learning_rate": 1.7829008700460116e-08, "loss": 0.0823, "step": 4879 }, { "epoch": 0.98, "learning_rate": 1.753570375247815e-08, "loss": 0.0876, "step": 4880 }, { "epoch": 0.98, "learning_rate": 1.7244827197067103e-08, "loss": 0.077, "step": 4881 }, { "epoch": 0.98, "learning_rate": 1.6956379175995796e-08, "loss": 0.0879, "step": 4882 }, { "epoch": 0.98, "learning_rate": 1.6670359829850657e-08, "loss": 0.0836, "step": 4883 }, { "epoch": 0.98, "learning_rate": 1.6386769298034067e-08, "loss": 0.0978, "step": 4884 }, { "epoch": 0.98, "learning_rate": 1.610560771876435e-08, "loss": 0.0871, "step": 4885 }, { "epoch": 0.98, "learning_rate": 1.582687522907633e-08, "loss": 0.0818, "step": 4886 }, { "epoch": 0.98, "learning_rate": 1.5550571964820793e-08, "loss": 0.0864, "step": 4887 }, { "epoch": 0.98, "learning_rate": 1.5276698060665007e-08, "loss": 0.0819, "step": 4888 }, { "epoch": 0.98, "learning_rate": 1.500525365009109e-08, "loss": 0.0859, "step": 4889 }, { "epoch": 0.98, "learning_rate": 1.4736238865398766e-08, "loss": 0.0821, "step": 4890 }, { "epoch": 0.98, "learning_rate": 1.4469653837701491e-08, "loss": 0.0806, "step": 4891 }, { "epoch": 0.98, "learning_rate": 1.4205498696930332e-08, "loss": 0.0814, "step": 4892 }, { "epoch": 0.98, "learning_rate": 1.3943773571831188e-08, "loss": 0.0794, "step": 4893 }, { "epoch": 0.98, "learning_rate": 1.3684478589964801e-08, "loss": 0.0779, "step": 4894 }, { "epoch": 0.98, "learning_rate": 1.3427613877709523e-08, "loss": 0.082, "step": 4895 }, { "epoch": 0.98, "learning_rate": 1.3173179560257432e-08, "loss": 0.0829, "step": 4896 }, { "epoch": 0.98, "learning_rate": 1.292117576161711e-08, "loss": 0.0851, "step": 4897 }, { "epoch": 0.98, "learning_rate": 1.2671602604612531e-08, "loss": 0.0833, "step": 4898 }, { "epoch": 0.98, "learning_rate": 1.2424460210881394e-08, "loss": 0.0854, "step": 4899 }, { "epoch": 0.98, "learning_rate": 1.2179748700879013e-08, "loss": 0.0845, "step": 4900 }, { "epoch": 0.98, "learning_rate": 1.1937468193873869e-08, "loss": 0.0817, "step": 4901 }, { "epoch": 0.98, "learning_rate": 1.1697618807951504e-08, "loss": 0.0875, "step": 4902 }, { "epoch": 0.98, "learning_rate": 1.146020066001119e-08, "loss": 0.0838, "step": 4903 }, { "epoch": 0.98, "learning_rate": 1.1225213865767026e-08, "loss": 0.0804, "step": 4904 }, { "epoch": 0.98, "learning_rate": 1.0992658539750179e-08, "loss": 0.0795, "step": 4905 }, { "epoch": 0.98, "learning_rate": 1.076253479530387e-08, "loss": 0.0877, "step": 4906 }, { "epoch": 0.98, "learning_rate": 1.0534842744588381e-08, "loss": 0.0852, "step": 4907 }, { "epoch": 0.98, "learning_rate": 1.030958249857772e-08, "loss": 0.0861, "step": 4908 }, { "epoch": 0.98, "learning_rate": 1.008675416706073e-08, "loss": 0.0856, "step": 4909 }, { "epoch": 0.98, "learning_rate": 9.866357858642206e-09, "loss": 0.0825, "step": 4910 }, { "epoch": 0.98, "learning_rate": 9.64839368074011e-09, "loss": 0.0811, "step": 4911 }, { "epoch": 0.98, "learning_rate": 9.432861739586685e-09, "loss": 0.0836, "step": 4912 }, { "epoch": 0.98, "learning_rate": 9.219762140231237e-09, "loss": 0.0869, "step": 4913 }, { "epoch": 0.98, "learning_rate": 9.009094986534572e-09, "loss": 0.0851, "step": 4914 }, { "epoch": 0.98, "learning_rate": 8.800860381173448e-09, "loss": 0.082, "step": 4915 }, { "epoch": 0.98, "learning_rate": 8.595058425640012e-09, "loss": 0.0805, "step": 4916 }, { "epoch": 0.98, "learning_rate": 8.391689220238474e-09, "loss": 0.0785, "step": 4917 }, { "epoch": 0.98, "learning_rate": 8.190752864088436e-09, "loss": 0.0841, "step": 4918 }, { "epoch": 0.98, "learning_rate": 7.992249455124889e-09, "loss": 0.1025, "step": 4919 }, { "epoch": 0.98, "learning_rate": 7.796179090094891e-09, "loss": 0.0792, "step": 4920 }, { "epoch": 0.98, "learning_rate": 7.602541864561442e-09, "loss": 0.082, "step": 4921 }, { "epoch": 0.98, "learning_rate": 7.411337872900715e-09, "loss": 0.0792, "step": 4922 }, { "epoch": 0.98, "learning_rate": 7.222567208303721e-09, "loss": 0.083, "step": 4923 }, { "epoch": 0.98, "learning_rate": 7.036229962774088e-09, "loss": 0.0904, "step": 4924 }, { "epoch": 0.98, "learning_rate": 6.852326227130835e-09, "loss": 0.0786, "step": 4925 }, { "epoch": 0.99, "learning_rate": 6.670856091006151e-09, "loss": 0.0817, "step": 4926 }, { "epoch": 0.99, "learning_rate": 6.491819642846509e-09, "loss": 0.0798, "step": 4927 }, { "epoch": 0.99, "learning_rate": 6.315216969912663e-09, "loss": 0.0826, "step": 4928 }, { "epoch": 0.99, "learning_rate": 6.141048158277429e-09, "loss": 0.0889, "step": 4929 }, { "epoch": 0.99, "learning_rate": 5.969313292830126e-09, "loss": 0.0817, "step": 4930 }, { "epoch": 0.99, "learning_rate": 5.800012457270466e-09, "loss": 0.0814, "step": 4931 }, { "epoch": 0.99, "learning_rate": 5.633145734114665e-09, "loss": 0.0815, "step": 4932 }, { "epoch": 0.99, "learning_rate": 5.468713204692111e-09, "loss": 0.0771, "step": 4933 }, { "epoch": 0.99, "learning_rate": 5.306714949143699e-09, "loss": 0.0833, "step": 4934 }, { "epoch": 0.99, "learning_rate": 5.147151046426824e-09, "loss": 0.0824, "step": 4935 }, { "epoch": 0.99, "learning_rate": 4.990021574309834e-09, "loss": 0.1026, "step": 4936 }, { "epoch": 0.99, "learning_rate": 4.835326609376468e-09, "loss": 0.0838, "step": 4937 }, { "epoch": 0.99, "learning_rate": 4.683066227023081e-09, "loss": 0.083, "step": 4938 }, { "epoch": 0.99, "learning_rate": 4.533240501459202e-09, "loss": 0.087, "step": 4939 }, { "epoch": 0.99, "learning_rate": 4.385849505708084e-09, "loss": 0.0763, "step": 4940 }, { "epoch": 0.99, "learning_rate": 4.2408933116072635e-09, "loss": 0.0803, "step": 4941 }, { "epoch": 0.99, "learning_rate": 4.098371989805227e-09, "loss": 0.0818, "step": 4942 }, { "epoch": 0.99, "learning_rate": 3.9582856097658554e-09, "loss": 0.0851, "step": 4943 }, { "epoch": 0.99, "learning_rate": 3.820634239765642e-09, "loss": 0.0835, "step": 4944 }, { "epoch": 0.99, "learning_rate": 3.685417946894254e-09, "loss": 0.0899, "step": 4945 }, { "epoch": 0.99, "learning_rate": 3.5526367970539765e-09, "loss": 0.0837, "step": 4946 }, { "epoch": 0.99, "learning_rate": 3.4222908549608193e-09, "loss": 0.0838, "step": 4947 }, { "epoch": 0.99, "learning_rate": 3.294380184143964e-09, "loss": 0.0806, "step": 4948 }, { "epoch": 0.99, "learning_rate": 3.1689048469457638e-09, "loss": 0.0896, "step": 4949 }, { "epoch": 0.99, "learning_rate": 3.0458649045211897e-09, "loss": 0.0811, "step": 4950 }, { "epoch": 0.99, "learning_rate": 2.9252604168383826e-09, "loss": 0.086, "step": 4951 }, { "epoch": 0.99, "learning_rate": 2.8070914426786555e-09, "loss": 0.091, "step": 4952 }, { "epoch": 0.99, "learning_rate": 2.6913580396359384e-09, "loss": 0.0826, "step": 4953 }, { "epoch": 0.99, "learning_rate": 2.5780602641167774e-09, "loss": 0.0945, "step": 4954 }, { "epoch": 0.99, "learning_rate": 2.4671981713420003e-09, "loss": 0.083, "step": 4955 }, { "epoch": 0.99, "learning_rate": 2.358771815344496e-09, "loss": 0.0799, "step": 4956 }, { "epoch": 0.99, "learning_rate": 2.2527812489692156e-09, "loss": 0.0812, "step": 4957 }, { "epoch": 0.99, "learning_rate": 2.149226523874837e-09, "loss": 0.0862, "step": 4958 }, { "epoch": 0.99, "learning_rate": 2.0481076905332074e-09, "loss": 0.0873, "step": 4959 }, { "epoch": 0.99, "learning_rate": 1.9494247982282386e-09, "loss": 0.0892, "step": 4960 }, { "epoch": 0.99, "learning_rate": 1.8531778950564572e-09, "loss": 0.0769, "step": 4961 }, { "epoch": 0.99, "learning_rate": 1.759367027927561e-09, "loss": 0.0831, "step": 4962 }, { "epoch": 0.99, "learning_rate": 1.6679922425638651e-09, "loss": 0.0852, "step": 4963 }, { "epoch": 0.99, "learning_rate": 1.5790535835003006e-09, "loss": 0.0854, "step": 4964 }, { "epoch": 0.99, "learning_rate": 1.4925510940844157e-09, "loss": 0.086, "step": 4965 }, { "epoch": 0.99, "learning_rate": 1.4084848164763742e-09, "loss": 0.085, "step": 4966 }, { "epoch": 0.99, "learning_rate": 1.3268547916495124e-09, "loss": 0.0857, "step": 4967 }, { "epoch": 0.99, "learning_rate": 1.247661059389227e-09, "loss": 0.0799, "step": 4968 }, { "epoch": 0.99, "learning_rate": 1.170903658293532e-09, "loss": 0.0824, "step": 4969 }, { "epoch": 0.99, "learning_rate": 1.096582625772502e-09, "loss": 0.0873, "step": 4970 }, { "epoch": 0.99, "learning_rate": 1.0246979980499395e-09, "loss": 0.0876, "step": 4971 }, { "epoch": 0.99, "learning_rate": 9.55249810161152e-10, "loss": 0.086, "step": 4972 }, { "epoch": 0.99, "learning_rate": 8.88238095955174e-10, "loss": 0.0796, "step": 4973 }, { "epoch": 0.99, "learning_rate": 8.236628880914365e-10, "loss": 0.0956, "step": 4974 }, { "epoch": 0.99, "learning_rate": 7.615242180436521e-10, "loss": 0.0968, "step": 4975 }, { "epoch": 1.0, "learning_rate": 7.018221160981498e-10, "loss": 0.0901, "step": 4976 }, { "epoch": 1.0, "learning_rate": 6.445566113516544e-10, "loss": 0.0865, "step": 4977 }, { "epoch": 1.0, "learning_rate": 5.897277317157279e-10, "loss": 0.084, "step": 4978 }, { "epoch": 1.0, "learning_rate": 5.373355039128836e-10, "loss": 0.0984, "step": 4979 }, { "epoch": 1.0, "learning_rate": 4.87379953478806e-10, "loss": 0.0796, "step": 4980 }, { "epoch": 1.0, "learning_rate": 4.398611047612411e-10, "loss": 0.0848, "step": 4981 }, { "epoch": 1.0, "learning_rate": 3.9477898091944135e-10, "loss": 0.0778, "step": 4982 }, { "epoch": 1.0, "learning_rate": 3.521336039263856e-10, "loss": 0.0819, "step": 4983 }, { "epoch": 1.0, "learning_rate": 3.1192499456766947e-10, "loss": 0.0888, "step": 4984 }, { "epoch": 1.0, "learning_rate": 2.741531724392843e-10, "loss": 0.0854, "step": 4985 }, { "epoch": 1.0, "learning_rate": 2.388181559515035e-10, "loss": 0.0799, "step": 4986 }, { "epoch": 1.0, "learning_rate": 2.0591996232610656e-10, "loss": 0.0753, "step": 4987 }, { "epoch": 1.0, "learning_rate": 1.7545860759693446e-10, "loss": 0.0839, "step": 4988 }, { "epoch": 1.0, "learning_rate": 1.4743410661044454e-10, "loss": 0.08, "step": 4989 }, { "epoch": 1.0, "learning_rate": 1.2184647302626585e-10, "loss": 0.0822, "step": 4990 }, { "epoch": 1.0, "learning_rate": 9.869571931442334e-11, "loss": 0.0989, "step": 4991 }, { "epoch": 1.0, "learning_rate": 7.798185675866876e-11, "loss": 0.0854, "step": 4992 }, { "epoch": 1.0, "learning_rate": 5.970489545537028e-11, "loss": 0.0807, "step": 4993 }, { "epoch": 1.0, "learning_rate": 4.3864844311847235e-11, "loss": 0.0879, "step": 4994 }, { "epoch": 1.0, "learning_rate": 3.0461711048035415e-11, "loss": 0.09, "step": 4995 }, { "epoch": 1.0, "learning_rate": 1.9495502197042214e-11, "loss": 0.086, "step": 4996 }, { "epoch": 1.0, "learning_rate": 1.0966223103481278e-11, "loss": 0.0834, "step": 4997 }, { "epoch": 1.0, "learning_rate": 4.873877924582715e-12, "loss": 0.0871, "step": 4998 }, { "epoch": 1.0, "learning_rate": 1.2184696296380083e-12, "loss": 0.0837, "step": 4999 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.0862, "step": 5000 }, { "epoch": 1.0, "step": 5000, "total_flos": 1.65603302178816e+18, "train_loss": 0.11027394030094147, "train_runtime": 20556.8372, "train_samples_per_second": 15.567, "train_steps_per_second": 0.243 } ], "max_steps": 5000, "num_train_epochs": 9223372036854775807, "total_flos": 1.65603302178816e+18, "trial_name": null, "trial_params": null }