{ "best_metric": 2.3293075561523438, "best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved_bloomfirefly/checkpoint-19000", "epoch": 2.952531261381571, "global_step": 19000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.9999999999999995e-05, "loss": 2.9733, "step": 20 }, { "epoch": 0.01, "learning_rate": 0.00011999999999999999, "loss": 2.7809, "step": 40 }, { "epoch": 0.01, "learning_rate": 0.00017999999999999998, "loss": 2.6052, "step": 60 }, { "epoch": 0.01, "learning_rate": 0.00023999999999999998, "loss": 2.4925, "step": 80 }, { "epoch": 0.02, "learning_rate": 0.0003, "loss": 2.458, "step": 100 }, { "epoch": 0.02, "learning_rate": 0.00029968758135902107, "loss": 2.4281, "step": 120 }, { "epoch": 0.02, "learning_rate": 0.00029937516271804216, "loss": 2.4178, "step": 140 }, { "epoch": 0.02, "learning_rate": 0.00029906274407706326, "loss": 2.3839, "step": 160 }, { "epoch": 0.03, "learning_rate": 0.0002987503254360843, "loss": 2.3521, "step": 180 }, { "epoch": 0.03, "learning_rate": 0.00029843790679510545, "loss": 2.338, "step": 200 }, { "epoch": 0.03, "eval_loss": 2.510117292404175, "eval_runtime": 69.1765, "eval_samples_per_second": 28.912, "eval_steps_per_second": 1.807, "step": 200 }, { "epoch": 0.03, "learning_rate": 0.0002981254881541265, "loss": 2.3401, "step": 220 }, { "epoch": 0.04, "learning_rate": 0.0002978130695131476, "loss": 2.3665, "step": 240 }, { "epoch": 0.04, "learning_rate": 0.0002975006508721687, "loss": 2.3691, "step": 260 }, { "epoch": 0.04, "learning_rate": 0.0002971882322311898, "loss": 2.3514, "step": 280 }, { "epoch": 0.05, "learning_rate": 0.0002968758135902109, "loss": 2.3203, "step": 300 }, { "epoch": 0.05, "learning_rate": 0.00029656339494923197, "loss": 2.3393, "step": 320 }, { "epoch": 0.05, "learning_rate": 0.000296250976308253, "loss": 2.3289, "step": 340 }, { "epoch": 0.06, "learning_rate": 0.00029593855766727416, "loss": 2.3407, "step": 360 }, { "epoch": 0.06, "learning_rate": 0.0002956261390262952, "loss": 2.3163, "step": 380 }, { "epoch": 0.06, "learning_rate": 0.0002953137203853163, "loss": 2.3212, "step": 400 }, { "epoch": 0.06, "eval_loss": 2.473245620727539, "eval_runtime": 69.0219, "eval_samples_per_second": 28.976, "eval_steps_per_second": 1.811, "step": 400 }, { "epoch": 0.07, "learning_rate": 0.0002950013017443374, "loss": 2.2927, "step": 420 }, { "epoch": 0.07, "learning_rate": 0.0002946888831033585, "loss": 2.2927, "step": 440 }, { "epoch": 0.07, "learning_rate": 0.0002943764644623796, "loss": 2.29, "step": 460 }, { "epoch": 0.07, "learning_rate": 0.0002940640458214007, "loss": 2.3099, "step": 480 }, { "epoch": 0.08, "learning_rate": 0.0002937516271804217, "loss": 2.3286, "step": 500 }, { "epoch": 0.08, "learning_rate": 0.0002934392085394428, "loss": 2.2928, "step": 520 }, { "epoch": 0.08, "learning_rate": 0.0002931267898984639, "loss": 2.2956, "step": 540 }, { "epoch": 0.09, "learning_rate": 0.000292814371257485, "loss": 2.2627, "step": 560 }, { "epoch": 0.09, "learning_rate": 0.0002925019526165061, "loss": 2.2897, "step": 580 }, { "epoch": 0.09, "learning_rate": 0.0002921895339755272, "loss": 2.2994, "step": 600 }, { "epoch": 0.09, "eval_loss": 2.455402374267578, "eval_runtime": 69.1315, "eval_samples_per_second": 28.93, "eval_steps_per_second": 1.808, "step": 600 }, { "epoch": 0.1, "learning_rate": 0.00029187711533454824, "loss": 2.3232, "step": 620 }, { "epoch": 0.1, "learning_rate": 0.0002915646966935694, "loss": 2.2515, "step": 640 }, { "epoch": 0.1, "learning_rate": 0.00029125227805259043, "loss": 2.2856, "step": 660 }, { "epoch": 0.11, "learning_rate": 0.0002909398594116115, "loss": 2.252, "step": 680 }, { "epoch": 0.11, "learning_rate": 0.0002906274407706326, "loss": 2.2891, "step": 700 }, { "epoch": 0.11, "learning_rate": 0.0002903150221296537, "loss": 2.2769, "step": 720 }, { "epoch": 0.11, "learning_rate": 0.0002900026034886748, "loss": 2.2763, "step": 740 }, { "epoch": 0.12, "learning_rate": 0.0002896901848476959, "loss": 2.278, "step": 760 }, { "epoch": 0.12, "learning_rate": 0.00028937776620671695, "loss": 2.3126, "step": 780 }, { "epoch": 0.12, "learning_rate": 0.0002890653475657381, "loss": 2.2698, "step": 800 }, { "epoch": 0.12, "eval_loss": 2.4434444904327393, "eval_runtime": 69.7211, "eval_samples_per_second": 28.686, "eval_steps_per_second": 1.793, "step": 800 }, { "epoch": 0.13, "learning_rate": 0.00028875292892475914, "loss": 2.2587, "step": 820 }, { "epoch": 0.13, "learning_rate": 0.00028844051028378023, "loss": 2.2954, "step": 840 }, { "epoch": 0.13, "learning_rate": 0.00028812809164280133, "loss": 2.3102, "step": 860 }, { "epoch": 0.14, "learning_rate": 0.0002878156730018224, "loss": 2.2918, "step": 880 }, { "epoch": 0.14, "learning_rate": 0.0002875032543608435, "loss": 2.2698, "step": 900 }, { "epoch": 0.14, "learning_rate": 0.0002871908357198646, "loss": 2.2514, "step": 920 }, { "epoch": 0.15, "learning_rate": 0.00028687841707888566, "loss": 2.2684, "step": 940 }, { "epoch": 0.15, "learning_rate": 0.00028656599843790675, "loss": 2.2833, "step": 960 }, { "epoch": 0.15, "learning_rate": 0.00028625357979692785, "loss": 2.2709, "step": 980 }, { "epoch": 0.16, "learning_rate": 0.00028594116115594894, "loss": 2.2596, "step": 1000 }, { "epoch": 0.16, "eval_loss": 2.436037302017212, "eval_runtime": 69.727, "eval_samples_per_second": 28.683, "eval_steps_per_second": 1.793, "step": 1000 }, { "epoch": 0.16, "learning_rate": 0.00028562874251497004, "loss": 2.2743, "step": 1020 }, { "epoch": 0.16, "learning_rate": 0.00028531632387399113, "loss": 2.23, "step": 1040 }, { "epoch": 0.16, "learning_rate": 0.0002850039052330122, "loss": 2.2723, "step": 1060 }, { "epoch": 0.17, "learning_rate": 0.0002846914865920333, "loss": 2.2585, "step": 1080 }, { "epoch": 0.17, "learning_rate": 0.00028437906795105437, "loss": 2.2463, "step": 1100 }, { "epoch": 0.17, "learning_rate": 0.00028406664931007546, "loss": 2.2264, "step": 1120 }, { "epoch": 0.18, "learning_rate": 0.00028375423066909656, "loss": 2.223, "step": 1140 }, { "epoch": 0.18, "learning_rate": 0.00028344181202811765, "loss": 2.2412, "step": 1160 }, { "epoch": 0.18, "learning_rate": 0.00028312939338713875, "loss": 2.2714, "step": 1180 }, { "epoch": 0.19, "learning_rate": 0.00028281697474615984, "loss": 2.2638, "step": 1200 }, { "epoch": 0.19, "eval_loss": 2.4272871017456055, "eval_runtime": 69.3748, "eval_samples_per_second": 28.829, "eval_steps_per_second": 1.802, "step": 1200 }, { "epoch": 0.19, "learning_rate": 0.0002825045561051809, "loss": 2.2303, "step": 1220 }, { "epoch": 0.19, "learning_rate": 0.000282192137464202, "loss": 2.2491, "step": 1240 }, { "epoch": 0.2, "learning_rate": 0.00028187971882322313, "loss": 2.2598, "step": 1260 }, { "epoch": 0.2, "learning_rate": 0.00028156730018224417, "loss": 2.2566, "step": 1280 }, { "epoch": 0.2, "learning_rate": 0.00028125488154126527, "loss": 2.2642, "step": 1300 }, { "epoch": 0.21, "learning_rate": 0.00028094246290028636, "loss": 2.2976, "step": 1320 }, { "epoch": 0.21, "learning_rate": 0.00028063004425930746, "loss": 2.2144, "step": 1340 }, { "epoch": 0.21, "learning_rate": 0.00028031762561832855, "loss": 2.2618, "step": 1360 }, { "epoch": 0.21, "learning_rate": 0.00028000520697734965, "loss": 2.2232, "step": 1380 }, { "epoch": 0.22, "learning_rate": 0.0002796927883363707, "loss": 2.2349, "step": 1400 }, { "epoch": 0.22, "eval_loss": 2.422177314758301, "eval_runtime": 69.7796, "eval_samples_per_second": 28.662, "eval_steps_per_second": 1.791, "step": 1400 }, { "epoch": 0.22, "learning_rate": 0.00027938036969539184, "loss": 2.2655, "step": 1420 }, { "epoch": 0.22, "learning_rate": 0.0002790679510544129, "loss": 2.265, "step": 1440 }, { "epoch": 0.23, "learning_rate": 0.000278755532413434, "loss": 2.2552, "step": 1460 }, { "epoch": 0.23, "learning_rate": 0.00027844311377245507, "loss": 2.252, "step": 1480 }, { "epoch": 0.23, "learning_rate": 0.00027813069513147617, "loss": 2.255, "step": 1500 }, { "epoch": 0.24, "learning_rate": 0.00027781827649049726, "loss": 2.1869, "step": 1520 }, { "epoch": 0.24, "learning_rate": 0.00027750585784951836, "loss": 2.2601, "step": 1540 }, { "epoch": 0.24, "learning_rate": 0.0002771934392085394, "loss": 2.2607, "step": 1560 }, { "epoch": 0.25, "learning_rate": 0.0002768810205675605, "loss": 2.2245, "step": 1580 }, { "epoch": 0.25, "learning_rate": 0.0002765686019265816, "loss": 2.2561, "step": 1600 }, { "epoch": 0.25, "eval_loss": 2.4173202514648438, "eval_runtime": 69.7813, "eval_samples_per_second": 28.661, "eval_steps_per_second": 1.791, "step": 1600 }, { "epoch": 0.25, "learning_rate": 0.0002762561832856027, "loss": 2.2472, "step": 1620 }, { "epoch": 0.25, "learning_rate": 0.0002759437646446238, "loss": 2.2952, "step": 1640 }, { "epoch": 0.26, "learning_rate": 0.0002756313460036449, "loss": 2.1941, "step": 1660 }, { "epoch": 0.26, "learning_rate": 0.0002753189273626659, "loss": 2.2396, "step": 1680 }, { "epoch": 0.26, "learning_rate": 0.00027500650872168707, "loss": 2.2325, "step": 1700 }, { "epoch": 0.27, "learning_rate": 0.0002746940900807081, "loss": 2.2458, "step": 1720 }, { "epoch": 0.27, "learning_rate": 0.0002743816714397292, "loss": 2.2464, "step": 1740 }, { "epoch": 0.27, "learning_rate": 0.0002740692527987503, "loss": 2.2487, "step": 1760 }, { "epoch": 0.28, "learning_rate": 0.0002737568341577714, "loss": 2.2609, "step": 1780 }, { "epoch": 0.28, "learning_rate": 0.0002734444155167925, "loss": 2.3016, "step": 1800 }, { "epoch": 0.28, "eval_loss": 2.4146716594696045, "eval_runtime": 69.513, "eval_samples_per_second": 28.772, "eval_steps_per_second": 1.798, "step": 1800 }, { "epoch": 0.28, "learning_rate": 0.0002731319968758136, "loss": 2.2415, "step": 1820 }, { "epoch": 0.29, "learning_rate": 0.0002728195782348346, "loss": 2.2512, "step": 1840 }, { "epoch": 0.29, "learning_rate": 0.0002725071595938558, "loss": 2.2186, "step": 1860 }, { "epoch": 0.29, "learning_rate": 0.0002721947409528768, "loss": 2.1982, "step": 1880 }, { "epoch": 0.3, "learning_rate": 0.0002718823223118979, "loss": 2.2358, "step": 1900 }, { "epoch": 0.3, "learning_rate": 0.000271569903670919, "loss": 2.2359, "step": 1920 }, { "epoch": 0.3, "learning_rate": 0.0002712574850299401, "loss": 2.2367, "step": 1940 }, { "epoch": 0.3, "learning_rate": 0.0002709450663889612, "loss": 2.2209, "step": 1960 }, { "epoch": 0.31, "learning_rate": 0.0002706326477479823, "loss": 2.2026, "step": 1980 }, { "epoch": 0.31, "learning_rate": 0.00027032022910700333, "loss": 2.2302, "step": 2000 }, { "epoch": 0.31, "eval_loss": 2.4096806049346924, "eval_runtime": 69.8744, "eval_samples_per_second": 28.623, "eval_steps_per_second": 1.789, "step": 2000 }, { "epoch": 0.31, "learning_rate": 0.00027000781046602443, "loss": 2.2516, "step": 2020 }, { "epoch": 0.32, "learning_rate": 0.0002696953918250455, "loss": 2.2173, "step": 2040 }, { "epoch": 0.32, "learning_rate": 0.0002693829731840666, "loss": 2.2414, "step": 2060 }, { "epoch": 0.32, "learning_rate": 0.0002690705545430877, "loss": 2.1922, "step": 2080 }, { "epoch": 0.33, "learning_rate": 0.0002687581359021088, "loss": 2.2396, "step": 2100 }, { "epoch": 0.33, "learning_rate": 0.00026844571726112985, "loss": 2.2602, "step": 2120 }, { "epoch": 0.33, "learning_rate": 0.000268133298620151, "loss": 2.2263, "step": 2140 }, { "epoch": 0.34, "learning_rate": 0.00026782087997917204, "loss": 2.2082, "step": 2160 }, { "epoch": 0.34, "learning_rate": 0.00026750846133819314, "loss": 2.2144, "step": 2180 }, { "epoch": 0.34, "learning_rate": 0.00026719604269721423, "loss": 2.2066, "step": 2200 }, { "epoch": 0.34, "eval_loss": 2.4065375328063965, "eval_runtime": 69.933, "eval_samples_per_second": 28.599, "eval_steps_per_second": 1.787, "step": 2200 }, { "epoch": 0.34, "learning_rate": 0.00026688362405623533, "loss": 2.2494, "step": 2220 }, { "epoch": 0.35, "learning_rate": 0.0002665712054152564, "loss": 2.2471, "step": 2240 }, { "epoch": 0.35, "learning_rate": 0.0002662587867742775, "loss": 2.2512, "step": 2260 }, { "epoch": 0.35, "learning_rate": 0.00026594636813329856, "loss": 2.2249, "step": 2280 }, { "epoch": 0.36, "learning_rate": 0.0002656339494923197, "loss": 2.2526, "step": 2300 }, { "epoch": 0.36, "learning_rate": 0.00026532153085134075, "loss": 2.2375, "step": 2320 }, { "epoch": 0.36, "learning_rate": 0.00026500911221036185, "loss": 2.169, "step": 2340 }, { "epoch": 0.37, "learning_rate": 0.00026469669356938294, "loss": 2.2206, "step": 2360 }, { "epoch": 0.37, "learning_rate": 0.00026438427492840404, "loss": 2.2284, "step": 2380 }, { "epoch": 0.37, "learning_rate": 0.00026407185628742513, "loss": 2.2116, "step": 2400 }, { "epoch": 0.37, "eval_loss": 2.402400255203247, "eval_runtime": 70.6508, "eval_samples_per_second": 28.308, "eval_steps_per_second": 1.769, "step": 2400 }, { "epoch": 0.38, "learning_rate": 0.00026375943764644623, "loss": 2.2228, "step": 2420 }, { "epoch": 0.38, "learning_rate": 0.0002634470190054673, "loss": 2.2264, "step": 2440 }, { "epoch": 0.38, "learning_rate": 0.00026313460036448837, "loss": 2.2212, "step": 2460 }, { "epoch": 0.39, "learning_rate": 0.0002628221817235095, "loss": 2.2164, "step": 2480 }, { "epoch": 0.39, "learning_rate": 0.00026250976308253056, "loss": 2.2523, "step": 2500 }, { "epoch": 0.39, "learning_rate": 0.00026219734444155165, "loss": 2.2272, "step": 2520 }, { "epoch": 0.39, "learning_rate": 0.00026188492580057275, "loss": 2.2381, "step": 2540 }, { "epoch": 0.4, "learning_rate": 0.00026157250715959384, "loss": 2.2149, "step": 2560 }, { "epoch": 0.4, "learning_rate": 0.00026126008851861494, "loss": 2.228, "step": 2580 }, { "epoch": 0.4, "learning_rate": 0.00026094766987763603, "loss": 2.2145, "step": 2600 }, { "epoch": 0.4, "eval_loss": 2.399576425552368, "eval_runtime": 69.9194, "eval_samples_per_second": 28.604, "eval_steps_per_second": 1.788, "step": 2600 }, { "epoch": 0.41, "learning_rate": 0.0002606352512366571, "loss": 2.18, "step": 2620 }, { "epoch": 0.41, "learning_rate": 0.0002603228325956782, "loss": 2.1965, "step": 2640 }, { "epoch": 0.41, "learning_rate": 0.00026001041395469927, "loss": 2.178, "step": 2660 }, { "epoch": 0.42, "learning_rate": 0.00025969799531372036, "loss": 2.194, "step": 2680 }, { "epoch": 0.42, "learning_rate": 0.00025938557667274146, "loss": 2.2024, "step": 2700 }, { "epoch": 0.42, "learning_rate": 0.00025907315803176255, "loss": 2.2427, "step": 2720 }, { "epoch": 0.43, "learning_rate": 0.00025876073939078365, "loss": 2.2246, "step": 2740 }, { "epoch": 0.43, "learning_rate": 0.00025844832074980474, "loss": 2.2169, "step": 2760 }, { "epoch": 0.43, "learning_rate": 0.0002581359021088258, "loss": 2.2154, "step": 2780 }, { "epoch": 0.44, "learning_rate": 0.0002578234834678469, "loss": 2.1732, "step": 2800 }, { "epoch": 0.44, "eval_loss": 2.3982491493225098, "eval_runtime": 70.2191, "eval_samples_per_second": 28.482, "eval_steps_per_second": 1.78, "step": 2800 }, { "epoch": 0.44, "learning_rate": 0.000257511064826868, "loss": 2.1951, "step": 2820 }, { "epoch": 0.44, "learning_rate": 0.00025719864618588907, "loss": 2.2139, "step": 2840 }, { "epoch": 0.44, "learning_rate": 0.00025688622754491017, "loss": 2.197, "step": 2860 }, { "epoch": 0.45, "learning_rate": 0.00025657380890393126, "loss": 2.2317, "step": 2880 }, { "epoch": 0.45, "learning_rate": 0.0002562613902629523, "loss": 2.2107, "step": 2900 }, { "epoch": 0.45, "learning_rate": 0.00025594897162197345, "loss": 2.2087, "step": 2920 }, { "epoch": 0.46, "learning_rate": 0.0002556365529809945, "loss": 2.2124, "step": 2940 }, { "epoch": 0.46, "learning_rate": 0.0002553241343400156, "loss": 2.1762, "step": 2960 }, { "epoch": 0.46, "learning_rate": 0.0002550117156990367, "loss": 2.2488, "step": 2980 }, { "epoch": 0.47, "learning_rate": 0.0002546992970580578, "loss": 2.2316, "step": 3000 }, { "epoch": 0.47, "eval_loss": 2.394296646118164, "eval_runtime": 70.2494, "eval_samples_per_second": 28.47, "eval_steps_per_second": 1.779, "step": 3000 }, { "epoch": 0.47, "learning_rate": 0.0002543868784170789, "loss": 2.2386, "step": 3020 }, { "epoch": 0.47, "learning_rate": 0.00025407445977609997, "loss": 2.224, "step": 3040 }, { "epoch": 0.48, "learning_rate": 0.000253762041135121, "loss": 2.2479, "step": 3060 }, { "epoch": 0.48, "learning_rate": 0.0002534496224941421, "loss": 2.2396, "step": 3080 }, { "epoch": 0.48, "learning_rate": 0.0002531372038531632, "loss": 2.2405, "step": 3100 }, { "epoch": 0.48, "learning_rate": 0.0002528247852121843, "loss": 2.1969, "step": 3120 }, { "epoch": 0.49, "learning_rate": 0.0002525123665712054, "loss": 2.2095, "step": 3140 }, { "epoch": 0.49, "learning_rate": 0.0002521999479302265, "loss": 2.2202, "step": 3160 }, { "epoch": 0.49, "learning_rate": 0.0002518875292892476, "loss": 2.2088, "step": 3180 }, { "epoch": 0.5, "learning_rate": 0.0002515751106482687, "loss": 2.2075, "step": 3200 }, { "epoch": 0.5, "eval_loss": 2.3918581008911133, "eval_runtime": 69.2896, "eval_samples_per_second": 28.864, "eval_steps_per_second": 1.804, "step": 3200 }, { "epoch": 0.5, "learning_rate": 0.0002512626920072897, "loss": 2.1993, "step": 3220 }, { "epoch": 0.5, "learning_rate": 0.0002509502733663108, "loss": 2.2406, "step": 3240 }, { "epoch": 0.51, "learning_rate": 0.0002506378547253319, "loss": 2.2352, "step": 3260 }, { "epoch": 0.51, "learning_rate": 0.000250325436084353, "loss": 2.236, "step": 3280 }, { "epoch": 0.51, "learning_rate": 0.0002500130174433741, "loss": 2.1805, "step": 3300 }, { "epoch": 0.52, "learning_rate": 0.0002497005988023952, "loss": 2.2249, "step": 3320 }, { "epoch": 0.52, "learning_rate": 0.00024938818016141624, "loss": 2.2153, "step": 3340 }, { "epoch": 0.52, "learning_rate": 0.0002490757615204374, "loss": 2.2115, "step": 3360 }, { "epoch": 0.53, "learning_rate": 0.00024876334287945843, "loss": 2.2284, "step": 3380 }, { "epoch": 0.53, "learning_rate": 0.0002484509242384795, "loss": 2.184, "step": 3400 }, { "epoch": 0.53, "eval_loss": 2.3887791633605957, "eval_runtime": 69.2387, "eval_samples_per_second": 28.886, "eval_steps_per_second": 1.805, "step": 3400 }, { "epoch": 0.53, "learning_rate": 0.0002481385055975006, "loss": 2.2172, "step": 3420 }, { "epoch": 0.53, "learning_rate": 0.0002478260869565217, "loss": 2.2347, "step": 3440 }, { "epoch": 0.54, "learning_rate": 0.0002475136683155428, "loss": 2.2213, "step": 3460 }, { "epoch": 0.54, "learning_rate": 0.0002472012496745639, "loss": 2.2215, "step": 3480 }, { "epoch": 0.54, "learning_rate": 0.00024688883103358495, "loss": 2.2058, "step": 3500 }, { "epoch": 0.55, "learning_rate": 0.00024657641239260604, "loss": 2.1918, "step": 3520 }, { "epoch": 0.55, "learning_rate": 0.0002462639937516272, "loss": 2.2021, "step": 3540 }, { "epoch": 0.55, "learning_rate": 0.00024595157511064824, "loss": 2.1832, "step": 3560 }, { "epoch": 0.56, "learning_rate": 0.00024563915646966933, "loss": 2.2199, "step": 3580 }, { "epoch": 0.56, "learning_rate": 0.0002453267378286904, "loss": 2.1997, "step": 3600 }, { "epoch": 0.56, "eval_loss": 2.386540412902832, "eval_runtime": 69.2123, "eval_samples_per_second": 28.897, "eval_steps_per_second": 1.806, "step": 3600 }, { "epoch": 0.56, "learning_rate": 0.0002450143191877115, "loss": 2.2009, "step": 3620 }, { "epoch": 0.57, "learning_rate": 0.0002447019005467326, "loss": 2.2045, "step": 3640 }, { "epoch": 0.57, "learning_rate": 0.0002443894819057537, "loss": 2.2231, "step": 3660 }, { "epoch": 0.57, "learning_rate": 0.00024407706326477478, "loss": 2.211, "step": 3680 }, { "epoch": 0.57, "learning_rate": 0.00024376464462379588, "loss": 2.1904, "step": 3700 }, { "epoch": 0.58, "learning_rate": 0.00024345222598281694, "loss": 2.1492, "step": 3720 }, { "epoch": 0.58, "learning_rate": 0.00024313980734183807, "loss": 2.2368, "step": 3740 }, { "epoch": 0.58, "learning_rate": 0.00024282738870085914, "loss": 2.1753, "step": 3760 }, { "epoch": 0.59, "learning_rate": 0.00024251497005988023, "loss": 2.179, "step": 3780 }, { "epoch": 0.59, "learning_rate": 0.0002422025514189013, "loss": 2.1811, "step": 3800 }, { "epoch": 0.59, "eval_loss": 2.3864212036132812, "eval_runtime": 69.2951, "eval_samples_per_second": 28.862, "eval_steps_per_second": 1.804, "step": 3800 }, { "epoch": 0.59, "learning_rate": 0.0002418901327779224, "loss": 2.1496, "step": 3820 }, { "epoch": 0.6, "learning_rate": 0.0002415777141369435, "loss": 2.2071, "step": 3840 }, { "epoch": 0.6, "learning_rate": 0.00024126529549596459, "loss": 2.189, "step": 3860 }, { "epoch": 0.6, "learning_rate": 0.00024095287685498565, "loss": 2.1838, "step": 3880 }, { "epoch": 0.61, "learning_rate": 0.00024064045821400675, "loss": 2.2292, "step": 3900 }, { "epoch": 0.61, "learning_rate": 0.00024032803957302782, "loss": 2.1931, "step": 3920 }, { "epoch": 0.61, "learning_rate": 0.00024001562093204894, "loss": 2.2293, "step": 3940 }, { "epoch": 0.62, "learning_rate": 0.00023970320229107, "loss": 2.2112, "step": 3960 }, { "epoch": 0.62, "learning_rate": 0.0002393907836500911, "loss": 2.1479, "step": 3980 }, { "epoch": 0.62, "learning_rate": 0.00023907836500911217, "loss": 2.1661, "step": 4000 }, { "epoch": 0.62, "eval_loss": 2.383505344390869, "eval_runtime": 69.2876, "eval_samples_per_second": 28.865, "eval_steps_per_second": 1.804, "step": 4000 }, { "epoch": 0.62, "learning_rate": 0.0002387659463681333, "loss": 2.1783, "step": 4020 }, { "epoch": 0.63, "learning_rate": 0.00023845352772715436, "loss": 2.1975, "step": 4040 }, { "epoch": 0.63, "learning_rate": 0.00023814110908617546, "loss": 2.2268, "step": 4060 }, { "epoch": 0.63, "learning_rate": 0.00023782869044519653, "loss": 2.1815, "step": 4080 }, { "epoch": 0.64, "learning_rate": 0.00023751627180421765, "loss": 2.2305, "step": 4100 }, { "epoch": 0.64, "learning_rate": 0.00023720385316323872, "loss": 2.2087, "step": 4120 }, { "epoch": 0.64, "learning_rate": 0.0002368914345222598, "loss": 2.2204, "step": 4140 }, { "epoch": 0.65, "learning_rate": 0.00023657901588128088, "loss": 2.2138, "step": 4160 }, { "epoch": 0.65, "learning_rate": 0.000236266597240302, "loss": 2.2071, "step": 4180 }, { "epoch": 0.65, "learning_rate": 0.00023595417859932307, "loss": 2.1728, "step": 4200 }, { "epoch": 0.65, "eval_loss": 2.3820013999938965, "eval_runtime": 69.3049, "eval_samples_per_second": 28.858, "eval_steps_per_second": 1.804, "step": 4200 }, { "epoch": 0.66, "learning_rate": 0.00023564175995834417, "loss": 2.182, "step": 4220 }, { "epoch": 0.66, "learning_rate": 0.00023532934131736524, "loss": 2.1948, "step": 4240 }, { "epoch": 0.66, "learning_rate": 0.00023501692267638633, "loss": 2.2178, "step": 4260 }, { "epoch": 0.67, "learning_rate": 0.00023470450403540743, "loss": 2.1979, "step": 4280 }, { "epoch": 0.67, "learning_rate": 0.00023439208539442852, "loss": 2.222, "step": 4300 }, { "epoch": 0.67, "learning_rate": 0.0002340796667534496, "loss": 2.221, "step": 4320 }, { "epoch": 0.67, "learning_rate": 0.00023376724811247069, "loss": 2.208, "step": 4340 }, { "epoch": 0.68, "learning_rate": 0.00023345482947149175, "loss": 2.1502, "step": 4360 }, { "epoch": 0.68, "learning_rate": 0.00023314241083051288, "loss": 2.1628, "step": 4380 }, { "epoch": 0.68, "learning_rate": 0.00023282999218953395, "loss": 2.1933, "step": 4400 }, { "epoch": 0.68, "eval_loss": 2.380128860473633, "eval_runtime": 69.2864, "eval_samples_per_second": 28.866, "eval_steps_per_second": 1.804, "step": 4400 }, { "epoch": 0.69, "learning_rate": 0.00023251757354855504, "loss": 2.2204, "step": 4420 }, { "epoch": 0.69, "learning_rate": 0.0002322051549075761, "loss": 2.218, "step": 4440 }, { "epoch": 0.69, "learning_rate": 0.00023189273626659723, "loss": 2.199, "step": 4460 }, { "epoch": 0.7, "learning_rate": 0.0002315803176256183, "loss": 2.1826, "step": 4480 }, { "epoch": 0.7, "learning_rate": 0.0002312678989846394, "loss": 2.174, "step": 4500 }, { "epoch": 0.7, "learning_rate": 0.00023095548034366046, "loss": 2.2011, "step": 4520 }, { "epoch": 0.71, "learning_rate": 0.00023064306170268159, "loss": 2.1951, "step": 4540 }, { "epoch": 0.71, "learning_rate": 0.00023033064306170265, "loss": 2.2189, "step": 4560 }, { "epoch": 0.71, "learning_rate": 0.00023001822442072375, "loss": 2.1891, "step": 4580 }, { "epoch": 0.71, "learning_rate": 0.00022970580577974482, "loss": 2.1873, "step": 4600 }, { "epoch": 0.71, "eval_loss": 2.379713296890259, "eval_runtime": 69.3005, "eval_samples_per_second": 28.86, "eval_steps_per_second": 1.804, "step": 4600 }, { "epoch": 0.72, "learning_rate": 0.00022939338713876591, "loss": 2.2191, "step": 4620 }, { "epoch": 0.72, "learning_rate": 0.000229080968497787, "loss": 2.1966, "step": 4640 }, { "epoch": 0.72, "learning_rate": 0.0002287685498568081, "loss": 2.2062, "step": 4660 }, { "epoch": 0.73, "learning_rate": 0.00022845613121582917, "loss": 2.1888, "step": 4680 }, { "epoch": 0.73, "learning_rate": 0.00022814371257485027, "loss": 2.1938, "step": 4700 }, { "epoch": 0.73, "learning_rate": 0.0002278312939338714, "loss": 2.206, "step": 4720 }, { "epoch": 0.74, "learning_rate": 0.00022751887529289246, "loss": 2.1584, "step": 4740 }, { "epoch": 0.74, "learning_rate": 0.00022720645665191355, "loss": 2.1933, "step": 4760 }, { "epoch": 0.74, "learning_rate": 0.00022689403801093462, "loss": 2.2087, "step": 4780 }, { "epoch": 0.75, "learning_rate": 0.00022658161936995575, "loss": 2.2239, "step": 4800 }, { "epoch": 0.75, "eval_loss": 2.3774757385253906, "eval_runtime": 69.3137, "eval_samples_per_second": 28.854, "eval_steps_per_second": 1.803, "step": 4800 }, { "epoch": 0.75, "learning_rate": 0.00022626920072897681, "loss": 2.2136, "step": 4820 }, { "epoch": 0.75, "learning_rate": 0.0002259567820879979, "loss": 2.2046, "step": 4840 }, { "epoch": 0.76, "learning_rate": 0.00022564436344701898, "loss": 2.2031, "step": 4860 }, { "epoch": 0.76, "learning_rate": 0.0002253319448060401, "loss": 2.171, "step": 4880 }, { "epoch": 0.76, "learning_rate": 0.00022501952616506117, "loss": 2.2101, "step": 4900 }, { "epoch": 0.76, "learning_rate": 0.00022470710752408226, "loss": 2.1306, "step": 4920 }, { "epoch": 0.77, "learning_rate": 0.00022439468888310333, "loss": 2.1754, "step": 4940 }, { "epoch": 0.77, "learning_rate": 0.00022408227024212443, "loss": 2.1972, "step": 4960 }, { "epoch": 0.77, "learning_rate": 0.00022376985160114552, "loss": 2.2175, "step": 4980 }, { "epoch": 0.78, "learning_rate": 0.00022345743296016662, "loss": 2.139, "step": 5000 }, { "epoch": 0.78, "eval_loss": 2.3760337829589844, "eval_runtime": 69.3092, "eval_samples_per_second": 28.856, "eval_steps_per_second": 1.804, "step": 5000 }, { "epoch": 0.78, "learning_rate": 0.0002231450143191877, "loss": 2.1912, "step": 5020 }, { "epoch": 0.78, "learning_rate": 0.00022283259567820878, "loss": 2.2036, "step": 5040 }, { "epoch": 0.79, "learning_rate": 0.00022252017703722985, "loss": 2.1852, "step": 5060 }, { "epoch": 0.79, "learning_rate": 0.00022220775839625097, "loss": 2.1672, "step": 5080 }, { "epoch": 0.79, "learning_rate": 0.00022189533975527204, "loss": 2.1828, "step": 5100 }, { "epoch": 0.8, "learning_rate": 0.00022158292111429314, "loss": 2.1875, "step": 5120 }, { "epoch": 0.8, "learning_rate": 0.0002212705024733142, "loss": 2.1997, "step": 5140 }, { "epoch": 0.8, "learning_rate": 0.00022095808383233533, "loss": 2.2162, "step": 5160 }, { "epoch": 0.8, "learning_rate": 0.0002206456651913564, "loss": 2.2213, "step": 5180 }, { "epoch": 0.81, "learning_rate": 0.0002203332465503775, "loss": 2.1972, "step": 5200 }, { "epoch": 0.81, "eval_loss": 2.374734878540039, "eval_runtime": 69.2582, "eval_samples_per_second": 28.877, "eval_steps_per_second": 1.805, "step": 5200 }, { "epoch": 0.81, "learning_rate": 0.00022002082790939856, "loss": 2.175, "step": 5220 }, { "epoch": 0.81, "learning_rate": 0.00021970840926841968, "loss": 2.1951, "step": 5240 }, { "epoch": 0.82, "learning_rate": 0.00021939599062744075, "loss": 2.1493, "step": 5260 }, { "epoch": 0.82, "learning_rate": 0.00021908357198646185, "loss": 2.1611, "step": 5280 }, { "epoch": 0.82, "learning_rate": 0.00021877115334548291, "loss": 2.1621, "step": 5300 }, { "epoch": 0.83, "learning_rate": 0.00021845873470450404, "loss": 2.1875, "step": 5320 }, { "epoch": 0.83, "learning_rate": 0.0002181463160635251, "loss": 2.1733, "step": 5340 }, { "epoch": 0.83, "learning_rate": 0.0002178338974225462, "loss": 2.242, "step": 5360 }, { "epoch": 0.84, "learning_rate": 0.00021752147878156727, "loss": 2.2154, "step": 5380 }, { "epoch": 0.84, "learning_rate": 0.00021720906014058836, "loss": 2.1969, "step": 5400 }, { "epoch": 0.84, "eval_loss": 2.372680902481079, "eval_runtime": 69.283, "eval_samples_per_second": 28.867, "eval_steps_per_second": 1.804, "step": 5400 }, { "epoch": 0.84, "learning_rate": 0.00021689664149960946, "loss": 2.1245, "step": 5420 }, { "epoch": 0.85, "learning_rate": 0.00021658422285863056, "loss": 2.2049, "step": 5440 }, { "epoch": 0.85, "learning_rate": 0.00021627180421765162, "loss": 2.1716, "step": 5460 }, { "epoch": 0.85, "learning_rate": 0.00021595938557667272, "loss": 2.1891, "step": 5480 }, { "epoch": 0.85, "learning_rate": 0.0002156469669356938, "loss": 2.1963, "step": 5500 }, { "epoch": 0.86, "learning_rate": 0.0002153345482947149, "loss": 2.1946, "step": 5520 }, { "epoch": 0.86, "learning_rate": 0.00021502212965373598, "loss": 2.1982, "step": 5540 }, { "epoch": 0.86, "learning_rate": 0.00021470971101275707, "loss": 2.1759, "step": 5560 }, { "epoch": 0.87, "learning_rate": 0.00021439729237177814, "loss": 2.1661, "step": 5580 }, { "epoch": 0.87, "learning_rate": 0.00021408487373079926, "loss": 2.2051, "step": 5600 }, { "epoch": 0.87, "eval_loss": 2.3719565868377686, "eval_runtime": 69.321, "eval_samples_per_second": 28.851, "eval_steps_per_second": 1.803, "step": 5600 }, { "epoch": 0.87, "learning_rate": 0.00021377245508982033, "loss": 2.1605, "step": 5620 }, { "epoch": 0.88, "learning_rate": 0.00021346003644884143, "loss": 2.1375, "step": 5640 }, { "epoch": 0.88, "learning_rate": 0.0002131476178078625, "loss": 2.1293, "step": 5660 }, { "epoch": 0.88, "learning_rate": 0.00021283519916688362, "loss": 2.2189, "step": 5680 }, { "epoch": 0.89, "learning_rate": 0.0002125227805259047, "loss": 2.1784, "step": 5700 }, { "epoch": 0.89, "learning_rate": 0.00021221036188492578, "loss": 2.1764, "step": 5720 }, { "epoch": 0.89, "learning_rate": 0.00021189794324394685, "loss": 2.1569, "step": 5740 }, { "epoch": 0.9, "learning_rate": 0.00021158552460296795, "loss": 2.1704, "step": 5760 }, { "epoch": 0.9, "learning_rate": 0.00021127310596198904, "loss": 2.1614, "step": 5780 }, { "epoch": 0.9, "learning_rate": 0.00021096068732101014, "loss": 2.2078, "step": 5800 }, { "epoch": 0.9, "eval_loss": 2.370939016342163, "eval_runtime": 69.2728, "eval_samples_per_second": 28.871, "eval_steps_per_second": 1.804, "step": 5800 }, { "epoch": 0.9, "learning_rate": 0.0002106482686800312, "loss": 2.198, "step": 5820 }, { "epoch": 0.91, "learning_rate": 0.0002103358500390523, "loss": 2.1735, "step": 5840 }, { "epoch": 0.91, "learning_rate": 0.00021002343139807342, "loss": 2.1936, "step": 5860 }, { "epoch": 0.91, "learning_rate": 0.0002097110127570945, "loss": 2.1559, "step": 5880 }, { "epoch": 0.92, "learning_rate": 0.0002093985941161156, "loss": 2.1856, "step": 5900 }, { "epoch": 0.92, "learning_rate": 0.00020908617547513666, "loss": 2.194, "step": 5920 }, { "epoch": 0.92, "learning_rate": 0.00020877375683415778, "loss": 2.1983, "step": 5940 }, { "epoch": 0.93, "learning_rate": 0.00020846133819317885, "loss": 2.1788, "step": 5960 }, { "epoch": 0.93, "learning_rate": 0.00020814891955219994, "loss": 2.2126, "step": 5980 }, { "epoch": 0.93, "learning_rate": 0.000207836500911221, "loss": 2.1454, "step": 6000 }, { "epoch": 0.93, "eval_loss": 2.369137763977051, "eval_runtime": 69.3036, "eval_samples_per_second": 28.859, "eval_steps_per_second": 1.804, "step": 6000 }, { "epoch": 0.94, "learning_rate": 0.00020752408227024213, "loss": 2.1603, "step": 6020 }, { "epoch": 0.94, "learning_rate": 0.0002072116636292632, "loss": 2.2075, "step": 6040 }, { "epoch": 0.94, "learning_rate": 0.0002068992449882843, "loss": 2.1817, "step": 6060 }, { "epoch": 0.94, "learning_rate": 0.00020658682634730537, "loss": 2.1917, "step": 6080 }, { "epoch": 0.95, "learning_rate": 0.00020627440770632646, "loss": 2.1727, "step": 6100 }, { "epoch": 0.95, "learning_rate": 0.00020596198906534756, "loss": 2.1985, "step": 6120 }, { "epoch": 0.95, "learning_rate": 0.00020564957042436865, "loss": 2.1888, "step": 6140 }, { "epoch": 0.96, "learning_rate": 0.00020533715178338972, "loss": 2.1425, "step": 6160 }, { "epoch": 0.96, "learning_rate": 0.00020502473314241082, "loss": 2.1659, "step": 6180 }, { "epoch": 0.96, "learning_rate": 0.00020471231450143188, "loss": 2.1768, "step": 6200 }, { "epoch": 0.96, "eval_loss": 2.368589162826538, "eval_runtime": 69.4033, "eval_samples_per_second": 28.817, "eval_steps_per_second": 1.801, "step": 6200 }, { "epoch": 0.97, "learning_rate": 0.000204399895860453, "loss": 2.1744, "step": 6220 }, { "epoch": 0.97, "learning_rate": 0.00020408747721947407, "loss": 2.1484, "step": 6240 }, { "epoch": 0.97, "learning_rate": 0.00020377505857849517, "loss": 2.2154, "step": 6260 }, { "epoch": 0.98, "learning_rate": 0.00020346263993751624, "loss": 2.1358, "step": 6280 }, { "epoch": 0.98, "learning_rate": 0.00020315022129653736, "loss": 2.1809, "step": 6300 }, { "epoch": 0.98, "learning_rate": 0.00020283780265555843, "loss": 2.1813, "step": 6320 }, { "epoch": 0.99, "learning_rate": 0.00020252538401457952, "loss": 2.1903, "step": 6340 }, { "epoch": 0.99, "learning_rate": 0.0002022129653736006, "loss": 2.1971, "step": 6360 }, { "epoch": 0.99, "learning_rate": 0.00020190054673262172, "loss": 2.2041, "step": 6380 }, { "epoch": 0.99, "learning_rate": 0.00020158812809164278, "loss": 2.2169, "step": 6400 }, { "epoch": 0.99, "eval_loss": 2.3672330379486084, "eval_runtime": 69.3516, "eval_samples_per_second": 28.839, "eval_steps_per_second": 1.802, "step": 6400 }, { "epoch": 1.0, "learning_rate": 0.00020127570945066388, "loss": 2.2101, "step": 6420 }, { "epoch": 1.0, "learning_rate": 0.00020096329080968495, "loss": 2.1739, "step": 6440 }, { "epoch": 1.0, "learning_rate": 0.00020065087216870604, "loss": 2.1764, "step": 6460 }, { "epoch": 1.01, "learning_rate": 0.00020033845352772714, "loss": 2.1718, "step": 6480 }, { "epoch": 1.01, "learning_rate": 0.00020002603488674823, "loss": 2.1688, "step": 6500 }, { "epoch": 1.01, "learning_rate": 0.0001997136162457693, "loss": 2.1322, "step": 6520 }, { "epoch": 1.02, "learning_rate": 0.0001994011976047904, "loss": 2.1593, "step": 6540 }, { "epoch": 1.02, "learning_rate": 0.0001990887789638115, "loss": 2.179, "step": 6560 }, { "epoch": 1.02, "learning_rate": 0.0001987763603228326, "loss": 2.139, "step": 6580 }, { "epoch": 1.03, "learning_rate": 0.00019846394168185366, "loss": 2.1594, "step": 6600 }, { "epoch": 1.03, "eval_loss": 2.367051839828491, "eval_runtime": 69.3473, "eval_samples_per_second": 28.84, "eval_steps_per_second": 1.803, "step": 6600 }, { "epoch": 1.03, "learning_rate": 0.00019815152304087475, "loss": 2.2033, "step": 6620 }, { "epoch": 1.03, "learning_rate": 0.00019783910439989582, "loss": 2.183, "step": 6640 }, { "epoch": 1.03, "learning_rate": 0.00019752668575891694, "loss": 2.1517, "step": 6660 }, { "epoch": 1.04, "learning_rate": 0.000197214267117938, "loss": 2.183, "step": 6680 }, { "epoch": 1.04, "learning_rate": 0.0001969018484769591, "loss": 2.197, "step": 6700 }, { "epoch": 1.04, "learning_rate": 0.00019658942983598017, "loss": 2.1778, "step": 6720 }, { "epoch": 1.05, "learning_rate": 0.0001962770111950013, "loss": 2.1745, "step": 6740 }, { "epoch": 1.05, "learning_rate": 0.00019596459255402237, "loss": 2.1585, "step": 6760 }, { "epoch": 1.05, "learning_rate": 0.00019565217391304346, "loss": 2.1708, "step": 6780 }, { "epoch": 1.06, "learning_rate": 0.00019533975527206453, "loss": 2.1649, "step": 6800 }, { "epoch": 1.06, "eval_loss": 2.363710880279541, "eval_runtime": 69.2642, "eval_samples_per_second": 28.875, "eval_steps_per_second": 1.805, "step": 6800 }, { "epoch": 1.06, "learning_rate": 0.00019502733663108565, "loss": 2.1391, "step": 6820 }, { "epoch": 1.06, "learning_rate": 0.00019471491799010672, "loss": 2.1939, "step": 6840 }, { "epoch": 1.07, "learning_rate": 0.00019440249934912782, "loss": 2.1558, "step": 6860 }, { "epoch": 1.07, "learning_rate": 0.00019409008070814888, "loss": 2.173, "step": 6880 }, { "epoch": 1.07, "learning_rate": 0.00019377766206716998, "loss": 2.1821, "step": 6900 }, { "epoch": 1.08, "learning_rate": 0.00019346524342619107, "loss": 2.16, "step": 6920 }, { "epoch": 1.08, "learning_rate": 0.00019315282478521217, "loss": 2.1808, "step": 6940 }, { "epoch": 1.08, "learning_rate": 0.00019284040614423324, "loss": 2.1355, "step": 6960 }, { "epoch": 1.08, "learning_rate": 0.00019252798750325433, "loss": 2.1813, "step": 6980 }, { "epoch": 1.09, "learning_rate": 0.00019221556886227546, "loss": 2.1677, "step": 7000 }, { "epoch": 1.09, "eval_loss": 2.3648109436035156, "eval_runtime": 69.3675, "eval_samples_per_second": 28.832, "eval_steps_per_second": 1.802, "step": 7000 }, { "epoch": 1.09, "learning_rate": 0.00019190315022129652, "loss": 2.1479, "step": 7020 }, { "epoch": 1.09, "learning_rate": 0.00019159073158031762, "loss": 2.1852, "step": 7040 }, { "epoch": 1.1, "learning_rate": 0.0001912783129393387, "loss": 2.14, "step": 7060 }, { "epoch": 1.1, "learning_rate": 0.0001909658942983598, "loss": 2.1332, "step": 7080 }, { "epoch": 1.1, "learning_rate": 0.00019065347565738088, "loss": 2.178, "step": 7100 }, { "epoch": 1.11, "learning_rate": 0.00019034105701640197, "loss": 2.1661, "step": 7120 }, { "epoch": 1.11, "learning_rate": 0.00019002863837542304, "loss": 2.1902, "step": 7140 }, { "epoch": 1.11, "learning_rate": 0.00018971621973444417, "loss": 2.1775, "step": 7160 }, { "epoch": 1.12, "learning_rate": 0.00018940380109346523, "loss": 2.2007, "step": 7180 }, { "epoch": 1.12, "learning_rate": 0.00018909138245248633, "loss": 2.2078, "step": 7200 }, { "epoch": 1.12, "eval_loss": 2.3642289638519287, "eval_runtime": 69.5476, "eval_samples_per_second": 28.757, "eval_steps_per_second": 1.797, "step": 7200 }, { "epoch": 1.12, "learning_rate": 0.0001887789638115074, "loss": 2.185, "step": 7220 }, { "epoch": 1.13, "learning_rate": 0.0001884665451705285, "loss": 2.1856, "step": 7240 }, { "epoch": 1.13, "learning_rate": 0.0001881541265295496, "loss": 2.2049, "step": 7260 }, { "epoch": 1.13, "learning_rate": 0.00018784170788857068, "loss": 2.1376, "step": 7280 }, { "epoch": 1.13, "learning_rate": 0.00018752928924759175, "loss": 2.1693, "step": 7300 }, { "epoch": 1.14, "learning_rate": 0.00018721687060661285, "loss": 2.1825, "step": 7320 }, { "epoch": 1.14, "learning_rate": 0.00018690445196563392, "loss": 2.1649, "step": 7340 }, { "epoch": 1.14, "learning_rate": 0.00018659203332465504, "loss": 2.1936, "step": 7360 }, { "epoch": 1.15, "learning_rate": 0.0001862796146836761, "loss": 2.143, "step": 7380 }, { "epoch": 1.15, "learning_rate": 0.0001859671960426972, "loss": 2.1617, "step": 7400 }, { "epoch": 1.15, "eval_loss": 2.362150192260742, "eval_runtime": 69.3218, "eval_samples_per_second": 28.851, "eval_steps_per_second": 1.803, "step": 7400 }, { "epoch": 1.15, "learning_rate": 0.00018565477740171827, "loss": 2.1555, "step": 7420 }, { "epoch": 1.16, "learning_rate": 0.0001853423587607394, "loss": 2.1639, "step": 7440 }, { "epoch": 1.16, "learning_rate": 0.00018502994011976046, "loss": 2.1678, "step": 7460 }, { "epoch": 1.16, "learning_rate": 0.00018471752147878156, "loss": 2.1775, "step": 7480 }, { "epoch": 1.17, "learning_rate": 0.00018440510283780263, "loss": 2.1784, "step": 7500 }, { "epoch": 1.17, "learning_rate": 0.00018409268419682375, "loss": 2.1499, "step": 7520 }, { "epoch": 1.17, "learning_rate": 0.00018378026555584482, "loss": 2.154, "step": 7540 }, { "epoch": 1.17, "learning_rate": 0.0001834678469148659, "loss": 2.1793, "step": 7560 }, { "epoch": 1.18, "learning_rate": 0.00018315542827388698, "loss": 2.2292, "step": 7580 }, { "epoch": 1.18, "learning_rate": 0.00018284300963290808, "loss": 2.1578, "step": 7600 }, { "epoch": 1.18, "eval_loss": 2.3628857135772705, "eval_runtime": 69.2564, "eval_samples_per_second": 28.878, "eval_steps_per_second": 1.805, "step": 7600 }, { "epoch": 1.18, "learning_rate": 0.00018253059099192917, "loss": 2.1494, "step": 7620 }, { "epoch": 1.19, "learning_rate": 0.00018221817235095027, "loss": 2.1669, "step": 7640 }, { "epoch": 1.19, "learning_rate": 0.00018190575370997133, "loss": 2.1447, "step": 7660 }, { "epoch": 1.19, "learning_rate": 0.00018159333506899243, "loss": 2.1663, "step": 7680 }, { "epoch": 1.2, "learning_rate": 0.0001812809164280135, "loss": 2.1871, "step": 7700 }, { "epoch": 1.2, "learning_rate": 0.00018096849778703462, "loss": 2.1338, "step": 7720 }, { "epoch": 1.2, "learning_rate": 0.0001806560791460557, "loss": 2.1767, "step": 7740 }, { "epoch": 1.21, "learning_rate": 0.00018034366050507678, "loss": 2.1694, "step": 7760 }, { "epoch": 1.21, "learning_rate": 0.00018003124186409785, "loss": 2.1674, "step": 7780 }, { "epoch": 1.21, "learning_rate": 0.00017971882322311898, "loss": 2.1863, "step": 7800 }, { "epoch": 1.21, "eval_loss": 2.3613035678863525, "eval_runtime": 69.2881, "eval_samples_per_second": 28.865, "eval_steps_per_second": 1.804, "step": 7800 }, { "epoch": 1.22, "learning_rate": 0.00017940640458214004, "loss": 2.1441, "step": 7820 }, { "epoch": 1.22, "learning_rate": 0.00017909398594116114, "loss": 2.1885, "step": 7840 }, { "epoch": 1.22, "learning_rate": 0.0001787815673001822, "loss": 2.1514, "step": 7860 }, { "epoch": 1.22, "learning_rate": 0.00017846914865920333, "loss": 2.2002, "step": 7880 }, { "epoch": 1.23, "learning_rate": 0.0001781567300182244, "loss": 2.1759, "step": 7900 }, { "epoch": 1.23, "learning_rate": 0.0001778443113772455, "loss": 2.1611, "step": 7920 }, { "epoch": 1.23, "learning_rate": 0.00017753189273626656, "loss": 2.1667, "step": 7940 }, { "epoch": 1.24, "learning_rate": 0.00017721947409528768, "loss": 2.1717, "step": 7960 }, { "epoch": 1.24, "learning_rate": 0.00017690705545430875, "loss": 2.1983, "step": 7980 }, { "epoch": 1.24, "learning_rate": 0.00017659463681332985, "loss": 2.2092, "step": 8000 }, { "epoch": 1.24, "eval_loss": 2.3608274459838867, "eval_runtime": 69.3364, "eval_samples_per_second": 28.845, "eval_steps_per_second": 1.803, "step": 8000 }, { "epoch": 1.25, "learning_rate": 0.00017628221817235092, "loss": 2.1305, "step": 8020 }, { "epoch": 1.25, "learning_rate": 0.000175969799531372, "loss": 2.1431, "step": 8040 }, { "epoch": 1.25, "learning_rate": 0.0001756573808903931, "loss": 2.1384, "step": 8060 }, { "epoch": 1.26, "learning_rate": 0.0001753449622494142, "loss": 2.2093, "step": 8080 }, { "epoch": 1.26, "learning_rate": 0.00017503254360843527, "loss": 2.1271, "step": 8100 }, { "epoch": 1.26, "learning_rate": 0.00017472012496745637, "loss": 2.1466, "step": 8120 }, { "epoch": 1.26, "learning_rate": 0.0001744077063264775, "loss": 2.1578, "step": 8140 }, { "epoch": 1.27, "learning_rate": 0.00017409528768549856, "loss": 2.1632, "step": 8160 }, { "epoch": 1.27, "learning_rate": 0.00017378286904451965, "loss": 2.1465, "step": 8180 }, { "epoch": 1.27, "learning_rate": 0.00017347045040354072, "loss": 2.2226, "step": 8200 }, { "epoch": 1.27, "eval_loss": 2.35835599899292, "eval_runtime": 69.2657, "eval_samples_per_second": 28.874, "eval_steps_per_second": 1.805, "step": 8200 }, { "epoch": 1.28, "learning_rate": 0.00017315803176256184, "loss": 2.1585, "step": 8220 }, { "epoch": 1.28, "learning_rate": 0.0001728456131215829, "loss": 2.1529, "step": 8240 }, { "epoch": 1.28, "learning_rate": 0.000172533194480604, "loss": 2.1663, "step": 8260 }, { "epoch": 1.29, "learning_rate": 0.00017222077583962508, "loss": 2.1422, "step": 8280 }, { "epoch": 1.29, "learning_rate": 0.00017190835719864617, "loss": 2.158, "step": 8300 }, { "epoch": 1.29, "learning_rate": 0.00017159593855766727, "loss": 2.1984, "step": 8320 }, { "epoch": 1.3, "learning_rate": 0.00017128351991668836, "loss": 2.1395, "step": 8340 }, { "epoch": 1.3, "learning_rate": 0.00017097110127570943, "loss": 2.14, "step": 8360 }, { "epoch": 1.3, "learning_rate": 0.00017065868263473053, "loss": 2.1657, "step": 8380 }, { "epoch": 1.31, "learning_rate": 0.00017036188492580056, "loss": 2.167, "step": 8400 }, { "epoch": 1.31, "eval_loss": 2.35697603225708, "eval_runtime": 69.2685, "eval_samples_per_second": 28.873, "eval_steps_per_second": 1.805, "step": 8400 }, { "epoch": 1.31, "learning_rate": 0.00017004946628482165, "loss": 2.1396, "step": 8420 }, { "epoch": 1.31, "learning_rate": 0.00016973704764384272, "loss": 2.1777, "step": 8440 }, { "epoch": 1.31, "learning_rate": 0.00016942462900286384, "loss": 2.1366, "step": 8460 }, { "epoch": 1.32, "learning_rate": 0.0001691122103618849, "loss": 2.1625, "step": 8480 }, { "epoch": 1.32, "learning_rate": 0.000168799791720906, "loss": 2.1859, "step": 8500 }, { "epoch": 1.32, "learning_rate": 0.00016848737307992707, "loss": 2.1705, "step": 8520 }, { "epoch": 1.33, "learning_rate": 0.0001681749544389482, "loss": 2.1971, "step": 8540 }, { "epoch": 1.33, "learning_rate": 0.00016786253579796927, "loss": 2.1937, "step": 8560 }, { "epoch": 1.33, "learning_rate": 0.00016755011715699036, "loss": 2.1436, "step": 8580 }, { "epoch": 1.34, "learning_rate": 0.00016723769851601143, "loss": 2.1592, "step": 8600 }, { "epoch": 1.34, "eval_loss": 2.3576247692108154, "eval_runtime": 69.277, "eval_samples_per_second": 28.87, "eval_steps_per_second": 1.804, "step": 8600 }, { "epoch": 1.34, "learning_rate": 0.00016692527987503252, "loss": 2.1745, "step": 8620 }, { "epoch": 1.34, "learning_rate": 0.00016661286123405362, "loss": 2.1517, "step": 8640 }, { "epoch": 1.35, "learning_rate": 0.00016630044259307472, "loss": 2.1921, "step": 8660 }, { "epoch": 1.35, "learning_rate": 0.00016598802395209578, "loss": 2.1703, "step": 8680 }, { "epoch": 1.35, "learning_rate": 0.00016567560531111688, "loss": 2.1223, "step": 8700 }, { "epoch": 1.36, "learning_rate": 0.00016536318667013795, "loss": 2.1748, "step": 8720 }, { "epoch": 1.36, "learning_rate": 0.00016505076802915907, "loss": 2.145, "step": 8740 }, { "epoch": 1.36, "learning_rate": 0.00016473834938818014, "loss": 2.1077, "step": 8760 }, { "epoch": 1.36, "learning_rate": 0.00016442593074720123, "loss": 2.1571, "step": 8780 }, { "epoch": 1.37, "learning_rate": 0.0001641135121062223, "loss": 2.1946, "step": 8800 }, { "epoch": 1.37, "eval_loss": 2.3559648990631104, "eval_runtime": 69.3886, "eval_samples_per_second": 28.823, "eval_steps_per_second": 1.801, "step": 8800 }, { "epoch": 1.37, "learning_rate": 0.00016380109346524342, "loss": 2.1635, "step": 8820 }, { "epoch": 1.37, "learning_rate": 0.0001634886748242645, "loss": 2.1546, "step": 8840 }, { "epoch": 1.38, "learning_rate": 0.0001631762561832856, "loss": 2.1359, "step": 8860 }, { "epoch": 1.38, "learning_rate": 0.00016286383754230666, "loss": 2.1741, "step": 8880 }, { "epoch": 1.38, "learning_rate": 0.00016255141890132778, "loss": 2.1382, "step": 8900 }, { "epoch": 1.39, "learning_rate": 0.00016223900026034885, "loss": 2.1514, "step": 8920 }, { "epoch": 1.39, "learning_rate": 0.00016192658161936994, "loss": 2.17, "step": 8940 }, { "epoch": 1.39, "learning_rate": 0.000161614162978391, "loss": 2.1784, "step": 8960 }, { "epoch": 1.4, "learning_rate": 0.0001613017443374121, "loss": 2.1869, "step": 8980 }, { "epoch": 1.4, "learning_rate": 0.0001609893256964332, "loss": 2.155, "step": 9000 }, { "epoch": 1.4, "eval_loss": 2.3562612533569336, "eval_runtime": 70.7208, "eval_samples_per_second": 28.28, "eval_steps_per_second": 1.768, "step": 9000 }, { "epoch": 1.4, "learning_rate": 0.0001606769070554543, "loss": 2.1467, "step": 9020 }, { "epoch": 1.4, "learning_rate": 0.00016036448841447537, "loss": 2.1662, "step": 9040 }, { "epoch": 1.41, "learning_rate": 0.00016005206977349646, "loss": 2.1928, "step": 9060 }, { "epoch": 1.41, "learning_rate": 0.00015973965113251756, "loss": 2.1084, "step": 9080 }, { "epoch": 1.41, "learning_rate": 0.00015942723249153865, "loss": 2.182, "step": 9100 }, { "epoch": 1.42, "learning_rate": 0.00015911481385055975, "loss": 2.1502, "step": 9120 }, { "epoch": 1.42, "learning_rate": 0.00015880239520958082, "loss": 2.1645, "step": 9140 }, { "epoch": 1.42, "learning_rate": 0.00015848997656860194, "loss": 2.1246, "step": 9160 }, { "epoch": 1.43, "learning_rate": 0.000158177557927623, "loss": 2.1769, "step": 9180 }, { "epoch": 1.43, "learning_rate": 0.0001578651392866441, "loss": 2.1772, "step": 9200 }, { "epoch": 1.43, "eval_loss": 2.354128360748291, "eval_runtime": 70.4883, "eval_samples_per_second": 28.374, "eval_steps_per_second": 1.773, "step": 9200 }, { "epoch": 1.43, "learning_rate": 0.00015755272064566517, "loss": 2.1777, "step": 9220 }, { "epoch": 1.44, "learning_rate": 0.0001572403020046863, "loss": 2.1749, "step": 9240 }, { "epoch": 1.44, "learning_rate": 0.00015692788336370736, "loss": 2.1861, "step": 9260 }, { "epoch": 1.44, "learning_rate": 0.00015661546472272846, "loss": 2.1567, "step": 9280 }, { "epoch": 1.45, "learning_rate": 0.00015630304608174952, "loss": 2.1426, "step": 9300 }, { "epoch": 1.45, "learning_rate": 0.00015599062744077062, "loss": 2.1658, "step": 9320 }, { "epoch": 1.45, "learning_rate": 0.00015567820879979172, "loss": 2.1639, "step": 9340 }, { "epoch": 1.45, "learning_rate": 0.0001553657901588128, "loss": 2.1897, "step": 9360 }, { "epoch": 1.46, "learning_rate": 0.00015505337151783388, "loss": 2.1439, "step": 9380 }, { "epoch": 1.46, "learning_rate": 0.00015474095287685497, "loss": 2.1326, "step": 9400 }, { "epoch": 1.46, "eval_loss": 2.352673292160034, "eval_runtime": 69.2871, "eval_samples_per_second": 28.865, "eval_steps_per_second": 1.804, "step": 9400 }, { "epoch": 1.46, "learning_rate": 0.00015442853423587604, "loss": 2.139, "step": 9420 }, { "epoch": 1.47, "learning_rate": 0.00015411611559489717, "loss": 2.1087, "step": 9440 }, { "epoch": 1.47, "learning_rate": 0.00015380369695391823, "loss": 2.1528, "step": 9460 }, { "epoch": 1.47, "learning_rate": 0.00015349127831293933, "loss": 2.1866, "step": 9480 }, { "epoch": 1.48, "learning_rate": 0.0001531788596719604, "loss": 2.1436, "step": 9500 }, { "epoch": 1.48, "learning_rate": 0.00015286644103098152, "loss": 2.1699, "step": 9520 }, { "epoch": 1.48, "learning_rate": 0.0001525540223900026, "loss": 2.1415, "step": 9540 }, { "epoch": 1.49, "learning_rate": 0.00015224160374902368, "loss": 2.1092, "step": 9560 }, { "epoch": 1.49, "learning_rate": 0.00015192918510804475, "loss": 2.1422, "step": 9580 }, { "epoch": 1.49, "learning_rate": 0.00015161676646706587, "loss": 2.1677, "step": 9600 }, { "epoch": 1.49, "eval_loss": 2.3518292903900146, "eval_runtime": 69.3029, "eval_samples_per_second": 28.859, "eval_steps_per_second": 1.804, "step": 9600 }, { "epoch": 1.49, "learning_rate": 0.00015130434782608694, "loss": 2.1594, "step": 9620 }, { "epoch": 1.5, "learning_rate": 0.00015099192918510804, "loss": 2.1539, "step": 9640 }, { "epoch": 1.5, "learning_rate": 0.0001506795105441291, "loss": 2.1343, "step": 9660 }, { "epoch": 1.5, "learning_rate": 0.00015036709190315023, "loss": 2.1386, "step": 9680 }, { "epoch": 1.51, "learning_rate": 0.0001500546732621713, "loss": 2.1512, "step": 9700 }, { "epoch": 1.51, "learning_rate": 0.0001497422546211924, "loss": 2.1669, "step": 9720 }, { "epoch": 1.51, "learning_rate": 0.0001494298359802135, "loss": 2.158, "step": 9740 }, { "epoch": 1.52, "learning_rate": 0.00014911741733923456, "loss": 2.1643, "step": 9760 }, { "epoch": 1.52, "learning_rate": 0.00014880499869825565, "loss": 2.1612, "step": 9780 }, { "epoch": 1.52, "learning_rate": 0.00014849258005727675, "loss": 2.1441, "step": 9800 }, { "epoch": 1.52, "eval_loss": 2.35211181640625, "eval_runtime": 69.2821, "eval_samples_per_second": 28.867, "eval_steps_per_second": 1.804, "step": 9800 }, { "epoch": 1.53, "learning_rate": 0.00014818016141629784, "loss": 2.1704, "step": 9820 }, { "epoch": 1.53, "learning_rate": 0.0001478677427753189, "loss": 2.1546, "step": 9840 }, { "epoch": 1.53, "learning_rate": 0.00014755532413434, "loss": 2.1909, "step": 9860 }, { "epoch": 1.54, "learning_rate": 0.0001472429054933611, "loss": 2.149, "step": 9880 }, { "epoch": 1.54, "learning_rate": 0.00014693048685238217, "loss": 2.1419, "step": 9900 }, { "epoch": 1.54, "learning_rate": 0.00014661806821140327, "loss": 2.1465, "step": 9920 }, { "epoch": 1.54, "learning_rate": 0.00014630564957042436, "loss": 2.1551, "step": 9940 }, { "epoch": 1.55, "learning_rate": 0.00014599323092944546, "loss": 2.1526, "step": 9960 }, { "epoch": 1.55, "learning_rate": 0.00014568081228846653, "loss": 2.1437, "step": 9980 }, { "epoch": 1.55, "learning_rate": 0.00014536839364748762, "loss": 2.1659, "step": 10000 }, { "epoch": 1.55, "eval_loss": 2.3507654666900635, "eval_runtime": 69.2997, "eval_samples_per_second": 28.86, "eval_steps_per_second": 1.804, "step": 10000 }, { "epoch": 1.56, "learning_rate": 0.00014505597500650872, "loss": 2.14, "step": 10020 }, { "epoch": 1.56, "learning_rate": 0.0001447435563655298, "loss": 2.1289, "step": 10040 }, { "epoch": 1.56, "learning_rate": 0.00014443113772455088, "loss": 2.1226, "step": 10060 }, { "epoch": 1.57, "learning_rate": 0.00014411871908357198, "loss": 2.1627, "step": 10080 }, { "epoch": 1.57, "learning_rate": 0.00014380630044259307, "loss": 2.1759, "step": 10100 }, { "epoch": 1.57, "learning_rate": 0.00014349388180161414, "loss": 2.1511, "step": 10120 }, { "epoch": 1.58, "learning_rate": 0.00014318146316063523, "loss": 2.1275, "step": 10140 }, { "epoch": 1.58, "learning_rate": 0.00014286904451965633, "loss": 2.1638, "step": 10160 }, { "epoch": 1.58, "learning_rate": 0.00014255662587867743, "loss": 2.1494, "step": 10180 }, { "epoch": 1.59, "learning_rate": 0.0001422442072376985, "loss": 2.1554, "step": 10200 }, { "epoch": 1.59, "eval_loss": 2.349271059036255, "eval_runtime": 69.2627, "eval_samples_per_second": 28.876, "eval_steps_per_second": 1.805, "step": 10200 }, { "epoch": 1.59, "learning_rate": 0.0001419317885967196, "loss": 2.133, "step": 10220 }, { "epoch": 1.59, "learning_rate": 0.00014161936995574068, "loss": 2.1515, "step": 10240 }, { "epoch": 1.59, "learning_rate": 0.00014130695131476178, "loss": 2.1262, "step": 10260 }, { "epoch": 1.6, "learning_rate": 0.00014099453267378285, "loss": 2.142, "step": 10280 }, { "epoch": 1.6, "learning_rate": 0.00014068211403280394, "loss": 2.1578, "step": 10300 }, { "epoch": 1.6, "learning_rate": 0.00014036969539182504, "loss": 2.1583, "step": 10320 }, { "epoch": 1.61, "learning_rate": 0.0001400572767508461, "loss": 2.1043, "step": 10340 }, { "epoch": 1.61, "learning_rate": 0.0001397448581098672, "loss": 2.1539, "step": 10360 }, { "epoch": 1.61, "learning_rate": 0.0001394324394688883, "loss": 2.1189, "step": 10380 }, { "epoch": 1.62, "learning_rate": 0.0001391200208279094, "loss": 2.1484, "step": 10400 }, { "epoch": 1.62, "eval_loss": 2.3479487895965576, "eval_runtime": 69.2625, "eval_samples_per_second": 28.876, "eval_steps_per_second": 1.805, "step": 10400 }, { "epoch": 1.62, "learning_rate": 0.00013880760218693046, "loss": 2.1993, "step": 10420 }, { "epoch": 1.62, "learning_rate": 0.00013849518354595156, "loss": 2.1869, "step": 10440 }, { "epoch": 1.63, "learning_rate": 0.00013818276490497265, "loss": 2.1644, "step": 10460 }, { "epoch": 1.63, "learning_rate": 0.00013787034626399375, "loss": 2.1751, "step": 10480 }, { "epoch": 1.63, "learning_rate": 0.00013755792762301482, "loss": 2.1416, "step": 10500 }, { "epoch": 1.63, "learning_rate": 0.0001372455089820359, "loss": 2.1809, "step": 10520 }, { "epoch": 1.64, "learning_rate": 0.000136933090341057, "loss": 2.1653, "step": 10540 }, { "epoch": 1.64, "learning_rate": 0.00013662067170007808, "loss": 2.1026, "step": 10560 }, { "epoch": 1.64, "learning_rate": 0.00013630825305909917, "loss": 2.1503, "step": 10580 }, { "epoch": 1.65, "learning_rate": 0.00013599583441812027, "loss": 2.1289, "step": 10600 }, { "epoch": 1.65, "eval_loss": 2.3468515872955322, "eval_runtime": 69.2274, "eval_samples_per_second": 28.89, "eval_steps_per_second": 1.806, "step": 10600 }, { "epoch": 1.65, "learning_rate": 0.00013568341577714136, "loss": 2.1929, "step": 10620 }, { "epoch": 1.65, "learning_rate": 0.00013537099713616243, "loss": 2.1547, "step": 10640 }, { "epoch": 1.66, "learning_rate": 0.00013505857849518353, "loss": 2.1571, "step": 10660 }, { "epoch": 1.66, "learning_rate": 0.00013474615985420462, "loss": 2.1649, "step": 10680 }, { "epoch": 1.66, "learning_rate": 0.00013443374121322572, "loss": 2.1647, "step": 10700 }, { "epoch": 1.67, "learning_rate": 0.00013412132257224679, "loss": 2.206, "step": 10720 }, { "epoch": 1.67, "learning_rate": 0.00013380890393126788, "loss": 2.1377, "step": 10740 }, { "epoch": 1.67, "learning_rate": 0.00013349648529028898, "loss": 2.1347, "step": 10760 }, { "epoch": 1.68, "learning_rate": 0.00013318406664931004, "loss": 2.1948, "step": 10780 }, { "epoch": 1.68, "learning_rate": 0.00013287164800833114, "loss": 2.1844, "step": 10800 }, { "epoch": 1.68, "eval_loss": 2.347837209701538, "eval_runtime": 69.2425, "eval_samples_per_second": 28.884, "eval_steps_per_second": 1.805, "step": 10800 }, { "epoch": 1.68, "learning_rate": 0.00013255922936735224, "loss": 2.1515, "step": 10820 }, { "epoch": 1.68, "learning_rate": 0.00013224681072637333, "loss": 2.1885, "step": 10840 }, { "epoch": 1.69, "learning_rate": 0.00013193439208539443, "loss": 2.143, "step": 10860 }, { "epoch": 1.69, "learning_rate": 0.00013162197344441552, "loss": 2.1671, "step": 10880 }, { "epoch": 1.69, "learning_rate": 0.0001313095548034366, "loss": 2.1426, "step": 10900 }, { "epoch": 1.7, "learning_rate": 0.00013099713616245769, "loss": 2.1653, "step": 10920 }, { "epoch": 1.7, "learning_rate": 0.00013068471752147878, "loss": 2.1774, "step": 10940 }, { "epoch": 1.7, "learning_rate": 0.00013037229888049988, "loss": 2.1344, "step": 10960 }, { "epoch": 1.71, "learning_rate": 0.00013005988023952094, "loss": 2.1217, "step": 10980 }, { "epoch": 1.71, "learning_rate": 0.00012974746159854204, "loss": 2.1281, "step": 11000 }, { "epoch": 1.71, "eval_loss": 2.345808982849121, "eval_runtime": 69.2499, "eval_samples_per_second": 28.881, "eval_steps_per_second": 1.805, "step": 11000 }, { "epoch": 1.71, "learning_rate": 0.00012943504295756314, "loss": 2.1459, "step": 11020 }, { "epoch": 1.72, "learning_rate": 0.0001291226243165842, "loss": 2.1294, "step": 11040 }, { "epoch": 1.72, "learning_rate": 0.0001288102056756053, "loss": 2.1455, "step": 11060 }, { "epoch": 1.72, "learning_rate": 0.0001284977870346264, "loss": 2.1219, "step": 11080 }, { "epoch": 1.72, "learning_rate": 0.0001281853683936475, "loss": 2.1696, "step": 11100 }, { "epoch": 1.73, "learning_rate": 0.00012787294975266856, "loss": 2.1474, "step": 11120 }, { "epoch": 1.73, "learning_rate": 0.00012756053111168965, "loss": 2.1436, "step": 11140 }, { "epoch": 1.73, "learning_rate": 0.00012724811247071075, "loss": 2.1785, "step": 11160 }, { "epoch": 1.74, "learning_rate": 0.00012693569382973184, "loss": 2.1677, "step": 11180 }, { "epoch": 1.74, "learning_rate": 0.0001266232751887529, "loss": 2.1564, "step": 11200 }, { "epoch": 1.74, "eval_loss": 2.3451294898986816, "eval_runtime": 69.2454, "eval_samples_per_second": 28.883, "eval_steps_per_second": 1.805, "step": 11200 }, { "epoch": 1.74, "learning_rate": 0.000126310856547774, "loss": 2.1793, "step": 11220 }, { "epoch": 1.75, "learning_rate": 0.0001259984379067951, "loss": 2.1583, "step": 11240 }, { "epoch": 1.75, "learning_rate": 0.00012568601926581617, "loss": 2.1482, "step": 11260 }, { "epoch": 1.75, "learning_rate": 0.00012537360062483727, "loss": 2.1393, "step": 11280 }, { "epoch": 1.76, "learning_rate": 0.00012506118198385836, "loss": 2.1586, "step": 11300 }, { "epoch": 1.76, "learning_rate": 0.00012474876334287946, "loss": 2.1533, "step": 11320 }, { "epoch": 1.76, "learning_rate": 0.00012443634470190053, "loss": 2.1516, "step": 11340 }, { "epoch": 1.77, "learning_rate": 0.00012412392606092162, "loss": 2.1184, "step": 11360 }, { "epoch": 1.77, "learning_rate": 0.00012381150741994272, "loss": 2.1162, "step": 11380 }, { "epoch": 1.77, "learning_rate": 0.0001234990887789638, "loss": 2.1588, "step": 11400 }, { "epoch": 1.77, "eval_loss": 2.3451669216156006, "eval_runtime": 69.2383, "eval_samples_per_second": 28.886, "eval_steps_per_second": 1.805, "step": 11400 }, { "epoch": 1.77, "learning_rate": 0.00012318667013798488, "loss": 2.1588, "step": 11420 }, { "epoch": 1.78, "learning_rate": 0.00012287425149700598, "loss": 2.1463, "step": 11440 }, { "epoch": 1.78, "learning_rate": 0.00012256183285602707, "loss": 2.1498, "step": 11460 }, { "epoch": 1.78, "learning_rate": 0.00012224941421504814, "loss": 2.1663, "step": 11480 }, { "epoch": 1.79, "learning_rate": 0.00012193699557406924, "loss": 2.1306, "step": 11500 }, { "epoch": 1.79, "learning_rate": 0.00012162457693309033, "loss": 2.1542, "step": 11520 }, { "epoch": 1.79, "learning_rate": 0.00012131215829211141, "loss": 2.1513, "step": 11540 }, { "epoch": 1.8, "learning_rate": 0.00012099973965113251, "loss": 2.2031, "step": 11560 }, { "epoch": 1.8, "learning_rate": 0.00012068732101015359, "loss": 2.1438, "step": 11580 }, { "epoch": 1.8, "learning_rate": 0.00012037490236917469, "loss": 2.1431, "step": 11600 }, { "epoch": 1.8, "eval_loss": 2.3447554111480713, "eval_runtime": 69.2865, "eval_samples_per_second": 28.866, "eval_steps_per_second": 1.804, "step": 11600 }, { "epoch": 1.81, "learning_rate": 0.00012006248372819577, "loss": 2.1272, "step": 11620 }, { "epoch": 1.81, "learning_rate": 0.00011975006508721686, "loss": 2.1584, "step": 11640 }, { "epoch": 1.81, "learning_rate": 0.00011943764644623794, "loss": 2.128, "step": 11660 }, { "epoch": 1.82, "learning_rate": 0.00011912522780525903, "loss": 2.1461, "step": 11680 }, { "epoch": 1.82, "learning_rate": 0.00011881280916428012, "loss": 2.1411, "step": 11700 }, { "epoch": 1.82, "learning_rate": 0.0001185003905233012, "loss": 2.1592, "step": 11720 }, { "epoch": 1.82, "learning_rate": 0.0001181879718823223, "loss": 2.1642, "step": 11740 }, { "epoch": 1.83, "learning_rate": 0.00011787555324134338, "loss": 2.1914, "step": 11760 }, { "epoch": 1.83, "learning_rate": 0.00011756313460036448, "loss": 2.1612, "step": 11780 }, { "epoch": 1.83, "learning_rate": 0.00011725071595938556, "loss": 2.1452, "step": 11800 }, { "epoch": 1.83, "eval_loss": 2.3442630767822266, "eval_runtime": 69.2459, "eval_samples_per_second": 28.883, "eval_steps_per_second": 1.805, "step": 11800 }, { "epoch": 1.84, "learning_rate": 0.00011693829731840665, "loss": 2.1453, "step": 11820 }, { "epoch": 1.84, "learning_rate": 0.00011662587867742774, "loss": 2.1251, "step": 11840 }, { "epoch": 1.84, "learning_rate": 0.00011631346003644882, "loss": 2.1412, "step": 11860 }, { "epoch": 1.85, "learning_rate": 0.00011600104139546991, "loss": 2.1033, "step": 11880 }, { "epoch": 1.85, "learning_rate": 0.000115688622754491, "loss": 2.1219, "step": 11900 }, { "epoch": 1.85, "learning_rate": 0.00011537620411351209, "loss": 2.1831, "step": 11920 }, { "epoch": 1.86, "learning_rate": 0.00011506378547253317, "loss": 2.1434, "step": 11940 }, { "epoch": 1.86, "learning_rate": 0.00011475136683155427, "loss": 2.1439, "step": 11960 }, { "epoch": 1.86, "learning_rate": 0.00011443894819057536, "loss": 2.1377, "step": 11980 }, { "epoch": 1.86, "learning_rate": 0.00011412652954959646, "loss": 2.1345, "step": 12000 }, { "epoch": 1.86, "eval_loss": 2.342855453491211, "eval_runtime": 69.2714, "eval_samples_per_second": 28.872, "eval_steps_per_second": 1.804, "step": 12000 }, { "epoch": 1.87, "learning_rate": 0.00011381411090861754, "loss": 2.1527, "step": 12020 }, { "epoch": 1.87, "learning_rate": 0.00011350169226763864, "loss": 2.1737, "step": 12040 }, { "epoch": 1.87, "learning_rate": 0.00011318927362665972, "loss": 2.137, "step": 12060 }, { "epoch": 1.88, "learning_rate": 0.00011287685498568081, "loss": 2.1616, "step": 12080 }, { "epoch": 1.88, "learning_rate": 0.0001125644363447019, "loss": 2.1688, "step": 12100 }, { "epoch": 1.88, "learning_rate": 0.00011225201770372299, "loss": 2.1746, "step": 12120 }, { "epoch": 1.89, "learning_rate": 0.00011193959906274407, "loss": 2.1552, "step": 12140 }, { "epoch": 1.89, "learning_rate": 0.00011162718042176515, "loss": 2.1643, "step": 12160 }, { "epoch": 1.89, "learning_rate": 0.00011131476178078625, "loss": 2.1494, "step": 12180 }, { "epoch": 1.9, "learning_rate": 0.00011100234313980733, "loss": 2.1112, "step": 12200 }, { "epoch": 1.9, "eval_loss": 2.34304141998291, "eval_runtime": 72.1422, "eval_samples_per_second": 27.723, "eval_steps_per_second": 1.733, "step": 12200 }, { "epoch": 1.9, "learning_rate": 0.00011068992449882843, "loss": 2.1505, "step": 12220 }, { "epoch": 1.9, "learning_rate": 0.00011037750585784951, "loss": 2.1722, "step": 12240 }, { "epoch": 1.91, "learning_rate": 0.0001100650872168706, "loss": 2.1582, "step": 12260 }, { "epoch": 1.91, "learning_rate": 0.00010975266857589169, "loss": 2.1806, "step": 12280 }, { "epoch": 1.91, "learning_rate": 0.00010944024993491278, "loss": 2.1508, "step": 12300 }, { "epoch": 1.91, "learning_rate": 0.00010912783129393386, "loss": 2.1654, "step": 12320 }, { "epoch": 1.92, "learning_rate": 0.00010881541265295496, "loss": 2.131, "step": 12340 }, { "epoch": 1.92, "learning_rate": 0.00010850299401197604, "loss": 2.1301, "step": 12360 }, { "epoch": 1.92, "learning_rate": 0.00010819057537099712, "loss": 2.1312, "step": 12380 }, { "epoch": 1.93, "learning_rate": 0.00010787815673001822, "loss": 2.1301, "step": 12400 }, { "epoch": 1.93, "eval_loss": 2.3404922485351562, "eval_runtime": 71.3367, "eval_samples_per_second": 28.036, "eval_steps_per_second": 1.752, "step": 12400 }, { "epoch": 1.93, "learning_rate": 0.00010758135902108825, "loss": 2.1398, "step": 12420 }, { "epoch": 1.93, "learning_rate": 0.00010726894038010933, "loss": 2.1449, "step": 12440 }, { "epoch": 1.94, "learning_rate": 0.00010695652173913043, "loss": 2.1498, "step": 12460 }, { "epoch": 1.94, "learning_rate": 0.00010664410309815151, "loss": 2.1484, "step": 12480 }, { "epoch": 1.94, "learning_rate": 0.0001063316844571726, "loss": 2.1705, "step": 12500 }, { "epoch": 1.95, "learning_rate": 0.00010601926581619368, "loss": 2.1236, "step": 12520 }, { "epoch": 1.95, "learning_rate": 0.00010570684717521478, "loss": 2.1435, "step": 12540 }, { "epoch": 1.95, "learning_rate": 0.00010539442853423586, "loss": 2.1656, "step": 12560 }, { "epoch": 1.95, "learning_rate": 0.00010508200989325696, "loss": 2.1459, "step": 12580 }, { "epoch": 1.96, "learning_rate": 0.00010476959125227804, "loss": 2.1392, "step": 12600 }, { "epoch": 1.96, "eval_loss": 2.3410892486572266, "eval_runtime": 72.1407, "eval_samples_per_second": 27.724, "eval_steps_per_second": 1.733, "step": 12600 }, { "epoch": 1.96, "learning_rate": 0.00010445717261129913, "loss": 2.1399, "step": 12620 }, { "epoch": 1.96, "learning_rate": 0.00010414475397032022, "loss": 2.1979, "step": 12640 }, { "epoch": 1.97, "learning_rate": 0.0001038323353293413, "loss": 2.1596, "step": 12660 }, { "epoch": 1.97, "learning_rate": 0.0001035199166883624, "loss": 2.1817, "step": 12680 }, { "epoch": 1.97, "learning_rate": 0.00010320749804738348, "loss": 2.0972, "step": 12700 }, { "epoch": 1.98, "learning_rate": 0.00010289507940640457, "loss": 2.1293, "step": 12720 }, { "epoch": 1.98, "learning_rate": 0.00010258266076542565, "loss": 2.1362, "step": 12740 }, { "epoch": 1.98, "learning_rate": 0.00010227024212444675, "loss": 2.1474, "step": 12760 }, { "epoch": 1.99, "learning_rate": 0.00010195782348346783, "loss": 2.2004, "step": 12780 }, { "epoch": 1.99, "learning_rate": 0.00010164540484248893, "loss": 2.1221, "step": 12800 }, { "epoch": 1.99, "eval_loss": 2.340029716491699, "eval_runtime": 72.0796, "eval_samples_per_second": 27.747, "eval_steps_per_second": 1.734, "step": 12800 }, { "epoch": 1.99, "learning_rate": 0.00010133298620151001, "loss": 2.1782, "step": 12820 }, { "epoch": 2.0, "learning_rate": 0.00010102056756053109, "loss": 2.1358, "step": 12840 }, { "epoch": 2.0, "learning_rate": 0.00010070814891955218, "loss": 2.122, "step": 12860 }, { "epoch": 2.0, "learning_rate": 0.00010039573027857327, "loss": 2.1494, "step": 12880 }, { "epoch": 2.0, "learning_rate": 0.00010008331163759436, "loss": 2.1522, "step": 12900 }, { "epoch": 2.01, "learning_rate": 9.977089299661544e-05, "loss": 2.1241, "step": 12920 }, { "epoch": 2.01, "learning_rate": 9.945847435563654e-05, "loss": 2.1456, "step": 12940 }, { "epoch": 2.01, "learning_rate": 9.914605571465763e-05, "loss": 2.1495, "step": 12960 }, { "epoch": 2.02, "learning_rate": 9.883363707367873e-05, "loss": 2.1734, "step": 12980 }, { "epoch": 2.02, "learning_rate": 9.852121843269981e-05, "loss": 2.1711, "step": 13000 }, { "epoch": 2.02, "eval_loss": 2.339312791824341, "eval_runtime": 69.2994, "eval_samples_per_second": 28.86, "eval_steps_per_second": 1.804, "step": 13000 }, { "epoch": 2.02, "learning_rate": 9.820879979172091e-05, "loss": 2.1483, "step": 13020 }, { "epoch": 2.03, "learning_rate": 9.789638115074199e-05, "loss": 2.124, "step": 13040 }, { "epoch": 2.03, "learning_rate": 9.758396250976308e-05, "loss": 2.1337, "step": 13060 }, { "epoch": 2.03, "learning_rate": 9.727154386878417e-05, "loss": 2.137, "step": 13080 }, { "epoch": 2.04, "learning_rate": 9.695912522780526e-05, "loss": 2.1225, "step": 13100 }, { "epoch": 2.04, "learning_rate": 9.664670658682634e-05, "loss": 2.1384, "step": 13120 }, { "epoch": 2.04, "learning_rate": 9.633428794584743e-05, "loss": 2.1052, "step": 13140 }, { "epoch": 2.05, "learning_rate": 9.602186930486852e-05, "loss": 2.1489, "step": 13160 }, { "epoch": 2.05, "learning_rate": 9.57094506638896e-05, "loss": 2.1154, "step": 13180 }, { "epoch": 2.05, "learning_rate": 9.53970320229107e-05, "loss": 2.1476, "step": 13200 }, { "epoch": 2.05, "eval_loss": 2.3396096229553223, "eval_runtime": 69.2833, "eval_samples_per_second": 28.867, "eval_steps_per_second": 1.804, "step": 13200 }, { "epoch": 2.05, "learning_rate": 9.508461338193178e-05, "loss": 2.1109, "step": 13220 }, { "epoch": 2.06, "learning_rate": 9.477219474095288e-05, "loss": 2.0973, "step": 13240 }, { "epoch": 2.06, "learning_rate": 9.445977609997396e-05, "loss": 2.1281, "step": 13260 }, { "epoch": 2.06, "learning_rate": 9.414735745899505e-05, "loss": 2.1216, "step": 13280 }, { "epoch": 2.07, "learning_rate": 9.383493881801614e-05, "loss": 2.1323, "step": 13300 }, { "epoch": 2.07, "learning_rate": 9.352252017703723e-05, "loss": 2.1477, "step": 13320 }, { "epoch": 2.07, "learning_rate": 9.321010153605831e-05, "loss": 2.1309, "step": 13340 }, { "epoch": 2.08, "learning_rate": 9.28976828950794e-05, "loss": 2.0899, "step": 13360 }, { "epoch": 2.08, "learning_rate": 9.258526425410049e-05, "loss": 2.1402, "step": 13380 }, { "epoch": 2.08, "learning_rate": 9.227284561312157e-05, "loss": 2.0768, "step": 13400 }, { "epoch": 2.08, "eval_loss": 2.3376858234405518, "eval_runtime": 69.4568, "eval_samples_per_second": 28.795, "eval_steps_per_second": 1.8, "step": 13400 }, { "epoch": 2.09, "learning_rate": 9.196042697214267e-05, "loss": 2.1405, "step": 13420 }, { "epoch": 2.09, "learning_rate": 9.164800833116375e-05, "loss": 2.1118, "step": 13440 }, { "epoch": 2.09, "learning_rate": 9.133558969018484e-05, "loss": 2.1525, "step": 13460 }, { "epoch": 2.09, "learning_rate": 9.102317104920593e-05, "loss": 2.1369, "step": 13480 }, { "epoch": 2.1, "learning_rate": 9.071075240822702e-05, "loss": 2.1683, "step": 13500 }, { "epoch": 2.1, "learning_rate": 9.03983337672481e-05, "loss": 2.1193, "step": 13520 }, { "epoch": 2.1, "learning_rate": 9.00859151262692e-05, "loss": 2.1222, "step": 13540 }, { "epoch": 2.11, "learning_rate": 8.977349648529028e-05, "loss": 2.1461, "step": 13560 }, { "epoch": 2.11, "learning_rate": 8.946107784431136e-05, "loss": 2.1106, "step": 13580 }, { "epoch": 2.11, "learning_rate": 8.914865920333246e-05, "loss": 2.1307, "step": 13600 }, { "epoch": 2.11, "eval_loss": 2.3381118774414062, "eval_runtime": 69.5609, "eval_samples_per_second": 28.752, "eval_steps_per_second": 1.797, "step": 13600 }, { "epoch": 2.12, "learning_rate": 8.883624056235354e-05, "loss": 2.1679, "step": 13620 }, { "epoch": 2.12, "learning_rate": 8.852382192137464e-05, "loss": 2.1418, "step": 13640 }, { "epoch": 2.12, "learning_rate": 8.821140328039572e-05, "loss": 2.1238, "step": 13660 }, { "epoch": 2.13, "learning_rate": 8.789898463941681e-05, "loss": 2.0995, "step": 13680 }, { "epoch": 2.13, "learning_rate": 8.75865659984379e-05, "loss": 2.1596, "step": 13700 }, { "epoch": 2.13, "learning_rate": 8.727414735745899e-05, "loss": 2.1478, "step": 13720 }, { "epoch": 2.14, "learning_rate": 8.696172871648007e-05, "loss": 2.1299, "step": 13740 }, { "epoch": 2.14, "learning_rate": 8.664931007550115e-05, "loss": 2.1405, "step": 13760 }, { "epoch": 2.14, "learning_rate": 8.633689143452225e-05, "loss": 2.174, "step": 13780 }, { "epoch": 2.14, "learning_rate": 8.602447279354333e-05, "loss": 2.129, "step": 13800 }, { "epoch": 2.14, "eval_loss": 2.337769031524658, "eval_runtime": 69.7472, "eval_samples_per_second": 28.675, "eval_steps_per_second": 1.792, "step": 13800 }, { "epoch": 2.15, "learning_rate": 8.571205415256443e-05, "loss": 2.1368, "step": 13820 }, { "epoch": 2.15, "learning_rate": 8.539963551158551e-05, "loss": 2.1573, "step": 13840 }, { "epoch": 2.15, "learning_rate": 8.50872168706066e-05, "loss": 2.1132, "step": 13860 }, { "epoch": 2.16, "learning_rate": 8.477479822962769e-05, "loss": 2.1131, "step": 13880 }, { "epoch": 2.16, "learning_rate": 8.446237958864878e-05, "loss": 2.1351, "step": 13900 }, { "epoch": 2.16, "learning_rate": 8.414996094766986e-05, "loss": 2.1738, "step": 13920 }, { "epoch": 2.17, "learning_rate": 8.383754230669096e-05, "loss": 2.1551, "step": 13940 }, { "epoch": 2.17, "learning_rate": 8.352512366571204e-05, "loss": 2.1195, "step": 13960 }, { "epoch": 2.17, "learning_rate": 8.321270502473312e-05, "loss": 2.1125, "step": 13980 }, { "epoch": 2.18, "learning_rate": 8.290028638375422e-05, "loss": 2.1549, "step": 14000 }, { "epoch": 2.18, "eval_loss": 2.337301731109619, "eval_runtime": 69.7462, "eval_samples_per_second": 28.675, "eval_steps_per_second": 1.792, "step": 14000 }, { "epoch": 2.18, "learning_rate": 8.25878677427753e-05, "loss": 2.1573, "step": 14020 }, { "epoch": 2.18, "learning_rate": 8.22754491017964e-05, "loss": 2.1125, "step": 14040 }, { "epoch": 2.18, "learning_rate": 8.196303046081748e-05, "loss": 2.161, "step": 14060 }, { "epoch": 2.19, "learning_rate": 8.165061181983857e-05, "loss": 2.1511, "step": 14080 }, { "epoch": 2.19, "learning_rate": 8.133819317885967e-05, "loss": 2.1737, "step": 14100 }, { "epoch": 2.19, "learning_rate": 8.102577453788076e-05, "loss": 2.1158, "step": 14120 }, { "epoch": 2.2, "learning_rate": 8.071335589690184e-05, "loss": 2.1398, "step": 14140 }, { "epoch": 2.2, "learning_rate": 8.040093725592294e-05, "loss": 2.1183, "step": 14160 }, { "epoch": 2.2, "learning_rate": 8.008851861494402e-05, "loss": 2.1295, "step": 14180 }, { "epoch": 2.21, "learning_rate": 7.977609997396512e-05, "loss": 2.1416, "step": 14200 }, { "epoch": 2.21, "eval_loss": 2.336796760559082, "eval_runtime": 69.3578, "eval_samples_per_second": 28.836, "eval_steps_per_second": 1.802, "step": 14200 }, { "epoch": 2.21, "learning_rate": 7.94636813329862e-05, "loss": 2.1461, "step": 14220 }, { "epoch": 2.21, "learning_rate": 7.91512626920073e-05, "loss": 2.0931, "step": 14240 }, { "epoch": 2.22, "learning_rate": 7.883884405102838e-05, "loss": 2.1341, "step": 14260 }, { "epoch": 2.22, "learning_rate": 7.852642541004946e-05, "loss": 2.1369, "step": 14280 }, { "epoch": 2.22, "learning_rate": 7.821400676907055e-05, "loss": 2.1431, "step": 14300 }, { "epoch": 2.23, "learning_rate": 7.790158812809164e-05, "loss": 2.1508, "step": 14320 }, { "epoch": 2.23, "learning_rate": 7.758916948711273e-05, "loss": 2.1456, "step": 14340 }, { "epoch": 2.23, "learning_rate": 7.727675084613381e-05, "loss": 2.1448, "step": 14360 }, { "epoch": 2.23, "learning_rate": 7.696433220515491e-05, "loss": 2.1637, "step": 14380 }, { "epoch": 2.24, "learning_rate": 7.665191356417599e-05, "loss": 2.114, "step": 14400 }, { "epoch": 2.24, "eval_loss": 2.3362655639648438, "eval_runtime": 69.5792, "eval_samples_per_second": 28.744, "eval_steps_per_second": 1.797, "step": 14400 }, { "epoch": 2.24, "learning_rate": 7.633949492319709e-05, "loss": 2.1222, "step": 14420 }, { "epoch": 2.24, "learning_rate": 7.602707628221817e-05, "loss": 2.1776, "step": 14440 }, { "epoch": 2.25, "learning_rate": 7.57302785732882e-05, "loss": 2.1414, "step": 14460 }, { "epoch": 2.25, "learning_rate": 7.541785993230929e-05, "loss": 2.1231, "step": 14480 }, { "epoch": 2.25, "learning_rate": 7.510544129133038e-05, "loss": 2.1345, "step": 14500 }, { "epoch": 2.26, "learning_rate": 7.479302265035147e-05, "loss": 2.1339, "step": 14520 }, { "epoch": 2.26, "learning_rate": 7.448060400937255e-05, "loss": 2.1562, "step": 14540 }, { "epoch": 2.26, "learning_rate": 7.416818536839363e-05, "loss": 2.1649, "step": 14560 }, { "epoch": 2.27, "learning_rate": 7.385576672741473e-05, "loss": 2.1339, "step": 14580 }, { "epoch": 2.27, "learning_rate": 7.354334808643581e-05, "loss": 2.1347, "step": 14600 }, { "epoch": 2.27, "eval_loss": 2.335818290710449, "eval_runtime": 69.5131, "eval_samples_per_second": 28.772, "eval_steps_per_second": 1.798, "step": 14600 }, { "epoch": 2.27, "learning_rate": 7.323092944545691e-05, "loss": 2.1078, "step": 14620 }, { "epoch": 2.28, "learning_rate": 7.291851080447799e-05, "loss": 2.1446, "step": 14640 }, { "epoch": 2.28, "learning_rate": 7.260609216349908e-05, "loss": 2.1076, "step": 14660 }, { "epoch": 2.28, "learning_rate": 7.229367352252017e-05, "loss": 2.1548, "step": 14680 }, { "epoch": 2.28, "learning_rate": 7.198125488154126e-05, "loss": 2.1317, "step": 14700 }, { "epoch": 2.29, "learning_rate": 7.166883624056234e-05, "loss": 2.0991, "step": 14720 }, { "epoch": 2.29, "learning_rate": 7.135641759958343e-05, "loss": 2.1507, "step": 14740 }, { "epoch": 2.29, "learning_rate": 7.104399895860452e-05, "loss": 2.1173, "step": 14760 }, { "epoch": 2.3, "learning_rate": 7.073158031762562e-05, "loss": 2.104, "step": 14780 }, { "epoch": 2.3, "learning_rate": 7.043478260869565e-05, "loss": 2.1118, "step": 14800 }, { "epoch": 2.3, "eval_loss": 2.334048271179199, "eval_runtime": 69.3816, "eval_samples_per_second": 28.826, "eval_steps_per_second": 1.802, "step": 14800 }, { "epoch": 2.3, "learning_rate": 7.012236396771674e-05, "loss": 2.0738, "step": 14820 }, { "epoch": 2.31, "learning_rate": 6.980994532673782e-05, "loss": 2.1221, "step": 14840 }, { "epoch": 2.31, "learning_rate": 6.94975266857589e-05, "loss": 2.1531, "step": 14860 }, { "epoch": 2.31, "learning_rate": 6.918510804478e-05, "loss": 2.1318, "step": 14880 }, { "epoch": 2.32, "learning_rate": 6.887268940380108e-05, "loss": 2.1251, "step": 14900 }, { "epoch": 2.32, "learning_rate": 6.856027076282218e-05, "loss": 2.1212, "step": 14920 }, { "epoch": 2.32, "learning_rate": 6.824785212184326e-05, "loss": 2.0927, "step": 14940 }, { "epoch": 2.32, "learning_rate": 6.793543348086436e-05, "loss": 2.1277, "step": 14960 }, { "epoch": 2.33, "learning_rate": 6.762301483988544e-05, "loss": 2.156, "step": 14980 }, { "epoch": 2.33, "learning_rate": 6.731059619890653e-05, "loss": 2.1276, "step": 15000 }, { "epoch": 2.33, "eval_loss": 2.3340351581573486, "eval_runtime": 69.2926, "eval_samples_per_second": 28.863, "eval_steps_per_second": 1.804, "step": 15000 }, { "epoch": 2.33, "learning_rate": 6.699817755792761e-05, "loss": 2.1313, "step": 15020 }, { "epoch": 2.34, "learning_rate": 6.668575891694871e-05, "loss": 2.1452, "step": 15040 }, { "epoch": 2.34, "learning_rate": 6.637334027596979e-05, "loss": 2.1148, "step": 15060 }, { "epoch": 2.34, "learning_rate": 6.606092163499087e-05, "loss": 2.1193, "step": 15080 }, { "epoch": 2.35, "learning_rate": 6.574850299401197e-05, "loss": 2.1672, "step": 15100 }, { "epoch": 2.35, "learning_rate": 6.543608435303305e-05, "loss": 2.0789, "step": 15120 }, { "epoch": 2.35, "learning_rate": 6.512366571205415e-05, "loss": 2.1438, "step": 15140 }, { "epoch": 2.36, "learning_rate": 6.481124707107523e-05, "loss": 2.1597, "step": 15160 }, { "epoch": 2.36, "learning_rate": 6.449882843009632e-05, "loss": 2.11, "step": 15180 }, { "epoch": 2.36, "learning_rate": 6.418640978911742e-05, "loss": 2.1279, "step": 15200 }, { "epoch": 2.36, "eval_loss": 2.3344008922576904, "eval_runtime": 69.3363, "eval_samples_per_second": 28.845, "eval_steps_per_second": 1.803, "step": 15200 }, { "epoch": 2.37, "learning_rate": 6.38739911481385e-05, "loss": 2.1459, "step": 15220 }, { "epoch": 2.37, "learning_rate": 6.35615725071596e-05, "loss": 2.1702, "step": 15240 }, { "epoch": 2.37, "learning_rate": 6.324915386618068e-05, "loss": 2.1262, "step": 15260 }, { "epoch": 2.37, "learning_rate": 6.293673522520177e-05, "loss": 2.0988, "step": 15280 }, { "epoch": 2.38, "learning_rate": 6.262431658422286e-05, "loss": 2.1224, "step": 15300 }, { "epoch": 2.38, "learning_rate": 6.231189794324394e-05, "loss": 2.1102, "step": 15320 }, { "epoch": 2.38, "learning_rate": 6.199947930226503e-05, "loss": 2.1168, "step": 15340 }, { "epoch": 2.39, "learning_rate": 6.168706066128611e-05, "loss": 2.1205, "step": 15360 }, { "epoch": 2.39, "learning_rate": 6.137464202030721e-05, "loss": 2.0855, "step": 15380 }, { "epoch": 2.39, "learning_rate": 6.106222337932829e-05, "loss": 2.1548, "step": 15400 }, { "epoch": 2.39, "eval_loss": 2.333451271057129, "eval_runtime": 69.3334, "eval_samples_per_second": 28.846, "eval_steps_per_second": 1.803, "step": 15400 }, { "epoch": 2.4, "learning_rate": 6.074980473834938e-05, "loss": 2.1433, "step": 15420 }, { "epoch": 2.4, "learning_rate": 6.043738609737047e-05, "loss": 2.123, "step": 15440 }, { "epoch": 2.4, "learning_rate": 6.012496745639156e-05, "loss": 2.0965, "step": 15460 }, { "epoch": 2.41, "learning_rate": 5.9812548815412647e-05, "loss": 2.1498, "step": 15480 }, { "epoch": 2.41, "learning_rate": 5.9500130174433735e-05, "loss": 2.1456, "step": 15500 }, { "epoch": 2.41, "learning_rate": 5.9187711533454824e-05, "loss": 2.1295, "step": 15520 }, { "epoch": 2.41, "learning_rate": 5.887529289247591e-05, "loss": 2.108, "step": 15540 }, { "epoch": 2.42, "learning_rate": 5.8562874251497e-05, "loss": 2.1592, "step": 15560 }, { "epoch": 2.42, "learning_rate": 5.825045561051809e-05, "loss": 2.1214, "step": 15580 }, { "epoch": 2.42, "learning_rate": 5.793803696953918e-05, "loss": 2.1561, "step": 15600 }, { "epoch": 2.42, "eval_loss": 2.3329403400421143, "eval_runtime": 69.6034, "eval_samples_per_second": 28.734, "eval_steps_per_second": 1.796, "step": 15600 }, { "epoch": 2.43, "learning_rate": 5.762561832856026e-05, "loss": 2.1382, "step": 15620 }, { "epoch": 2.43, "learning_rate": 5.731319968758135e-05, "loss": 2.109, "step": 15640 }, { "epoch": 2.43, "learning_rate": 5.700078104660244e-05, "loss": 2.1283, "step": 15660 }, { "epoch": 2.44, "learning_rate": 5.6688362405623526e-05, "loss": 2.15, "step": 15680 }, { "epoch": 2.44, "learning_rate": 5.6375943764644615e-05, "loss": 2.1125, "step": 15700 }, { "epoch": 2.44, "learning_rate": 5.6063525123665704e-05, "loss": 2.1709, "step": 15720 }, { "epoch": 2.45, "learning_rate": 5.575110648268679e-05, "loss": 2.1622, "step": 15740 }, { "epoch": 2.45, "learning_rate": 5.543868784170789e-05, "loss": 2.0769, "step": 15760 }, { "epoch": 2.45, "learning_rate": 5.5126269200728976e-05, "loss": 2.137, "step": 15780 }, { "epoch": 2.46, "learning_rate": 5.4813850559750065e-05, "loss": 2.1294, "step": 15800 }, { "epoch": 2.46, "eval_loss": 2.3324475288391113, "eval_runtime": 69.559, "eval_samples_per_second": 28.753, "eval_steps_per_second": 1.797, "step": 15800 }, { "epoch": 2.46, "learning_rate": 5.4501431918771154e-05, "loss": 2.1425, "step": 15820 }, { "epoch": 2.46, "learning_rate": 5.418901327779224e-05, "loss": 2.128, "step": 15840 }, { "epoch": 2.46, "learning_rate": 5.387659463681333e-05, "loss": 2.1553, "step": 15860 }, { "epoch": 2.47, "learning_rate": 5.356417599583441e-05, "loss": 2.1339, "step": 15880 }, { "epoch": 2.47, "learning_rate": 5.32517573548555e-05, "loss": 2.1536, "step": 15900 }, { "epoch": 2.47, "learning_rate": 5.293933871387659e-05, "loss": 2.1669, "step": 15920 }, { "epoch": 2.48, "learning_rate": 5.262692007289768e-05, "loss": 2.122, "step": 15940 }, { "epoch": 2.48, "learning_rate": 5.231450143191877e-05, "loss": 2.1435, "step": 15960 }, { "epoch": 2.48, "learning_rate": 5.2002082790939856e-05, "loss": 2.1406, "step": 15980 }, { "epoch": 2.49, "learning_rate": 5.1689664149960945e-05, "loss": 2.1174, "step": 16000 }, { "epoch": 2.49, "eval_loss": 2.332836866378784, "eval_runtime": 69.3739, "eval_samples_per_second": 28.829, "eval_steps_per_second": 1.802, "step": 16000 }, { "epoch": 2.49, "learning_rate": 5.137724550898203e-05, "loss": 2.1286, "step": 16020 }, { "epoch": 2.49, "learning_rate": 5.106482686800312e-05, "loss": 2.1343, "step": 16040 }, { "epoch": 2.5, "learning_rate": 5.075240822702421e-05, "loss": 2.1134, "step": 16060 }, { "epoch": 2.5, "learning_rate": 5.043998958604529e-05, "loss": 2.1633, "step": 16080 }, { "epoch": 2.5, "learning_rate": 5.012757094506638e-05, "loss": 2.1473, "step": 16100 }, { "epoch": 2.5, "learning_rate": 4.981515230408747e-05, "loss": 2.1535, "step": 16120 }, { "epoch": 2.51, "learning_rate": 4.950273366310856e-05, "loss": 2.112, "step": 16140 }, { "epoch": 2.51, "learning_rate": 4.919031502212965e-05, "loss": 2.1399, "step": 16160 }, { "epoch": 2.51, "learning_rate": 4.8877896381150736e-05, "loss": 2.0913, "step": 16180 }, { "epoch": 2.52, "learning_rate": 4.8565477740171824e-05, "loss": 2.1179, "step": 16200 }, { "epoch": 2.52, "eval_loss": 2.332409143447876, "eval_runtime": 69.3294, "eval_samples_per_second": 28.848, "eval_steps_per_second": 1.803, "step": 16200 }, { "epoch": 2.52, "learning_rate": 4.825305909919291e-05, "loss": 2.1756, "step": 16220 }, { "epoch": 2.52, "learning_rate": 4.7940640458214e-05, "loss": 2.1466, "step": 16240 }, { "epoch": 2.53, "learning_rate": 4.762822181723509e-05, "loss": 2.1443, "step": 16260 }, { "epoch": 2.53, "learning_rate": 4.731580317625618e-05, "loss": 2.1207, "step": 16280 }, { "epoch": 2.53, "learning_rate": 4.700338453527726e-05, "loss": 2.1275, "step": 16300 }, { "epoch": 2.54, "learning_rate": 4.669096589429835e-05, "loss": 2.1305, "step": 16320 }, { "epoch": 2.54, "learning_rate": 4.6378547253319445e-05, "loss": 2.134, "step": 16340 }, { "epoch": 2.54, "learning_rate": 4.6066128612340534e-05, "loss": 2.1681, "step": 16360 }, { "epoch": 2.55, "learning_rate": 4.575370997136162e-05, "loss": 2.1627, "step": 16380 }, { "epoch": 2.55, "learning_rate": 4.544129133038271e-05, "loss": 2.1421, "step": 16400 }, { "epoch": 2.55, "eval_loss": 2.3318614959716797, "eval_runtime": 69.3251, "eval_samples_per_second": 28.85, "eval_steps_per_second": 1.803, "step": 16400 }, { "epoch": 2.55, "learning_rate": 4.51288726894038e-05, "loss": 2.1225, "step": 16420 }, { "epoch": 2.55, "learning_rate": 4.481645404842489e-05, "loss": 2.156, "step": 16440 }, { "epoch": 2.56, "learning_rate": 4.450403540744598e-05, "loss": 2.1573, "step": 16460 }, { "epoch": 2.56, "learning_rate": 4.4191616766467066e-05, "loss": 2.1295, "step": 16480 }, { "epoch": 2.56, "learning_rate": 4.3879198125488154e-05, "loss": 2.14, "step": 16500 }, { "epoch": 2.57, "learning_rate": 4.356677948450924e-05, "loss": 2.1046, "step": 16520 }, { "epoch": 2.57, "learning_rate": 4.3254360843530325e-05, "loss": 2.1201, "step": 16540 }, { "epoch": 2.57, "learning_rate": 4.2941942202551413e-05, "loss": 2.1767, "step": 16560 }, { "epoch": 2.58, "learning_rate": 4.26295235615725e-05, "loss": 2.1244, "step": 16580 }, { "epoch": 2.58, "learning_rate": 4.231710492059359e-05, "loss": 2.1301, "step": 16600 }, { "epoch": 2.58, "eval_loss": 2.331899881362915, "eval_runtime": 69.3398, "eval_samples_per_second": 28.843, "eval_steps_per_second": 1.803, "step": 16600 }, { "epoch": 2.58, "learning_rate": 4.200468627961468e-05, "loss": 2.1022, "step": 16620 }, { "epoch": 2.59, "learning_rate": 4.169226763863577e-05, "loss": 2.1121, "step": 16640 }, { "epoch": 2.59, "learning_rate": 4.137984899765686e-05, "loss": 2.1014, "step": 16660 }, { "epoch": 2.59, "learning_rate": 4.1067430356677945e-05, "loss": 2.1867, "step": 16680 }, { "epoch": 2.6, "learning_rate": 4.0755011715699034e-05, "loss": 2.1055, "step": 16700 }, { "epoch": 2.6, "learning_rate": 4.044259307472012e-05, "loss": 2.1435, "step": 16720 }, { "epoch": 2.6, "learning_rate": 4.013017443374121e-05, "loss": 2.09, "step": 16740 }, { "epoch": 2.6, "learning_rate": 3.981775579276229e-05, "loss": 2.1317, "step": 16760 }, { "epoch": 2.61, "learning_rate": 3.950533715178338e-05, "loss": 2.0683, "step": 16780 }, { "epoch": 2.61, "learning_rate": 3.919291851080447e-05, "loss": 2.1249, "step": 16800 }, { "epoch": 2.61, "eval_loss": 2.331566572189331, "eval_runtime": 69.3154, "eval_samples_per_second": 28.854, "eval_steps_per_second": 1.803, "step": 16800 }, { "epoch": 2.61, "learning_rate": 3.888049986982556e-05, "loss": 2.164, "step": 16820 }, { "epoch": 2.62, "learning_rate": 3.856808122884665e-05, "loss": 2.16, "step": 16840 }, { "epoch": 2.62, "learning_rate": 3.8255662587867736e-05, "loss": 2.1603, "step": 16860 }, { "epoch": 2.62, "learning_rate": 3.7943243946888825e-05, "loss": 2.1346, "step": 16880 }, { "epoch": 2.63, "learning_rate": 3.7630825305909914e-05, "loss": 2.1082, "step": 16900 }, { "epoch": 2.63, "learning_rate": 3.7318406664931e-05, "loss": 2.1014, "step": 16920 }, { "epoch": 2.63, "learning_rate": 3.700598802395209e-05, "loss": 2.1088, "step": 16940 }, { "epoch": 2.64, "learning_rate": 3.669356938297318e-05, "loss": 2.0975, "step": 16960 }, { "epoch": 2.64, "learning_rate": 3.638115074199427e-05, "loss": 2.1212, "step": 16980 }, { "epoch": 2.64, "learning_rate": 3.606873210101536e-05, "loss": 2.1226, "step": 17000 }, { "epoch": 2.64, "eval_loss": 2.3310983180999756, "eval_runtime": 69.3945, "eval_samples_per_second": 28.821, "eval_steps_per_second": 1.801, "step": 17000 }, { "epoch": 2.64, "learning_rate": 3.5756313460036446e-05, "loss": 2.1318, "step": 17020 }, { "epoch": 2.65, "learning_rate": 3.5443894819057534e-05, "loss": 2.1073, "step": 17040 }, { "epoch": 2.65, "learning_rate": 3.513147617807862e-05, "loss": 2.1411, "step": 17060 }, { "epoch": 2.65, "learning_rate": 3.481905753709971e-05, "loss": 2.0959, "step": 17080 }, { "epoch": 2.66, "learning_rate": 3.45066388961208e-05, "loss": 2.0858, "step": 17100 }, { "epoch": 2.66, "learning_rate": 3.419422025514189e-05, "loss": 2.1174, "step": 17120 }, { "epoch": 2.66, "learning_rate": 3.388180161416298e-05, "loss": 2.1459, "step": 17140 }, { "epoch": 2.67, "learning_rate": 3.3569382973184066e-05, "loss": 2.1425, "step": 17160 }, { "epoch": 2.67, "learning_rate": 3.3256964332205155e-05, "loss": 2.0971, "step": 17180 }, { "epoch": 2.67, "learning_rate": 3.2944545691226243e-05, "loss": 2.1176, "step": 17200 }, { "epoch": 2.67, "eval_loss": 2.330962896347046, "eval_runtime": 69.3407, "eval_samples_per_second": 28.843, "eval_steps_per_second": 1.803, "step": 17200 }, { "epoch": 2.68, "learning_rate": 3.2632127050247325e-05, "loss": 2.1471, "step": 17220 }, { "epoch": 2.68, "learning_rate": 3.2319708409268414e-05, "loss": 2.1064, "step": 17240 }, { "epoch": 2.68, "learning_rate": 3.20072897682895e-05, "loss": 2.1347, "step": 17260 }, { "epoch": 2.69, "learning_rate": 3.169487112731059e-05, "loss": 2.142, "step": 17280 }, { "epoch": 2.69, "learning_rate": 3.138245248633168e-05, "loss": 2.1773, "step": 17300 }, { "epoch": 2.69, "learning_rate": 3.107003384535277e-05, "loss": 2.1489, "step": 17320 }, { "epoch": 2.69, "learning_rate": 3.075761520437386e-05, "loss": 2.1257, "step": 17340 }, { "epoch": 2.7, "learning_rate": 3.044519656339495e-05, "loss": 2.1288, "step": 17360 }, { "epoch": 2.7, "learning_rate": 3.0132777922416038e-05, "loss": 2.1258, "step": 17380 }, { "epoch": 2.7, "learning_rate": 2.9820359281437123e-05, "loss": 2.1322, "step": 17400 }, { "epoch": 2.7, "eval_loss": 2.3309593200683594, "eval_runtime": 69.3923, "eval_samples_per_second": 28.822, "eval_steps_per_second": 1.801, "step": 17400 }, { "epoch": 2.71, "learning_rate": 2.9507940640458212e-05, "loss": 2.1495, "step": 17420 }, { "epoch": 2.71, "learning_rate": 2.91955219994793e-05, "loss": 2.0843, "step": 17440 }, { "epoch": 2.71, "learning_rate": 2.888310335850039e-05, "loss": 2.11, "step": 17460 }, { "epoch": 2.72, "learning_rate": 2.8570684717521478e-05, "loss": 2.1005, "step": 17480 }, { "epoch": 2.72, "learning_rate": 2.827388700859151e-05, "loss": 2.1302, "step": 17500 }, { "epoch": 2.72, "learning_rate": 2.79614683676126e-05, "loss": 2.1086, "step": 17520 }, { "epoch": 2.73, "learning_rate": 2.7649049726633688e-05, "loss": 2.1302, "step": 17540 }, { "epoch": 2.73, "learning_rate": 2.7336631085654777e-05, "loss": 2.1417, "step": 17560 }, { "epoch": 2.73, "learning_rate": 2.7024212444675862e-05, "loss": 2.1369, "step": 17580 }, { "epoch": 2.73, "learning_rate": 2.671179380369695e-05, "loss": 2.1384, "step": 17600 }, { "epoch": 2.73, "eval_loss": 2.33089017868042, "eval_runtime": 69.3747, "eval_samples_per_second": 28.829, "eval_steps_per_second": 1.802, "step": 17600 }, { "epoch": 2.74, "learning_rate": 2.639937516271804e-05, "loss": 2.1243, "step": 17620 }, { "epoch": 2.74, "learning_rate": 2.6086956521739128e-05, "loss": 2.1161, "step": 17640 }, { "epoch": 2.74, "learning_rate": 2.5774537880760217e-05, "loss": 2.1051, "step": 17660 }, { "epoch": 2.75, "learning_rate": 2.5462119239781302e-05, "loss": 2.0762, "step": 17680 }, { "epoch": 2.75, "learning_rate": 2.514970059880239e-05, "loss": 2.1105, "step": 17700 }, { "epoch": 2.75, "learning_rate": 2.483728195782348e-05, "loss": 2.1535, "step": 17720 }, { "epoch": 2.76, "learning_rate": 2.452486331684457e-05, "loss": 2.1706, "step": 17740 }, { "epoch": 2.76, "learning_rate": 2.421244467586566e-05, "loss": 2.0857, "step": 17760 }, { "epoch": 2.76, "learning_rate": 2.390002603488675e-05, "loss": 2.1553, "step": 17780 }, { "epoch": 2.77, "learning_rate": 2.3587607393907834e-05, "loss": 2.0983, "step": 17800 }, { "epoch": 2.77, "eval_loss": 2.3304569721221924, "eval_runtime": 69.35, "eval_samples_per_second": 28.839, "eval_steps_per_second": 1.802, "step": 17800 }, { "epoch": 2.77, "learning_rate": 2.3275188752928923e-05, "loss": 2.1212, "step": 17820 }, { "epoch": 2.77, "learning_rate": 2.296277011195001e-05, "loss": 2.0816, "step": 17840 }, { "epoch": 2.78, "learning_rate": 2.26503514709711e-05, "loss": 2.0935, "step": 17860 }, { "epoch": 2.78, "learning_rate": 2.233793282999219e-05, "loss": 2.1576, "step": 17880 }, { "epoch": 2.78, "learning_rate": 2.2025514189013274e-05, "loss": 2.1076, "step": 17900 }, { "epoch": 2.78, "learning_rate": 2.1713095548034362e-05, "loss": 2.1184, "step": 17920 }, { "epoch": 2.79, "learning_rate": 2.140067690705545e-05, "loss": 2.1169, "step": 17940 }, { "epoch": 2.79, "learning_rate": 2.108825826607654e-05, "loss": 2.1442, "step": 17960 }, { "epoch": 2.79, "learning_rate": 2.077583962509763e-05, "loss": 2.1332, "step": 17980 }, { "epoch": 2.8, "learning_rate": 2.0463420984118717e-05, "loss": 2.1553, "step": 18000 }, { "epoch": 2.8, "eval_loss": 2.330599069595337, "eval_runtime": 69.346, "eval_samples_per_second": 28.841, "eval_steps_per_second": 1.803, "step": 18000 }, { "epoch": 2.8, "learning_rate": 2.0151002343139802e-05, "loss": 2.1055, "step": 18020 }, { "epoch": 2.8, "learning_rate": 1.9838583702160894e-05, "loss": 2.0778, "step": 18040 }, { "epoch": 2.81, "learning_rate": 1.9526165061181983e-05, "loss": 2.143, "step": 18060 }, { "epoch": 2.81, "learning_rate": 1.921374642020307e-05, "loss": 2.0886, "step": 18080 }, { "epoch": 2.81, "learning_rate": 1.890132777922416e-05, "loss": 2.1236, "step": 18100 }, { "epoch": 2.82, "learning_rate": 1.858890913824525e-05, "loss": 2.1307, "step": 18120 }, { "epoch": 2.82, "learning_rate": 1.8276490497266334e-05, "loss": 2.1192, "step": 18140 }, { "epoch": 2.82, "learning_rate": 1.7964071856287423e-05, "loss": 2.0999, "step": 18160 }, { "epoch": 2.83, "learning_rate": 1.765165321530851e-05, "loss": 2.0792, "step": 18180 }, { "epoch": 2.83, "learning_rate": 1.73392345743296e-05, "loss": 2.1015, "step": 18200 }, { "epoch": 2.83, "eval_loss": 2.330050230026245, "eval_runtime": 69.3278, "eval_samples_per_second": 28.848, "eval_steps_per_second": 1.803, "step": 18200 }, { "epoch": 2.83, "learning_rate": 1.702681593335069e-05, "loss": 2.1226, "step": 18220 }, { "epoch": 2.83, "learning_rate": 1.6714397292371778e-05, "loss": 2.0924, "step": 18240 }, { "epoch": 2.84, "learning_rate": 1.6401978651392866e-05, "loss": 2.1272, "step": 18260 }, { "epoch": 2.84, "learning_rate": 1.6089560010413955e-05, "loss": 2.1175, "step": 18280 }, { "epoch": 2.84, "learning_rate": 1.577714136943504e-05, "loss": 2.1396, "step": 18300 }, { "epoch": 2.85, "learning_rate": 1.546472272845613e-05, "loss": 2.1514, "step": 18320 }, { "epoch": 2.85, "learning_rate": 1.5152304087477217e-05, "loss": 2.1257, "step": 18340 }, { "epoch": 2.85, "learning_rate": 1.4839885446498306e-05, "loss": 2.1459, "step": 18360 }, { "epoch": 2.86, "learning_rate": 1.4527466805519396e-05, "loss": 2.09, "step": 18380 }, { "epoch": 2.86, "learning_rate": 1.4215048164540483e-05, "loss": 2.1442, "step": 18400 }, { "epoch": 2.86, "eval_loss": 2.330048084259033, "eval_runtime": 69.2975, "eval_samples_per_second": 28.861, "eval_steps_per_second": 1.804, "step": 18400 }, { "epoch": 2.86, "learning_rate": 1.3902629523561572e-05, "loss": 2.1816, "step": 18420 }, { "epoch": 2.87, "learning_rate": 1.3590210882582659e-05, "loss": 2.0965, "step": 18440 }, { "epoch": 2.87, "learning_rate": 1.3277792241603748e-05, "loss": 2.1178, "step": 18460 }, { "epoch": 2.87, "learning_rate": 1.2965373600624836e-05, "loss": 2.1562, "step": 18480 }, { "epoch": 2.87, "learning_rate": 1.2652954959645923e-05, "loss": 2.095, "step": 18500 }, { "epoch": 2.88, "learning_rate": 1.2340536318667012e-05, "loss": 2.1522, "step": 18520 }, { "epoch": 2.88, "learning_rate": 1.2028117677688102e-05, "loss": 2.1729, "step": 18540 }, { "epoch": 2.88, "learning_rate": 1.1715699036709189e-05, "loss": 2.141, "step": 18560 }, { "epoch": 2.89, "learning_rate": 1.1403280395730278e-05, "loss": 2.148, "step": 18580 }, { "epoch": 2.89, "learning_rate": 1.1090861754751366e-05, "loss": 2.1619, "step": 18600 }, { "epoch": 2.89, "eval_loss": 2.329728603363037, "eval_runtime": 69.3412, "eval_samples_per_second": 28.843, "eval_steps_per_second": 1.803, "step": 18600 }, { "epoch": 2.89, "learning_rate": 1.0778443113772453e-05, "loss": 2.1199, "step": 18620 }, { "epoch": 2.9, "learning_rate": 1.0466024472793542e-05, "loss": 2.131, "step": 18640 }, { "epoch": 2.9, "learning_rate": 1.0153605831814629e-05, "loss": 2.1512, "step": 18660 }, { "epoch": 2.9, "learning_rate": 9.84118719083572e-06, "loss": 2.1292, "step": 18680 }, { "epoch": 2.91, "learning_rate": 9.528768549856808e-06, "loss": 2.0928, "step": 18700 }, { "epoch": 2.91, "learning_rate": 9.216349908877897e-06, "loss": 2.1168, "step": 18720 }, { "epoch": 2.91, "learning_rate": 8.903931267898984e-06, "loss": 2.1316, "step": 18740 }, { "epoch": 2.92, "learning_rate": 8.591512626920072e-06, "loss": 2.1198, "step": 18760 }, { "epoch": 2.92, "learning_rate": 8.279093985941161e-06, "loss": 2.1226, "step": 18780 }, { "epoch": 2.92, "learning_rate": 7.96667534496225e-06, "loss": 2.1234, "step": 18800 }, { "epoch": 2.92, "eval_loss": 2.3294034004211426, "eval_runtime": 69.3303, "eval_samples_per_second": 28.847, "eval_steps_per_second": 1.803, "step": 18800 }, { "epoch": 2.92, "learning_rate": 7.654256703983337e-06, "loss": 2.1251, "step": 18820 }, { "epoch": 2.93, "learning_rate": 7.341838063004425e-06, "loss": 2.1278, "step": 18840 }, { "epoch": 2.93, "learning_rate": 7.029419422025514e-06, "loss": 2.1115, "step": 18860 }, { "epoch": 2.93, "learning_rate": 6.717000781046602e-06, "loss": 2.1468, "step": 18880 }, { "epoch": 2.94, "learning_rate": 6.4045821400676894e-06, "loss": 2.0903, "step": 18900 }, { "epoch": 2.94, "learning_rate": 6.092163499088779e-06, "loss": 2.1271, "step": 18920 }, { "epoch": 2.94, "learning_rate": 5.779744858109867e-06, "loss": 2.1253, "step": 18940 }, { "epoch": 2.95, "learning_rate": 5.4673262171309545e-06, "loss": 2.0903, "step": 18960 }, { "epoch": 2.95, "learning_rate": 5.154907576152043e-06, "loss": 2.1566, "step": 18980 }, { "epoch": 2.95, "learning_rate": 4.842488935173132e-06, "loss": 2.1477, "step": 19000 }, { "epoch": 2.95, "eval_loss": 2.3293075561523438, "eval_runtime": 69.6518, "eval_samples_per_second": 28.714, "eval_steps_per_second": 1.795, "step": 19000 } ], "max_steps": 19305, "num_train_epochs": 3, "total_flos": 5.3158443458154725e+19, "trial_name": null, "trial_params": null }