{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 22840, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9956217162872154e-05, "loss": 13.2845, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.991243432574431e-05, "loss": 5.8932, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.9868651488616462e-05, "loss": 1.2995, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.9824868651488618e-05, "loss": 0.6063, "step": 200 }, { "epoch": 0.02, "learning_rate": 1.978108581436077e-05, "loss": 0.5446, "step": 250 }, { "epoch": 0.03, "learning_rate": 1.9737302977232926e-05, "loss": 0.469, "step": 300 }, { "epoch": 0.03, "learning_rate": 1.969352014010508e-05, "loss": 0.4239, "step": 350 }, { "epoch": 0.04, "learning_rate": 1.9649737302977235e-05, "loss": 0.3874, "step": 400 }, { "epoch": 0.04, "learning_rate": 1.9605954465849387e-05, "loss": 0.3522, "step": 450 }, { "epoch": 0.04, "learning_rate": 1.9562171628721543e-05, "loss": 0.3294, "step": 500 }, { "epoch": 0.05, "learning_rate": 1.9518388791593695e-05, "loss": 0.3258, "step": 550 }, { "epoch": 0.05, "learning_rate": 1.947460595446585e-05, "loss": 0.3032, "step": 600 }, { "epoch": 0.06, "learning_rate": 1.9430823117338004e-05, "loss": 0.3083, "step": 650 }, { "epoch": 0.06, "learning_rate": 1.938704028021016e-05, "loss": 0.298, "step": 700 }, { "epoch": 0.07, "learning_rate": 1.9343257443082312e-05, "loss": 0.2915, "step": 750 }, { "epoch": 0.07, "learning_rate": 1.9299474605954468e-05, "loss": 0.2927, "step": 800 }, { "epoch": 0.07, "learning_rate": 1.925569176882662e-05, "loss": 0.2854, "step": 850 }, { "epoch": 0.08, "learning_rate": 1.9211908931698776e-05, "loss": 0.2821, "step": 900 }, { "epoch": 0.08, "learning_rate": 1.916812609457093e-05, "loss": 0.2876, "step": 950 }, { "epoch": 0.09, "learning_rate": 1.9124343257443084e-05, "loss": 0.2695, "step": 1000 }, { "epoch": 0.09, "learning_rate": 1.9080560420315237e-05, "loss": 0.2801, "step": 1050 }, { "epoch": 0.1, "learning_rate": 1.9036777583187393e-05, "loss": 0.2772, "step": 1100 }, { "epoch": 0.1, "learning_rate": 1.8992994746059545e-05, "loss": 0.2702, "step": 1150 }, { "epoch": 0.11, "learning_rate": 1.89492119089317e-05, "loss": 0.277, "step": 1200 }, { "epoch": 0.11, "learning_rate": 1.8905429071803853e-05, "loss": 0.2749, "step": 1250 }, { "epoch": 0.11, "learning_rate": 1.886164623467601e-05, "loss": 0.2751, "step": 1300 }, { "epoch": 0.12, "learning_rate": 1.881786339754816e-05, "loss": 0.2747, "step": 1350 }, { "epoch": 0.12, "learning_rate": 1.8774080560420317e-05, "loss": 0.2768, "step": 1400 }, { "epoch": 0.13, "learning_rate": 1.873029772329247e-05, "loss": 0.269, "step": 1450 }, { "epoch": 0.13, "learning_rate": 1.8686514886164622e-05, "loss": 0.2711, "step": 1500 }, { "epoch": 0.14, "learning_rate": 1.8642732049036778e-05, "loss": 0.2669, "step": 1550 }, { "epoch": 0.14, "learning_rate": 1.8598949211908934e-05, "loss": 0.2617, "step": 1600 }, { "epoch": 0.14, "learning_rate": 1.855516637478109e-05, "loss": 0.269, "step": 1650 }, { "epoch": 0.15, "learning_rate": 1.8511383537653242e-05, "loss": 0.2629, "step": 1700 }, { "epoch": 0.15, "learning_rate": 1.8467600700525398e-05, "loss": 0.2606, "step": 1750 }, { "epoch": 0.16, "learning_rate": 1.842381786339755e-05, "loss": 0.2623, "step": 1800 }, { "epoch": 0.16, "learning_rate": 1.8380035026269706e-05, "loss": 0.2638, "step": 1850 }, { "epoch": 0.17, "learning_rate": 1.833625218914186e-05, "loss": 0.2617, "step": 1900 }, { "epoch": 0.17, "learning_rate": 1.829246935201401e-05, "loss": 0.2547, "step": 1950 }, { "epoch": 0.18, "learning_rate": 1.8248686514886167e-05, "loss": 0.2656, "step": 2000 }, { "epoch": 0.18, "learning_rate": 1.820490367775832e-05, "loss": 0.2635, "step": 2050 }, { "epoch": 0.18, "learning_rate": 1.8161120840630475e-05, "loss": 0.2598, "step": 2100 }, { "epoch": 0.19, "learning_rate": 1.8117338003502628e-05, "loss": 0.2927, "step": 2150 }, { "epoch": 0.19, "learning_rate": 1.8073555166374784e-05, "loss": 0.2635, "step": 2200 }, { "epoch": 0.2, "learning_rate": 1.8029772329246936e-05, "loss": 0.2532, "step": 2250 }, { "epoch": 0.2, "learning_rate": 1.7985989492119092e-05, "loss": 0.2665, "step": 2300 }, { "epoch": 0.21, "learning_rate": 1.7942206654991244e-05, "loss": 0.2518, "step": 2350 }, { "epoch": 0.21, "learning_rate": 1.78984238178634e-05, "loss": 0.2622, "step": 2400 }, { "epoch": 0.21, "learning_rate": 1.7854640980735553e-05, "loss": 0.2599, "step": 2450 }, { "epoch": 0.22, "learning_rate": 1.781085814360771e-05, "loss": 0.2571, "step": 2500 }, { "epoch": 0.22, "learning_rate": 1.776707530647986e-05, "loss": 0.2593, "step": 2550 }, { "epoch": 0.23, "learning_rate": 1.7723292469352017e-05, "loss": 0.2659, "step": 2600 }, { "epoch": 0.23, "learning_rate": 1.767950963222417e-05, "loss": 0.2544, "step": 2650 }, { "epoch": 0.24, "learning_rate": 1.7635726795096325e-05, "loss": 0.2523, "step": 2700 }, { "epoch": 0.24, "learning_rate": 1.7591943957968477e-05, "loss": 0.2618, "step": 2750 }, { "epoch": 0.25, "learning_rate": 1.7548161120840633e-05, "loss": 0.2539, "step": 2800 }, { "epoch": 0.25, "learning_rate": 1.7504378283712786e-05, "loss": 0.2617, "step": 2850 }, { "epoch": 0.25, "learning_rate": 1.746059544658494e-05, "loss": 0.2503, "step": 2900 }, { "epoch": 0.26, "learning_rate": 1.7416812609457094e-05, "loss": 0.2548, "step": 2950 }, { "epoch": 0.26, "learning_rate": 1.737302977232925e-05, "loss": 0.2547, "step": 3000 }, { "epoch": 0.27, "learning_rate": 1.7329246935201402e-05, "loss": 0.2516, "step": 3050 }, { "epoch": 0.27, "learning_rate": 1.7285464098073558e-05, "loss": 0.2525, "step": 3100 }, { "epoch": 0.28, "learning_rate": 1.724168126094571e-05, "loss": 0.2549, "step": 3150 }, { "epoch": 0.28, "learning_rate": 1.7197898423817866e-05, "loss": 0.2495, "step": 3200 }, { "epoch": 0.28, "learning_rate": 1.715411558669002e-05, "loss": 0.262, "step": 3250 }, { "epoch": 0.29, "learning_rate": 1.7110332749562174e-05, "loss": 0.2557, "step": 3300 }, { "epoch": 0.29, "learning_rate": 1.7066549912434327e-05, "loss": 0.2493, "step": 3350 }, { "epoch": 0.3, "learning_rate": 1.7022767075306483e-05, "loss": 0.2561, "step": 3400 }, { "epoch": 0.3, "learning_rate": 1.6978984238178635e-05, "loss": 0.2473, "step": 3450 }, { "epoch": 0.31, "learning_rate": 1.6935201401050788e-05, "loss": 0.2507, "step": 3500 }, { "epoch": 0.31, "learning_rate": 1.6891418563922943e-05, "loss": 0.253, "step": 3550 }, { "epoch": 0.32, "learning_rate": 1.6847635726795096e-05, "loss": 0.2442, "step": 3600 }, { "epoch": 0.32, "learning_rate": 1.6803852889667252e-05, "loss": 0.2447, "step": 3650 }, { "epoch": 0.32, "learning_rate": 1.6760070052539404e-05, "loss": 0.2544, "step": 3700 }, { "epoch": 0.33, "learning_rate": 1.671628721541156e-05, "loss": 0.2554, "step": 3750 }, { "epoch": 0.33, "learning_rate": 1.6672504378283712e-05, "loss": 0.2571, "step": 3800 }, { "epoch": 0.34, "learning_rate": 1.6628721541155868e-05, "loss": 0.2503, "step": 3850 }, { "epoch": 0.34, "learning_rate": 1.658493870402802e-05, "loss": 0.2483, "step": 3900 }, { "epoch": 0.35, "learning_rate": 1.6541155866900177e-05, "loss": 0.2543, "step": 3950 }, { "epoch": 0.35, "learning_rate": 1.649737302977233e-05, "loss": 0.2366, "step": 4000 }, { "epoch": 0.35, "learning_rate": 1.6453590192644485e-05, "loss": 0.2476, "step": 4050 }, { "epoch": 0.36, "learning_rate": 1.6409807355516637e-05, "loss": 0.248, "step": 4100 }, { "epoch": 0.36, "learning_rate": 1.6366024518388793e-05, "loss": 0.2474, "step": 4150 }, { "epoch": 0.37, "learning_rate": 1.6322241681260946e-05, "loss": 0.2493, "step": 4200 }, { "epoch": 0.37, "learning_rate": 1.62784588441331e-05, "loss": 0.2383, "step": 4250 }, { "epoch": 0.38, "learning_rate": 1.6234676007005254e-05, "loss": 0.2493, "step": 4300 }, { "epoch": 0.38, "learning_rate": 1.619089316987741e-05, "loss": 0.251, "step": 4350 }, { "epoch": 0.39, "learning_rate": 1.6147110332749562e-05, "loss": 0.2558, "step": 4400 }, { "epoch": 0.39, "learning_rate": 1.6103327495621718e-05, "loss": 0.2448, "step": 4450 }, { "epoch": 0.39, "learning_rate": 1.605954465849387e-05, "loss": 0.2502, "step": 4500 }, { "epoch": 0.4, "learning_rate": 1.6015761821366026e-05, "loss": 0.2507, "step": 4550 }, { "epoch": 0.4, "learning_rate": 1.597197898423818e-05, "loss": 0.2468, "step": 4600 }, { "epoch": 0.41, "learning_rate": 1.5928196147110334e-05, "loss": 0.24, "step": 4650 }, { "epoch": 0.41, "learning_rate": 1.5884413309982487e-05, "loss": 0.2485, "step": 4700 }, { "epoch": 0.42, "learning_rate": 1.5840630472854643e-05, "loss": 0.2521, "step": 4750 }, { "epoch": 0.42, "learning_rate": 1.57968476357268e-05, "loss": 0.2504, "step": 4800 }, { "epoch": 0.42, "learning_rate": 1.575306479859895e-05, "loss": 0.2481, "step": 4850 }, { "epoch": 0.43, "learning_rate": 1.5709281961471107e-05, "loss": 0.2469, "step": 4900 }, { "epoch": 0.43, "learning_rate": 1.566549912434326e-05, "loss": 0.2446, "step": 4950 }, { "epoch": 0.44, "learning_rate": 1.5621716287215415e-05, "loss": 0.2494, "step": 5000 }, { "epoch": 0.44, "learning_rate": 1.5577933450087568e-05, "loss": 0.2442, "step": 5050 }, { "epoch": 0.45, "learning_rate": 1.5534150612959723e-05, "loss": 0.2458, "step": 5100 }, { "epoch": 0.45, "learning_rate": 1.5490367775831876e-05, "loss": 0.2579, "step": 5150 }, { "epoch": 0.46, "learning_rate": 1.544658493870403e-05, "loss": 0.2456, "step": 5200 }, { "epoch": 0.46, "learning_rate": 1.5402802101576184e-05, "loss": 0.2478, "step": 5250 }, { "epoch": 0.46, "learning_rate": 1.535901926444834e-05, "loss": 0.2491, "step": 5300 }, { "epoch": 0.47, "learning_rate": 1.5315236427320492e-05, "loss": 0.2434, "step": 5350 }, { "epoch": 0.47, "learning_rate": 1.5271453590192645e-05, "loss": 0.2438, "step": 5400 }, { "epoch": 0.48, "learning_rate": 1.52276707530648e-05, "loss": 0.2435, "step": 5450 }, { "epoch": 0.48, "learning_rate": 1.5183887915936955e-05, "loss": 0.2524, "step": 5500 }, { "epoch": 0.49, "learning_rate": 1.5140105078809109e-05, "loss": 0.2511, "step": 5550 }, { "epoch": 0.49, "learning_rate": 1.5096322241681263e-05, "loss": 0.2534, "step": 5600 }, { "epoch": 0.49, "learning_rate": 1.5052539404553417e-05, "loss": 0.2477, "step": 5650 }, { "epoch": 0.5, "learning_rate": 1.5008756567425571e-05, "loss": 0.255, "step": 5700 }, { "epoch": 0.5, "learning_rate": 1.4964973730297725e-05, "loss": 0.2446, "step": 5750 }, { "epoch": 0.51, "learning_rate": 1.492119089316988e-05, "loss": 0.2492, "step": 5800 }, { "epoch": 0.51, "learning_rate": 1.4877408056042034e-05, "loss": 0.25, "step": 5850 }, { "epoch": 0.52, "learning_rate": 1.4833625218914188e-05, "loss": 0.241, "step": 5900 }, { "epoch": 0.52, "learning_rate": 1.4789842381786342e-05, "loss": 0.2517, "step": 5950 }, { "epoch": 0.53, "learning_rate": 1.4746059544658496e-05, "loss": 0.2453, "step": 6000 }, { "epoch": 0.53, "learning_rate": 1.470227670753065e-05, "loss": 0.2485, "step": 6050 }, { "epoch": 0.53, "learning_rate": 1.4658493870402803e-05, "loss": 0.2494, "step": 6100 }, { "epoch": 0.54, "learning_rate": 1.4614711033274957e-05, "loss": 0.2471, "step": 6150 }, { "epoch": 0.54, "learning_rate": 1.4570928196147111e-05, "loss": 0.2508, "step": 6200 }, { "epoch": 0.55, "learning_rate": 1.4527145359019265e-05, "loss": 0.2423, "step": 6250 }, { "epoch": 0.55, "learning_rate": 1.448336252189142e-05, "loss": 0.2433, "step": 6300 }, { "epoch": 0.56, "learning_rate": 1.4439579684763573e-05, "loss": 0.2405, "step": 6350 }, { "epoch": 0.56, "learning_rate": 1.4395796847635727e-05, "loss": 0.2496, "step": 6400 }, { "epoch": 0.56, "learning_rate": 1.4352014010507882e-05, "loss": 0.2433, "step": 6450 }, { "epoch": 0.57, "learning_rate": 1.4308231173380036e-05, "loss": 0.2429, "step": 6500 }, { "epoch": 0.57, "learning_rate": 1.426444833625219e-05, "loss": 0.246, "step": 6550 }, { "epoch": 0.58, "learning_rate": 1.4220665499124344e-05, "loss": 0.2455, "step": 6600 }, { "epoch": 0.58, "learning_rate": 1.4176882661996498e-05, "loss": 0.2437, "step": 6650 }, { "epoch": 0.59, "learning_rate": 1.4133099824868652e-05, "loss": 0.2455, "step": 6700 }, { "epoch": 0.59, "learning_rate": 1.4089316987740806e-05, "loss": 0.2457, "step": 6750 }, { "epoch": 0.6, "learning_rate": 1.404553415061296e-05, "loss": 0.2304, "step": 6800 }, { "epoch": 0.6, "learning_rate": 1.4001751313485115e-05, "loss": 0.241, "step": 6850 }, { "epoch": 0.6, "learning_rate": 1.3957968476357269e-05, "loss": 0.2398, "step": 6900 }, { "epoch": 0.61, "learning_rate": 1.3914185639229423e-05, "loss": 0.2459, "step": 6950 }, { "epoch": 0.61, "learning_rate": 1.3870402802101577e-05, "loss": 0.2452, "step": 7000 }, { "epoch": 0.62, "learning_rate": 1.3826619964973731e-05, "loss": 0.2416, "step": 7050 }, { "epoch": 0.62, "learning_rate": 1.3782837127845885e-05, "loss": 0.2365, "step": 7100 }, { "epoch": 0.63, "learning_rate": 1.373905429071804e-05, "loss": 0.245, "step": 7150 }, { "epoch": 0.63, "learning_rate": 1.3695271453590194e-05, "loss": 0.2472, "step": 7200 }, { "epoch": 0.63, "learning_rate": 1.3651488616462348e-05, "loss": 0.2462, "step": 7250 }, { "epoch": 0.64, "learning_rate": 1.3607705779334502e-05, "loss": 0.2426, "step": 7300 }, { "epoch": 0.64, "learning_rate": 1.3563922942206656e-05, "loss": 0.2463, "step": 7350 }, { "epoch": 0.65, "learning_rate": 1.352014010507881e-05, "loss": 0.2416, "step": 7400 }, { "epoch": 0.65, "learning_rate": 1.3476357267950964e-05, "loss": 0.2476, "step": 7450 }, { "epoch": 0.66, "learning_rate": 1.3432574430823118e-05, "loss": 0.2424, "step": 7500 }, { "epoch": 0.66, "learning_rate": 1.3388791593695273e-05, "loss": 0.2354, "step": 7550 }, { "epoch": 0.67, "learning_rate": 1.3345008756567425e-05, "loss": 0.243, "step": 7600 }, { "epoch": 0.67, "learning_rate": 1.3301225919439579e-05, "loss": 0.2362, "step": 7650 }, { "epoch": 0.67, "learning_rate": 1.3257443082311733e-05, "loss": 0.2374, "step": 7700 }, { "epoch": 0.68, "learning_rate": 1.3213660245183887e-05, "loss": 0.2379, "step": 7750 }, { "epoch": 0.68, "learning_rate": 1.3169877408056041e-05, "loss": 0.2348, "step": 7800 }, { "epoch": 0.69, "learning_rate": 1.3126094570928196e-05, "loss": 0.2432, "step": 7850 }, { "epoch": 0.69, "learning_rate": 1.308231173380035e-05, "loss": 0.2381, "step": 7900 }, { "epoch": 0.7, "learning_rate": 1.3038528896672507e-05, "loss": 0.2472, "step": 7950 }, { "epoch": 0.7, "learning_rate": 1.2994746059544661e-05, "loss": 0.2409, "step": 8000 }, { "epoch": 0.7, "learning_rate": 1.2950963222416814e-05, "loss": 0.2448, "step": 8050 }, { "epoch": 0.71, "learning_rate": 1.2907180385288968e-05, "loss": 0.2421, "step": 8100 }, { "epoch": 0.71, "learning_rate": 1.2863397548161122e-05, "loss": 0.2392, "step": 8150 }, { "epoch": 0.72, "learning_rate": 1.2819614711033276e-05, "loss": 0.2432, "step": 8200 }, { "epoch": 0.72, "learning_rate": 1.277583187390543e-05, "loss": 0.2336, "step": 8250 }, { "epoch": 0.73, "learning_rate": 1.2732049036777585e-05, "loss": 0.2416, "step": 8300 }, { "epoch": 0.73, "learning_rate": 1.2688266199649739e-05, "loss": 0.236, "step": 8350 }, { "epoch": 0.74, "learning_rate": 1.2644483362521893e-05, "loss": 0.2387, "step": 8400 }, { "epoch": 0.74, "learning_rate": 1.2600700525394047e-05, "loss": 0.2398, "step": 8450 }, { "epoch": 0.74, "learning_rate": 1.2556917688266201e-05, "loss": 0.2413, "step": 8500 }, { "epoch": 0.75, "learning_rate": 1.2513134851138355e-05, "loss": 0.2445, "step": 8550 }, { "epoch": 0.75, "learning_rate": 1.246935201401051e-05, "loss": 0.2405, "step": 8600 }, { "epoch": 0.76, "learning_rate": 1.2425569176882663e-05, "loss": 0.2389, "step": 8650 }, { "epoch": 0.76, "learning_rate": 1.2381786339754818e-05, "loss": 0.2348, "step": 8700 }, { "epoch": 0.77, "learning_rate": 1.2338003502626972e-05, "loss": 0.2397, "step": 8750 }, { "epoch": 0.77, "learning_rate": 1.2294220665499126e-05, "loss": 0.233, "step": 8800 }, { "epoch": 0.77, "learning_rate": 1.225043782837128e-05, "loss": 0.2395, "step": 8850 }, { "epoch": 0.78, "learning_rate": 1.2206654991243434e-05, "loss": 0.2366, "step": 8900 }, { "epoch": 0.78, "learning_rate": 1.2162872154115588e-05, "loss": 0.234, "step": 8950 }, { "epoch": 0.79, "learning_rate": 1.2119089316987742e-05, "loss": 0.2391, "step": 9000 }, { "epoch": 0.79, "learning_rate": 1.2075306479859897e-05, "loss": 0.2382, "step": 9050 }, { "epoch": 0.8, "learning_rate": 1.203152364273205e-05, "loss": 0.231, "step": 9100 }, { "epoch": 0.8, "learning_rate": 1.1987740805604205e-05, "loss": 0.2383, "step": 9150 }, { "epoch": 0.81, "learning_rate": 1.1943957968476359e-05, "loss": 0.2443, "step": 9200 }, { "epoch": 0.81, "learning_rate": 1.1900175131348513e-05, "loss": 0.2446, "step": 9250 }, { "epoch": 0.81, "learning_rate": 1.1856392294220667e-05, "loss": 0.2404, "step": 9300 }, { "epoch": 0.82, "learning_rate": 1.1812609457092821e-05, "loss": 0.2371, "step": 9350 }, { "epoch": 0.82, "learning_rate": 1.1768826619964975e-05, "loss": 0.2427, "step": 9400 }, { "epoch": 0.83, "learning_rate": 1.172504378283713e-05, "loss": 0.2354, "step": 9450 }, { "epoch": 0.83, "learning_rate": 1.1681260945709284e-05, "loss": 0.2332, "step": 9500 }, { "epoch": 0.84, "learning_rate": 1.1637478108581436e-05, "loss": 0.2394, "step": 9550 }, { "epoch": 0.84, "learning_rate": 1.159369527145359e-05, "loss": 0.238, "step": 9600 }, { "epoch": 0.85, "learning_rate": 1.1549912434325744e-05, "loss": 0.2365, "step": 9650 }, { "epoch": 0.85, "learning_rate": 1.1506129597197899e-05, "loss": 0.235, "step": 9700 }, { "epoch": 0.85, "learning_rate": 1.1462346760070053e-05, "loss": 0.2374, "step": 9750 }, { "epoch": 0.86, "learning_rate": 1.1418563922942207e-05, "loss": 0.2291, "step": 9800 }, { "epoch": 0.86, "learning_rate": 1.1374781085814361e-05, "loss": 0.2404, "step": 9850 }, { "epoch": 0.87, "learning_rate": 1.1330998248686515e-05, "loss": 0.2373, "step": 9900 }, { "epoch": 0.87, "learning_rate": 1.128721541155867e-05, "loss": 0.2426, "step": 9950 }, { "epoch": 0.88, "learning_rate": 1.1243432574430823e-05, "loss": 0.2476, "step": 10000 }, { "epoch": 0.88, "learning_rate": 1.1199649737302978e-05, "loss": 0.2404, "step": 10050 }, { "epoch": 0.88, "learning_rate": 1.1155866900175132e-05, "loss": 0.2401, "step": 10100 }, { "epoch": 0.89, "learning_rate": 1.1112084063047286e-05, "loss": 0.2443, "step": 10150 }, { "epoch": 0.89, "learning_rate": 1.106830122591944e-05, "loss": 0.2349, "step": 10200 }, { "epoch": 0.9, "learning_rate": 1.1024518388791594e-05, "loss": 0.2408, "step": 10250 }, { "epoch": 0.9, "learning_rate": 1.0980735551663748e-05, "loss": 0.2393, "step": 10300 }, { "epoch": 0.91, "learning_rate": 1.0936952714535902e-05, "loss": 0.2358, "step": 10350 }, { "epoch": 0.91, "learning_rate": 1.0893169877408056e-05, "loss": 0.2412, "step": 10400 }, { "epoch": 0.92, "learning_rate": 1.084938704028021e-05, "loss": 0.2536, "step": 10450 }, { "epoch": 0.92, "learning_rate": 1.0805604203152365e-05, "loss": 0.2344, "step": 10500 }, { "epoch": 0.92, "learning_rate": 1.0761821366024519e-05, "loss": 0.2413, "step": 10550 }, { "epoch": 0.93, "learning_rate": 1.0718038528896673e-05, "loss": 0.2487, "step": 10600 }, { "epoch": 0.93, "learning_rate": 1.0674255691768827e-05, "loss": 0.2374, "step": 10650 }, { "epoch": 0.94, "learning_rate": 1.0630472854640981e-05, "loss": 0.2374, "step": 10700 }, { "epoch": 0.94, "learning_rate": 1.0586690017513135e-05, "loss": 0.2396, "step": 10750 }, { "epoch": 0.95, "learning_rate": 1.054290718038529e-05, "loss": 0.236, "step": 10800 }, { "epoch": 0.95, "learning_rate": 1.0499124343257444e-05, "loss": 0.2393, "step": 10850 }, { "epoch": 0.95, "learning_rate": 1.0455341506129598e-05, "loss": 0.238, "step": 10900 }, { "epoch": 0.96, "learning_rate": 1.0411558669001752e-05, "loss": 0.2385, "step": 10950 }, { "epoch": 0.96, "learning_rate": 1.0367775831873904e-05, "loss": 0.2376, "step": 11000 }, { "epoch": 0.97, "learning_rate": 1.0323992994746059e-05, "loss": 0.2347, "step": 11050 }, { "epoch": 0.97, "learning_rate": 1.0280210157618213e-05, "loss": 0.2337, "step": 11100 }, { "epoch": 0.98, "learning_rate": 1.023642732049037e-05, "loss": 0.2423, "step": 11150 }, { "epoch": 0.98, "learning_rate": 1.0192644483362524e-05, "loss": 0.2347, "step": 11200 }, { "epoch": 0.99, "learning_rate": 1.0148861646234678e-05, "loss": 0.2387, "step": 11250 }, { "epoch": 0.99, "learning_rate": 1.0105078809106833e-05, "loss": 0.2427, "step": 11300 }, { "epoch": 0.99, "learning_rate": 1.0061295971978987e-05, "loss": 0.2243, "step": 11350 }, { "epoch": 1.0, "learning_rate": 1.0017513134851141e-05, "loss": 0.2386, "step": 11400 }, { "epoch": 1.0, "eval_loss": 0.2167460024356842, "eval_runtime": 72.1234, "eval_samples_per_second": 63.336, "eval_steps_per_second": 15.834, "step": 11420 }, { "epoch": 1.0, "learning_rate": 9.973730297723293e-06, "loss": 0.2285, "step": 11450 }, { "epoch": 1.01, "learning_rate": 9.929947460595447e-06, "loss": 0.2315, "step": 11500 }, { "epoch": 1.01, "learning_rate": 9.886164623467602e-06, "loss": 0.2317, "step": 11550 }, { "epoch": 1.02, "learning_rate": 9.842381786339756e-06, "loss": 0.2314, "step": 11600 }, { "epoch": 1.02, "learning_rate": 9.79859894921191e-06, "loss": 0.2392, "step": 11650 }, { "epoch": 1.02, "learning_rate": 9.754816112084064e-06, "loss": 0.2311, "step": 11700 }, { "epoch": 1.03, "learning_rate": 9.711033274956218e-06, "loss": 0.223, "step": 11750 }, { "epoch": 1.03, "learning_rate": 9.667250437828372e-06, "loss": 0.2176, "step": 11800 }, { "epoch": 1.04, "learning_rate": 9.623467600700526e-06, "loss": 0.2311, "step": 11850 }, { "epoch": 1.04, "learning_rate": 9.57968476357268e-06, "loss": 0.2242, "step": 11900 }, { "epoch": 1.05, "learning_rate": 9.535901926444835e-06, "loss": 0.2203, "step": 11950 }, { "epoch": 1.05, "learning_rate": 9.492119089316989e-06, "loss": 0.2301, "step": 12000 }, { "epoch": 1.06, "learning_rate": 9.448336252189143e-06, "loss": 0.2356, "step": 12050 }, { "epoch": 1.06, "learning_rate": 9.404553415061297e-06, "loss": 0.2261, "step": 12100 }, { "epoch": 1.06, "learning_rate": 9.360770577933451e-06, "loss": 0.2269, "step": 12150 }, { "epoch": 1.07, "learning_rate": 9.316987740805605e-06, "loss": 0.2224, "step": 12200 }, { "epoch": 1.07, "learning_rate": 9.27320490367776e-06, "loss": 0.2214, "step": 12250 }, { "epoch": 1.08, "learning_rate": 9.229422066549914e-06, "loss": 0.2327, "step": 12300 }, { "epoch": 1.08, "learning_rate": 9.185639229422068e-06, "loss": 0.2245, "step": 12350 }, { "epoch": 1.09, "learning_rate": 9.141856392294222e-06, "loss": 0.2304, "step": 12400 }, { "epoch": 1.09, "learning_rate": 9.098073555166376e-06, "loss": 0.2387, "step": 12450 }, { "epoch": 1.09, "learning_rate": 9.05429071803853e-06, "loss": 0.2285, "step": 12500 }, { "epoch": 1.1, "learning_rate": 9.010507880910684e-06, "loss": 0.2274, "step": 12550 }, { "epoch": 1.1, "learning_rate": 8.966725043782838e-06, "loss": 0.2278, "step": 12600 }, { "epoch": 1.11, "learning_rate": 8.922942206654993e-06, "loss": 0.2277, "step": 12650 }, { "epoch": 1.11, "learning_rate": 8.879159369527147e-06, "loss": 0.2261, "step": 12700 }, { "epoch": 1.12, "learning_rate": 8.8353765323993e-06, "loss": 0.2274, "step": 12750 }, { "epoch": 1.12, "learning_rate": 8.791593695271455e-06, "loss": 0.227, "step": 12800 }, { "epoch": 1.13, "learning_rate": 8.747810858143609e-06, "loss": 0.2363, "step": 12850 }, { "epoch": 1.13, "learning_rate": 8.704028021015763e-06, "loss": 0.2244, "step": 12900 }, { "epoch": 1.13, "learning_rate": 8.660245183887916e-06, "loss": 0.2272, "step": 12950 }, { "epoch": 1.14, "learning_rate": 8.61646234676007e-06, "loss": 0.2277, "step": 13000 }, { "epoch": 1.14, "learning_rate": 8.572679509632224e-06, "loss": 0.232, "step": 13050 }, { "epoch": 1.15, "learning_rate": 8.528896672504378e-06, "loss": 0.2309, "step": 13100 }, { "epoch": 1.15, "learning_rate": 8.485113835376532e-06, "loss": 0.2292, "step": 13150 }, { "epoch": 1.16, "learning_rate": 8.441330998248686e-06, "loss": 0.2308, "step": 13200 }, { "epoch": 1.16, "learning_rate": 8.39754816112084e-06, "loss": 0.2291, "step": 13250 }, { "epoch": 1.16, "learning_rate": 8.353765323992995e-06, "loss": 0.2326, "step": 13300 }, { "epoch": 1.17, "learning_rate": 8.309982486865149e-06, "loss": 0.2215, "step": 13350 }, { "epoch": 1.17, "learning_rate": 8.266199649737303e-06, "loss": 0.2312, "step": 13400 }, { "epoch": 1.18, "learning_rate": 8.222416812609457e-06, "loss": 0.2216, "step": 13450 }, { "epoch": 1.18, "learning_rate": 8.178633975481613e-06, "loss": 0.2269, "step": 13500 }, { "epoch": 1.19, "learning_rate": 8.134851138353767e-06, "loss": 0.229, "step": 13550 }, { "epoch": 1.19, "learning_rate": 8.091068301225921e-06, "loss": 0.2319, "step": 13600 }, { "epoch": 1.2, "learning_rate": 8.047285464098075e-06, "loss": 0.226, "step": 13650 }, { "epoch": 1.2, "learning_rate": 8.00350262697023e-06, "loss": 0.2294, "step": 13700 }, { "epoch": 1.2, "learning_rate": 7.959719789842383e-06, "loss": 0.2347, "step": 13750 }, { "epoch": 1.21, "learning_rate": 7.915936952714538e-06, "loss": 0.2315, "step": 13800 }, { "epoch": 1.21, "learning_rate": 7.872154115586692e-06, "loss": 0.2209, "step": 13850 }, { "epoch": 1.22, "learning_rate": 7.828371278458846e-06, "loss": 0.2248, "step": 13900 }, { "epoch": 1.22, "learning_rate": 7.784588441330998e-06, "loss": 0.2305, "step": 13950 }, { "epoch": 1.23, "learning_rate": 7.740805604203152e-06, "loss": 0.2288, "step": 14000 }, { "epoch": 1.23, "learning_rate": 7.697022767075307e-06, "loss": 0.2286, "step": 14050 }, { "epoch": 1.23, "learning_rate": 7.65323992994746e-06, "loss": 0.2274, "step": 14100 }, { "epoch": 1.24, "learning_rate": 7.609457092819616e-06, "loss": 0.2235, "step": 14150 }, { "epoch": 1.24, "learning_rate": 7.56567425569177e-06, "loss": 0.2328, "step": 14200 }, { "epoch": 1.25, "learning_rate": 7.521891418563924e-06, "loss": 0.2267, "step": 14250 }, { "epoch": 1.25, "learning_rate": 7.478108581436077e-06, "loss": 0.2231, "step": 14300 }, { "epoch": 1.26, "learning_rate": 7.434325744308231e-06, "loss": 0.2256, "step": 14350 }, { "epoch": 1.26, "learning_rate": 7.3905429071803855e-06, "loss": 0.2276, "step": 14400 }, { "epoch": 1.27, "learning_rate": 7.34676007005254e-06, "loss": 0.2312, "step": 14450 }, { "epoch": 1.27, "learning_rate": 7.302977232924694e-06, "loss": 0.2307, "step": 14500 }, { "epoch": 1.27, "learning_rate": 7.259194395796848e-06, "loss": 0.2265, "step": 14550 }, { "epoch": 1.28, "learning_rate": 7.215411558669002e-06, "loss": 0.2351, "step": 14600 }, { "epoch": 1.28, "learning_rate": 7.171628721541156e-06, "loss": 0.2264, "step": 14650 }, { "epoch": 1.29, "learning_rate": 7.12784588441331e-06, "loss": 0.2309, "step": 14700 }, { "epoch": 1.29, "learning_rate": 7.0840630472854645e-06, "loss": 0.2325, "step": 14750 }, { "epoch": 1.3, "learning_rate": 7.040280210157619e-06, "loss": 0.2329, "step": 14800 }, { "epoch": 1.3, "learning_rate": 6.996497373029773e-06, "loss": 0.2301, "step": 14850 }, { "epoch": 1.3, "learning_rate": 6.952714535901927e-06, "loss": 0.2308, "step": 14900 }, { "epoch": 1.31, "learning_rate": 6.908931698774081e-06, "loss": 0.2273, "step": 14950 }, { "epoch": 1.31, "learning_rate": 6.865148861646235e-06, "loss": 0.2347, "step": 15000 }, { "epoch": 1.32, "learning_rate": 6.8213660245183884e-06, "loss": 0.2303, "step": 15050 }, { "epoch": 1.32, "learning_rate": 6.777583187390544e-06, "loss": 0.2293, "step": 15100 }, { "epoch": 1.33, "learning_rate": 6.733800350262698e-06, "loss": 0.2299, "step": 15150 }, { "epoch": 1.33, "learning_rate": 6.6900175131348525e-06, "loss": 0.2271, "step": 15200 }, { "epoch": 1.34, "learning_rate": 6.646234676007006e-06, "loss": 0.2246, "step": 15250 }, { "epoch": 1.34, "learning_rate": 6.60245183887916e-06, "loss": 0.2262, "step": 15300 }, { "epoch": 1.34, "learning_rate": 6.558669001751314e-06, "loss": 0.2176, "step": 15350 }, { "epoch": 1.35, "learning_rate": 6.514886164623468e-06, "loss": 0.2277, "step": 15400 }, { "epoch": 1.35, "learning_rate": 6.471103327495622e-06, "loss": 0.2184, "step": 15450 }, { "epoch": 1.36, "learning_rate": 6.4273204903677765e-06, "loss": 0.2365, "step": 15500 }, { "epoch": 1.36, "learning_rate": 6.383537653239931e-06, "loss": 0.2288, "step": 15550 }, { "epoch": 1.37, "learning_rate": 6.339754816112085e-06, "loss": 0.2252, "step": 15600 }, { "epoch": 1.37, "learning_rate": 6.295971978984239e-06, "loss": 0.2293, "step": 15650 }, { "epoch": 1.37, "learning_rate": 6.252189141856393e-06, "loss": 0.2252, "step": 15700 }, { "epoch": 1.38, "learning_rate": 6.208406304728547e-06, "loss": 0.2382, "step": 15750 }, { "epoch": 1.38, "learning_rate": 6.164623467600701e-06, "loss": 0.2293, "step": 15800 }, { "epoch": 1.39, "learning_rate": 6.120840630472855e-06, "loss": 0.2274, "step": 15850 }, { "epoch": 1.39, "learning_rate": 6.0770577933450096e-06, "loss": 0.2227, "step": 15900 }, { "epoch": 1.4, "learning_rate": 6.033274956217164e-06, "loss": 0.2311, "step": 15950 }, { "epoch": 1.4, "learning_rate": 5.989492119089317e-06, "loss": 0.2227, "step": 16000 }, { "epoch": 1.41, "learning_rate": 5.945709281961471e-06, "loss": 0.2242, "step": 16050 }, { "epoch": 1.41, "learning_rate": 5.901926444833625e-06, "loss": 0.2341, "step": 16100 }, { "epoch": 1.41, "learning_rate": 5.858143607705779e-06, "loss": 0.2341, "step": 16150 }, { "epoch": 1.42, "learning_rate": 5.8143607705779335e-06, "loss": 0.2306, "step": 16200 }, { "epoch": 1.42, "learning_rate": 5.770577933450088e-06, "loss": 0.224, "step": 16250 }, { "epoch": 1.43, "learning_rate": 5.726795096322242e-06, "loss": 0.2283, "step": 16300 }, { "epoch": 1.43, "learning_rate": 5.683012259194396e-06, "loss": 0.2266, "step": 16350 }, { "epoch": 1.44, "learning_rate": 5.63922942206655e-06, "loss": 0.2283, "step": 16400 }, { "epoch": 1.44, "learning_rate": 5.595446584938704e-06, "loss": 0.2296, "step": 16450 }, { "epoch": 1.44, "learning_rate": 5.551663747810858e-06, "loss": 0.2214, "step": 16500 }, { "epoch": 1.45, "learning_rate": 5.5078809106830125e-06, "loss": 0.2229, "step": 16550 }, { "epoch": 1.45, "learning_rate": 5.464098073555167e-06, "loss": 0.2333, "step": 16600 }, { "epoch": 1.46, "learning_rate": 5.420315236427321e-06, "loss": 0.2276, "step": 16650 }, { "epoch": 1.46, "learning_rate": 5.376532399299476e-06, "loss": 0.2261, "step": 16700 }, { "epoch": 1.47, "learning_rate": 5.33274956217163e-06, "loss": 0.2305, "step": 16750 }, { "epoch": 1.47, "learning_rate": 5.288966725043784e-06, "loss": 0.2314, "step": 16800 }, { "epoch": 1.48, "learning_rate": 5.245183887915938e-06, "loss": 0.2269, "step": 16850 }, { "epoch": 1.48, "learning_rate": 5.201401050788092e-06, "loss": 0.2313, "step": 16900 }, { "epoch": 1.48, "learning_rate": 5.157618213660246e-06, "loss": 0.2341, "step": 16950 }, { "epoch": 1.49, "learning_rate": 5.1138353765324e-06, "loss": 0.225, "step": 17000 }, { "epoch": 1.49, "learning_rate": 5.070052539404554e-06, "loss": 0.2189, "step": 17050 }, { "epoch": 1.5, "learning_rate": 5.026269702276708e-06, "loss": 0.231, "step": 17100 }, { "epoch": 1.5, "learning_rate": 4.982486865148862e-06, "loss": 0.229, "step": 17150 }, { "epoch": 1.51, "learning_rate": 4.938704028021016e-06, "loss": 0.2324, "step": 17200 }, { "epoch": 1.51, "learning_rate": 4.89492119089317e-06, "loss": 0.2303, "step": 17250 }, { "epoch": 1.51, "learning_rate": 4.8511383537653245e-06, "loss": 0.2274, "step": 17300 }, { "epoch": 1.52, "learning_rate": 4.807355516637479e-06, "loss": 0.2244, "step": 17350 }, { "epoch": 1.52, "learning_rate": 4.763572679509633e-06, "loss": 0.2273, "step": 17400 }, { "epoch": 1.53, "learning_rate": 4.719789842381787e-06, "loss": 0.2265, "step": 17450 }, { "epoch": 1.53, "learning_rate": 4.676007005253941e-06, "loss": 0.2253, "step": 17500 }, { "epoch": 1.54, "learning_rate": 4.632224168126095e-06, "loss": 0.2349, "step": 17550 }, { "epoch": 1.54, "learning_rate": 4.588441330998249e-06, "loss": 0.2202, "step": 17600 }, { "epoch": 1.55, "learning_rate": 4.544658493870403e-06, "loss": 0.2304, "step": 17650 }, { "epoch": 1.55, "learning_rate": 4.500875656742557e-06, "loss": 0.2321, "step": 17700 }, { "epoch": 1.55, "learning_rate": 4.457092819614711e-06, "loss": 0.2194, "step": 17750 }, { "epoch": 1.56, "learning_rate": 4.413309982486865e-06, "loss": 0.2278, "step": 17800 }, { "epoch": 1.56, "learning_rate": 4.36952714535902e-06, "loss": 0.2237, "step": 17850 }, { "epoch": 1.57, "learning_rate": 4.325744308231174e-06, "loss": 0.2241, "step": 17900 }, { "epoch": 1.57, "learning_rate": 4.281961471103328e-06, "loss": 0.2244, "step": 17950 }, { "epoch": 1.58, "learning_rate": 4.238178633975482e-06, "loss": 0.2247, "step": 18000 }, { "epoch": 1.58, "learning_rate": 4.1943957968476365e-06, "loss": 0.2307, "step": 18050 }, { "epoch": 1.58, "learning_rate": 4.150612959719791e-06, "loss": 0.2212, "step": 18100 }, { "epoch": 1.59, "learning_rate": 4.106830122591945e-06, "loss": 0.224, "step": 18150 }, { "epoch": 1.59, "learning_rate": 4.063047285464098e-06, "loss": 0.221, "step": 18200 }, { "epoch": 1.6, "learning_rate": 4.019264448336252e-06, "loss": 0.2237, "step": 18250 }, { "epoch": 1.6, "learning_rate": 3.975481611208406e-06, "loss": 0.2294, "step": 18300 }, { "epoch": 1.61, "learning_rate": 3.9316987740805604e-06, "loss": 0.222, "step": 18350 }, { "epoch": 1.61, "learning_rate": 3.8879159369527146e-06, "loss": 0.2235, "step": 18400 }, { "epoch": 1.62, "learning_rate": 3.844133099824869e-06, "loss": 0.2299, "step": 18450 }, { "epoch": 1.62, "learning_rate": 3.800350262697023e-06, "loss": 0.2308, "step": 18500 }, { "epoch": 1.62, "learning_rate": 3.756567425569177e-06, "loss": 0.2279, "step": 18550 }, { "epoch": 1.63, "learning_rate": 3.712784588441331e-06, "loss": 0.2293, "step": 18600 }, { "epoch": 1.63, "learning_rate": 3.6690017513134857e-06, "loss": 0.2279, "step": 18650 }, { "epoch": 1.64, "learning_rate": 3.62521891418564e-06, "loss": 0.2292, "step": 18700 }, { "epoch": 1.64, "learning_rate": 3.581436077057794e-06, "loss": 0.2276, "step": 18750 }, { "epoch": 1.65, "learning_rate": 3.537653239929948e-06, "loss": 0.2276, "step": 18800 }, { "epoch": 1.65, "learning_rate": 3.4938704028021018e-06, "loss": 0.2209, "step": 18850 }, { "epoch": 1.65, "learning_rate": 3.450087565674256e-06, "loss": 0.2251, "step": 18900 }, { "epoch": 1.66, "learning_rate": 3.40630472854641e-06, "loss": 0.2266, "step": 18950 }, { "epoch": 1.66, "learning_rate": 3.362521891418564e-06, "loss": 0.228, "step": 19000 }, { "epoch": 1.67, "learning_rate": 3.3187390542907183e-06, "loss": 0.2274, "step": 19050 }, { "epoch": 1.67, "learning_rate": 3.2749562171628725e-06, "loss": 0.2256, "step": 19100 }, { "epoch": 1.68, "learning_rate": 3.2311733800350266e-06, "loss": 0.2279, "step": 19150 }, { "epoch": 1.68, "learning_rate": 3.1873905429071807e-06, "loss": 0.2254, "step": 19200 }, { "epoch": 1.69, "learning_rate": 3.1436077057793344e-06, "loss": 0.2273, "step": 19250 }, { "epoch": 1.69, "learning_rate": 3.0998248686514886e-06, "loss": 0.2277, "step": 19300 }, { "epoch": 1.69, "learning_rate": 3.0560420315236427e-06, "loss": 0.2291, "step": 19350 }, { "epoch": 1.7, "learning_rate": 3.012259194395797e-06, "loss": 0.2258, "step": 19400 }, { "epoch": 1.7, "learning_rate": 2.9684763572679514e-06, "loss": 0.2247, "step": 19450 }, { "epoch": 1.71, "learning_rate": 2.9246935201401055e-06, "loss": 0.2246, "step": 19500 }, { "epoch": 1.71, "learning_rate": 2.8809106830122597e-06, "loss": 0.2228, "step": 19550 }, { "epoch": 1.72, "learning_rate": 2.837127845884414e-06, "loss": 0.2281, "step": 19600 }, { "epoch": 1.72, "learning_rate": 2.793345008756568e-06, "loss": 0.2396, "step": 19650 }, { "epoch": 1.73, "learning_rate": 2.749562171628722e-06, "loss": 0.2214, "step": 19700 }, { "epoch": 1.73, "learning_rate": 2.7057793345008758e-06, "loss": 0.2286, "step": 19750 }, { "epoch": 1.73, "learning_rate": 2.66199649737303e-06, "loss": 0.2289, "step": 19800 }, { "epoch": 1.74, "learning_rate": 2.618213660245184e-06, "loss": 0.2282, "step": 19850 }, { "epoch": 1.74, "learning_rate": 2.574430823117338e-06, "loss": 0.2299, "step": 19900 }, { "epoch": 1.75, "learning_rate": 2.5306479859894923e-06, "loss": 0.2276, "step": 19950 }, { "epoch": 1.75, "learning_rate": 2.4868651488616464e-06, "loss": 0.2257, "step": 20000 }, { "epoch": 1.76, "learning_rate": 2.4430823117338006e-06, "loss": 0.228, "step": 20050 }, { "epoch": 1.76, "learning_rate": 2.3992994746059547e-06, "loss": 0.2245, "step": 20100 }, { "epoch": 1.76, "learning_rate": 2.355516637478109e-06, "loss": 0.2199, "step": 20150 }, { "epoch": 1.77, "learning_rate": 2.311733800350263e-06, "loss": 0.2264, "step": 20200 }, { "epoch": 1.77, "learning_rate": 2.267950963222417e-06, "loss": 0.228, "step": 20250 }, { "epoch": 1.78, "learning_rate": 2.2241681260945713e-06, "loss": 0.2311, "step": 20300 }, { "epoch": 1.78, "learning_rate": 2.180385288966725e-06, "loss": 0.2252, "step": 20350 }, { "epoch": 1.79, "learning_rate": 2.136602451838879e-06, "loss": 0.2259, "step": 20400 }, { "epoch": 1.79, "learning_rate": 2.0928196147110337e-06, "loss": 0.2254, "step": 20450 }, { "epoch": 1.8, "learning_rate": 2.0490367775831878e-06, "loss": 0.2273, "step": 20500 }, { "epoch": 1.8, "learning_rate": 2.005253940455342e-06, "loss": 0.2188, "step": 20550 }, { "epoch": 1.8, "learning_rate": 1.9614711033274956e-06, "loss": 0.2295, "step": 20600 }, { "epoch": 1.81, "learning_rate": 1.9176882661996498e-06, "loss": 0.2257, "step": 20650 }, { "epoch": 1.81, "learning_rate": 1.873905429071804e-06, "loss": 0.2296, "step": 20700 }, { "epoch": 1.82, "learning_rate": 1.830122591943958e-06, "loss": 0.2304, "step": 20750 }, { "epoch": 1.82, "learning_rate": 1.7863397548161122e-06, "loss": 0.2316, "step": 20800 }, { "epoch": 1.83, "learning_rate": 1.7425569176882665e-06, "loss": 0.2311, "step": 20850 }, { "epoch": 1.83, "learning_rate": 1.6987740805604204e-06, "loss": 0.2254, "step": 20900 }, { "epoch": 1.83, "learning_rate": 1.6549912434325746e-06, "loss": 0.2234, "step": 20950 }, { "epoch": 1.84, "learning_rate": 1.6112084063047287e-06, "loss": 0.2193, "step": 21000 }, { "epoch": 1.84, "learning_rate": 1.5674255691768828e-06, "loss": 0.236, "step": 21050 }, { "epoch": 1.85, "learning_rate": 1.5236427320490368e-06, "loss": 0.2282, "step": 21100 }, { "epoch": 1.85, "learning_rate": 1.479859894921191e-06, "loss": 0.23, "step": 21150 }, { "epoch": 1.86, "learning_rate": 1.436077057793345e-06, "loss": 0.232, "step": 21200 }, { "epoch": 1.86, "learning_rate": 1.3922942206654994e-06, "loss": 0.2283, "step": 21250 }, { "epoch": 1.87, "learning_rate": 1.3485113835376535e-06, "loss": 0.2334, "step": 21300 }, { "epoch": 1.87, "learning_rate": 1.3047285464098074e-06, "loss": 0.2297, "step": 21350 }, { "epoch": 1.87, "learning_rate": 1.2609457092819616e-06, "loss": 0.223, "step": 21400 }, { "epoch": 1.88, "learning_rate": 1.2171628721541157e-06, "loss": 0.223, "step": 21450 }, { "epoch": 1.88, "learning_rate": 1.1733800350262698e-06, "loss": 0.2252, "step": 21500 }, { "epoch": 1.89, "learning_rate": 1.129597197898424e-06, "loss": 0.2256, "step": 21550 }, { "epoch": 1.89, "learning_rate": 1.085814360770578e-06, "loss": 0.2228, "step": 21600 }, { "epoch": 1.9, "learning_rate": 1.042031523642732e-06, "loss": 0.2288, "step": 21650 }, { "epoch": 1.9, "learning_rate": 9.982486865148862e-07, "loss": 0.2342, "step": 21700 }, { "epoch": 1.9, "learning_rate": 9.544658493870403e-07, "loss": 0.2267, "step": 21750 }, { "epoch": 1.91, "learning_rate": 9.106830122591944e-07, "loss": 0.2234, "step": 21800 }, { "epoch": 1.91, "learning_rate": 8.669001751313486e-07, "loss": 0.2191, "step": 21850 }, { "epoch": 1.92, "learning_rate": 8.231173380035026e-07, "loss": 0.2219, "step": 21900 }, { "epoch": 1.92, "learning_rate": 7.793345008756568e-07, "loss": 0.2239, "step": 21950 }, { "epoch": 1.93, "learning_rate": 7.35551663747811e-07, "loss": 0.2262, "step": 22000 }, { "epoch": 1.93, "learning_rate": 6.91768826619965e-07, "loss": 0.2185, "step": 22050 }, { "epoch": 1.94, "learning_rate": 6.47985989492119e-07, "loss": 0.2239, "step": 22100 }, { "epoch": 1.94, "learning_rate": 6.042031523642733e-07, "loss": 0.2286, "step": 22150 }, { "epoch": 1.94, "learning_rate": 5.604203152364274e-07, "loss": 0.2253, "step": 22200 }, { "epoch": 1.95, "learning_rate": 5.166374781085814e-07, "loss": 0.2252, "step": 22250 }, { "epoch": 1.95, "learning_rate": 4.728546409807356e-07, "loss": 0.2245, "step": 22300 }, { "epoch": 1.96, "learning_rate": 4.290718038528897e-07, "loss": 0.2285, "step": 22350 }, { "epoch": 1.96, "learning_rate": 3.8528896672504383e-07, "loss": 0.2256, "step": 22400 }, { "epoch": 1.97, "learning_rate": 3.415061295971979e-07, "loss": 0.2334, "step": 22450 }, { "epoch": 1.97, "learning_rate": 2.9772329246935205e-07, "loss": 0.2313, "step": 22500 }, { "epoch": 1.97, "learning_rate": 2.5394045534150613e-07, "loss": 0.2202, "step": 22550 }, { "epoch": 1.98, "learning_rate": 2.1015761821366026e-07, "loss": 0.2235, "step": 22600 }, { "epoch": 1.98, "learning_rate": 1.6637478108581437e-07, "loss": 0.2311, "step": 22650 }, { "epoch": 1.99, "learning_rate": 1.2259194395796848e-07, "loss": 0.2213, "step": 22700 }, { "epoch": 1.99, "learning_rate": 7.88091068301226e-08, "loss": 0.2246, "step": 22750 }, { "epoch": 2.0, "learning_rate": 3.502626970227671e-08, "loss": 0.2186, "step": 22800 }, { "epoch": 2.0, "eval_loss": 0.20982445776462555, "eval_runtime": 72.0375, "eval_samples_per_second": 63.411, "eval_steps_per_second": 15.853, "step": 22840 } ], "max_steps": 22840, "num_train_epochs": 2, "total_flos": 1.0789666943297126e+17, "trial_name": null, "trial_params": null }